This script takes the models which have I have run a 5-fold cross validation on and will get the mean IOU for each grid cell/ecoregion. 
It will also export a table which has the final (non-spatial) mean across all five folds. 

In [1]:
import pandas as pd
import logging, os
logging.disable(logging.WARNING)
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
os.environ["SM_FRAMEWORK"] = "tf.keras"
import tensorflow
from tensorflow.python.keras.optimizer_v2.adam import Adam
import segmentation_models as sm
import matplotlib.pyplot as plt
import numpy as np
#from tensorflow.python.keras.utils.multi_gpu_utils import multi_gpu_model
from tensorflow.keras.models import Model
from keras_unet_collection import models
import geopandas as gpd
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
import warnings
import glob
warnings.filterwarnings("ignore")

Segmentation Models: using `tf.keras` framework.


Functions used to run analysis 

In [5]:
#image gen class to be used when predicting
min_max_vi = pd.read_csv("/explore/nobackup/people/spotter5/cnn_mapping/nbac_training/l8_sent_collection2_global_min_max_cutoff_proj.csv").reset_index(drop = True)
min_max_vi = min_max_vi[['6', '7', '8']]

class img_gen_vi(tensorflow.keras.utils.Sequence):

    """Helper to iterate over the data (as Numpy arrays).
    Inputs are batch size, the image size, the input paths (x) and target paths (y)
    """

    #will need pre defined variables batch_size, img_size, input_img_paths and target_img_paths
    def __init__(self, batch_size, img_size, input_img_paths):
	    self.batch_size = batch_size
	    self.img_size = img_size
	    self.input_img_paths = input_img_paths
	    self.target_img_paths = input_img_paths

    #number of batches the generator is supposed to produceis the length of the paths divided by the batch siize
    def __len__(self):
	    return len(self.input_img_paths) // self.batch_size

    def __getitem__(self, idx):
        
        """Returns tuple (input, target) correspond to batch #idx."""
        i = idx * self.batch_size
        batch_img_paths = self.input_img_paths[i : i + self.batch_size] #for a given index get the input batch pathways (x)
        batch_target_img_paths = self.target_img_paths[i : i + self.batch_size] #for a given index get the input batch pathways (y)
		
        x = np.zeros((self.batch_size,) + self.img_size + (3,), dtype="float32") #create matrix of zeros which will have the dimension height, wideth, n_bands), 8 is the n_bands
        
  
         #start populating x by enumerating over the input img paths
        for j, path in enumerate(batch_img_paths):

           #load image
            img =  np.round(np.load(path), 3)
            
            if img.shape[2] == 4:
                
                img = img[:, :, :-1]

            else:
                
                img = img[:, :, 6:9]

            # img = img * 1000
            img = img.astype(float)
            img = np.round(img, 3)
            img[img == 0] = -999

            img[np.isnan(img)] = -999


            img[img == -999] = np.nan

            in_shape = img.shape
            
            #turn to dataframe to normalize
            img = img.reshape(img.shape[0] * img.shape[1], img.shape[2])
			
            img = pd.DataFrame(img)
			
            img.columns = min_max_vi.columns
			
            img = pd.concat([min_max_vi, img]).reset_index(drop = True)


            #normalize 0 to 1
            img = pd.DataFrame(scaler.fit_transform(img))
			
            img = img.iloc[2:]
#
#             img = img.values.reshape(in_shape)
            img = img.values.reshape(in_shape)

#             replace nan with -1
            img[np.isnan(img)] = -1

#apply standardization
# img = normalize(img, axis=(0,1))

            img = np.round(img, 3)
            #populate x
            x[j] = img#[:, :, 4:] index number is not included, 


        #do tthe same thing for y
        y = np.zeros((self.batch_size,) + self.img_size, dtype="uint8")

        for j, path in enumerate(batch_target_img_paths):

            #load image
            img =  np.round(np.load(path), 3)[:, :, -1]

            img = img.astype(int)

            img[img < 0] = 0
            img[img >1] = 0
            img[~np.isin(img, [0,1])] = 0

            img[np.isnan(img)] = 0
            img = img.astype(int)

            # img =  tf.keras.utils.to_categorical(img, num_classes = 2)
            # y[j] = np.expand_dims(img, 2) 
            y[j] = img
  
       
    #Ground truth labels are 1, 2, 3. Subtract one to make them 0, 1, 2:
    # y[j] -= 1

        return x, y



#function which prediicts the selected model and saves some metrics
def predict_model(model, generator, name, fid, count):
    
    '''
    model: tensorflow model to predict
    generator: keras generator with the images to predict on
    name: string, model name\
    fid: variable I was looping through
    count: count retained earlier
    '''
    #get the results from the nbac and mtbs model
    model_1_res = model.evaluate_generator(generator, 100)

    iou = np.round(model_1_res[-2], 2)
    precision = np.round(model_1_res[-5], 2)
    recall = np.round(model_1_res[-4], 2)
    f1 = np.round(model_1_res[-3], 2)
    accuracy = np.round(model_1_res[-1], 2)

    #make new dataframe with scores
    in_df = pd.DataFrame({
        'Model': [name],
        'FID': [fid],
        'Count': [count],
        'IOU': [iou],
        'Precision': [precision],
        'Recall': [recall],
        'F-1': [f1],
        'Accuracy': [accuracy]
                        }, index=[0])  # Explicitly setting index to [0] for a single row

    return in_df

def grid_predict(model_1, model_2, model_3, model_4, model_5, model_6, model_7, model_8, model_9, out_path, fold):
    
    
    '''
       model_1: nbac/mtbs
       model_2: nbac/mtbs 85
       model_3: nbac/mtbs NDSI
       model_4: combined
       model_5: combined 85
       model_6: combined NDSI
       model_7: eurasia 
       model_8: eurasia 85
       model_9: eurasia ndsi
       out_path: str of where to save
    '''
    
    os.makedirs(out_path, exist_ok = True)
    
    #for the grids I have two ids, FID which is the fishnet grid cells to loop through, and ID which is teh good anna polygon nids
    fish_good = gpd.read_file('/explore/nobackup/people/spotter5/cnn_mapping/Russia/model_iou_spatial/grid.shp')
    fish_good['Grid_ID'] = fish_good['Grid_ID'].astype(int)
    #all the fishnet ids to loop through
    all_fid = fish_good['Grid_ID'].unique().tolist()

    #get all the testing full pathways to predict on, will need to filter fish good with this
    testing_names = pd.read_csv(f'/explore/nobackup/people/spotter5/cnn_mapping/Russia/test_fold_{fold}.csv')['ID'].tolist()


    #now I need to get the chunked files which match the fire ids to make new training, validation and testing times
    #path to the chunked files
    chunked_85 =  os.listdir('/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_training_85_subs_0_128')
    chunked_old =  os.listdir('/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_training_85_old_subs_0_128')
    chunked_ndsi = os.listdir('/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_monthly_ndsi_subs_0_128')


    def filter_chunked_85(in_names, chunked):
        """
        Filters items in the 'chunked' list based on whether the specified part of
        each item (extracted by splitting the item's string) is in 'training_names'.

        Parameters:
        - training_names: List of integers to filter against.
        - chunked: List of strings, where each string is a filename that contains numbers.

        Returns:
        - List of strings from 'chunked' that match the filtering criteria.
        """
        # Filter the 'chunked' list
        filtered_chunked = [
            name for name in chunked 
            if int(name.split('_')[-1].split('.')[0]) in in_names
        ]

        filtered_chunked = ['/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_training_85_subs_0_128/' + i for i in filtered_chunked]
        return filtered_chunked

    def filter_chunked_ndsi(in_names, chunked):
        """
        Filters items in the 'chunked' list based on whether the specified part of
        each item (extracted by splitting the item's string) is in 'training_names'.
    
        Parameters:
        - training_names: List of integers to filter against.
        - chunked: List of strings, where each string is a filename that contains numbers.
    
        Returns:
        - List of strings from 'chunked' that match the filtering criteria.
        """
        # Filter the 'chunked' list
        filtered_chunked = [
            name for name in chunked 
            if int(name.split('_')[-1].split('.')[0]) in in_names
        ]
        
        filtered_chunked = ['/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_monthly_ndsi_subs_0_128/' + i for i in filtered_chunked]
        return filtered_chunked

    def filter_chunked2(in_names, chunked):
        """
        Filters items in the 'chunked' list based on whether the specified part of
        each item (extracted by splitting the item's string) is in 'training_names'.

        Parameters:
        - training_names: List of integers to filter against.
        - chunked: List of strings, where each string is a filename that contains numbers.

        Returns:
        - List of strings from 'chunked' that match the filtering criteria.
        """
        # Filter the 'chunked' list
        filtered_chunked = [
            name for name in chunked 
            if int(name.split('_')[-1].split('.')[0]) in in_names
        ]

        # filtered_chunked = ['/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_training_85_subs_0_128/' + i for i in filtered_chunked]
        return filtered_chunked

    def filter_chunked_old(in_names, chunked):
        """
        Filters items in the 'chunked' list based on whether the specified part of
        each item (extracted by splitting the item's string) is in 'training_names'.

        Parameters:
        - training_names: List of integers to filter against.
        - chunked: List of strings, where each string is a filename that contains numbers.

        Returns:
        - List of strings from 'chunked' that match the filtering criteria.
        """
        # Filter the 'chunked' list
        filtered_chunked = [
            name for name in chunked 
            if int(name.split('_')[-1].split('.')[0]) in in_names
        ]

        filtered_chunked = ['/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_training_85_old_subs_0_128/' + i for i in filtered_chunked]
        return filtered_chunked


    #new dnbr composite method
    testing_names_85 = filter_chunked_85(testing_names, chunked_85)
    testing_names_old = filter_chunked_old(testing_names, chunked_old)
    testing_names_ndsi = filter_chunked_ndsi(testing_names, chunked_ndsi)



    #save all dataframes
    final = []

    for fid in all_fid:

        # print(f"Processing {fid}")

        #sub shapefile for the grid
        sub_grid = fish_good[fish_good['Grid_ID'] == fid]

        #get the anna fire ids in this fid
        anna_in_fid = sub_grid['Fire_ID'].unique().tolist()

        count = len(anna_in_fid)

        #get full pathway to the anna ids in the fids for new dnbr method
        model_test_85 = filter_chunked2(anna_in_fid, testing_names_85)
        model_test_old = filter_chunked2(anna_in_fid, testing_names_old)
        model_test_ndsi = filter_chunked2(anna_in_fid, testing_names_ndsi)

        #get the batch sie
        if len(model_test_85) <= 45:

            batch_size_85 = len(model_test_85)

        else:
            batch_size_85 = 45

        if len(model_test_old) <= 45:

            batch_size_old = len(model_test_old)
            
        else:
            batch_size_old = 45

        if len(model_test_ndsi) <= 45:

            batch_size_ndsi = len(model_test_ndsi)
            
        else:
            batch_size_ndsi = 45

        
        if len(model_test_85) > 0 and len(model_test_old) > 0 and len(model_test_ndsi) > 0:

            #create an image generator for this fid and then predict
            models_vi_gen_85 =  img_gen_vi(batch_size_85, img_size, model_test_85)
            models_vi_gen_old =  img_gen_vi(batch_size_old, img_size, model_test_old)
            models_vi_gen_ndsi =  img_gen_vi(batch_size_ndsi, img_size, model_test_ndsi)


            mtbs_nbac = predict_model(model_1, models_vi_gen_old, 'MTBS_NBAC', fid, count)
            mtbs_nbac_85 = predict_model(model_2, models_vi_gen_85, 'MTBS_NBAC_85', fid, count)
            mtbs_nbac_ndsi = predict_model(model_3, models_vi_gen_ndsi, 'MTBS_NBAC_NDSI', fid, count)

            combined = predict_model(model_4, models_vi_gen_old, 'Combined', fid, count)
            combined_85 = predict_model(model_5, models_vi_gen_85, 'Combined_85', fid, count)
            combined_ndsi = predict_model(model_6, models_vi_gen_ndsi, 'Combined_NDSI', fid, count)

            
            russia = predict_model(model_7, models_vi_gen_old, 'Russia', fid, count)
            russia_85 = predict_model(model_8, models_vi_gen_85, 'Russia_85', fid, count)
            russia_ndsi = predict_model(model_9, models_vi_gen_ndsi, 'Russia_NDSI', fid, count)


            final.append(mtbs_nbac)
            final.append(mtbs_nbac_85)
            final.append(mtbs_nbac_ndsi)

            final.append(combined)
            final.append(combined_85)
            final.append(combined_ndsi)

            final.append(russia)
            final.append(russia_85)
            final.append(russia_ndsi)


    final = pd.concat(final).reset_index(drop=True)

    final['FID'] = final['FID'].astype(int)

    final.to_csv(os.path.join(out_path, f"{fold}.csv"), index = False)
    
    return print(f"Done Processing Fishnet grid Fold {fold}")

def ecoregion_predict(model_1, model_2, model_3, model_4, model_5, model_6, model_7, model_8, model_9, out_path, fold):
    
    '''
       model_1: nbac/mtbs
       model_2: nbac/mtbs 85
       model_3: nbac/mtbs NDSI
       model_4: combined
       model_5: combined 85
       model_6: combined NDSI
       model_7: eurasia 
       model_8: eurasia 85
       model_9: eurasia ndsi
       out_path: str of where to save
    '''
    
    os.makedirs(out_path, exist_ok = True)
        
    #function to predict ecoregions
    #for the grids I have two ids, FID which is the fishnet grid cells to loop through, and ID which is teh good anna polygon nids
    fish_good = gpd.read_file('/explore/nobackup/people/spotter5/cnn_mapping/Russia/model_iou_spatial/ecoregions.shp')

    #all the fishnet ids to loop through
    all_fid = fish_good['Grid_ID'].unique().tolist()

    #get all the testing full pathways to predict on, will need to filter fish good with this
    testing_names = pd.read_csv(f'/explore/nobackup/people/spotter5/cnn_mapping/Russia/test_fold_{fold}.csv')['ID'].tolist()


     #now I need to get the chunked files which match the fire ids to make new training, validation and testing times
    #path to the chunked files
    chunked_85 =  os.listdir('/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_training_85_subs_0_128')
    chunked_old =  os.listdir('/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_training_85_old_subs_0_128')
    chunked_ndsi = os.listdir('/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_monthly_ndsi_subs_0_128')


    def filter_chunked_85(in_names, chunked):
        """
        Filters items in the 'chunked' list based on whether the specified part of
        each item (extracted by splitting the item's string) is in 'training_names'.

        Parameters:
        - training_names: List of integers to filter against.
        - chunked: List of strings, where each string is a filename that contains numbers.

        Returns:
        - List of strings from 'chunked' that match the filtering criteria.
        """
        # Filter the 'chunked' list
        filtered_chunked = [
            name for name in chunked 
            if int(name.split('_')[-1].split('.')[0]) in in_names
        ]

        filtered_chunked = ['/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_training_85_subs_0_128/' + i for i in filtered_chunked]
        return filtered_chunked

    def filter_chunked_ndsi(in_names, chunked):
        """
        Filters items in the 'chunked' list based on whether the specified part of
        each item (extracted by splitting the item's string) is in 'training_names'.
    
        Parameters:
        - training_names: List of integers to filter against.
        - chunked: List of strings, where each string is a filename that contains numbers.
    
        Returns:
        - List of strings from 'chunked' that match the filtering criteria.
        """
        # Filter the 'chunked' list
        filtered_chunked = [
            name for name in chunked 
            if int(name.split('_')[-1].split('.')[0]) in in_names
        ]
        
        filtered_chunked = ['/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_monthly_ndsi_subs_0_128/' + i for i in filtered_chunked]
        return filtered_chunked

    def filter_chunked2(in_names, chunked):
        """
        Filters items in the 'chunked' list based on whether the specified part of
        each item (extracted by splitting the item's string) is in 'training_names'.

        Parameters:
        - training_names: List of integers to filter against.
        - chunked: List of strings, where each string is a filename that contains numbers.

        Returns:
        - List of strings from 'chunked' that match the filtering criteria.
        """
        # Filter the 'chunked' list
        filtered_chunked = [
            name for name in chunked 
            if int(name.split('_')[-1].split('.')[0]) in in_names
        ]

        # filtered_chunked = ['/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_training_85_subs_0_128/' + i for i in filtered_chunked]
        return filtered_chunked

    def filter_chunked_old(in_names, chunked):
        """
        Filters items in the 'chunked' list based on whether the specified part of
        each item (extracted by splitting the item's string) is in 'training_names'.

        Parameters:
        - training_names: List of integers to filter against.
        - chunked: List of strings, where each string is a filename that contains numbers.

        Returns:
        - List of strings from 'chunked' that match the filtering criteria.
        """
        # Filter the 'chunked' list
        filtered_chunked = [
            name for name in chunked 
            if int(name.split('_')[-1].split('.')[0]) in in_names
        ]

        filtered_chunked = ['/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_training_85_old_subs_0_128/' + i for i in filtered_chunked]
        return filtered_chunked


    #new dnbr composite method
    testing_names_85 = filter_chunked_85(testing_names, chunked_85)
    testing_names_old = filter_chunked_old(testing_names, chunked_old)
    testing_names_ndsi = filter_chunked_ndsi(testing_names, chunked_ndsi)



    #save all dataframes
    final = []

    for fid in all_fid:

        # print(f"Processing {fid}")

        #sub shapefile for the grid
        sub_grid = fish_good[fish_good['Grid_ID'] == fid]

        #get the anna fire ids in this fid
        anna_in_fid = sub_grid['Fire_ID'].unique().tolist()

        count = len(anna_in_fid)

        #get full pathway to the anna ids in the fids for new dnbr method
        model_test_85 = filter_chunked2(anna_in_fid, testing_names_85)
        model_test_old = filter_chunked2(anna_in_fid, testing_names_old)
        model_test_ndsi = filter_chunked2(anna_in_fid, testing_names_ndsi)

        #get the batch sie
        if len(model_test_85) <= 45:

            batch_size_85 = len(model_test_85)

        else:
            batch_size_85 = 45

        if len(model_test_old) <= 45:

            batch_size_old = len(model_test_old)

        else:
            batch_size_old = 45

        if len(model_test_ndsi) <= 45:

            batch_size_ndsi = len(model_test_ndsi)
            
        else:
            batch_size_ndsi = 45


        if len(model_test_85) > 0 and len(model_test_old) > 0 and len(model_test_ndsi) > 0:

            #create an image generator for this fid and then predict
            models_vi_gen_85 =  img_gen_vi(batch_size_85, img_size, model_test_85)
            models_vi_gen_old =  img_gen_vi(batch_size_old, img_size, model_test_old)
            models_vi_gen_ndsi =  img_gen_vi(batch_size_ndsi, img_size, model_test_ndsi)


            mtbs_nbac = predict_model(model_1, models_vi_gen_old, 'MTBS_NBAC', fid, count)
            mtbs_nbac_85 = predict_model(model_2, models_vi_gen_85, 'MTBS_NBAC_85', fid, count)
            mtbs_nbac_ndsi = predict_model(model_3, models_vi_gen_ndsi, 'MTBS_NBAC_NDSI', fid, count)

            combined = predict_model(model_4, models_vi_gen_old, 'Combined', fid, count)
            combined_85 = predict_model(model_5, models_vi_gen_85, 'Combined_85', fid, count)
            combined_ndsi = predict_model(model_6, models_vi_gen_ndsi, 'Combined_NDSI', fid, count)

            
            russia = predict_model(model_7, models_vi_gen_old, 'Russia', fid, count)
            russia_85 = predict_model(model_8, models_vi_gen_85, 'Russia_85', fid, count)
            russia_ndsi = predict_model(model_9, models_vi_gen_ndsi, 'Russia_NDSI', fid, count)


            final.append(mtbs_nbac)
            final.append(mtbs_nbac_85)
            final.append(mtbs_nbac_ndsi)

            final.append(combined)
            final.append(combined_85)
            final.append(combined_ndsi)

            final.append(russia)
            final.append(russia_85)
            final.append(russia_ndsi)

    final = pd.concat(final).reset_index(drop=True)

    final.to_csv(os.path.join(out_path, f"{fold}.csv"), index = False)
    
    return print(f'Done Processing Ecoregion grid Fold {fold}')


#function to save individual shapefiles for the fishnet grid
def grid_shapes(in_path, out_path, fold):
    
    '''
    in_path: str to pathway where csv is stored
    out_path: str to pathway to save the shapefiles
    fold: current fold to save the shapefile for
    '''
    
    
    os.makedirs(out_path, exist_ok = True)
    
    from pyproj import CRS

    #merge back to the original shapefile for plotting
    grid_metrics = pd.read_csv(os.path.join(in_path, f"{fold}.csv"))

    #fishnet good
    # fish_good = gpd.read_file('/explore/nobackup/people/spotter5/cnn_mapping/Russia/model_iou_spatial/grid.shp')
    fish_good = gpd.read_file('/explore/nobackup/people/spotter5/cnn_mapping/Russia/ea_grid_clip/ea_grid_clip.shp')
    # Defining the Albers Equal Area projection parameters
    # albers_ea_projection = CRS("+proj=aea +lat_0=56 +lon_0=100 +lat_1=50 +lat_2=70 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs")

    #Transforming the GeoDataFrame to the new projection
    #fish_good= fish_good.to_crs(albers_ea_projection)


    fish_good = fish_good.rename(columns = {'Id': 'FID'})


    # # 
    # fish_good['FID'] = fish_good['FID'].astype(int)

    merged = fish_good.merge(grid_metrics, on = 'FID', how = 'inner')

    merged = merged[['FID', 'Model', 'Count', 'IOU', 'Precision', 'Recall', 'F-1', 'Accuracy', 'geometry']]

    #round floats to 2 digits
    merged.loc[:, merged.select_dtypes(include=['float64']).columns] = merged.select_dtypes(include=['float64']).round(2)

    #create a new column which will difference the combined model and the north america model from eurasia, do it so we subtract from russia, so larger values are better for russia
    na = merged[merged['Model'] == 'MTBS_NBAC']
    na_85 = merged[merged['Model'] == 'MTBS_NBAC_85']
    na_ndsi = merged[merged['Model'] == 'MTBS_NBAC_NDSI']

    combined = merged[merged['Model'] == 'Combined']
    combined_85 = merged[merged['Model'] == 'Combined_85']
    combined_ndsi = merged[merged['Model'] == 'Combined_NDSI']

    
    russ = merged[merged['Model'] == 'Russia']
    russ_85 = merged[merged['Model'] == 'Russia_85']
    russ_ndsi = merged[merged['Model'] == 'Russia_NDSI']


    #determine difference in na vs eruasia models
    na_russ_diff = russ['IOU'].values - na['IOU'].values
    na_russ_85_diff = russ_85['IOU'].values - na_85['IOU'].values
    na_russ_ndsi_diff = russ_ndsi['IOU'].values - na_ndsi['IOU'].values

    combined_russ_diff = russ['IOU'].values - combined['IOU'].values
    combined_russ_85_diff = russ_85['IOU'].values - combined_85['IOU'].values
    combined_russ_ndsi_diff = russ_ndsi['IOU'].values - combined_ndsi['IOU'].values

    #within the same na or eurasia models look at difference in IOU too, for instance na old and na 85 or ndsi
    na_85_diff = na['IOU'].values - na_85['IOU'].values
    na_ndsi_diff = na['IOU'].values - na_ndsi['IOU'].values
    na_85_ndsi_diff = na_85['IOU'].values - na_ndsi['IOU'].values

    combined_85_diff = combined['IOU'].values - combined_85['IOU'].values
    combined_ndsi_diff = combined['IOU'].values - combined_ndsi['IOU'].values
    combined_85_ndsi_diff = combined_85['IOU'].values - combined_ndsi['IOU'].values

    russ_85_diff = russ['IOU'].values - russ_85['IOU'].values
    russ_ndsi_diff = russ['IOU'].values - russ_ndsi['IOU'].values
    russ_85_ndsi_diff = russ_85['IOU'].values - russ_ndsi['IOU'].values


    


    #for each unique model loop through and save individual files, this is for making maps in arc later easier
    models = merged['Model'].unique()

    for m in models:
        
        sub = merged[merged['Model'] == m]

        #difference across models
        sub['na_russ_IOU_diff'] = na_russ_diff
        sub['na_russ_85_IOU_diff'] = na_russ_85_diff
        sub['na_russ_ndsi_IOU_diff'] = na_russ_ndsi_diff

        sub['combined_russ_IOU_diff'] = combined_russ_diff
        sub['combined_russ_85_IOU_diff'] = combined_russ_85_diff
        sub['combined_russ_ndsi_IOU_diff'] = combined_russ_ndsi_diff

        #within same location differences
        sub['na_85_diff'] = na_85_diff
        sub['na_ndsi_diff'] = na_ndsi_diff
        sub['na_85_ndsi_diff'] = na_85_ndsi_diff

        sub['combined_85_diff'] = combined_85_diff
        sub['combined_ndsi_diff'] = combined_ndsi_diff
        sub['combined_85_ndsi_diff'] = combined_85_ndsi_diff

        sub['russ_85_diff'] = russ_85_diff
        sub['russ_ndsi_diff'] = russ_ndsi_diff
        sub['russ_85_ndsi_diff'] = russ_85_ndsi_diff

        sub.to_file(os.path.join(out_path,  f"{m}.shp"))

    return print(f"Done Processing Fishnet grid Shape {fold}")

def ecoregion_shapes(in_path, out_path, fold):   
                               
    '''
    in_path: str to pathway where csv is stored
    out_path: str to pathway to save the shapefiles
    fold: current fold to save the 
    shapefile for
    '''
    os.makedirs(out_path, exist_ok = True)

                               
    #function to save for the ecoregion grid
    from pyproj import CRS

    #merge back to the original shapefile for plotting
    grid_metrics = pd.read_csv(os.path.join(in_path, f"{fold}.csv"))

    #fishnet good
    # fish_good = gpd.read_file('/explore/nobackup/people/spotter5/cnn_mapping/Russia/model_iou_spatial/grid.shp')
    fish_good = gpd.read_file('/explore/nobackup/people/spotter5/cnn_mapping/Russia/raw_files/EcoRegion_AlbEAadj/EcoRegion_AlbEAadj/EcoRegion_g.shp')
    # Defining the Albers Equal Area projection parameters
    # albers_ea_projection = CRS("+proj=aea +lat_0=56 +lon_0=100 +lat_1=50 +lat_2=70 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs")

    # Transforming the GeoDataFrame to the new projection
    # fish_good= fish_good.to_crs(albers_ea_projection)

    fish_good = fish_good.rename(columns = {'ecoregion': 'FID'})


    # # 
    # fish_good['FID'] = fish_good['FID'].astype(int)

    merged = fish_good.merge(grid_metrics, on = 'FID', how = 'inner')

    merged = merged[['FID', 'Model', 'Count', 'IOU', 'Precision', 'Recall', 'F-1', 'Accuracy', 'geometry']]

    #round floats to 2 digits
    merged.loc[:, merged.select_dtypes(include=['float64']).columns] = merged.select_dtypes(include=['float64']).round(2)

    #create a new column which will difference the combined model and the north america model from eurasia, do it so we subtract from russia, so larger values are better for russia
    na = merged[merged['Model'] == 'MTBS_NBAC']
    na_85 = merged[merged['Model'] == 'MTBS_NBAC_85']
    na_ndsi = merged[merged['Model'] == 'MTBS_NBAC_NDSI']

    combined = merged[merged['Model'] == 'Combined']
    combined_85 = merged[merged['Model'] == 'Combined_85']
    combined_ndsi = merged[merged['Model'] == 'Combined_NDSI']

    
    russ = merged[merged['Model'] == 'Russia']
    russ_85 = merged[merged['Model'] == 'Russia_85']
    russ_ndsi = merged[merged['Model'] == 'Russia_NDSI']


    #determine difference in na vs eruasia models
    na_russ_diff = russ['IOU'].values - na['IOU'].values
    na_russ_85_diff = russ_85['IOU'].values - na_85['IOU'].values
    na_russ_ndsi_diff = russ_ndsi['IOU'].values - na_ndsi['IOU'].values

    combined_russ_diff = russ['IOU'].values - combined['IOU'].values
    combined_russ_85_diff = russ_85['IOU'].values - combined_85['IOU'].values
    combined_russ_ndsi_diff = russ_ndsi['IOU'].values - combined_ndsi['IOU'].values

    #within the same na or eurasia models look at difference in IOU too, for instance na old and na 85 or ndsi
    na_85_diff = na['IOU'].values - na_85['IOU'].values
    na_ndsi_diff = na['IOU'].values - na_ndsi['IOU'].values
    na_85_ndsi_diff = na_85['IOU'].values - na_ndsi['IOU'].values

    combined_85_diff = combined['IOU'].values - combined_85['IOU'].values
    combined_ndsi_diff = combined['IOU'].values - combined_ndsi['IOU'].values
    combined_85_ndsi_diff = combined_85['IOU'].values - combined_ndsi['IOU'].values

    russ_85_diff = russ['IOU'].values - russ_85['IOU'].values
    russ_ndsi_diff = russ['IOU'].values - russ_ndsi['IOU'].values
    russ_85_ndsi_diff = russ_85['IOU'].values - russ_ndsi['IOU'].values
    
    #for each unique model loop through and save individual files, this is for making maps in arc later easier
    models = merged['Model'].unique()

    for m in models:
        
        sub = merged[merged['Model'] == m]

        #difference across models
        sub['na_russ_IOU_diff'] = na_russ_diff
        sub['na_russ_85_IOU_diff'] = na_russ_85_diff
        sub['na_russ_ndsi_IOU_diff'] = na_russ_ndsi_diff

        sub['combined_russ_IOU_diff'] = combined_russ_diff
        sub['combined_russ_85_IOU_diff'] = combined_russ_85_diff
        sub['combined_russ_ndsi_IOU_diff'] = combined_russ_ndsi_diff

        #within same location differences
        sub['na_85_diff'] = na_85_diff
        sub['na_ndsi_diff'] = na_ndsi_diff
        sub['na_85_ndsi_diff'] = na_85_ndsi_diff

        sub['combined_85_diff'] = combined_85_diff
        sub['combined_ndsi_diff'] = combined_ndsi_diff
        sub['combined_85_ndsi_diff'] = combined_85_ndsi_diff

        sub['russ_85_diff'] = russ_85_diff
        sub['russ_ndsi_diff'] = russ_ndsi_diff
        sub['russ_85_ndsi_diff'] = russ_85_ndsi_diff

        sub.to_file(os.path.join(out_path,  f"{m}.shp"))

    return print(f"Done Processing Ecoregion Shape {fold}")

In [3]:
#batch size and img size
BATCH_SIZE = 45
GPUS = ["GPU:0", "GPU:1", "GPU:2", "GPU:3"]
strategy = tensorflow.distribute.MirroredStrategy() #can add GPUS here to select specific ones
print('Number of devices: %d' % strategy.num_replicas_in_sync) 

batch_size = BATCH_SIZE * strategy.num_replicas_in_sync

#image size
img_size = (128, 128)

#number of classes to predict
num_classes = 1

#folds to loop through
folds = range(0, 5)

# folds = [0, 1, 3, 4]


#don't need to read in loops as no folds here
#nbac mtbs model
model_1 = tensorflow.keras.models.load_model("/explore/nobackup/people/spotter5/cnn_mapping/nbac_training/models/nbac_mtbs_regularize_50_global_norm.tf", 
                                           custom_objects={'precision':sm.metrics.Precision(threshold=0.5), 
                                                           'recall':sm.metrics.Recall(threshold = 0.5),
                                                            'f1-score': sm.metrics.FScore(threshold=0.5),
                                                             'iou_score': sm.metrics.IOUScore(threshold=0.5),
                                                              'accuracy': 'accuracy'})

#nbac mtbs model with 85% dnbr threshold
model_2 = tensorflow.keras.models.load_model("/explore/nobackup/people/spotter5/cnn_mapping/nbac_training/models/nbac_mtbs_regularize_50_global_norm_85.tf", 
                                           custom_objects={'precision':sm.metrics.Precision(threshold=0.5), 
                                                           'recall':sm.metrics.Recall(threshold = 0.5),
                                                            'f1-score': sm.metrics.FScore(threshold=0.5),
                                                             'iou_score': sm.metrics.IOUScore(threshold=0.5),
                                                              'accuracy': 'accuracy'})
#nbac mtbs with NDSI
model_3 = tensorflow.keras.models.load_model("/explore/nobackup/people/spotter5/cnn_mapping/nbac_training/models/nbac_mtbs_regularize_50_global_norm_ndsi.tf", 
                                           custom_objects={'precision':sm.metrics.Precision(threshold=0.5), 
                                                           'recall':sm.metrics.Recall(threshold = 0.5),
                                                            'f1-score': sm.metrics.FScore(threshold=0.5),
                                                             'iou_score': sm.metrics.IOUScore(threshold=0.5),
                                                              'accuracy': 'accuracy'})

for fold in folds:
#loop through the folds

     #combined original dnbr method
    model_4 = tensorflow.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/combined_good_old_dnbr_{fold}_t2.tf", 
                                               custom_objects={'precision':sm.metrics.Precision(threshold=0.5), 
                                                               'recall':sm.metrics.Recall(threshold = 0.5),
                                                                'f1-score': sm.metrics.FScore(threshold=0.5),
                                                                 'iou_score': sm.metrics.IOUScore(threshold=0.5),
                                                                  'accuracy': 'accuracy'})
    #combined 85
    model_5 = tensorflow.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/combined_good_{fold}_t2.tf", 
                                               custom_objects={'precision':sm.metrics.Precision(threshold=0.5), 
                                                               'recall':sm.metrics.Recall(threshold = 0.5),
                                                                'f1-score': sm.metrics.FScore(threshold=0.5),
                                                                 'iou_score': sm.metrics.IOUScore(threshold=0.5),
                                                                  'accuracy': 'accuracy'})

    #combined ndsi
    model_6 = tensorflow.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/combined_good_ndsi_{fold}.tf", 
                                               custom_objects={'precision':sm.metrics.Precision(threshold=0.5), 
                                                               'recall':sm.metrics.Recall(threshold = 0.5),
                                                                'f1-score': sm.metrics.FScore(threshold=0.5),
                                                                 'iou_score': sm.metrics.IOUScore(threshold=0.5),
                                                                  'accuracy': 'accuracy'})
    

    #russia original
    model_7 = tensorflow.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/russia_good_no_regularize_{fold}_t2.tf", 
                                               custom_objects={'precision':sm.metrics.Precision(threshold=0.5), 
                                                               'recall':sm.metrics.Recall(threshold = 0.5),
                                                                'f1-score': sm.metrics.FScore(threshold=0.5),
                                                                 'iou_score': sm.metrics.IOUScore(threshold=0.5),
                                                                  'accuracy': 'accuracy'})
    #russia old dnbr method
    model_8 = tensorflow.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/russia_good_no_regularize_old_dnbr_{fold}_t2.tf", 
                                               custom_objects={'precision':sm.metrics.Precision(threshold=0.5), 
                                                               'recall':sm.metrics.Recall(threshold = 0.5),
                                                                'f1-score': sm.metrics.FScore(threshold=0.5),
                                                                 'iou_score': sm.metrics.IOUScore(threshold=0.5),
                                                                  'accuracy': 'accuracy'})

    #russia dsi method
    model_9 = tensorflow.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/russia_good_no_regularize_ndsi_{fold}.tf", 
                                               custom_objects={'precision':sm.metrics.Precision(threshold=0.5), 
                                                               'recall':sm.metrics.Recall(threshold = 0.5),
                                                                'f1-score': sm.metrics.FScore(threshold=0.5),
                                                                 'iou_score': sm.metrics.IOUScore(threshold=0.5),
                                                                  'accuracy': 'accuracy'})
    
    #predict the fishnet
    fish_out = "/explore/nobackup/people/spotter5/cnn_mapping/Russia/cv_results_t2/grid/csvs"
    fish_shp_out = f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/cv_results_t2/grid/shapefiles/{fold}"
    
    grid_predict(model_1, model_2, model_3, model_4, model_5, model_6, model_7, model_8, model_9, fish_out, fold)
    grid_shapes(fish_out, fish_shp_out, fold)
    
    #predict the ecoregoins
    ecoregion_out = "/explore/nobackup/people/spotter5/cnn_mapping/Russia/cv_results_t2/ecoregion/csvs"
    ecoregion_shp_out = f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/cv_results_t2/ecoregion/shapefiles/{fold}"
    
    ecoregion_predict(model_1, model_2, model_3, model_4, model_5, model_6, model_7, model_8, model_9, ecoregion_out, fold)
    ecoregion_shapes(ecoregion_out, ecoregion_shp_out, fold)
    
    



Number of devices: 4
Done Processing Fishnet grid Fold 0
Done Processing Fishnet grid Shape 0
Done Processing Ecoregion grid Fold 0
Done Processing Ecoregion Shape 0
Done Processing Fishnet grid Fold 1
Done Processing Fishnet grid Shape 1
Done Processing Ecoregion grid Fold 1
Done Processing Ecoregion Shape 1
Done Processing Fishnet grid Fold 2
Done Processing Fishnet grid Shape 2
Done Processing Ecoregion grid Fold 2
Done Processing Ecoregion Shape 2
Done Processing Fishnet grid Fold 3
Done Processing Fishnet grid Shape 3
Done Processing Ecoregion grid Fold 3
Done Processing Ecoregion Shape 3
Done Processing Fishnet grid Fold 4
Done Processing Fishnet grid Shape 4
Done Processing Ecoregion grid Fold 4
Done Processing Ecoregion Shape 4


In [6]:
't'

't'

Function to get the mean IoU regardless of spatial location, don't need to use the shapefiles for this, just predict IoU on all the val files

In [3]:
#batch size and img size
BATCH_SIZE = 45
GPUS = ["GPU:0", "GPU:1", "GPU:2", "GPU:3"]
strategy = tensorflow.distribute.MirroredStrategy() #can add GPUS here to select specific ones
print('Number of devices: %d' % strategy.num_replicas_in_sync) 

batch_size = BATCH_SIZE * strategy.num_replicas_in_sync

#image size
img_size = (128, 128)

#number of classes to predict
num_classes = 1

folds = range(0, 5)
def predict_model_single(model, generator, name):
    
    '''
    model: tensorflow model to predict
    generator: keras generator with the images to predict on
    name: string, model name\
    fid: variable I was looping through
    count: count retained earlier
    '''
    #get the results from the nbac and mtbs model
    model_1_res = model.evaluate_generator(generator, 100)

    iou = np.round(model_1_res[-2], 2)
    precision = np.round(model_1_res[-5], 2)
    recall = np.round(model_1_res[-4], 2)
    f1 = np.round(model_1_res[-3], 2)
    accuracy = np.round(model_1_res[-1], 2)

    #make new dataframe with scores
    in_df = pd.DataFrame({
        'Model': [name],
        'IOU': [iou],
        'Precision': [precision],
        'Recall': [recall],
        'F-1': [f1],
        'Accuracy': [accuracy]
                        }, index=[0])  # Explicitly setting index to [0] for a single row

    return in_df

def get_final_mean(model_1, model_2, model_3, model_4, model_5, model_6, model_7, model_8, model_9, out_path, fold):
    
    '''
       model_1: nbac/mtbs
       model_2: nbac/mtbs 85
       model_3: nbac/mtbs NDSI
       model_4: combined
       model_5: combined 85
       model_6: combined NDSI
       model_7: eurasia 
       model_8: eurasia 85
       model_9: eurasia ndsi
       out_path: str of where to save
    '''
    
    os.makedirs(out_path, exist_ok = True)
        
    #get all the testing full pathways to predict on, will need to filter fish good with this
    testing_names = pd.read_csv(f'/explore/nobackup/people/spotter5/cnn_mapping/Russia/test_fold_{fold}.csv')['ID'].tolist()


    #now I need to get the chunked files which match the fire ids to make new training, validation and testing times
    #path to the chunked files
    chunked_85 =  os.listdir('/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_training_85_subs_0_128')
    # chunked_old =  os.listdir('/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_training_85_old_subs_0_128')
    chunked_old =  os.listdir('/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_old_subs_0_128')
    chunked_ndsi = os.listdir('/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_monthly_ndsi_subs_0_128')

    def filter_chunked_85(in_names, chunked):
        """
        Filters items in the 'chunked' list based on whether the specified part of
        each item (extracted by splitting the item's string) is in 'training_names'.

        Parameters:
        - training_names: List of integers to filter against.
        - chunked: List of strings, where each string is a filename that contains numbers.

        Returns:
        - List of strings from 'chunked' that match the filtering criteria.
        """
        # Filter the 'chunked' list
        filtered_chunked = [
            name for name in chunked 
            if int(name.split('_')[-1].split('.')[0]) in in_names
        ]

        filtered_chunked = ['/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_training_85_subs_0_128/' + i for i in filtered_chunked]
        return filtered_chunked

    def filter_chunked_ndsi(in_names, chunked):
        """
        Filters items in the 'chunked' list based on whether the specified part of
        each item (extracted by splitting the item's string) is in 'training_names'.
    
        Parameters:
        - training_names: List of integers to filter against.
        - chunked: List of strings, where each string is a filename that contains numbers.
    
        Returns:
        - List of strings from 'chunked' that match the filtering criteria.
        """
        # Filter the 'chunked' list
        filtered_chunked = [
            name for name in chunked 
            if int(name.split('_')[-1].split('.')[0]) in in_names
        ]
        
        filtered_chunked = ['/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_monthly_ndsi_subs_0_128/' + i for i in filtered_chunked]
        return filtered_chunked

    def filter_chunked2(in_names, chunked):
        """
        Filters items in the 'chunked' list based on whether the specified part of
        each item (extracted by splitting the item's string) is in 'training_names'.

        Parameters:
        - training_names: List of integers to filter against.
        - chunked: List of strings, where each string is a filename that contains numbers.

        Returns:
        - List of strings from 'chunked' that match the filtering criteria.
        """
        # Filter the 'chunked' list
        filtered_chunked = [
            name for name in chunked 
            if int(name.split('_')[-1].split('.')[0]) in in_names
        ]

        # filtered_chunked = ['/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_training_85_subs_0_128/' + i for i in filtered_chunked]
        return filtered_chunked

    def filter_chunked_old(in_names, chunked):
        """
        Filters items in the 'chunked' list based on whether the specified part of
        each item (extracted by splitting the item's string) is in 'training_names'.

        Parameters:
        - training_names: List of integers to filter against.
        - chunked: List of strings, where each string is a filename that contains numbers.

        Returns:
        - List of strings from 'chunked' that match the filtering criteria.
        """
        # Filter the 'chunked' list
        filtered_chunked = [
            name for name in chunked 
            if int(name.split('_')[-1].split('.')[0]) in in_names
        ]

        filtered_chunked = ['/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_old_subs_0_128/' + i for i in filtered_chunked]
        return filtered_chunked


    #new dnbr composite method
    testing_names_85 = filter_chunked_85(testing_names, chunked_85)
    testing_names_old = filter_chunked_old(testing_names, chunked_old)
    testing_names_ndsi = filter_chunked_ndsi(testing_names, chunked_ndsi)
    
    batch_size = 45
    
    models_vi_gen_85 =  img_gen_vi(batch_size, img_size, testing_names_85)
    models_vi_gen_old =  img_gen_vi(batch_size, img_size, testing_names_old)
    models_vi_gen_ndsi =  img_gen_vi(batch_size, img_size, testing_names_ndsi)


    mtbs_nbac = predict_model_single(model_1, models_vi_gen_old, 'MTBS_NBAC')
    mtbs_nbac_85 = predict_model_single(model_2, models_vi_gen_85, 'MTBS_NBAC_85')
    mtbs_nbac_ndsi = predict_model_single(model_3, models_vi_gen_ndsi, 'MTBS_NBAC_NDSI')

    combined = predict_model_single(model_4, models_vi_gen_old, 'Combined')
    combined_85 = predict_model_single(model_5, models_vi_gen_85, 'Combined_85')
    combined_ndsi = predict_model_single(model_6, models_vi_gen_ndsi, 'Combined_NDSI')

    
    russia = predict_model_single(model_7, models_vi_gen_old, 'Russia')
    russia_85 = predict_model_single(model_8, models_vi_gen_85, 'Russia_85')
    russia_ndsi = predict_model_single(model_9, models_vi_gen_ndsi, 'Russia_NDSI')

    
    final = pd.concat([mtbs_nbac, mtbs_nbac_85, mtbs_nbac_ndsi, combined, combined_85, combined_ndsi, russia, russia_85, russia_ndsi])
    
    final['Fold'] = fold
    
    final.to_csv(os.path.join(out_path, f"{fold}.csv"), index = False)
    
    return print(f'Done Processing Final Median for fold {fold}')

out_path =  "/explore/nobackup/people/spotter5/cnn_mapping/Russia/cv_results/final_means_try2"

    
#don't need to read in loops as no folds here
#nbac mtbs model
model_1 = tensorflow.keras.models.load_model("/explore/nobackup/people/spotter5/cnn_mapping/nbac_training/models/nbac_mtbs_regularize_50_global_norm.tf", 
                                           custom_objects={'precision':sm.metrics.Precision(threshold=0.5), 
                                                           'recall':sm.metrics.Recall(threshold = 0.5),
                                                            'f1-score': sm.metrics.FScore(threshold=0.5),
                                                             'iou_score': sm.metrics.IOUScore(threshold=0.5),
                                                              'accuracy': 'accuracy'})

#nbac mtbs model with 85% dnbr threshold
model_2 = tensorflow.keras.models.load_model("/explore/nobackup/people/spotter5/cnn_mapping/nbac_training/models/nbac_mtbs_regularize_50_global_norm_85.tf", 
                                           custom_objects={'precision':sm.metrics.Precision(threshold=0.5), 
                                                           'recall':sm.metrics.Recall(threshold = 0.5),
                                                            'f1-score': sm.metrics.FScore(threshold=0.5),
                                                             'iou_score': sm.metrics.IOUScore(threshold=0.5),
                                                              'accuracy': 'accuracy'})
#nbac mtbs with NDSI
model_3 = tensorflow.keras.models.load_model("/explore/nobackup/people/spotter5/cnn_mapping/nbac_training/models/nbac_mtbs_regularize_50_global_norm_ndsi.tf", 
                                           custom_objects={'precision':sm.metrics.Precision(threshold=0.5), 
                                                           'recall':sm.metrics.Recall(threshold = 0.5),
                                                            'f1-score': sm.metrics.FScore(threshold=0.5),
                                                             'iou_score': sm.metrics.IOUScore(threshold=0.5),
                                                              'accuracy': 'accuracy'})

for fold in folds:
#loop through the folds

     #combined original dnbr method
    # model_4 = tensorflow.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/combined_good_old_dnbr_{fold}_t2.tf", 
    #                                            custom_objects={'precision':sm.metrics.Precision(threshold=0.5), 
    #                                                            'recall':sm.metrics.Recall(threshold = 0.5),
    #                                                             'f1-score': sm.metrics.FScore(threshold=0.5),
    #                                                              'iou_score': sm.metrics.IOUScore(threshold=0.5),
    #                                                               'accuracy': 'accuracy'})
    
    model_4 = tensorflow.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/combined_good_{fold}_old.tf", 
                                           custom_objects={'precision':sm.metrics.Precision(threshold=0.5), 
                                                           'recall':sm.metrics.Recall(threshold = 0.5),
                                                            'f1-score': sm.metrics.FScore(threshold=0.5),
                                                             'iou_score': sm.metrics.IOUScore(threshold=0.5),
                                                              'accuracy': 'accuracy'})
    #combined 85
    model_5 = tensorflow.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/combined_good_{fold}_t2.tf", 
                                               custom_objects={'precision':sm.metrics.Precision(threshold=0.5), 
                                                               'recall':sm.metrics.Recall(threshold = 0.5),
                                                                'f1-score': sm.metrics.FScore(threshold=0.5),
                                                                 'iou_score': sm.metrics.IOUScore(threshold=0.5),
                                                                  'accuracy': 'accuracy'})

    #combined ndsi
    model_6 = tensorflow.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/combined_good_ndsi_{fold}.tf", 
                                               custom_objects={'precision':sm.metrics.Precision(threshold=0.5), 
                                                               'recall':sm.metrics.Recall(threshold = 0.5),
                                                                'f1-score': sm.metrics.FScore(threshold=0.5),
                                                                 'iou_score': sm.metrics.IOUScore(threshold=0.5),
                                                                  'accuracy': 'accuracy'})
    

    #russia monthly
    # model_7 = tensorflow.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/russia_good_no_regularize_{fold}_t2.tf", 
    #                                            custom_objects={'precision':sm.metrics.Precision(threshold=0.5), 
    #                                                            'recall':sm.metrics.Recall(threshold = 0.5),
    #                                                             'f1-score': sm.metrics.FScore(threshold=0.5),
    #                                                              'iou_score': sm.metrics.IOUScore(threshold=0.5),
    #                                                               'accuracy': 'accuracy'})

    #russia old
    model_7 = tensorflow.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/russia_good_no_regularize_{fold}_old.tf", 
                                           custom_objects={'precision':sm.metrics.Precision(threshold=0.5), 
                                                           'recall':sm.metrics.Recall(threshold = 0.5),
                                                            'f1-score': sm.metrics.FScore(threshold=0.5),
                                                             'iou_score': sm.metrics.IOUScore(threshold=0.5),
                                                              'accuracy': 'accuracy'})

        #russia monthly
    model_8 = tensorflow.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/russia_good_no_regularize_{fold}_t2.tf", 
                                               custom_objects={'precision':sm.metrics.Precision(threshold=0.5), 
                                                               'recall':sm.metrics.Recall(threshold = 0.5),
                                                                'f1-score': sm.metrics.FScore(threshold=0.5),
                                                                 'iou_score': sm.metrics.IOUScore(threshold=0.5),
                                                                  'accuracy': 'accuracy'})
    #russia old old
    # model_8 = tensorflow.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/russia_good_no_regularize_old_dnbr_{fold}_t2.tf", 
    #                                            custom_objects={'precision':sm.metrics.Precision(threshold=0.5), 
    #                                                            'recall':sm.metrics.Recall(threshold = 0.5),
    #                                                             'f1-score': sm.metrics.FScore(threshold=0.5),
    #                                                              'iou_score': sm.metrics.IOUScore(threshold=0.5),
    #                                                               'accuracy': 'accuracy'})

    #russia ndsi method
    model_9 = tensorflow.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/russia_good_no_regularize_ndsi_{fold}.tf", 
                                               custom_objects={'precision':sm.metrics.Precision(threshold=0.5), 
                                                               'recall':sm.metrics.Recall(threshold = 0.5),
                                                                'f1-score': sm.metrics.FScore(threshold=0.5),
                                                                 'iou_score': sm.metrics.IOUScore(threshold=0.5),
                                                                  'accuracy': 'accuracy'})
    
    
    get_final_mean(model_1, model_2, model_3, model_4, model_5, model_6, model_7, model_8, model_9, out_path, fold)
    
    
    

Number of devices: 4
Done Processing Final Median for fold 0
Done Processing Final Median for fold 1
Done Processing Final Median for fold 2
Done Processing Final Median for fold 3
Done Processing Final Median for fold 4


In [4]:
't'

't'

In [1]:
import pandas as pd
import glob

# Set the directory where your CSV files are stored
directory_path = "/explore/nobackup/people/spotter5/cnn_mapping/Russia/cv_results/final_means_try2"

# Create a single DataFrame from all CSV files
combined_df = pd.concat(
    [pd.read_csv(f) for f in glob.glob(f"{directory_path}/*.csv")],
    ignore_index=True
)

# Calculate the mean of the 'IoU' column
mean_iou = combined_df.groupby('Model')['F-1'].mean().reset_index()

print(f"The mean of the IOU: {mean_iou}")

The mean of the IOU:             Model    F-1
0        Combined  0.786
1     Combined_85  0.796
2   Combined_NDSI  0.830
3       MTBS_NBAC  0.814
4    MTBS_NBAC_85  0.780
5  MTBS_NBAC_NDSI  0.804
6          Russia  0.832
7       Russia_85  0.814
8     Russia_NDSI  0.812


Now I need some functions to get the median for the shapefiles

In [4]:
def spatial_median(in_path, out_path):
    
    # Set the directory where your CSV files are stored
    directory_path = in_path
    os.makedirs(out_path, exist_ok = True)

    folds = [0, 1, 2, 3, 4]

    for_comb = []
    for fold in folds:

        in_files = os.listdir(os.path.join(directory_path, str(fold)))

        for f in in_files:

            if f.endswith('.shp'):
                f_name = f.split('.')[0]
                in_file = gpd.read_file(os.path.join(directory_path, str(fold), f))
                in_file = in_file[['FID', 'Model', 'IOU', 'geometry']]
                in_file['Fold'] = fold
                for_comb.append(in_file)


    # Ensure the concatenation retains GeoDataFrame structure
    merged = gpd.GeoDataFrame(pd.concat(for_comb, ignore_index=True))

    # Use dissolve to aggregate by 'FID' and 'Model' while calculating mean 'IOU'
    # and keeping the last geometry for each group (adjust according to needs)
    merged = merged.dissolve(by=['FID', 'Model'], aggfunc={'IOU': 'mean'}).reset_index()
    
    #round numeric columns to 2 digits
    numeric_cols = merged.select_dtypes(include=['number']).columns

    # Round all numeric columns to 2 decimal places
    merged[numeric_cols] = merged[numeric_cols].round(2)


     ##create a new column which will difference the combined model and the north america model from eurasia, do it so we subtract from russia, so larger values are better for russia
    na = merged[merged['Model'] == 'MTBS_NBAC']
    na_85 = merged[merged['Model'] == 'MTBS_NBAC_85']
    na_ndsi = merged[merged['Model'] == 'MTBS_NBAC_NDSI']
    
    combined = merged[merged['Model'] == 'Combined']
    combined_85 = merged[merged['Model'] == 'Combined_85']
    combined_ndsi = merged[merged['Model'] == 'Combined_NDSI']
    
    russ = merged[merged['Model'] == 'Russia']
    russ_85 = merged[merged['Model'] == 'Russia_85']
    russ_ndsi = merged[merged['Model'] == 'Russia_NDSI']

    #determine difference in na vs eruasia models
    na_russ_diff = russ['IOU'].values - na['IOU'].values
    na_russ_85_diff = russ_85['IOU'].values - na_85['IOU'].values
    na_russ_ndsi_diff = russ_ndsi['IOU'].values - na_ndsi['IOU'].values

    combined_russ_diff = russ['IOU'].values - combined['IOU'].values
    combined_russ_85_diff = russ_85['IOU'].values - combined_85['IOU'].values
    combined_russ_ndsi_diff = russ_ndsi['IOU'].values - combined_ndsi['IOU'].values

    #within the same na or eurasia models look at difference in IOU too, for instance na old and na 85 or ndsi
    na_85_diff = na['IOU'].values - na_85['IOU'].values
    na_ndsi_diff = na['IOU'].values - na_ndsi['IOU'].values
    na_85_ndsi_diff = na_85['IOU'].values - na_ndsi['IOU'].values

    combined_85_diff = combined['IOU'].values - combined_85['IOU'].values
    combined_ndsi_diff = combined['IOU'].values - combined_ndsi['IOU'].values
    combined_85_ndsi_diff = combined_85['IOU'].values - combined_ndsi['IOU'].values

    russ_85_diff = russ['IOU'].values - russ_85['IOU'].values
    russ_ndsi_diff = russ['IOU'].values - russ_ndsi['IOU'].values
    russ_85_ndsi_diff = russ_85['IOU'].values - russ_ndsi['IOU'].values
    
    #for each unique model loop through and save individual files, this is for making maps in arc later easier
    models = merged['Model'].unique()

    for m in models:
        
        sub = merged[merged['Model'] == m]

        #difference across models
        sub['na_russ_IOU_diff'] = na_russ_diff
        sub['na_russ_85_IOU_diff'] = na_russ_85_diff
        sub['na_russ_ndsi_IOU_diff'] = na_russ_ndsi_diff

        sub['combined_russ_IOU_diff'] = combined_russ_diff
        sub['combined_russ_85_IOU_diff'] = combined_russ_85_diff
        sub['combined_russ_ndsi_IOU_diff'] = combined_russ_ndsi_diff

        #within same location differences
        sub['na_85_diff'] = na_85_diff
        sub['na_ndsi_diff'] = na_ndsi_diff
        sub['na_85_ndsi_diff'] = na_85_ndsi_diff

        sub['combined_85_diff'] = combined_85_diff
        sub['combined_ndsi_diff'] = combined_ndsi_diff
        sub['combined_85_ndsi_diff'] = combined_85_ndsi_diff

        sub['russ_85_diff'] = russ_85_diff
        sub['russ_ndsi_diff'] = russ_ndsi_diff
        sub['russ_85_ndsi_diff'] = russ_85_ndsi_diff

        sub.to_file(os.path.join(out_path,  f"{m}.shp"))

    print("Done Processing final shapefiles")

    
#grid final shapefiles
spatial_median("/explore/nobackup/people/spotter5/cnn_mapping/Russia/cv_results_t2/grid/shapefiles",  "/explore/nobackup/people/spotter5/cnn_mapping/Russia/cv_results_t2/grid/shapefiles_final")

#ecoregion final shapefiles
spatial_median("/explore/nobackup/people/spotter5/cnn_mapping/Russia/cv_results_t2/ecoregion/shapefiles",  "/explore/nobackup/people/spotter5/cnn_mapping/Russia/cv_results_t2/ecoregion/shapefiles_final")



Done Processing final shapefiles
Done Processing final shapefiles


In [15]:
't'

't'

In [3]:
import geopandas as gpd

gdf = gpd.read_file("/explore/nobackup/people/spotter5/viirs_nrt/shared_data/shapefiles/ak_ca.shp")

gdf.to_file("/explore/nobackup/people/spotter5/viirs_nrt/shared_data/shapefiles/ak_ca.gpkg")