This script will get all the 128x128 numpy files and predict one model using an image generator and one manually. I will also predict manually on the original tif file and compare the three IOU scores. This is to ensure I get the same score for all of them.  If I don't I need to explore why.

In [1]:
import os
# Setup environment variables
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
os.environ["SM_FRAMEWORK"] = "tf.keras"
import rioxarray
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from MightyMosaic import MightyMosaic
import segmentation_models as sm
import geopandas as gpd
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

import warnings
import glob
import tensorflow
warnings.filterwarnings("ignore")



Segmentation Models: using `tf.keras` framework.


Image generator class and function to predict a model

In [2]:
#image gen class to be used when predicting
min_max_vi = pd.read_csv("/explore/nobackup/people/spotter5/cnn_mapping/nbac_training/l8_sent_collection2_global_min_max_cutoff_proj.csv").reset_index(drop = True)
min_max_vi = min_max_vi[['6', '7', '8']]

class img_gen_vi(tensorflow.keras.utils.Sequence):

    """Helper to iterate over the data (as Numpy arrays).
    Inputs are batch size, the image size, the input paths (x) and target paths (y)
    """

    #will need pre defined variables batch_size, img_size, input_img_paths and target_img_paths
    def __init__(self, batch_size, img_size, input_img_paths):
	    self.batch_size = batch_size
	    self.img_size = img_size
	    self.input_img_paths = input_img_paths
	    self.target_img_paths = input_img_paths

    #number of batches the generator is supposed to produceis the length of the paths divided by the batch siize
    def __len__(self):
	    return len(self.input_img_paths) // self.batch_size

    def __getitem__(self, idx):
        
        """Returns tuple (input, target) correspond to batch #idx."""
        i = idx * self.batch_size
        batch_img_paths = self.input_img_paths[i : i + self.batch_size] #for a given index get the input batch pathways (x)
        batch_target_img_paths = self.target_img_paths[i : i + self.batch_size] #for a given index get the input batch pathways (y)
		
        x = np.zeros((self.batch_size,) + self.img_size + (3,), dtype="float32") #create matrix of zeros which will have the dimension height, wideth, n_bands), 8 is the n_bands
        
  
         #start populating x by enumerating over the input img paths
        for j, path in enumerate(batch_img_paths):

           #load image
            img =  np.round(np.load(path), 3)
            
            if img.shape[2] == 4:
                
                img = img[:, :, :-1]

            else:
                
                img = img[:, :, 6:9]

            # img = img * 1000
            img = img.astype(float)
            img = np.round(img, 3)
            img[img == 0] = -999

            img[np.isnan(img)] = -999


            img[img == -999] = np.nan

            in_shape = img.shape
            
            #turn to dataframe to normalize
            img = img.reshape(img.shape[0] * img.shape[1], img.shape[2])
			
            img = pd.DataFrame(img)
			
            img.columns = min_max_vi.columns
			
            img = pd.concat([min_max_vi, img]).reset_index(drop = True)


            #normalize 0 to 1
            img = pd.DataFrame(scaler.fit_transform(img))
			
            img = img.iloc[2:]
#
#             img = img.values.reshape(in_shape)
            img = img.values.reshape(in_shape)

#             replace nan with -1
            img[np.isnan(img)] = -1

#apply standardization
# img = normalize(img, axis=(0,1))

            img = np.round(img, 3)
            #populate x
            x[j] = img#[:, :, 4:] index number is not included, 


        #do tthe same thing for y
        y = np.zeros((self.batch_size,) + self.img_size, dtype="uint8")

        for j, path in enumerate(batch_target_img_paths):

            #load image
            img =  np.round(np.load(path), 3)[:, :, -1]

            img = img.astype(int)

            img[img < 0] = 0
            img[img >1] = 0
            img[~np.isin(img, [0,1])] = 0

            img[np.isnan(img)] = 0
            img = img.astype(int)

            # img =  tf.keras.utils.to_categorical(img, num_classes = 2)
            # y[j] = np.expand_dims(img, 2) 
            y[j] = img
  
       
    #Ground truth labels are 1, 2, 3. Subtract one to make them 0, 1, 2:
    # y[j] -= 1

        return x, y

def predict_model(model, generator, name):
    
    '''
    model: tensorflow model to predict
    generator: keras generator with the images to predict on
    name: string, model name\
    fid: variable I was looping through
    count: count retained earlier
    '''
    #get the results from the nbac and mtbs model
    model_1_res = model.evaluate_generator(generator, 100)
    # model_1_res = model.evaluate(models_vi_gen, 
    #                          steps=20,   # Total number of steps (batches)
    #                          workers=4,                  # Number of workers for parallel data loading
    #                          use_multiprocessing=True)   # Enable multiprocessing for faster data loading

    iou = np.round(model_1_res[-2], 2)
    precision = np.round(model_1_res[-5], 2)
    recall = np.round(model_1_res[-4], 2)
    f1 = np.round(model_1_res[-3], 2)
    accuracy = np.round(model_1_res[-1], 2)

    #make new dataframe with scores
    in_df = pd.DataFrame({
        'Model': [name],
        'IOU': [iou],
        'Precision': [precision],
        'Recall': [recall],
        'F-1': [f1],
        'Accuracy': [accuracy]
                        }, index=[0])  # Explicitly setting index to [0] for a single row

    return in_df

Model to check with

In [5]:
model = tf.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/combined_good_ndsi_sliding_0.tf", 
                                     custom_objects={'precision': sm.metrics.Precision(threshold=0.5), 
                                                     'recall': sm.metrics.Recall(threshold=0.5),
                                                     'f1-score': sm.metrics.FScore(threshold=0.5),
                                                     'iou_score': sm.metrics.IOUScore(threshold=0.5)})



Pathways to the original tif and chunked path, uses median_1.tif here. I will run the generator on this one file first

In [5]:
import time

start_time = time.time()

# Input path where the original TIFF files are stored
in_path = '/explore/nobackup/people/spotter5/cnn_mapping/nbac_training/nbac_monthly_ndsi_sliding'

# Path where the 128x128 chunks are stored (output path from previous script)
chunk_path = '/explore/nobackup/people/spotter5/cnn_mapping/nbac_training/nbac_monthly_ndsi_sliding_subs_0_128'

#file of interest
in_file = 'median_6.tif' #median 1 is bigger

 # Extract the TIFF ID from file_name, e.g., for 'median_1.tif' the ID is 1
tif_id = in_file.split('_')[1].replace('.tif', '')

# Loop through the directory of chunks and select the ones associated with the current file_name
chunk_files = [f for f in os.listdir(chunk_path) if f.endswith(f'_{tif_id}.npy')]  # Dynamically select files based on file_name

#append the full pathway
chunk_files = [os.path.join(chunk_path, f) for f in chunk_files]

#image size
img_size = (128, 128)

#batch size
batch_size = len(chunk_files)
# batch_size = 1

#apply the generator
models_vi_gen =  img_gen_vi(batch_size, img_size, chunk_files)

#predict the model
gen_preds = predict_model(model, models_vi_gen, 'Comb_Sliding_1')

 # End the timer
end_time = time.time()

# Calculate total time in minutes
total_time = (end_time - start_time) / 60
print(f"Total execution time: {total_time:.2f} minutes")

print(gen_preds)

Total execution time: 0.11 minutes
            Model   IOU  Precision  Recall   F-1  Accuracy
0  Comb_Sliding_1  0.83       0.89    0.92  0.91      0.98


Return IOU with intersection and union and time to compare to above

In [6]:
import time
import numpy as np
import pandas as pd
import os

start_time = time.time()

def predict_model(model, generator, name):
    '''
    model: tensorflow model to predict
    generator: keras generator with the images to predict on
    name: string, model name
    '''
    # Initialize variables to accumulate intersection and union
    total_intersection = 0
    total_union = 0
    
    # Get the results from the model (since generator length is 1)
    for i in range(len(generator)):
        # Extract the batch of images (x_batch) and ground truth labels (y_true)
        x_batch, y_true = generator[i]
        
        # Iterate through each sample in the batch (81 samples)
        for j in range(len(x_batch)):
            # Select the j-th image and its corresponding label
            x_sample = np.expand_dims(x_batch[j], axis=0)  # Add batch dimension for prediction
            y_true_sample = y_true[j]
            
            # Skip the calculation if y_true_sample is all zeros
            if np.all(y_true_sample == 0):
                continue
            
            # Predict on the individual sample
            y_pred_sample = model.predict(x_sample, verbose=0)
            
            # Squeeze the extra dimension in y_pred to match the shape of y_true
            y_pred_sample = np.squeeze(y_pred_sample, axis=1)  # Remove the batch dimension
            
            # Select the first channel from the prediction (assuming binary classification)
            y_pred_sample = y_pred_sample[0]  # Taking the first channel out of 3
            
            # Threshold predictions to binary (0 or 1) if necessary
            y_pred_sample = np.where(y_pred_sample > 0.5, 1, 0)

            y_pred_sample = y_pred_sample[:, :, 0]
            
            # Ensure y_pred and y_true have compatible shapes
            assert y_pred_sample.shape == y_true_sample.shape, f"Shape mismatch: y_pred {y_pred_sample.shape} and y_true {y_true_sample.shape}"
            
            # Calculate intersection and union for this sample
            intersection = np.logical_and(y_pred_sample, y_true_sample).sum()
            union = np.logical_or(y_pred_sample, y_true_sample).sum()
            
            # Accumulate intersection and union
            total_intersection += intersection
            total_union += union
    
    # Calculate IOU based on total intersection and total union
    iou_calculated = total_intersection / total_union if total_union > 0 else 0
    
    # Evaluate the model to get metrics including IOU (from model's perspective)
    model_1_res = model.evaluate(generator)
    
    iou_model = np.round(model_1_res[-2], 2)
    precision = np.round(model_1_res[-5], 2)
    recall = np.round(model_1_res[-4], 2)
    f1 = np.round(model_1_res[-3], 2)
    accuracy = np.round(model_1_res[-1], 2)
    
    # Create a new dataframe with scores and the calculated IOU
    in_df = pd.DataFrame({
        'Model': [name],
        'IOU (Model)': [iou_model],
        'IOU (Calculated)': [iou_calculated],
        'Total Intersection': [total_intersection],
        'Total Union': [total_union],
        'Precision': [precision],
        'Recall': [recall],
        'F-1': [f1],
        'Accuracy': [accuracy]
    }, index=[0])  # Explicitly setting index to [0] for a single row
    
    return in_df


# Input path where the original TIFF files are stored
in_path = '/explore/nobackup/people/spotter5/cnn_mapping/nbac_training/nbac_monthly_ndsi_sliding'

# Path where the 128x128 chunks are stored (output path from previous script)
chunk_path = '/explore/nobackup/people/spotter5/cnn_mapping/nbac_training/nbac_monthly_ndsi_sliding_subs_0_128'

# File of interest
in_file = 'median_6.tif'

# Extract the TIFF ID from file_name, e.g., for 'median_1.tif' the ID is 1
tif_id = in_file.split('_')[1].replace('.tif', '')

# Loop through the directory of chunks and select the ones associated with the current file_name
chunk_files = [f for f in os.listdir(chunk_path) if f.endswith(f'_{tif_id}.npy')]  # Dynamically select files based on file_name

# Append the full pathway
chunk_files = [os.path.join(chunk_path, f) for f in chunk_files]

# Image size
img_size = (128, 128)

# Batch size
batch_size = len(chunk_files)

# Apply the generator
models_vi_gen = img_gen_vi(batch_size, img_size, chunk_files)

# Predict the model
gen_preds = predict_model(model, models_vi_gen, 'Comb_Sliding_1')

# End the timer
end_time = time.time()

# Calculate total time in minutes
total_time = (end_time - start_time) / 60
print(f"Total execution time: {total_time:.2f} minutes")

print(gen_preds)


Total execution time: 0.17 minutes
            Model  IOU (Model)  IOU (Calculated)  Total Intersection   
0  Comb_Sliding_1         0.83          0.862402              136820  \

   Total Union  Precision  Recall   F-1  Accuracy  
0       158650       0.89    0.92  0.91      0.98  


In [7]:
136820/158650

0.8624015127639458

Now predict on the chunks without the generator

In [5]:
# Load min_max dataframe and use columns '6', '7', '8'
min_max = pd.read_csv("/explore/nobackup/people/spotter5/cnn_mapping/nbac_training/l8_sent_collection2_global_min_max_cutoff_proj.csv").reset_index(drop=True)
min_max = min_max[['6', '7', '8']]

# Fit the scaler once on the min_max data
scaler = MinMaxScaler()
scaler.fit(min_max)

# Function to normalize the first three bands (6, 7, 8)
def norm(img):
    img = img[:, :, :3]  # Normalize first three bands
    img = img.astype(float)
    img = np.round(img, 3)
    img[img == 0] = -999
    img[np.isnan(img)] = -999
    img[img == -999] = np.nan
    
    in_shape = img.shape
    img_flat = img.reshape(-1, img.shape[2])
    df_img = pd.DataFrame(img_flat, columns=['6', '7', '8'])
    df_img = pd.concat([min_max, df_img]).reset_index(drop=True)
    scaled_img = pd.DataFrame(scaler.transform(df_img)).iloc[len(min_max):]
    img_scaled = scaled_img.values.reshape(in_shape)
    img_scaled[np.isnan(img)] = -1
    
    return img_scaled

# Function to calculate IoU (Intersection over Union)
def calculate_iou(pred, y):
    pred_binary = pred > 0.5
    y_binary = y > 0.5
    intersection = np.logical_and(pred_binary, y_binary).sum()
    union = np.logical_or(pred_binary, y_binary).sum()
    return intersection, union

# Function to process all files in chunk_files without a generator
def process_chunks_and_calculate_iou(model, chunk_files):
    total_intersection = 0
    total_union = 0
    
    for chunk_file in chunk_files:
        try:
            # Load the chunk (npy file)
            img = np.load(chunk_file)
            
            # Ground truth is the last band
            y_true = img[:, :, -1]
            y_true = np.round(y_true).astype(int)
            y_true[y_true < 0] = 0
            y_true[~np.isin(y_true, [0, 1])] = 0
            
            # Normalize the input data (first three bands)
            img_normalized = norm(img)
            
            # Predict using the model
            pred = model.predict(np.expand_dims(img_normalized, axis=0))[0].squeeze()
            
            # Calculate IoU
            intersection, union = calculate_iou(pred, y_true)
            total_intersection += intersection
            total_union += union
            
        except Exception as e:
            print(f"Error processing {chunk_file}: {e}")
            continue
    
    # Calculate the final IoU
    overall_iou = total_intersection / total_union if total_union != 0 else 0
    return overall_iou

# Process all chunks and calculate IoU
overall_iou = process_chunks_and_calculate_iou(model, chunk_files)

print(f"Overall IoU for all chunk files: {overall_iou}")

Overall IoU for all chunk files: 0.8552701603283642


Now predict on the original tif file

In [7]:
# Helper function to load and process TIFF files
def load_tif(file_path):
    img = rioxarray.open_rasterio(file_path).to_numpy()
    img = np.moveaxis(img, 0, 2)  # Move bands to last axis
    return img

# Function to crop two arrays to the smallest common shape
def crop_to_smallest(pred, y):
    min_height = min(pred.shape[0], y.shape[0])
    min_width = min(pred.shape[1], y.shape[1])
    return pred[:min_height, :min_width], y[:min_height, :min_width]

# Path to the median_1.tif file
in_path = '/explore/nobackup/people/spotter5/cnn_mapping/nbac_training/nbac_monthly_ndsi_sliding'
tif_file = 'median_1.tif'
tif_path = os.path.join(in_path, tif_file)

# Load the image from the TIFF file
img = load_tif(tif_path)

# Extract ground truth (last band)
y_true = img[:, :, -1]
y_true = np.round(y_true).astype(int)
y_true[y_true < 0] = 0
y_true[~np.isin(y_true, [0, 1])] = 0

# Normalize the input data (first three bands)
img_normalized = norm(img)

# Predict using the model
pred = model.predict(np.expand_dims(img_normalized, axis=0))[0].squeeze()

pred, y_true = crop_to_smallest(pred, y_true)

# Calculate IoU
intersection, union = calculate_iou(pred, y_true)
iou = intersection / union if union != 0 else 0

print(f"IoU for 'median_1.tif': {iou}")

IoU for 'median_1.tif': 0.826564719897466


Image generator with batch size of 1

In [9]:
# Assuming min_max_vi and scaler are already defined as in your original code

class img_gen_vi_one(tf.keras.utils.Sequence):

    """Helper to iterate over the data (as Numpy arrays).
    Inputs are batch size (now fixed to 1), the image size, the input paths (x), and target paths (y)
    """

    def __init__(self, img_size, input_img_paths):
        self.batch_size = 1  # Fixed batch size to 1 for individual processing
        self.img_size = img_size
        self.input_img_paths = input_img_paths
        self.target_img_paths = input_img_paths  # Assuming target paths are the same

    def __len__(self):
        return len(self.input_img_paths)  # One batch per image

    def __getitem__(self, idx):
        """Returns tuple (input, target) corresponding to batch #idx."""
        
        # Get the image path
        img_path = self.input_img_paths[idx]
        
        # Create empty arrays for x (input) and y (ground truth)
        x = np.zeros((self.batch_size,) + self.img_size + (3,), dtype="float32")
        y = np.zeros((self.batch_size,) + self.img_size, dtype="uint8")
        
        # Load image
        img = np.round(np.load(img_path), 3)
        
        # Select the appropriate bands for normalization
        if img.shape[2] == 4:
            img = img[:, :, :-1]  # Drop the last band if it has 4 bands
        else:
            img = img[:, :, 6:9]  # Select bands 6 to 8 if it has more than 3 bands

        # Normalize the image
        img = img.astype(float)
        img = np.round(img, 3)
        img[img == 0] = -999
        img[np.isnan(img)] = -999
        img[img == -999] = np.nan

        in_shape = img.shape
        img = img.reshape(img.shape[0] * img.shape[1], img.shape[2])
        img = pd.DataFrame(img, columns=min_max_vi.columns)
        img = pd.concat([min_max_vi, img]).reset_index(drop=True)
        img = pd.DataFrame(scaler.transform(img))
        img = img.iloc[len(min_max_vi):]  # Remove the first rows from the min_max scaling
        img = img.values.reshape(in_shape)
        img[np.isnan(img)] = -1

        # Assign the normalized image to the input array
        x[0] = img

        # Ground truth (y) is the last band
        y_img = np.round(np.load(img_path), 3)[:, :, -1]
        y_img = y_img.astype(int)
        y_img[y_img < 0] = 0
        y_img[y_img > 1] = 0
        y_img[~np.isin(y_img, [0, 1])] = 0
        y_img[np.isnan(y_img)] = 0
        y[0] = y_img

        return x, y
        
# Initialize the generator with batch size 1
# models_vi_gen = img_gen_vi_one(img_size, chunk_files)

# # Predict the model using the generator with batch size 1
# gen_preds = predict_model(model, models_vi_gen, 'Comb_Sliding_1')

# print(gen_preds)

Depending on the results above I can use the image generator or not, I think there was a problem in my spatial evaluations before anyways. I think it was ok actually to use the generator, redo the code to get total predictions from the folds first, then re-do getting spatial on only the test set, and merging those IOU's, there is something wrong there. First do it with one fold, then do it with all to make sure things are staying consistent.  Ideally one script will do it all at the exact same time. Not all folds are done right now for NDSI and NDSI sliding so maybe do those, and match old with the folds that are done.  If something suspicisou re-run the fold. 

For each of the folds compare the testing set IOU's independently, I will do this for the combined models

In [32]:
import os
import pandas as pd
import tensorflow as tf
import segmentation_models as sm

# Function to load models for a specific fold
def load_models_for_fold(fold):
    model_1 = tf.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/combined_good_{fold}_old.tf", 
                                         custom_objects={'precision': sm.metrics.Precision(threshold=0.5), 
                                                         'recall': sm.metrics.Recall(threshold=0.5),
                                                         'f1-score': sm.metrics.FScore(threshold=0.5),
                                                         'iou_score': sm.metrics.IOUScore(threshold=0.5)})

    model_2 = tf.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/combined_good_ndsi_{fold}.tf", 
                                         custom_objects={'precision': sm.metrics.Precision(threshold=0.5), 
                                                         'recall': sm.metrics.Recall(threshold=0.5),
                                                         'f1-score': sm.metrics.FScore(threshold=0.5),
                                                         'iou_score': sm.metrics.IOUScore(threshold=0.5)})

    model_3 = tf.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/combined_good_ndsi_sliding_{fold}.tf", 
                                         custom_objects={'precision': sm.metrics.Precision(threshold=0.5), 
                                                         'recall': sm.metrics.Recall(threshold=0.5),
                                                         'f1-score': sm.metrics.FScore(threshold=0.5),
                                                         'iou_score': sm.metrics.IOUScore(threshold=0.5)})

    return model_1, model_2, model_3

# Filter function for chunked data
def filter_chunked(in_names, chunked, data_type):
    """
    General function to filter chunked data based on in_names and chunked paths.
    data_type: 'old', 'ndsi', or 'sliding' to modify paths accordingly.
    """
    filtered_chunked = [name for name in chunked if int(name.split('_')[-1].split('.')[0]) in in_names]
    base_path = f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_{data_type}_subs_0_128/"
    return [os.path.join(base_path, i) for i in filtered_chunked]

# Main function to process a single fold
def process_fold(fold):
    # Load models
    model_1, model_2, model_3 = load_models_for_fold(fold)

    # Load testing data for the fold
    testing_names = pd.read_csv(f'/explore/nobackup/people/spotter5/cnn_mapping/Russia/test_fold_{fold}.csv')['ID'].tolist()

    # Load chunked data for old, ndsi, and sliding
    chunked_old = os.listdir('/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_old_subs_0_128')
    chunked_ndsi = os.listdir('/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_monthly_ndsi_subs_0_128')
    chunked_sliding = os.listdir('/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_monthly_ndsi_sliding_subs_0_128')

    # Filter chunked data
    testing_names_old = filter_chunked(testing_names, chunked_old, 'old')
    testing_names_ndsi = filter_chunked(testing_names, chunked_ndsi, 'monthly_ndsi')
    testing_names_sliding = filter_chunked(testing_names, chunked_sliding, 'monthly_ndsi_sliding')

    # Initialize image generators
    img_size = (128, 128)
    models_vi_gen_old = img_gen_vi_one(img_size, testing_names_old)
    models_vi_gen_ndsi = img_gen_vi_one(img_size, testing_names_ndsi)
    models_vi_gen_sliding = img_gen_vi_one(img_size, testing_names_sliding)

    # Predict on the models using the generator with batch size 1
    gen_preds_old = predict_model(model_1, models_vi_gen_old, f'Comb_Old_{fold}')
    gen_preds_ndsi = predict_model(model_2, models_vi_gen_ndsi, f'Comb_NDSI_{fold}')
    gen_preds_sliding = predict_model(model_3, models_vi_gen_sliding, f'Comb_Sliding_{fold}')

    # Combine results for the current fold
    fold_results = pd.concat([gen_preds_old, gen_preds_ndsi, gen_preds_sliding], ignore_index=True)
    
    return fold_results

# List of folds to process
folds = [0, 2, 4]

# Process each fold and combine results
all_results = pd.concat([process_fold(fold) for fold in folds], ignore_index=True)

# Save final results to CSV
# all_results.to_csv('/explore/nobackup/people/spotter5/cnn_mapping/Russia/final_iou_results_all_folds.csv', index=False)

print("Final results saved to CSV.")


Final results saved to CSV.


Get IOU accumulated across all folds, note this isn't really a way we should do this as taking the mean across the 5 after each fold is fine

In [4]:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
os.environ["SM_FRAMEWORK"] = "tf.keras"
import rioxarray
import numpy as np
import tensorflow as tf
import segmentation_models as sm
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import geopandas as gpd

# Function to load models for a specific fold
def load_models_for_fold(fold):
    model_1 = tf.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/combined_good_{fold}_old.tf", 
                                         custom_objects={'precision': sm.metrics.Precision(threshold=0.5), 
                                                         'recall': sm.metrics.Recall(threshold=0.5),
                                                         'f1-score': sm.metrics.FScore(threshold=0.5),
                                                         'iou_score': sm.metrics.IOUScore(threshold=0.5)})

    model_2 = tf.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/combined_good_ndsi_{fold}.tf", 
                                         custom_objects={'precision': sm.metrics.Precision(threshold=0.5), 
                                                         'recall': sm.metrics.Recall(threshold=0.5),
                                                         'f1-score': sm.metrics.FScore(threshold=0.5),
                                                         'iou_score': sm.metrics.IOUScore(threshold=0.5)})

    model_3 = tf.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/combined_good_ndsi_sliding_{fold}.tf", 
                                         custom_objects={'precision': sm.metrics.Precision(threshold=0.5), 
                                                         'recall': sm.metrics.Recall(threshold=0.5),
                                                         'f1-score': sm.metrics.FScore(threshold=0.5),
                                                         'iou_score': sm.metrics.IOUScore(threshold=0.5)})

    return model_1, model_2, model_3

# Load min_max dataframe and use columns '6', '7', '8'
min_max = pd.read_csv("/explore/nobackup/people/spotter5/cnn_mapping/nbac_training/l8_sent_collection2_global_min_max_cutoff_proj.csv").reset_index(drop=True)
min_max = min_max[['6', '7', '8']]

# Fit the scaler once on the min_max data
scaler = MinMaxScaler()
scaler.fit(min_max)

# Function to normalize the first three bands (6, 7, 8)
def norm(img):
    img = img[:, :, :3]  # Normalize first three bands
    img = img.astype(float)
    img = np.round(img, 3)
    img[img == 0] = -999
    img[np.isnan(img)] = -999
    img[img == -999] = np.nan
    
    in_shape = img.shape
    img_flat = img.reshape(-1, img.shape[2])
    df_img = pd.DataFrame(img_flat, columns=['6', '7', '8'])
    df_img = pd.concat([min_max, df_img]).reset_index(drop=True)
    scaled_img = pd.DataFrame(scaler.transform(df_img)).iloc[len(min_max):]
    img_scaled = scaled_img.values.reshape(in_shape)
    img_scaled[np.isnan(img)] = -1
    
    return img_scaled

# Filter function for chunked data
def filter_chunked(in_names, chunked, data_type):
    """
    General function to filter chunked data based on in_names and chunked paths.
    data_type: 'old', 'ndsi', or 'sliding' to modify paths accordingly.
    """
    filtered_chunked = [name for name in chunked if int(name.split('_')[-1].split('.')[0]) in in_names]
    base_path = f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_{data_type}_subs_0_128/"
    
    return [os.path.join(base_path, i) for i in filtered_chunked]


# Helper function to load and process TIFF files
# def load_tif(file_path, poly_id):
#     tif_file = f'median_{poly_id}.tif'
#     tif_path = os.path.join(file_path, tif_file)
#     img = rioxarray.open_rasterio(tif_path).to_numpy()
#     img = np.moveaxis(img, 0, 2)  # Move bands to last axis
#     return img

# Function to calculate IoU (Intersection over Union)
def calculate_iou(pred, y):
    pred_binary = pred > 0.5
    y_binary = y > 0.5
    intersection = np.logical_and(pred_binary, y_binary).sum()
    union = np.logical_or(pred_binary, y_binary).sum()
    return intersection, union

# Function to crop two arrays to the smallest common shape
def crop_to_smallest(pred, y):
    min_height = min(pred.shape[0], y.shape[0])
    min_width = min(pred.shape[1], y.shape[1])
    pred_cropped = pred[:min_height, :min_width]
    y_cropped = y[:min_height, :min_width]
    return pred_cropped, y_cropped

# Function to process all files in chunk_files and calculate IoU
def process_chunks_and_calculate_iou(model, chunk_files, model_type):
    total_intersection = 0
    total_union = 0
    
    for chunk_file in chunk_files:
        try:
            # Load the chunk (npy file)
            img = np.load(chunk_file)
            
            # Ground truth is the last band
            y_true = img[:, :, -1]
            y_true = np.round(y_true).astype(int)
            y_true[y_true < 0] = 0
            y_true[~np.isin(y_true, [0, 1])] = 0
            
            # Normalize the input data (first three bands)
            img_normalized = norm(img)
            
            # Predict using the model
            pred = model.predict(np.expand_dims(img_normalized, axis=0),  verbose=0)[0].squeeze()
            
            # Crop predictions and ground truth to the smallest common shape
            pred, y_true = crop_to_smallest(pred, y_true)
            
            # Calculate IoU
            intersection, union = calculate_iou(pred, y_true)
            total_intersection += intersection
            total_union += union
            
        except Exception as e:
            print(f"Error processing {chunk_file} for {model_type} model: {e}")
            continue
    
    return total_intersection, total_union

# Main function to process a single fold and calculate IoU for old, ndsi, and sliding
def process_fold(fold):
    # Initialize accumulators for IoU sums
    total_intersection_old = 0
    total_union_old = 0

    total_intersection_ndsi = 0
    total_union_ndsi = 0

    total_intersection_sliding = 0
    total_union_sliding = 0

    # Load models
    model_1, model_2, model_3 = load_models_for_fold(fold)

    # Load testing data for the fold
    testing_names = pd.read_csv(f'/explore/nobackup/people/spotter5/cnn_mapping/Russia/test_fold_{fold}.csv')['ID'].tolist()

    # Load chunked data for old, ndsi, and sliding
    chunked_old = os.listdir('/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_old_subs_0_128')
    chunked_ndsi = os.listdir('/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_monthly_ndsi_subs_0_128')
    chunked_sliding = os.listdir('/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_monthly_ndsi_sliding_subs_0_128')

    # Filter chunked data based on test names
    testing_names_old = filter_chunked(testing_names, chunked_old, 'old')
    testing_names_ndsi = filter_chunked(testing_names, chunked_ndsi, 'monthly_ndsi')
    testing_names_sliding = filter_chunked(testing_names, chunked_sliding, 'monthly_ndsi_sliding')

    # Process chunk files for each model and accumulate IoU for old model
    total_intersection_old, total_union_old = process_chunks_and_calculate_iou(model_1, testing_names_old, 'old')
    
    # Process chunk files for NDSI model
    total_intersection_ndsi, total_union_ndsi = process_chunks_and_calculate_iou(model_2, testing_names_ndsi, 'ndsi')

    # Process chunk files for sliding NDSI model
    total_intersection_sliding, total_union_sliding = process_chunks_and_calculate_iou(model_3, testing_names_sliding, 'sliding')

    # Calculate final overall IoUs for each model
    overall_iou_old = total_intersection_old / total_union_old if total_union_old != 0 else 0
    overall_iou_ndsi = total_intersection_ndsi / total_union_ndsi if total_union_ndsi != 0 else 0
    overall_iou_sliding = total_intersection_sliding / total_union_sliding if total_union_sliding != 0 else 0

    return overall_iou_old, overall_iou_ndsi, overall_iou_sliding

# Main function to calculate IoU across all folds
def calculate_iou_across_folds(folds):
    # Initialize accumulators for IoU across all folds
    total_intersection_old = 0
    total_union_old = 0

    total_intersection_ndsi = 0
    total_union_ndsi = 0

    total_intersection_sliding = 0
    total_union_sliding = 0

    for fold in folds:
        print(f"Processing fold {fold}...")
        
        # Process IoU for the current fold
        fold_iou_old, fold_iou_ndsi, fold_iou_sliding = process_fold(fold)

        # Accumulate results
        total_intersection_old += fold_iou_old
        total_union_old += fold_iou_old

        total_intersection_ndsi += fold_iou_ndsi
        total_union_ndsi += fold_iou_ndsi

        total_intersection_sliding += fold_iou_sliding
        total_union_sliding += fold_iou_sliding

    # Calculate overall IoUs for all folds
    overall_iou_old = total_intersection_old / total_union_old if total_union_old != 0 else 0
    overall_iou_ndsi = total_intersection_ndsi / total_union_ndsi if total_union_ndsi != 0 else 0
    overall_iou_sliding = total_intersection_sliding / total_union_sliding if total_union_sliding != 0 else 0

    return overall_iou_old, overall_iou_ndsi, overall_iou_sliding

# List of folds to process
folds = [0, 2, 4]  # Example folds

# Calculate the IoU across all folds
iou_old, iou_ndsi, iou_sliding = calculate_iou_across_folds(folds)

# Print the final IoU values for each model across all folds
print(f"Overall IoU for old model across all folds: {iou_old}")
print(f"Overall IoU for NDSI model across all folds: {iou_ndsi}")
print(f"Overall IoU for Sliding NDSI model across all folds: {iou_sliding}")


Processing fold 0...


2024-10-16 14:02:55.102654: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8401


KeyboardInterrupt: 

Do the above but use image generator and keep track of total intersection and union for all folds in teh respectie models. 

In [None]:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
os.environ["SM_FRAMEWORK"] = "tf.keras"
import numpy as np
import tensorflow as tf
import segmentation_models as sm
import pandas as pd
import time

# Function to load models for a specific fold
def load_models_for_fold(fold):
    # model_1 = tf.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/combined_good_{fold}_old.tf", 
    #                                      custom_objects={'precision': sm.metrics.Precision(threshold=0.5), 
    #                                                      'recall': sm.metrics.Recall(threshold=0.5),
    #                                                      'f1-score': sm.metrics.FScore(threshold=0.5),
    #                                                      'iou_score': sm.metrics.IOUScore(threshold=0.5)})

    # model_2 = tf.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/combined_good_ndsi_{fold}.tf", 
    #                                      custom_objects={'precision': sm.metrics.Precision(threshold=0.5), 
    #                                                      'recall': sm.metrics.Recall(threshold=0.5),
    #                                                      'f1-score': sm.metrics.FScore(threshold=0.5),
    #                                                      'iou_score': sm.metrics.IOUScore(threshold=0.5)})

    # model_3 = tf.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/combined_good_ndsi_sliding_{fold}.tf", 
    #                                      custom_objects={'precision': sm.metrics.Precision(threshold=0.5), 
    #                                                      'recall': sm.metrics.Recall(threshold=0.5),
    #                                                      'f1-score': sm.metrics.FScore(threshold=0.5),
    #                                                      'iou_score': sm.metrics.IOUScore(threshold=0.5)})


    model_1 = tf.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/russia_good_no_regularize_{fold}_old.tf", 
                                         custom_objects={'precision': sm.metrics.Precision(threshold=0.5), 
                                                         'recall': sm.metrics.Recall(threshold=0.5),
                                                         'f1-score': sm.metrics.FScore(threshold=0.5),
                                                         'iou_score': sm.metrics.IOUScore(threshold=0.5)})

    model_2 = tf.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/russia_good_no_regularize_ndsi_{fold}.tf", 
                                         custom_objects={'precision': sm.metrics.Precision(threshold=0.5), 
                                                         'recall': sm.metrics.Recall(threshold=0.5),
                                                         'f1-score': sm.metrics.FScore(threshold=0.5),
                                                         'iou_score': sm.metrics.IOUScore(threshold=0.5)})

    model_3 = tf.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/russia_good_no_regularize_ndsi_sliding_{fold}.tf", 
                                         custom_objects={'precision': sm.metrics.Precision(threshold=0.5), 
                                                         'recall': sm.metrics.Recall(threshold=0.5),
                                                         'f1-score': sm.metrics.FScore(threshold=0.5),
                                                         'iou_score': sm.metrics.IOUScore(threshold=0.5)})

    return model_1, model_2, model_3

# Filter function for chunked data
def filter_chunked(in_names, chunked, data_type):
    filtered_chunked = [name for name in chunked if int(name.split('_')[-1].split('.')[0]) in in_names]
    base_path = f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_{data_type}_subs_0_128/"
    return [os.path.join(base_path, i) for i in filtered_chunked]

# Function to predict using model and accumulate IoU across batches
def predict_model(model, generator, name):
    total_intersection = 0
    total_union = 0
    
    for i in range(len(generator)):
        x_batch, y_true = generator[i]
        for j in range(len(x_batch)):
            x_sample = np.expand_dims(x_batch[j], axis=0)
            y_true_sample = y_true[j]

            if np.all(y_true_sample == 0):
                continue
            
            y_pred_sample = model.predict(x_sample, verbose=0)
            y_pred_sample = np.squeeze(y_pred_sample, axis=1)[0]
            y_pred_sample = np.where(y_pred_sample > 0.5, 1, 0)
            y_pred_sample = y_pred_sample[:, :, 0]
            
            assert y_pred_sample.shape == y_true_sample.shape, f"Shape mismatch: y_pred {y_pred_sample.shape} and y_true {y_true_sample.shape}"
            
            intersection = np.logical_and(y_pred_sample, y_true_sample).sum()
            union = np.logical_or(y_pred_sample, y_true_sample).sum()
            
            total_intersection += intersection
            total_union += union
    
    iou_calculated = total_intersection / total_union if total_union > 0 else 0
    
    # Evaluate the model to get metrics including IOU (from model's perspective)
    model_1_res = model.evaluate(generator, verbose=0)
    
    iou_model = np.round(model_1_res[-2], 2)
    precision = np.round(model_1_res[-5], 2)
    recall = np.round(model_1_res[-4], 2)
    f1 = np.round(model_1_res[-3], 2)
    accuracy = np.round(model_1_res[-1], 2)
    
    # Create a dataframe with the results
    in_df = pd.DataFrame({
        'Model': [name],
        'IOU (Model)': [iou_model],
        'IOU (Calculated)': [iou_calculated],
        'Total Intersection': [total_intersection],
        'Total Union': [total_union],
        'Precision': [precision],
        'Recall': [recall],
        'F-1': [f1],
        'Accuracy': [accuracy]
    }, index=[0])
    
    return in_df

# Function to process all folds dynamically for each model
def process_all_folds(folds, batch_size, img_size, output_path):
    total_intersections = {'old': 0, 'ndsi': 0, 'sliding': 0}
    total_unions = {'old': 0, 'ndsi': 0, 'sliding': 0}
    results = []

    for fold in folds:
        # Load models for the current fold
        model_1, model_2, model_3 = load_models_for_fold(fold)
        
        # Load testing data for the fold
        testing_names = pd.read_csv(f'/explore/nobackup/people/spotter5/cnn_mapping/Russia/test_fold_{fold}.csv')['ID'].tolist()

        # Load chunked data for old, ndsi, and sliding
        chunked_old = os.listdir('/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_old_subs_0_128')
        chunked_ndsi = os.listdir('/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_monthly_ndsi_subs_0_128')
        chunked_sliding = os.listdir('/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_monthly_ndsi_sliding_subs_0_128')

        # Filter chunked data based on test names
        testing_names_old = filter_chunked(testing_names, chunked_old, 'old')
        testing_names_ndsi = filter_chunked(testing_names, chunked_ndsi, 'monthly_ndsi')
        testing_names_sliding = filter_chunked(testing_names, chunked_sliding, 'monthly_ndsi_sliding')

        # Generate data for each model
        model_vi_gen_old = img_gen_vi(batch_size, img_size, testing_names_old)
        model_vi_gen_ndsi = img_gen_vi(batch_size, img_size, testing_names_ndsi)
        model_vi_gen_sliding = img_gen_vi(batch_size, img_size, testing_names_sliding)

        # Apply the generator and predict for each model
        result_old = predict_model(model_1, model_vi_gen_old, f'Comb_Old_{fold}')
        result_ndsi = predict_model(model_2, model_vi_gen_ndsi, f'Comb_NDSI_{fold}')
        result_sliding = predict_model(model_3, model_vi_gen_sliding, f'Comb_Sliding_{fold}')

        results.append(result_old)
        results.append(result_ndsi)
        results.append(result_sliding)

        # Accumulate the intersections and unions
        total_intersections['old'] += result_old['Total Intersection'].sum()
        total_unions['old'] += result_old['Total Union'].sum()
        total_intersections['ndsi'] += result_ndsi['Total Intersection'].sum()
        total_unions['ndsi'] += result_ndsi['Total Union'].sum()
        total_intersections['sliding'] += result_sliding['Total Intersection'].sum()
        total_unions['sliding'] += result_sliding['Total Union'].sum()

    # Calculate the final IoU for each model
    iou_old_final = total_intersections['old'] / total_unions['old'] if total_unions['old'] != 0 else 0
    iou_ndsi_final = total_intersections['ndsi'] / total_unions['ndsi'] if total_unions['ndsi'] != 0 else 0
    iou_sliding_final = total_intersections['sliding'] / total_unions['sliding'] if total_unions['sliding'] != 0 else 0

    # Create a final results dataframe
    final_results = pd.DataFrame({
        'Model': ['Overall_Old', 'Overall_NDSI', 'Overall_Sliding'],
        'IOU (Calculated)': [iou_old_final, iou_ndsi_final, iou_sliding_final],
        'Total Intersection': [total_intersections['old'], total_intersections['ndsi'], total_intersections['sliding']],
        'Total Union': [total_unions['old'], total_unions['ndsi'], total_unions['sliding']]
    })

    # Concatenate fold results with overall results
    all_results = pd.concat([pd.concat(results, ignore_index=True), final_results], ignore_index=True)

    # Save results to CSV
    output_file = os.path.join(output_path, 'russia_all_fold_iou_combined.csv')
    all_results.to_csv(output_file, index=False)
    print(f"Results saved to {output_file}")

    # Return the final results
    return iou_old_final, iou_ndsi_final, iou_sliding_final

# Main entry point
# folds = [0, 2, 4]  # List of folds
folds = [0, 1, 2]  # List of folds

batch_size = 20  # Example batch size
img_size = (128, 128)  # Example image size
output_path = '/explore/nobackup/people/spotter5/cnn_mapping/Russia/spatial_compare_temp'
os.makedirs(output_path, exist_ok = True)

# Process all folds and get the final IoU for each model
start_time = time.time()
iou_old, iou_ndsi, iou_sliding = process_all_folds(folds, batch_size, img_size, output_path)
end_time = time.time()

total_time = (end_time - start_time) / 60
print(f"Total execution time: {total_time:.2f} minutes")

# Print the final IoU for each model across all folds
print(f"Overall IoU for old model across all folds: {iou_old}")
print(f"Overall IoU for NDSI model across all folds: {iou_ndsi}")
print(f"Overall IoU for Sliding NDSI model across all folds: {iou_sliding}")


Save results per fold as well as overall

In [3]:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
os.environ["SM_FRAMEWORK"] = "tf.keras"
import numpy as np
import tensorflow as tf
import segmentation_models as sm
import pandas as pd
import time

# Function to load models for a specific fold
def load_models_for_fold(fold):


    # model_1 = tf.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/combined_good_{fold}_old.tf", 
    #                                      custom_objects={'precision': sm.metrics.Precision(threshold=0.5), 
    #                                                      'recall': sm.metrics.Recall(threshold=0.5),
    #                                                      'f1-score': sm.metrics.FScore(threshold=0.5),
    #                                                      'iou_score': sm.metrics.IOUScore(threshold=0.5)})

    # model_2 = tf.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/combined_good_ndsi_{fold}.tf", 
    #                                      custom_objects={'precision': sm.metrics.Precision(threshold=0.5), 
    #                                                      'recall': sm.metrics.Recall(threshold=0.5),
    #                                                      'f1-score': sm.metrics.FScore(threshold=0.5),
    #                                                      'iou_score': sm.metrics.IOUScore(threshold=0.5)})

    # model_3 = tf.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/combined_good_ndsi_sliding_{fold}.tf", 
    #                                      custom_objects={'precision': sm.metrics.Precision(threshold=0.5), 
    #                                                      'recall': sm.metrics.Recall(threshold=0.5),
    #                                                      'f1-score': sm.metrics.FScore(threshold=0.5),
    #                                                      'iou_score': sm.metrics.IOUScore(threshold=0.5)})

    model_1 = tf.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/russia_good_no_regularize_{fold}_old.tf", 
                                         custom_objects={'precision': sm.metrics.Precision(threshold=0.5), 
                                                         'recall': sm.metrics.Recall(threshold=0.5),
                                                         'f1-score': sm.metrics.FScore(threshold=0.5),
                                                         'iou_score': sm.metrics.IOUScore(threshold=0.5)})

    model_2 = tf.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/russia_good_no_regularize_ndsi_{fold}.tf", 
                                         custom_objects={'precision': sm.metrics.Precision(threshold=0.5), 
                                                         'recall': sm.metrics.Recall(threshold=0.5),
                                                         'f1-score': sm.metrics.FScore(threshold=0.5),
                                                         'iou_score': sm.metrics.IOUScore(threshold=0.5)})

    model_3 = tf.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/russia_good_no_regularize_ndsi_sliding_{fold}.tf", 
                                         custom_objects={'precision': sm.metrics.Precision(threshold=0.5), 
                                                         'recall': sm.metrics.Recall(threshold=0.5),
                                                         'f1-score': sm.metrics.FScore(threshold=0.5),
                                                         'iou_score': sm.metrics.IOUScore(threshold=0.5)})

    return model_1, model_2, model_3

# Filter function for chunked data
def filter_chunked(in_names, chunked, data_type):
    filtered_chunked = [name for name in chunked if int(name.split('_')[-1].split('.')[0]) in in_names]
    base_path = f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_{data_type}_subs_0_128/"
    return [os.path.join(base_path, i) for i in filtered_chunked]

# Function to predict using model and accumulate IoU across batches
def predict_model(model, generator, name):
    total_intersection = 0
    total_union = 0
    
    for i in range(len(generator)):
        x_batch, y_true = generator[i]
        for j in range(len(x_batch)):
            x_sample = np.expand_dims(x_batch[j], axis=0)
            y_true_sample = y_true[j]

            if np.all(y_true_sample == 0):
                continue
            
            y_pred_sample = model.predict(x_sample, verbose=0)
            y_pred_sample = np.squeeze(y_pred_sample, axis=1)[0]
            y_pred_sample = np.where(y_pred_sample > 0.5, 1, 0)
            y_pred_sample = y_pred_sample[:, :, 0]
            
            assert y_pred_sample.shape == y_true_sample.shape, f"Shape mismatch: y_pred {y_pred_sample.shape} and y_true {y_true_sample.shape}"
            
            intersection = np.logical_and(y_pred_sample, y_true_sample).sum()
            union = np.logical_or(y_pred_sample, y_true_sample).sum()
            
            total_intersection += intersection
            total_union += union
    
    iou_calculated = total_intersection / total_union if total_union > 0 else 0
    
    # Evaluate the model to get metrics including IOU (from model's perspective)
    model_1_res = model.evaluate(generator, verbose=0)
    
    iou_model = np.round(model_1_res[-2], 2)
    precision = np.round(model_1_res[-5], 2)
    recall = np.round(model_1_res[-4], 2)
    f1 = np.round(model_1_res[-3], 2)
    accuracy = np.round(model_1_res[-1], 2)
    
    # Create a dataframe with the results
    in_df = pd.DataFrame({
        'Model': [name],
        'IOU (Model)': [iou_model],
        'IOU (Calculated)': [iou_calculated],
        'Total Intersection': [total_intersection],
        'Total Union': [total_union],
        'Precision': [precision],
        'Recall': [recall],
        'F-1': [f1],
        'Accuracy': [accuracy]
    }, index=[0])
    
    return in_df

# Function to process all folds dynamically for each model
def process_all_folds(folds, batch_size, img_size, output_path):
    total_intersections = {'old': 0, 'ndsi': 0, 'sliding': 0}
    total_unions = {'old': 0, 'ndsi': 0, 'sliding': 0}
    results = []

    for fold in folds:
        # Load models for the current fold
        model_1, model_2, model_3 = load_models_for_fold(fold)
        
        # Load testing data for the fold
        testing_names = pd.read_csv(f'/explore/nobackup/people/spotter5/cnn_mapping/Russia/test_fold_{fold}.csv')['ID'].tolist()

        # Load chunked data for old, ndsi, and sliding
        chunked_old = os.listdir('/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_old_subs_0_128')
        chunked_ndsi = os.listdir('/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_monthly_ndsi_subs_0_128')
        chunked_sliding = os.listdir('/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_monthly_ndsi_sliding_subs_0_128')

        # Filter chunked data based on test names
        testing_names_old = filter_chunked(testing_names, chunked_old, 'old')
        testing_names_ndsi = filter_chunked(testing_names, chunked_ndsi, 'monthly_ndsi')
        testing_names_sliding = filter_chunked(testing_names, chunked_sliding, 'monthly_ndsi_sliding')

        # Generate data for each model
        model_vi_gen_old = img_gen_vi(batch_size, img_size, testing_names_old)
        model_vi_gen_ndsi = img_gen_vi(batch_size, img_size, testing_names_ndsi)
        model_vi_gen_sliding = img_gen_vi(batch_size, img_size, testing_names_sliding)

        # Apply the generator and predict for each model
        result_old = predict_model(model_1, model_vi_gen_old, f'Comb_Old_{fold}')
        result_ndsi = predict_model(model_2, model_vi_gen_ndsi, f'Comb_NDSI_{fold}')
        result_sliding = predict_model(model_3, model_vi_gen_sliding, f'Comb_Sliding_{fold}')

        results.append(result_old)
        results.append(result_ndsi)
        results.append(result_sliding)

        # Accumulate the intersections and unions
        total_intersections['old'] += result_old['Total Intersection'].sum()
        total_unions['old'] += result_old['Total Union'].sum()
        total_intersections['ndsi'] += result_ndsi['Total Intersection'].sum()
        total_unions['ndsi'] += result_ndsi['Total Union'].sum()
        total_intersections['sliding'] += result_sliding['Total Intersection'].sum()
        total_unions['sliding'] += result_sliding['Total Union'].sum()

        # Save IOU for each fold
        fold_iou = pd.concat([result_old, result_ndsi, result_sliding])
        fold_output_file = os.path.join(output_path, f'russia_{fold}_iou.csv')
        fold_iou.to_csv(fold_output_file, index=False)

    # Calculate the final IoU for each model
    iou_old_final = total_intersections['old'] / total_unions['old'] if total_unions['old'] != 0 else 0
    iou_ndsi_final = total_intersections['ndsi'] / total_unions['ndsi'] if total_unions['ndsi'] != 0 else 0
    iou_sliding_final = total_intersections['sliding'] / total_unions['sliding'] if total_unions['sliding'] != 0 else 0

    # Create a final results dataframe
    final_results = pd.DataFrame({
        'Model': ['Overall_Old', 'Overall_NDSI', 'Overall_Sliding'],
        'IOU (Calculated)': [iou_old_final, iou_ndsi_final, iou_sliding_final],
        'Total Intersection': [total_intersections['old'], total_intersections['ndsi'], total_intersections['sliding']],
        'Total Union': [total_unions['old'], total_unions['ndsi'], total_unions['sliding']]
    })

    # Concatenate fold results with overall results
    all_results = pd.concat([pd.concat(results, ignore_index=True), final_results], ignore_index=True)

    # Save results to CSV
    output_file = os.path.join(output_path, 'russia_all_fold_iou_combined.csv')
    all_results.to_csv(output_file, index=False)
    print(f"Results saved to {output_file}")

    # Return the final results
    return iou_old_final, iou_ndsi_final, iou_sliding_final

# Main entry point
# folds = [0, 2, 4]  # List of folds
folds = [0, 1, 2]  # List of folds

batch_size = 20  # Example batch size
img_size = (128, 128)  # Example image size
output_path = '/explore/nobackup/people/spotter5/cnn_mapping/Russia/spatial_compare_temp'
os.makedirs(output_path, exist_ok=True)

# Process all folds and get the final IoU for each model
start_time = time.time()
iou_old, iou_ndsi, iou_sliding = process_all_folds(folds, batch_size, img_size, output_path)
end_time = time.time()

total_time = (end_time - start_time) / 60
print(f"Total execution time: {total_time:.2f} minutes")

# Print the final IoU for each model across all folds
print(f"Overall IoU for old model across all folds: {iou_old}")
print(f"Overall IoU for NDSI model across all folds: {iou_ndsi}")
print(f"Overall IoU for Sliding NDSI model across all folds: {iou_sliding}")


Results saved to /explore/nobackup/people/spotter5/cnn_mapping/Russia/spatial_compare_temp/russia_all_fold_iou_combined.csv
Total execution time: 416.95 minutes
Overall IoU for old model across all folds: 0.8069396442982794
Overall IoU for NDSI model across all folds: 0.7328799271405454
Overall IoU for Sliding NDSI model across all folds: 0.7347724887126243


In [5]:
't'

't'

Now get the IOU saved per ecoregion, we can't use a generator for this I don't think

Better way with all functions

In [1]:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
os.environ["SM_FRAMEWORK"] = "tf.keras"
import rioxarray
import numpy as np
import tensorflow as tf
import segmentation_models as sm
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import geopandas as gpd

# Function to load models for a specific fold
def load_models_for_fold(fold):
    model_1 = tf.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/combined_good_{fold}_old.tf", 
                                         custom_objects={'precision': sm.metrics.Precision(threshold=0.5), 
                                                         'recall': sm.metrics.Recall(threshold=0.5),
                                                         'f1-score': sm.metrics.FScore(threshold=0.5),
                                                         'iou_score': sm.metrics.IOUScore(threshold=0.5)})

    model_2 = tf.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/combined_good_ndsi_{fold}.tf", 
                                         custom_objects={'precision': sm.metrics.Precision(threshold=0.5), 
                                                         'recall': sm.metrics.Recall(threshold=0.5),
                                                         'f1-score': sm.metrics.FScore(threshold=0.5),
                                                         'iou_score': sm.metrics.IOUScore(threshold=0.5)})

    model_3 = tf.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/combined_good_ndsi_sliding_{fold}.tf", 
                                         custom_objects={'precision': sm.metrics.Precision(threshold=0.5), 
                                                         'recall': sm.metrics.Recall(threshold=0.5),
                                                         'f1-score': sm.metrics.FScore(threshold=0.5),
                                                         'iou_score': sm.metrics.IOUScore(threshold=0.5)})

    return model_1, model_2, model_3

# Load min_max dataframe and use columns '6', '7', '8'
min_max = pd.read_csv("/explore/nobackup/people/spotter5/cnn_mapping/nbac_training/l8_sent_collection2_global_min_max_cutoff_proj.csv").reset_index(drop=True)
min_max = min_max[['6', '7', '8']]

# Fit the scaler once on the min_max data
scaler = MinMaxScaler()
scaler.fit(min_max)

# Function to normalize the first three bands (6, 7, 8)
def norm(img):
    img = img[:, :, :3]  # Normalize first three bands
    img = img.astype(float)
    img = np.round(img, 3)
    img[img == 0] = -999
    img[np.isnan(img)] = -999
    img[img == -999] = np.nan
    
    in_shape = img.shape
    img_flat = img.reshape(-1, img.shape[2])
    df_img = pd.DataFrame(img_flat, columns=['6', '7', '8'])
    df_img = pd.concat([min_max, df_img]).reset_index(drop=True)
    scaled_img = pd.DataFrame(scaler.transform(df_img)).iloc[len(min_max):]
    img_scaled = scaled_img.values.reshape(in_shape)
    img_scaled[np.isnan(img)] = -1
    
    return img_scaled

# Function to calculate IoU (Intersection over Union)
def calculate_iou(pred, y):
    pred_binary = pred > 0.5
    y_binary = y > 0.5
    intersection = np.logical_and(pred_binary, y_binary).sum()
    union = np.logical_or(pred_binary, y_binary).sum()
    return intersection, union

# Function to process all files in chunk_files and calculate IoU
def process_chunks_and_calculate_iou(model, chunk_files):
    total_intersection = 0
    total_union = 0
    
    for chunk_file in chunk_files:
        try:
            # Load the chunk (npy file)
            img = np.load(chunk_file)
            
            # Ground truth is the last band
            y_true = img[:, :, -1]
            y_true = np.round(y_true).astype(int)
            y_true[y_true < 0] = 0
            y_true[~np.isin(y_true, [0, 1])] = 0
            
            # Normalize the input data (first three bands)
            img_normalized = norm(img)
            
            # Predict using the model
            pred = model.predict(np.expand_dims(img_normalized, axis=0), verbose=0)[0].squeeze()
            
            # Calculate IoU
            intersection, union = calculate_iou(pred, y_true)
            total_intersection += intersection
            total_union += union
            
        except Exception as e:
            print(f"Error processing {chunk_file}: {e}")
            continue
    
    return total_intersection, total_union

# Main function to calculate IoU for all ecoregions and folds
def calculate_iou_across_folds_ecoregion(ecoregions, eco_df, folds):
    iou_results = []
    
    # Loop through each ecoregion
    for ecoregion in ecoregions:
        print(f"Processing ecoregion {ecoregion}...")

        total_intersection_old = 0
        total_union_old = 0
        total_intersection_ndsi = 0
        total_union_ndsi = 0
        total_intersection_sliding = 0
        total_union_sliding = 0
        
        # Get IDs in this ecoregion
        sub_eco = eco_df[eco_df['ecoregion'] == ecoregion]
        
        for fold in folds:
            print(f"Processing fold {fold} for ecoregion {ecoregion}...")
            
            # Load models for the fold
            model_1, model_2, model_3 = load_models_for_fold(fold)
            
            # Load testing data for the fold
            testing_names = pd.read_csv(f'/explore/nobackup/people/spotter5/cnn_mapping/Russia/test_fold_{fold}.csv')['ID'].tolist()
            
            # Filter ecoregion and testing names
            sub_fold = sub_eco[sub_eco['ID'].isin(testing_names)]
            fold_ids = sub_fold['ID'].unique().tolist()
            
            if len(fold_ids) == 0:
                continue
            
            # Load chunked data
            old_base_path = '/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_old_subs_0_128'
            ndsi_base_path = '/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_monthly_ndsi_subs_0_128'
            sliding_base_path = '/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_monthly_ndsi_sliding_subs_0_128'
            
            # List all files in the directories
            chunked_old = os.listdir(old_base_path)
            chunked_ndsi = os.listdir(ndsi_base_path)
            chunked_sliding = os.listdir(sliding_base_path)
            
            # Filter chunked data by fold IDs and include full paths
            chunked_old_files = [os.path.join(old_base_path, f) for f in chunked_old if int(f.split('_')[-1].split('.')[0]) in fold_ids]
            chunked_ndsi_files = [os.path.join(ndsi_base_path, f) for f in chunked_ndsi if int(f.split('_')[-1].split('.')[0]) in fold_ids]
            chunked_sliding_files = [os.path.join(sliding_base_path, f) for f in chunked_sliding if int(f.split('_')[-1].split('.')[0]) in fold_ids]

            # Process and calculate IoU for each model
            intersection_old, union_old = process_chunks_and_calculate_iou(model_1, chunked_old_files)
            intersection_ndsi, union_ndsi = process_chunks_and_calculate_iou(model_2, chunked_ndsi_files)
            intersection_sliding, union_sliding = process_chunks_and_calculate_iou(model_3, chunked_sliding_files)
            
            # Accumulate results for this fold
            total_intersection_old += intersection_old
            total_union_old += union_old
            total_intersection_ndsi += intersection_ndsi
            total_union_ndsi += union_ndsi
            total_intersection_sliding += intersection_sliding
            total_union_sliding += union_sliding
        
        # Calculate final IoU for this ecoregion across all folds
        iou_old = total_intersection_old / total_union_old if total_union_old != 0 else 0
        iou_ndsi = total_intersection_ndsi / total_union_ndsi if total_union_ndsi != 0 else 0
        iou_sliding = total_intersection_sliding / total_union_sliding if total_union_sliding != 0 else 0
        
        # Store the results
        iou_results.append({
            'ecoregion': ecoregion,
            'iou_old': iou_old,
            'iou_ndsi': iou_ndsi,
            'iou_sliding': iou_sliding
        })
    
    return pd.DataFrame(iou_results)

# List of folds to process
folds = [0, 2, 4]  # Adjust the number of folds if necessary

# Load ecoregion shapefile
eco = gpd.read_file('/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_good_eco_clip.shp')
all_ecoregions = eco['ecoregion'].unique().tolist()

# Calculate the IoU across all ecoregions and folds
iou_df = calculate_iou_across_folds_ecoregion(all_ecoregions, eco, folds)

# Save the results to CSV
iou_df.to_csv('/explore/nobackup/people/spotter5/cnn_mapping/Russia/iou_ecoregion_results.csv', index=False)

# Print the final IoU values for each ecoregion
print(iou_df)


Segmentation Models: using `tf.keras` framework.
Processing ecoregion Montane Sub-Arctic...
Processing fold 0 for ecoregion Montane Sub-Arctic...
Processing fold 2 for ecoregion Montane Sub-Arctic...


KeyboardInterrupt: 

Do the ecoregion level calculations using the generator, but which will calculate the IoU by summing the intersection and union manually. 

In [4]:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
os.environ["SM_FRAMEWORK"] = "tf.keras"
import numpy as np
import tensorflow as tf
import segmentation_models as sm
import pandas as pd
import geopandas as gpd
import time

# Function to load models for a specific fold
def load_models_for_fold(fold):
    # model_1 = tf.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/combined_good_{fold}_old.tf", 
    #                                      custom_objects={'precision': sm.metrics.Precision(threshold=0.5), 
    #                                                      'recall': sm.metrics.Recall(threshold=0.5),
    #                                                      'f1-score': sm.metrics.FScore(threshold=0.5),
    #                                                      'iou_score': sm.metrics.IOUScore(threshold=0.5)})

    # model_2 = tf.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/combined_good_ndsi_{fold}.tf", 
    #                                      custom_objects={'precision': sm.metrics.Precision(threshold=0.5), 
    #                                                      'recall': sm.metrics.Recall(threshold=0.5),
    #                                                      'f1-score': sm.metrics.FScore(threshold=0.5),
    #                                                      'iou_score': sm.metrics.IOUScore(threshold=0.5)})

    # model_3 = tf.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/combined_good_ndsi_sliding_{fold}.tf", 
    #                                      custom_objects={'precision': sm.metrics.Precision(threshold=0.5), 
    #                                                      'recall': sm.metrics.Recall(threshold=0.5),
    #                                                      'f1-score': sm.metrics.FScore(threshold=0.5),
    #                                                      'iou_score': sm.metrics.IOUScore(threshold=0.5)})

    model_1 = tf.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/russia_good_no_regularize_{fold}_old.tf", 
                                         custom_objects={'precision': sm.metrics.Precision(threshold=0.5), 
                                                         'recall': sm.metrics.Recall(threshold=0.5),
                                                         'f1-score': sm.metrics.FScore(threshold=0.5),
                                                         'iou_score': sm.metrics.IOUScore(threshold=0.5)})

    model_2 = tf.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/russia_good_no_regularize_ndsi_{fold}.tf", 
                                         custom_objects={'precision': sm.metrics.Precision(threshold=0.5), 
                                                         'recall': sm.metrics.Recall(threshold=0.5),
                                                         'f1-score': sm.metrics.FScore(threshold=0.5),
                                                         'iou_score': sm.metrics.IOUScore(threshold=0.5)})

    model_3 = tf.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/russia_good_no_regularize_ndsi_sliding_{fold}.tf", 
                                         custom_objects={'precision': sm.metrics.Precision(threshold=0.5), 
                                                         'recall': sm.metrics.Recall(threshold=0.5),
                                                         'f1-score': sm.metrics.FScore(threshold=0.5),
                                                         'iou_score': sm.metrics.IOUScore(threshold=0.5)})

    return model_1, model_2, model_3

# Function to filter chunked data for specific ecoregions
def filter_chunked(in_names, chunked, data_type):
    filtered_chunked = [name for name in chunked if int(name.split('_')[-1].split('.')[0]) in in_names]
    base_path = f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_{data_type}_subs_0_128/"
    return [os.path.join(base_path, i) for i in filtered_chunked]

# Function to predict using model and accumulate IoU using generator
def predict_model_with_generator(model, generator, name):
    total_intersection = 0
    total_union = 0
    
    for i in range(len(generator)):
        x_batch, y_true = generator[i]
        for j in range(len(x_batch)):
            x_sample = np.expand_dims(x_batch[j], axis=0)
            y_true_sample = y_true[j]

            if np.all(y_true_sample == 0):
                continue
            
            y_pred_sample = model.predict(x_sample, verbose=0)
            y_pred_sample = np.squeeze(y_pred_sample, axis=1)[0]
            y_pred_sample = np.where(y_pred_sample > 0.5, 1, 0)
            y_pred_sample = y_pred_sample[:, :, 0]
            
            intersection = np.logical_and(y_pred_sample, y_true_sample).sum()
            union = np.logical_or(y_pred_sample, y_true_sample).sum()
            
            total_intersection += intersection
            total_union += union

    iou_calculated = total_intersection / total_union if total_union > 0 else 0
    
    # Evaluate the model to get metrics including IOU (from model's perspective)
    model_1_res = model.evaluate(generator, verbose=0)
    
    iou_model = np.round(model_1_res[-2], 2)
    precision = np.round(model_1_res[-5], 2)
    recall = np.round(model_1_res[-4], 2)
    f1 = np.round(model_1_res[-3], 2)
    accuracy = np.round(model_1_res[-1], 2)
    
    # Create a dataframe with the results
    in_df = pd.DataFrame({
        'Model': [name],
        'IOU (Model)': [iou_model],
        'IOU (Calculated)': [iou_calculated],
        'Total Intersection': [total_intersection],
        'Total Union': [total_union],
        'Precision': [precision],
        'Recall': [recall],
        'F-1': [f1],
        'Accuracy': [accuracy]
    }, index=[0])
    
    return in_df, iou_calculated

# Main function to calculate IoU for all ecoregions and folds
def calculate_iou_across_folds_ecoregion(ecoregions, eco_df, folds, batch_size, img_size):
    iou_results = []
    
    for ecoregion in ecoregions:
        print(f"Processing ecoregion {ecoregion}...")

        total_intersections = {'old': 0, 'ndsi': 0, 'sliding': 0}
        total_unions = {'old': 0, 'ndsi': 0, 'sliding': 0}
        iou_per_fold = []

        sub_eco = eco_df[eco_df['ecoregion'] == ecoregion]
        
        for fold in folds:
            print(f"Processing fold {fold} for ecoregion {ecoregion}...")
            
            # Load models for the fold
            model_1, model_2, model_3 = load_models_for_fold(fold)
            
            # Load testing data for the fold
            testing_names = pd.read_csv(f'/explore/nobackup/people/spotter5/cnn_mapping/Russia/test_fold_{fold}.csv')['ID'].tolist()
            
            # Filter ecoregion and testing names
            sub_fold = sub_eco[sub_eco['ID'].isin(testing_names)]
            fold_ids = sub_fold['ID'].unique().tolist()
            
            if len(fold_ids) == 0:
                continue
            
            # Load chunked data
            chunked_old = os.listdir('/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_old_subs_0_128')
            chunked_ndsi = os.listdir('/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_monthly_ndsi_subs_0_128')
            chunked_sliding = os.listdir('/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_monthly_ndsi_sliding_subs_0_128')
            
            # Filter chunked data by fold IDs and include full paths
            testing_names_old = filter_chunked(fold_ids, chunked_old, 'old')
            testing_names_ndsi = filter_chunked(fold_ids, chunked_ndsi, 'monthly_ndsi')
            testing_names_sliding = filter_chunked(fold_ids, chunked_sliding, 'monthly_ndsi_sliding')

            # Generate data for each model
            model_vi_gen_old = img_gen_vi(batch_size, img_size, testing_names_old)
            model_vi_gen_ndsi = img_gen_vi(batch_size, img_size, testing_names_ndsi)
            model_vi_gen_sliding = img_gen_vi(batch_size, img_size, testing_names_sliding)

            # Apply the generator and predict for each model
            result_old, iou_old = predict_model_with_generator(model_1, model_vi_gen_old, f'Comb_Old_{fold}')
            result_ndsi, iou_ndsi = predict_model_with_generator(model_2, model_vi_gen_ndsi, f'Comb_NDSI_{fold}')
            result_sliding, iou_sliding = predict_model_with_generator(model_3, model_vi_gen_sliding, f'Comb_Sliding_{fold}')
            
            # Record IoU for this fold
            iou_per_fold.append({
                'ecoregion': ecoregion,
                'fold': fold,
                'iou_old': iou_old,
                'iou_ndsi': iou_ndsi,
                'iou_sliding': iou_sliding
            })

            # Accumulate the intersections and unions
            total_intersections['old'] += result_old['Total Intersection'].sum()
            total_unions['old'] += result_old['Total Union'].sum()
            total_intersections['ndsi'] += result_ndsi['Total Intersection'].sum()
            total_unions['ndsi'] += result_ndsi['Total Union'].sum()
            total_intersections['sliding'] += result_sliding['Total Intersection'].sum()
            total_unions['sliding'] += result_sliding['Total Union'].sum()

        # Calculate final IoU for this ecoregion across all folds using the sum of intersections and unions
        iou_old_final = total_intersections['old'] / total_unions['old'] if total_unions['old'] != 0 else 0
        iou_ndsi_final = total_intersections['ndsi'] / total_unions['ndsi'] if total_unions['ndsi'] != 0 else 0
        iou_sliding_final = total_intersections['sliding'] / total_unions['sliding'] if total_unions['sliding'] != 0 else 0
        
        # Store final results
        iou_results.append({
            'ecoregion': ecoregion,
            'final_iou_old': iou_old_final,
            'final_iou_ndsi': iou_ndsi_final,
            'final_iou_sliding': iou_sliding_final
        })

    # Return fold-specific and final IoU results
    return pd.DataFrame(iou_per_fold), pd.DataFrame(iou_results)

# List of folds to process
folds = [0, 2, 4]

# Load ecoregion shapefile
eco = gpd.read_file('/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_good_eco_clip.shp')
all_ecoregions = eco['ecoregion'].unique().tolist()

# Parameters for image generator
batch_size = 20
img_size = (128, 128)

start_time = time.time()
# Calculate IoU across all ecoregions and folds
iou_per_fold_df, final_iou_df = calculate_iou_across_folds_ecoregion(all_ecoregions, eco, folds, batch_size, img_size)

# Save the results to CSV
# iou_per_fold_df.to_csv('/explore/nobackup/people/spotter5/cnn_mapping/Russia/spatial_compare_temp/combined_iou_ecoregion_folds.csv', index=False)
# final_iou_df.to_csv('/explore/nobackup/people/spotter5/cnn_mapping/Russia/spatial_compare_temp/combined_iou_ecoregion_final.csv', index=False)

iou_per_fold_df.to_csv('/explore/nobackup/people/spotter5/cnn_mapping/Russia/spatial_compare_temp/russia_iou_ecoregion_folds.csv', index=False)
final_iou_df.to_csv('/explore/nobackup/people/spotter5/cnn_mapping/Russia/spatial_compare_temp/russia_iou_ecoregion_final.csv', index=False)

# Print the final IoU values for each ecoregion
print(final_iou_df)

end_time = time.time()

total_time = (end_time - start_time) / 60
print(f"Total execution time: {total_time:.2f} minutes")


Processing ecoregion Montane Sub-Arctic...
Processing fold 0 for ecoregion Montane Sub-Arctic...
Processing fold 2 for ecoregion Montane Sub-Arctic...
Processing fold 4 for ecoregion Montane Sub-Arctic...
Processing ecoregion Arctic Deserts and Tundra...
Processing fold 0 for ecoregion Arctic Deserts and Tundra...
Processing fold 2 for ecoregion Arctic Deserts and Tundra...
Processing fold 4 for ecoregion Arctic Deserts and Tundra...
Processing ecoregion Wetlands...
Processing fold 0 for ecoregion Wetlands...
Processing fold 2 for ecoregion Wetlands...
Processing fold 4 for ecoregion Wetlands...
Processing ecoregion Montane Boreal...
Processing fold 0 for ecoregion Montane Boreal...
Processing fold 2 for ecoregion Montane Boreal...
Processing fold 4 for ecoregion Montane Boreal...
Processing ecoregion Central Taiga...
Processing fold 0 for ecoregion Central Taiga...
Processing fold 2 for ecoregion Central Taiga...
Processing fold 4 for ecoregion Central Taiga...
Processing ecoregion No

NameError: name 'fold_df' is not defined

Maybe it is faster to predict just on the actual tif files not the chunks, lets make a script to do that, note this actually results in tensorflow issues as it prefers same sized images

In [1]:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
os.environ["SM_FRAMEWORK"] = "tf.keras"
import rioxarray
import numpy as np
import tensorflow as tf
import segmentation_models as sm
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import geopandas as gpd

# Function to load models for a specific fold
def load_models_for_fold(fold):
    model_1 = tf.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/combined_good_{fold}_old.tf", 
                                         custom_objects={'precision': sm.metrics.Precision(threshold=0.5), 
                                                         'recall': sm.metrics.Recall(threshold=0.5),
                                                         'f1-score': sm.metrics.FScore(threshold=0.5),
                                                         'iou_score': sm.metrics.IOUScore(threshold=0.5)})
    model_2 = tf.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/combined_good_ndsi_{fold}.tf", 
                                         custom_objects={'precision': sm.metrics.Precision(threshold=0.5), 
                                                         'recall': sm.metrics.Recall(threshold=0.5),
                                                         'f1-score': sm.metrics.FScore(threshold=0.5),
                                                         'iou_score': sm.metrics.IOUScore(threshold=0.5)})
    model_3 = tf.keras.models.load_model(f"/explore/nobackup/people/spotter5/cnn_mapping/Russia/models/combined_good_ndsi_sliding_{fold}.tf", 
                                         custom_objects={'precision': sm.metrics.Precision(threshold=0.5), 
                                                         'recall': sm.metrics.Recall(threshold=0.5),
                                                         'f1-score': sm.metrics.FScore(threshold=0.5),
                                                         'iou_score': sm.metrics.IOUScore(threshold=0.5)})
    return model_1, model_2, model_3

# Load min_max dataframe and use columns '6', '7', '8'
min_max = pd.read_csv("/explore/nobackup/people/spotter5/cnn_mapping/nbac_training/l8_sent_collection2_global_min_max_cutoff_proj.csv").reset_index(drop=True)
min_max = min_max[['6', '7', '8']]
scaler = MinMaxScaler()
scaler.fit(min_max)

# Function to normalize the first three bands (6, 7, 8)
def norm(img):
    img = img[:, :, :3]  # Normalize first three bands
    img = img.astype(float)
    img[img == 0] = -999
    img[np.isnan(img)] = -999
    img[img == -999] = np.nan
    
    in_shape = img.shape
    img_flat = img.reshape(-1, img.shape[2])
    df_img = pd.DataFrame(img_flat, columns=['6', '7', '8'])
    df_img = pd.concat([min_max, df_img]).reset_index(drop=True)
    scaled_img = pd.DataFrame(scaler.transform(df_img)).iloc[len(min_max):]
    img_scaled = scaled_img.values.reshape(in_shape)
    img_scaled[np.isnan(img)] = -1
    
    return img_scaled

# Function to calculate IoU (Intersection over Union)
def calculate_iou(pred, y):
    pred_binary = pred > 0.5
    y_binary = y > 0.5
    intersection = np.logical_and(pred_binary, y_binary).sum()
    union = np.logical_or(pred_binary, y_binary).sum()
    return intersection, union

# Helper function to load and process TIFF files
def load_tif(file_path, poly_id):
    tif_file = f'median_{poly_id}.tif'
    tif_path = os.path.join(file_path, tif_file)
    img = rioxarray.open_rasterio(tif_path).to_numpy()
    img = np.moveaxis(img, 0, 2)  # Move bands to last axis
    return img

# Function to crop two arrays to the smallest common shape
def crop_to_smallest(pred, y):
    min_height = min(pred.shape[0], y.shape[0])
    min_width = min(pred.shape[1], y.shape[1])
    pred_cropped = pred[:min_height, :min_width]
    y_cropped = y[:min_height, :min_width]
    return pred_cropped, y_cropped

# Main loop to process polygons and accumulate IoU sums
def process_tif_files_for_fold(fold, fold_ids, model_1, model_2, model_3, old_path, ndsi_path, sliding_path):
    total_intersection_old = 0
    total_union_old = 0
    total_intersection_ndsi = 0
    total_union_ndsi = 0
    total_intersection_sliding = 0
    total_union_sliding = 0
    
    for poly_id in fold_ids:
        try:
            # Load TIFFs for dNBR (band 1) and y (band 4) for each model
            y_old = load_tif(old_path, poly_id)[:, :, 3]  # y is band 4
            y_ndsi = load_tif(ndsi_path, poly_id)[:, :, 3]
            y_sliding = load_tif(sliding_path, poly_id)[:, :, 3]

            # Load and normalize images for model predictions
            dnbr_old_img = load_tif(old_path, poly_id)
            dnbr_ndsi_img = load_tif(ndsi_path, poly_id)
            dnbr_sliding_img = load_tif(sliding_path, poly_id)
        
            dnbr_old_norm = norm(dnbr_old_img)
            dnbr_ndsi_norm = norm(dnbr_ndsi_img)
            dnbr_sliding_norm = norm(dnbr_sliding_img)
        
            # Predict using the models
            pred_old = model_1.predict(np.expand_dims(dnbr_old_norm, axis=0), verbose=0)[0].squeeze()
            pred_ndsi = model_2.predict(np.expand_dims(dnbr_ndsi_norm, axis=0), verbose=0)[0].squeeze()
            pred_sliding = model_3.predict(np.expand_dims(dnbr_sliding_norm, axis=0), verbose=0)[0].squeeze()

            # Crop predictions and ground truth to smallest common shape
            pred_old, y_old = crop_to_smallest(pred_old, y_old)
            pred_ndsi, y_ndsi = crop_to_smallest(pred_ndsi, y_ndsi)
            pred_sliding, y_sliding = crop_to_smallest(pred_sliding, y_sliding)

            # Calculate IoU for each model and accumulate intersection and union
            intersection_old, union_old = calculate_iou(pred_old, y_old)
            intersection_ndsi, union_ndsi = calculate_iou(pred_ndsi, y_ndsi)
            intersection_sliding, union_sliding = calculate_iou(pred_sliding, y_sliding)

            # Accumulate for final IoU across all predictions
            total_intersection_old += intersection_old
            total_union_old += union_old

            total_intersection_ndsi += intersection_ndsi
            total_union_ndsi += union_ndsi

            total_intersection_sliding += intersection_sliding
            total_union_sliding += union_sliding

        except Exception as e:
            print(f"Error processing poly_id {poly_id}: {e}")
            continue

    return total_intersection_old, total_union_old, total_intersection_ndsi, total_union_ndsi, total_intersection_sliding, total_union_sliding

# Main function to calculate IoU for all ecoregions and folds
def calculate_iou_across_folds_ecoregion(ecoregions, eco_df, folds, tif_paths):
    iou_results = []
    
    # Loop through each ecoregion
    for ecoregion in ecoregions:
        print(f"Processing ecoregion {ecoregion}...")

        total_intersection_old = 0
        total_union_old = 0
        total_intersection_ndsi = 0
        total_union_ndsi = 0
        total_intersection_sliding = 0
        total_union_sliding = 0
        
        # Get IDs in this ecoregion
        sub_eco = eco_df[eco_df['ecoregion'] == ecoregion]
        
        for fold in folds:
            print(f"Processing fold {fold} for ecoregion {ecoregion}...")
            
            # Load models for the fold
            model_1, model_2, model_3 = load_models_for_fold(fold)
            
            # Load testing data for the fold
            testing_names = pd.read_csv(f'/explore/nobackup/people/spotter5/cnn_mapping/Russia/test_fold_{fold}.csv')['ID'].tolist()
            
            # Filter ecoregion and testing names
            sub_fold = sub_eco[sub_eco['ID'].isin(testing_names)]
            fold_ids = sub_fold['ID'].unique().tolist()
            
            if len(fold_ids) == 0:
                continue
            
            # Process TIFF files for the current fold
            intersection_old, union_old, intersection_ndsi, union_ndsi, intersection_sliding, union_sliding = process_tif_files_for_fold(
                fold, fold_ids, model_1, model_2, model_3, tif_paths['old'], tif_paths['ndsi'], tif_paths['sliding'])

            # Accumulate results for this fold
            total_intersection_old += intersection_old
            total_union_old += union_old
            total_intersection_ndsi += intersection_ndsi
            total_union_ndsi += union_ndsi
            total_intersection_sliding += intersection_sliding
            total_union_sliding += union_sliding
        
        # Calculate final IoU for this ecoregion across all folds
        iou_old = total_intersection_old / total_union_old if total_union_old != 0 else 0
        iou_ndsi = total_intersection_ndsi / total_union_ndsi if total_union_ndsi != 0 else 0
        iou_sliding = total_intersection_sliding / total_union_sliding if total_union_sliding != 0 else 0
        
        # Store the results
        iou_results.append({
            'ecoregion': ecoregion,
            'iou_old': iou_old,
            'iou_ndsi': iou_ndsi,
            'iou_sliding': iou_sliding
        })
    
    return pd.DataFrame(iou_results)

# List of folds to process
folds = [0, 2, 4]  # Adjust the number of folds if necessary

# Load ecoregion shapefile
eco = gpd.read_file('/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_good_eco_clip.shp')
all_ecoregions = eco['ecoregion'].unique().tolist()

# Define paths for the TIFF files
tif_paths = {
    'old': '/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_old',
    'ndsi': '/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_monthly_ndsi',
    'sliding': '/explore/nobackup/people/spotter5/cnn_mapping/Russia/anna_monthly_ndsi_sliding'
}

# Calculate the IoU across all ecoregions and folds
iou_df = calculate_iou_across_folds_ecoregion(all_ecoregions, eco, folds, tif_paths)

# Save the results to CSV
iou_df.to_csv('/explore/nobackup/people/spotter5/cnn_mapping/Russia/iou_ecoregion_results.csv', index=False)

# Print the final IoU values for each ecoregion
print(iou_df)


Segmentation Models: using `tf.keras` framework.
Processing ecoregion Montane Sub-Arctic...
Processing fold 0 for ecoregion Montane Sub-Arctic...
Processing fold 2 for ecoregion Montane Sub-Arctic...
Processing fold 4 for ecoregion Montane Sub-Arctic...
Processing ecoregion Arctic Deserts and Tundra...
Processing fold 0 for ecoregion Arctic Deserts and Tundra...
Processing fold 2 for ecoregion Arctic Deserts and Tundra...
Processing fold 4 for ecoregion Arctic Deserts and Tundra...
Processing ecoregion Wetlands...
Processing fold 0 for ecoregion Wetlands...
Processing fold 2 for ecoregion Wetlands...
Processing fold 4 for ecoregion Wetlands...
Error processing poly_id 1454: Graph execution error:

Detected at node 'unet_model/EfficientNetB7_backbone/block2a_expand_conv/Conv2D' defined at (most recent call last):
    File "/home/spotter5/.conda/envs/deeplearning3/lib/python3.10/runpy.py", line 196, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/home/s