In [1]:
import os
# Setup environment variables
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
os.environ["SM_FRAMEWORK"] = "tf.keras"
import rioxarray
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from MightyMosaic import MightyMosaic
import segmentation_models as sm
import geopandas as gpd
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

import warnings
import glob
import tensorflow
warnings.filterwarnings("ignore")



Segmentation Models: using `tf.keras` framework.


In [14]:
class img_gen_vi(tensorflow.keras.utils.Sequence):

    """Helper to iterate over the data (as Numpy arrays).
    Inputs are batch size, the image size, the input paths (x) and target paths (y)
    """

    def __init__(self, batch_size, img_size, input_img_paths):
        self.batch_size = batch_size
        self.img_size = img_size
        self.input_img_paths = input_img_paths
        self.target_img_paths = input_img_paths

    def __len__(self):
        return len(self.input_img_paths) // self.batch_size

    def __getitem__(self, idx):
        """Returns tuple (input, target) corresponding to batch #idx."""
        i = idx * self.batch_size
        batch_img_paths = self.input_img_paths[i : i + self.batch_size]
        batch_target_img_paths = self.target_img_paths[i : i + self.batch_size]

        x = np.zeros((self.batch_size,) + self.img_size + (3,), dtype="float32")

        for j, path in enumerate(batch_img_paths):
            try:
                # Attempt to load the image
                img = np.round(np.load(path, allow_pickle=True), 3)

                if img.shape[2] == 4:
                    img = img[:, :, :-1]
                else:
                    img = img[:, :, 6:9]

                img = img.astype(float)
                img = np.round(img, 3)
                img[img == 0] = -999
                img[np.isnan(img)] = -999
                img[img == -999] = np.nan

                in_shape = img.shape

                # Turn to dataframe to normalize
                img = img.reshape(img.shape[0] * img.shape[1], img.shape[2])
                img = pd.DataFrame(img)
                img.columns = min_max_vi.columns

                # Add min/max for normalization
                img = pd.concat([min_max_vi, img]).reset_index(drop=True)

                # Normalize 0 to 1
                img = pd.DataFrame(scaler.fit_transform(img))
                img = img.iloc[2:]  # Remove the added rows for min/max

                img = img.values.reshape(in_shape)
                img[np.isnan(img)] = -1

                img = np.round(img, 3)

                x[j] = img  # Populate x

            except Exception as e:
                print(f"Error loading {path}: {e}")
                continue  # Skip this file and continue with the next one

        y = np.zeros((self.batch_size,) + self.img_size, dtype="uint8")

        for j, path in enumerate(batch_target_img_paths):
            try:
                img = np.round(np.load(path, allow_pickle=True), 3)[:, :, -1]
                img = img.astype(int)
                img[img < 0] = 0
                img[img > 1] = 0
                img[~np.isin(img, [0, 1])] = 0
                img[np.isnan(img)] = 0
                img = img.astype(int)

                y[j] = img

            except Exception as e:
                print(f"Error loading {path}: {e}")
                continue  # Skip this file and continue with the next one

        return x, y


In [2]:
# Assuming min_max_vi and scaler are already defined as in your original code

class img_gen_vi_one(tf.keras.utils.Sequence):

    """Helper to iterate over the data (as Numpy arrays).
    Inputs are batch size (now fixed to 1), the image size, the input paths (x), and target paths (y)
    """

    def __init__(self, img_size, input_img_paths):
        self.batch_size = 1  # Fixed batch size to 1 for individual processing
        self.img_size = img_size
        self.input_img_paths = input_img_paths
        self.target_img_paths = input_img_paths  # Assuming target paths are the same

    def __len__(self):
        return len(self.input_img_paths)  # One batch per image

    def __getitem__(self, idx):
        """Returns tuple (input, target) corresponding to batch #idx."""
        
        # Get the image path
        img_path = self.input_img_paths[idx]
        
        # Create empty arrays for x (input) and y (ground truth)
        x = np.zeros((self.batch_size,) + self.img_size + (3,), dtype="float32")
        y = np.zeros((self.batch_size,) + self.img_size, dtype="uint8")
        
        # Load image
        img = np.round(np.load(img_path), 3)
        
        # Select the appropriate bands for normalization
        if img.shape[2] == 4:
            img = img[:, :, :-1]  # Drop the last band if it has 4 bands
        else:
            img = img[:, :, 6:9]  # Select bands 6 to 8 if it has more than 3 bands

        # Normalize the image
        img = img.astype(float)
        img = np.round(img, 3)
        img[img == 0] = -999
        img[np.isnan(img)] = -999
        img[img == -999] = np.nan

        in_shape = img.shape
        img = img.reshape(img.shape[0] * img.shape[1], img.shape[2])
        img = pd.DataFrame(img, columns=min_max_vi.columns)
        img = pd.concat([min_max_vi, img]).reset_index(drop=True)
        img = pd.DataFrame(scaler.transform(img))
        img = img.iloc[len(min_max_vi):]  # Remove the first rows from the min_max scaling
        img = img.values.reshape(in_shape)
        img[np.isnan(img)] = -1

        # Assign the normalized image to the input array
        x[0] = img

        # Ground truth (y) is the last band
        y_img = np.round(np.load(img_path), 3)[:, :, -1]
        y_img = y_img.astype(int)
        y_img[y_img < 0] = 0
        y_img[y_img > 1] = 0
        y_img[~np.isin(y_img, [0, 1])] = 0
        y_img[np.isnan(y_img)] = 0
        y[0] = y_img

        return x, y
        
# Initialize the generator with batch size 1
# models_vi_gen = img_gen_vi_one(img_size, chunk_files)

# # Predict the model using the generator with batch size 1
# gen_preds = predict_model(model, models_vi_gen, 'Comb_Sliding_1')

# print(gen_preds)

In [15]:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
os.environ["SM_FRAMEWORK"] = "tf.keras"
import numpy as np
import tensorflow as tf
import segmentation_models as sm
import pandas as pd
import time

# Function to load models for a specific fold

model_1 = tf.keras.models.load_model("/explore/nobackup/people/spotter5/cnn_mapping/nbac_training/models/nbac_mtbs_regularize_50_global_norm_old.tf", 
                                     custom_objects={'precision': sm.metrics.Precision(threshold=0.5), 
                                                     'recall': sm.metrics.Recall(threshold=0.5),
                                                     'f1-score': sm.metrics.FScore(threshold=0.5),
                                                     'iou_score': sm.metrics.IOUScore(threshold=0.5)})

model_2 = tf.keras.models.load_model("/explore/nobackup/people/spotter5/cnn_mapping/nbac_training/models/nbac_mtbs_regularize_50_global_norm_ndsi.tf", 
                                     custom_objects={'precision': sm.metrics.Precision(threshold=0.5), 
                                                     'recall': sm.metrics.Recall(threshold=0.5),
                                                     'f1-score': sm.metrics.FScore(threshold=0.5),
                                                     'iou_score': sm.metrics.IOUScore(threshold=0.5)})

model_3 = tf.keras.models.load_model("/explore/nobackup/people/spotter5/cnn_mapping/nbac_training/models/nbac_mtbs_ndsi_sliding.tf", 
                                     custom_objects={'precision': sm.metrics.Precision(threshold=0.5), 
                                                     'recall': sm.metrics.Recall(threshold=0.5),
                                                     'f1-score': sm.metrics.FScore(threshold=0.5),
                                                     'iou_score': sm.metrics.IOUScore(threshold=0.5)})



# Function to predict using model and accumulate IoU across batches
def predict_model(model, generator, name):
    total_intersection = 0
    total_union = 0
    
    for i in range(len(generator)):
        x_batch, y_true = generator[i]
        for j in range(len(x_batch)):
            x_sample = np.expand_dims(x_batch[j], axis=0)
            y_true_sample = y_true[j]

            if np.all(y_true_sample == 0):
                continue
            
            y_pred_sample = model.predict(x_sample, verbose=0)
            y_pred_sample = np.squeeze(y_pred_sample, axis=1)[0]
            y_pred_sample = np.where(y_pred_sample > 0.5, 1, 0)
            y_pred_sample = y_pred_sample[:, :, 0]
            
            assert y_pred_sample.shape == y_true_sample.shape, f"Shape mismatch: y_pred {y_pred_sample.shape} and y_true {y_true_sample.shape}"
            
            intersection = np.logical_and(y_pred_sample, y_true_sample).sum()
            union = np.logical_or(y_pred_sample, y_true_sample).sum()
            
            total_intersection += intersection
            total_union += union
    
    iou_calculated = total_intersection / total_union if total_union > 0 else 0
    
    # Evaluate the model to get metrics including IOU (from model's perspective)
    model_1_res = model.evaluate(generator, verbose=0)
    
    iou_model = np.round(model_1_res[-2], 2)
    precision = np.round(model_1_res[-5], 2)
    recall = np.round(model_1_res[-4], 2)
    f1 = np.round(model_1_res[-3], 2)
    accuracy = np.round(model_1_res[-1], 2)
    
    # Create a dataframe with the results
    in_df = pd.DataFrame({
        'Model': [name],
        'IOU (Model)': [iou_model],
        'IOU (Calculated)': [iou_calculated],
        'Total Intersection': [total_intersection],
        'Total Union': [total_union],
        'Precision': [precision],
        'Recall': [recall],
        'F-1': [f1],
        'Accuracy': [accuracy]
    }, index=[0])
    
    return in_df

# Function to process all folds dynamically for each model
def process_all_folds(batch_size, img_size, output_path):
    
    total_intersections = {'old': 0, 'ndsi': 0, 'sliding': 0}
    total_unions = {'old': 0, 'ndsi': 0, 'sliding': 0}
    results = []

        
    testing_names = pd.read_csv('/explore/nobackup/people/spotter5/cnn_mapping/nbac_training/mtbs_old_testing_files.csv')['Files'].tolist()
    testing_names2 = pd.read_csv('/explore/nobackup/people/spotter5/cnn_mapping/nbac_training/nbac_old_testing_files.csv')['Files'].tolist()

    testing_names_old = testing_names + testing_names2

    testing_names = pd.read_csv('/explore/nobackup/people/spotter5/cnn_mapping/nbac_training/nbac_monthly_ndsi_testing_files.csv')['Files'].tolist()
    testing_names2 = pd.read_csv('/explore/nobackup/people/spotter5/cnn_mapping/nbac_training/mtbs_monthly_ndsi_testing_files.csv')['Files'].tolist()

    testing_names_ndsi = testing_names + testing_names2

    testing_names = pd.read_csv('/explore/nobackup/people/spotter5/cnn_mapping/nbac_training/nbac_monthly_ndsi_sliding_testing_files.csv')['Files'].tolist()
    testing_names2 = pd.read_csv('/explore/nobackup/people/spotter5/cnn_mapping/nbac_training/mtbs_monthly_ndsi_sliding_testing_files.csv')['Files'].tolist()

    testing_names_sliding = testing_names + testing_names2
    
    # Generate data for each model
    model_vi_gen_old = img_gen_vi(batch_size, img_size, testing_names_old)
    model_vi_gen_ndsi = img_gen_vi(batch_size, img_size, testing_names_ndsi)
    model_vi_gen_sliding = img_gen_vi(batch_size, img_size, testing_names_sliding)

    # Apply the generator and predict for each model
    result_old = predict_model(model_1, model_vi_gen_old, 'Comb_Old')
    result_ndsi = predict_model(model_2, model_vi_gen_ndsi, 'Comb_NDSI')
    result_sliding = predict_model(model_3, model_vi_gen_sliding, 'Comb_Sliding')

    results.append(result_old)
    results.append(result_ndsi)
    results.append(result_sliding)

    # Accumulate the intersections and unions
    total_intersections['old'] += result_old['Total Intersection'].sum()
    total_unions['old'] += result_old['Total Union'].sum()
    total_intersections['ndsi'] += result_ndsi['Total Intersection'].sum()
    total_unions['ndsi'] += result_ndsi['Total Union'].sum()
    total_intersections['sliding'] += result_sliding['Total Intersection'].sum()
    total_unions['sliding'] += result_sliding['Total Union'].sum()

    # Calculate the final IoU for each model
    iou_old_final = total_intersections['old'] / total_unions['old'] if total_unions['old'] != 0 else 0
    iou_ndsi_final = total_intersections['ndsi'] / total_unions['ndsi'] if total_unions['ndsi'] != 0 else 0
    iou_sliding_final = total_intersections['sliding'] / total_unions['sliding'] if total_unions['sliding'] != 0 else 0

    # Create a final results dataframe
    final_results = pd.DataFrame({
        'Model': ['Overall_Old', 'Overall_NDSI', 'Overall_Sliding'],
        'IOU (Calculated)': [iou_old_final, iou_ndsi_final, iou_sliding_final],
        'Total Intersection': [total_intersections['old'], total_intersections['ndsi'], total_intersections['sliding']],
        'Total Union': [total_unions['old'], total_unions['ndsi'], total_unions['sliding']]
    })

    # Concatenate fold results with overall results
    all_results = pd.concat([pd.concat(results, ignore_index=True), final_results], ignore_index=True)

    # Save results to CSV
    output_file = os.path.join(output_path, 'north_america.csv')
    all_results.to_csv(output_file, index=False)
    print(f"Results saved to {output_file}")

    # Return the final results
    return iou_old_final, iou_ndsi_final, iou_sliding_final

# Main entry point
batch_size = 20  # Example batch size
img_size = (128, 128)  # Example image size
output_path = '/explore/nobackup/people/spotter5/cnn_mapping/Russia/spatial_compare_temp'
os.makedirs(output_path, exist_ok = True)

# Process all folds and get the final IoU for each model
start_time = time.time()
iou_old, iou_ndsi, iou_sliding = process_all_folds(batch_size, img_size, output_path)
end_time = time.time()

total_time = (end_time - start_time) / 60
print(f"Total execution time: {total_time:.2f} minutes")

# Print the final IoU for each model across all folds
print(f"Overall IoU for old model across all folds: {iou_old}")
print(f"Overall IoU for NDSI model across all folds: {iou_ndsi}")
print(f"Overall IoU for Sliding NDSI model across all folds: {iou_sliding}")


Error loading /explore/nobackup/people/spotter5/cnn_mapping/nbac_training/nbac_old_subs_0_128/5_1_5976.npy: Failed to interpret file '/explore/nobackup/people/spotter5/cnn_mapping/nbac_training/nbac_old_subs_0_128/5_1_5976.npy' as a pickle
Error loading /explore/nobackup/people/spotter5/cnn_mapping/nbac_training/nbac_old_subs_0_128/5_1_5976.npy: Failed to interpret file '/explore/nobackup/people/spotter5/cnn_mapping/nbac_training/nbac_old_subs_0_128/5_1_5976.npy' as a pickle
Error loading /explore/nobackup/people/spotter5/cnn_mapping/nbac_training/nbac_old_subs_0_128/5_1_5976.npy: Failed to interpret file '/explore/nobackup/people/spotter5/cnn_mapping/nbac_training/nbac_old_subs_0_128/5_1_5976.npy' as a pickle
Error loading /explore/nobackup/people/spotter5/cnn_mapping/nbac_training/nbac_old_subs_0_128/5_1_5976.npy: Failed to interpret file '/explore/nobackup/people/spotter5/cnn_mapping/nbac_training/nbac_old_subs_0_128/5_1_5976.npy' as a pickle
Results saved to /explore/nobackup/peopl

In [13]:
l = np.load('/explore/nobackup/people/spotter5/cnn_mapping/nbac_training/nbac_old_subs_0_128/5_1_5976.npy')

l

ValueError: Cannot load file containing pickled data when allow_pickle=False