In [1]:
import torch
from model import load_resnet_model, register_model_with_hook
from utils import sorted_file_paths
from dataloader import create_dataloader, create_dataset
import numpy as np
import pandas as pd
from scipy.stats import f_oneway
from lots import LOTS, calculate_activation_map 
from tqdm import tqdm

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
test_files = sorted_file_paths('./data/test_records_with_land_cover/test')
test_dataset = create_dataset(test_files, regression = True, half = False)
test_loader = create_dataloader(test_dataset, 16)

In [3]:
model = load_resnet_model('resnet50', num_classes=1)

# statedict = torch.load('./ex_1/checkpoint_epoch_25.pth')
statedict = torch.load('./ex_5/checkpoint_epoch_22.pth')
model.load_state_dict(statedict)
model.eval()
model = model.to(device)

get_feature_maps = register_model_with_hook(model)



In [4]:
# Define category names
category_names = [
    "water", "trees", "grass", "flooded_vegetation", "crops",
    "shrub_and_scrub", "built", "bare", "snow_and_ice"
]

# Initialize storage structures
category_areas = {name: [] for name in category_names}
average_intensities = {name: [] for name in category_names}
all_labels = []
all_predictions = []
all_locations = []

for data, location, labels in tqdm(test_loader):
    with torch.no_grad():
        data = data.to(device)
        labels = labels.to(device)
        predictions = model(data[:, :24, :, :])

    # Flatten labels and predictions and store
    all_labels.extend(labels.cpu().numpy().flatten())
    all_locations.extend([f"{row[0]} + {row[1]}" for row in location.cpu().numpy()])
    all_predictions.extend(predictions.cpu().numpy().flatten())

    for i in range(data.size(0)):
        # Generate initial and adversarial images
        imageinit = data[i, :24, :, :].unsqueeze(0)
        imageadv = LOTS(imageinit, 10, model, get_feature_maps, device)

        # Calculate the activation map without normalization
        activation_map = calculate_activation_map(imageinit.squeeze(), imageadv.squeeze(), filter_size=5, normalize=False)

        # Process each category within the image
        land_cover_mask = data[i, 24, :, :]  # Assuming the land cover mask is at channel index 24
        for idx, category in enumerate(category_names):
            category_mask = (land_cover_mask == idx).float()
            category_area = category_mask.sum().item()
            category_areas[category].append(category_area)

            if category_mask.sum() > 0:
                average_intensity = (activation_map * category_mask).sum() / category_mask.sum()
                average_intensities[category].append(average_intensity.item())
            else:
                average_intensities[category].append(np.nan)  # Handle no area case

# Convert results to DataFrames for better manipulation and visibility
intensities_df = pd.DataFrame(average_intensities)
areas_df = pd.DataFrame(category_areas)

# Concatenate the areas DataFrame with the intensities DataFrame
result_df = pd.concat([intensities_df, areas_df.add_suffix("_area")], axis=1)

# Add location, label, and prediction data
result_df['Location'] = all_locations
result_df['Label'] = all_labels
result_df['Prediction'] = all_predictions

100%|██████████| 131/131 [14:39<00:00,  6.72s/it]


In [5]:
result_df.to_csv('./analysis/quantitative/model_5.csv', index=False)

In [10]:
# Calculate average of mean activation intensity for each category within each wealth index range
grouped_means = result_df.groupby('Wealth_Index_Range').mean()[[f'{category}_mean_activation_intensity' for category in category_names]]

Unnamed: 0_level_0,water_mean_activation_intensity,trees_mean_activation_intensity,grass_mean_activation_intensity,flooded_vegetation_mean_activation_intensity,crops_mean_activation_intensity,shrub_and_scrub_mean_activation_intensity,built_mean_activation_intensity,bare_mean_activation_intensity,snow_and_ice_mean_activation_intensity
Wealth_Index_Range,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Very Low,,,,,,,,,
Low,0.030511,3e-06,0.000578,0.001535,0.003978,0.000724,0.000463,0.007138,0.003332
Medium,0.013293,1.8e-05,0.001625,0.004418,0.001967,0.001371,0.002592,0.037332,0.016851
High,0.0072,3.5e-05,0.002366,0.003418,0.002379,0.001356,0.003599,0.020379,0.019043
Very High,0.001545,4.5e-05,0.00067,0.00061,2.8e-05,0.000378,0.000263,0.010379,0.045546


In [12]:
print(grouped_means)

                    water_mean_activation_intensity  \
Wealth_Index_Range                                    
Very Low                                        NaN   
Low                                        0.030511   
Medium                                     0.013293   
High                                       0.007200   
Very High                                  0.001545   

                    trees_mean_activation_intensity  \
Wealth_Index_Range                                    
Very Low                                        NaN   
Low                                        0.000003   
Medium                                     0.000018   
High                                       0.000035   
Very High                                  0.000045   

                    grass_mean_activation_intensity  \
Wealth_Index_Range                                    
Very Low                                        NaN   
Low                                        0.000578   
Medium  

In [13]:
anova_results = {}

for category in category_names:
    # Extract data for each wealth index range
    data_very_low = result_df[result_df['Wealth_Index_Range'] == 'Very Low'][f'{category}_mean_activation_intensity'].dropna()
    data_low = result_df[result_df['Wealth_Index_Range'] == 'Low'][f'{category}_mean_activation_intensity'].dropna()
    data_medium = result_df[result_df['Wealth_Index_Range'] == 'Medium'][f'{category}_mean_activation_intensity'].dropna()
    data_high = result_df[result_df['Wealth_Index_Range'] == 'High'][f'{category}_mean_activation_intensity'].dropna()
    data_very_high = result_df[result_df['Wealth_Index_Range'] == 'Very High'][f'{category}_mean_activation_intensity'].dropna()
    
    # Perform one-way ANOVA
    f_stat, p_value = f_oneway(data_low, data_medium, data_high, data_very_high)
    
    # Store the results for each category
    anova_results[category] = {'F-Statistic': f_stat, 'P-Value': p_value}

    # Optionally, print the results for each category
    print(f"ANOVA for {category}:")
    print(f"F-Statistic: {f_stat}, P-Value: {p_value}\n")

ANOVA for water:
F-Statistic: 7.498186044618382, P-Value: 5.482564075481055e-05

ANOVA for trees:
F-Statistic: 1.8323226855670456, P-Value: 0.13917453781405828

ANOVA for grass:
F-Statistic: 2.6723232621319277, P-Value: 0.04594181731377569

ANOVA for flooded_vegetation:
F-Statistic: 4.225989302188007, P-Value: 0.005478317048878044

ANOVA for crops:
F-Statistic: 1.4725816896616353, P-Value: 0.2200833709278355

ANOVA for shrub_and_scrub:
F-Statistic: 2.873148439205878, P-Value: 0.03505236204556475

ANOVA for built:
F-Statistic: 2.6976480020414346, P-Value: 0.044417287934120434

ANOVA for bare:
F-Statistic: 11.349699332933843, P-Value: 2.3636551369030527e-07

ANOVA for snow_and_ice:
F-Statistic: 2.386597480354004, P-Value: 0.07022271508555081

