### This notebooks explores a way to select a region from all the 58 candidate regions based on the following criteria : the AUCs of the classification tasks on ABCD and dHCP and the shift of the gravity centers between adult and newborn crops. 

In [64]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt


In [65]:
# Read the AUC values per region

abcd_auc_df = pandas.read_csv('/neurospin/dico/rmenasria/Runs/03_main/Output/csv/prematurity_AUC_by_region_ABCD_3006_thresholded.csv', index_col="region")

abcd_auc_27 = abcd_auc_df['AUC_27']
abcd_auc_32 = abcd_auc_df['AUC_27_32']
abcd_auc_37 = abcd_auc_df['AUC_37']



In [66]:
dhcp_auc_df = pandas.read_csv('/neurospin/dico/rmenasria/Runs/03_main/Output/csv/prematurity_AUC_by_region_thresholded_1205.csv', index_col="region")
dhcp_auc_27 = dhcp_auc_df['AUC_27']
dhcp_auc_32 = dhcp_auc_df['AUC_27_32']
dhcp_auc_37 = dhcp_auc_df['AUC_37']

In [67]:
# Read the gravity center shift values per region
gravity_center_shifts = pd.read_csv('/neurospin/dico/rmenasria/Runs/03_main/Output/csv/all_regions_gravity_center_shifts.csv', index_col="region")
#Replace all the "." by " " in the region names
gravity_center_shifts.index = gravity_center_shifts.index.str.replace('.', '')
shift_x = gravity_center_shifts['shift_x']
shift_y = gravity_center_shifts['shift_y']
shift_z = gravity_center_shifts['shift_z']
shift_distance = gravity_center_shifts['distance']

In [68]:
# Fusionner tout dans un DataFrame
combined_df = pd.DataFrame({
    "abcd_auc_27": abcd_auc_27,
    "abcd_auc_32": abcd_auc_32,
    "abcd_auc_37": abcd_auc_37,
    "dhcp_auc_27": dhcp_auc_27,
    "dhcp_auc_32": dhcp_auc_32,
    "dhcp_auc_37": dhcp_auc_37,
    "shift_distance": shift_distance
})


In [69]:
def compute_weighted_region_scores(df, weights=None, min_valid=5, invert_shift=True):

    # (z-score) per column
    z_df = (df - df.mean()) / df.std()

    # Invert shift
    if invert_shift and 'shift_distance' in z_df.columns:
        z_df['shift_distance'] = -z_df['shift_distance']

    # print the shift distance z-scores in decreasing order
    # print("Shift distance z-scores (decreasing order):")
    # print(z_df['shift_distance'].sort_values(ascending=False))

    # weights
    if weights is None:
        weights = {col: 1 for col in z_df.columns}

    # Apply weights
    common_cols = [col for col in z_df.columns if col in weights]
    weighted = pd.DataFrame(index=z_df.index)

    for col in common_cols:
        weighted[col] = z_df[col] * weights[col]

    # Number of valid metrics
    weighted["n_valid"] = weighted[common_cols].notna().sum(axis=1)

    # Filter
    filtered = weighted[weighted["n_valid"] >= min_valid].copy()

    # Calculate global score
    filtered["global_score"] = filtered[common_cols].sum(axis=1, skipna=True) / \
                               filtered[common_cols].notna().mul([weights[col] for col in common_cols], axis=1).sum(axis=1)

    # Sort by global score
    return filtered.sort_values("global_score", ascending=False)


In [87]:
weights = {
    "abcd_auc_27": 2,
    "abcd_auc_32": 2,
    "abcd_auc_37": 1,
    "dhcp_auc_27": 2,
    "dhcp_auc_32": 2,
    "dhcp_auc_37": 1,
    "shift_distance": 6
}

ranked = compute_weighted_region_scores(combined_df, weights=weights, min_valid=5, invert_shift=True)

# Display the top 10 regions based on the global score
print(ranked[["global_score"]].head(10))


                                   global_score
region                                         
FCMpost-SpC_right                      0.778574
SC-sylv_right                          0.776293
SsP-SPaint_right                       0.755041
FPO-SCu-ScCal_right                    0.754442
FCMpost-SpC_left                       0.682105
ScCal-SLi_right                        0.557689
SC-sylv_left                           0.513005
fronto-parietal_medial_face_right      0.507278
fronto-parietal_medial_face_left       0.498242
SC-SPoC_left                           0.491670
