In [7]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import cohen_kappa_score, mean_absolute_error, mean_squared_error, r2_score
from scipy.stats import spearmanr
from scipy.optimize import minimize
from functools import reduce
import os
import joblib

In [8]:
### Define all types of targets

ordinal_targets = ['ADNC', 'Braak', 'Thal', 'CERAD']
continuous_targets = ['percent 6e10 positive area', 'percent AT8 positive area', 
                      'percent GFAP positive area', 'percent NeuN positive area']
all_target_columns = ordinal_targets + continuous_targets

### Categorical mappings for submission format
MAPS = {
    'ADNC': {0:"Not AD", 1:"Low", 2:"Intermediate", 3:"High"},
    'Braak': {i:f"Braak {['0','I','II','III','IV','V','VI'][i]}" for i in range(7)},
    'CERAD': {0:"Absent", 1:"Sparse", 2:"Moderate", 3:"Frequent"},
    'Thal': {i:f"Thal {i}" for i in range(6)}
}

In [9]:
# 1. Reorder datasets

# ### Read column order from CSV
column_order = pd.read_csv('data/column_order.csv')['column_name'].tolist()

### Load datasets
df_mtg = pd.read_csv('data/dataset_mtg_dropped.csv')
df_a9 = pd.read_csv('data/dataset_a9_dropped.csv')

### Reorder datasets using the predefined column order
df_mtg = df_mtg[column_order]
df_a9 = df_a9[column_order]
df_mtg.to_csv('data/false_dataset_a9_ordered.csv', index=False)
df_a9.to_csv('data/false_dataset_mtg_ordered.csv', index=False)

In [10]:
# 2. Test models

def test_model(df, target, region):
    
    ## Load trained model from disk
    model = joblib.load(f'./models/{region}/{region}_{target}_model.pkl')
    
    ## Prepare test data (drop other targets and create feature matrix)
    other_targets = [t for t in all_target_columns if t != target]
    X = df.drop(columns=['Donor ID', target] + other_targets)
    y = df[target].values
    donor_ids = df['Donor ID'].values
    
    ## Make predictions (continuous values from XGBoost regressor)
    y_pred = model.predict(X)
    
    ## Calculate final metrics based on target type
    
    ## For ordinal targets (ADNC, Braak, Thal, CERAD) - Goal 1
    if target in ordinal_targets:
        
        ## Load optimized thresholds from trained model
        optimized_thresholds = model.optimized_thresholds_
        
        ## Apply thresholds to convert continuous predictions to ordinal classes
        y_classes = np.digitize(y_pred, bins=optimized_thresholds)
        
        ## Calculate DREAM benchmark metrics for ordinal targets (QWK primary, MAE and Spearman secondary)
        metrics = {
            'quadratic_weighted_kappa': cohen_kappa_score(y, y_classes, weights='quadratic'),
            'mean_absolute_error': mean_absolute_error(y, y_classes),
            'spearman_correlation': spearmanr(y, y_classes)[0],
            'thresholds_used': str(optimized_thresholds.round(3))
        }
        
        ## Create predictions dataframe with ordinal classes
        predictions_df = pd.DataFrame({'Donor ID': donor_ids, 'true_value': y, 'predicted_value': y_classes})
    
    else:
        ## For continuous targets (6e10, AT8, GFAP, NeuN) - Goal 2
        
        ## Calculate concordance correlation coefficient (CCC) - primary DREAM metric
        mean_t, mean_p = np.mean(y), np.mean(y_pred)
        var_t, var_p = np.var(y, ddof=1), np.var(y_pred, ddof=1)
        ccc = (2 * np.cov(y, y_pred)[0,1]) / (var_t + var_p + (mean_t - mean_p)**2)
        
        ## Calculate DREAM benchmark metrics for continuous targets (CCC primary, MSE and R2 secondary)
        metrics = {
            'concordance_correlation_coefficient': ccc,
            'mean_squared_error': mean_squared_error(y, y_pred),
            'r2_score': r2_score(y, y_pred)
        }
        
        ## Create predictions dataframe with continuous values
        predictions_df = pd.DataFrame({'Donor ID': donor_ids, 'true_value': y, 'predicted_value': y_pred})
    
    ## Save test predictions to CSV
    os.makedirs(f'./output/{region}', exist_ok=True)
    predictions_df.to_csv(f'./output/{region}/{region}_{target}_test_predictions.csv', index=False)
    
    ## Save test metrics to CSV
    pd.DataFrame(list(metrics.items()), columns=['metric_name', 'metric_value']).to_csv(
        f'./output/{region}/{region}_{target}_test_metrics.csv', index=False)
    
    return metrics

### Process all test datasets with their respective trained models
datasets = {'MTG': df_mtg, 'A9': df_a9}
test_metrics = {region: {target: test_model(df, target, region) 
                         for target in all_target_columns} for region, df in datasets.items()}

In [11]:
# 3. Format test submissions

def format_evaluation(f, mode='train'):

    ## Extract target name from filename (format: {region}_{target}_train_predictions.csv)
    parts = f.split('/')[-1].replace(f'_{mode}_predictions.csv', '').split('_')
    target = '_'.join(parts[1:])
    
    ## Load test predictions and ensure data type (string) in Donor ID
    df = pd.read_csv(f)[['Donor ID', 'predicted_value']]
    df['Donor ID'] = df['Donor ID'].astype(str)
    
    ## Convert predictions: categorical to strings, continuous to clipped float [0,100]
    if target in MAPS:
        df['predicted_value'] = df['predicted_value'].round().astype(int).map(MAPS[target])
    else:
        df['predicted_value'] = df['predicted_value'].clip(0, 100).astype(float)
    
    ## Format column name for submission
    col_name = f"predicted {target.replace('percent ','').replace(' positive area','')}"
    return df.rename(columns={'predicted_value': col_name})

### Process all predictions per target and region
for region in ['MTG', 'A9']:
    files = [f'./output/{region}/{region}_{t}_test_predictions.csv' for t in all_target_columns]
    reduce(lambda l,r: l.merge(r, on='Donor ID'), map(lambda f: format_evaluation(f, 'test'), files)).to_csv(
        f'./output/{region}/submission_{region}_test.csv', index=False)

In [12]:
# 4. Compare models between regions

def compare_target(target):
    
    ## Determine primary metric based on target type
    metric_name = 'quadratic_weighted_kappa' if target in ordinal_targets else 'concordance_correlation_coefficient'
    ## Extract metrics from both regions
    mtg_metric = test_metrics['MTG'][target][metric_name]
    a9_metric = test_metrics['A9'][target][metric_name]
    
    return {
        'Target': target,
        'Metric': 'QWK' if target in ordinal_targets else 'CCC',
        'MTG_Score': mtg_metric,
        'A9_Score': a9_metric,
        'Best_Region': 'MTG' if mtg_metric > a9_metric else 'A9',
        'Best_Score': max(mtg_metric, a9_metric),
        'Difference': abs(mtg_metric - a9_metric)
    }

pd.DataFrame([compare_target(t) for t in all_target_columns]).to_csv('./output/model_comparison.csv', index=False)