In [None]:
import os
import pandas as pd
import numpy as np

models = ['bnn_mcd', 'bnn_dc', 'mdn', 'ens_nn', 'rnn']
scenarios = ['prisma_1', 'prisma_ood_a', 'prisma_ood_l', 'prisma_wd_a', 'prisma_wd_l']
subscenarios = ['Subscenario 1', 'Subscenario 2', 'Subscenario 2', 'Subscenario 3', 'Subscenario 3']
ac_types = ['N/A', 'ACOLITE', 'L2', 'ACOLITE', 'L2']
variables = ['aph_443', 'aph_675', 'aCDOM_443', 'aCDOM_675', 'aNAP_443', 'aNAP_675']

def get_median_model_mdsa(file_path):
    try:
        df = pd.read_csv(file_path)
        if 'model' not in df.columns or 'variable' not in df.columns or 'MdSA' not in df.columns:
            print(f"Required columns not found in {file_path}")
            return {var: np.nan for var in variables}
       
        vars_443 = ['aph_443', 'aCDOM_443', 'aNAP_443']
        df_443 = df[df['variable'].isin(vars_443)]
       
        median_mdsa_443 = df_443.groupby('model')['MdSA'].sum().sort_values()
       
        if len(median_mdsa_443) == 0:
            print(f"No valid data found in {file_path}")
            return {var: np.nan for var in variables}
       
        median_model = median_mdsa_443.index[len(median_mdsa_443) // 2]
       
        mdsa_values = {}
        for var in variables:
            var_data = df[(df['model'] == median_model) & (df['variable'] == var)]
            if len(var_data) > 0:
                mdsa_values[var] = round(var_data['MdSA'].iloc[0])
            else:
                mdsa_values[var] = np.nan
       
        return mdsa_values
   
    except Exception as e:
        print(f"Error processing {file_path}: {str(e)}")
        return {var: np.nan for var in variables}

def process_data(base_path):
    data = []
    
    for model in models:
        for scenario, subscenario, ac_type in zip(scenarios, subscenarios, ac_types):
            file_path = os.path.join(base_path, f'{model}_{scenario}_metrics.csv')
            mdsa_values = get_median_model_mdsa(file_path)
            
            row = {
                'model': model,
                'subscenario': subscenario,
                'AC': ac_type
            }
            row.update(mdsa_values)
            
            data.append(row)
    
    result_df = pd.DataFrame(data)
    
    column_order = ['model', 'subscenario', 'AC'] + variables
    result_df = result_df[column_order]
    
    return result_df

base_path = r'C:\SwitchDrive\Data\pnn_model_estimates'
result_df = process_data(base_path)