### ***Data Preparation***

In [None]:
import pandas as pd
import glob
import os


folder_path = r"folder_path_of_saved_files"
csv_files = glob.glob(os.path.join(folder_path, '*.csv'))

dataframes = []

def extract_info(filename):
    base_name = os.path.basename(filename).replace('.csv', '')
    parts = base_name.split('_')
    
    method = parts[1]
    
    last_part = parts[-1]
    model_scenario = last_part.rsplit('-', 1)
    model = model_scenario[0]
    scenario = model_scenario[1]
    
    return model, scenario, method

for file in csv_files:
    df = pd.read_csv(file)
    filename = os.path.basename(file)
    model, scenario, method = extract_info(filename)
    df['model'] = model
    df['scenario'] = scenario
    df['method'] = method
    dataframes.append(df)

  from pandas.core import (


In [2]:
for df in dataframes:
    if 'lat_lon' in df.columns:
        df[['lat', 'lon']] = df['lat_lon'].str.split(',', expand=True)
        df.drop(columns='lat_lon', inplace=True)
    if 'intercept' in df.columns:
        df.drop(columns='intercept', inplace=True)
    if 'Unnamed: 0' in df.columns:
        df.drop(columns='Unnamed: 0', inplace=True)
    if 'slope' in df.columns:
        df.drop(columns='slope', inplace=True)

In [4]:
grid = pd.concat(dataframes, ignore_index = True)
grid["model"].replace("gdfl-esm4", "gfdl-esm4", inplace = True)
grid["model"].replace("mpi-esm1-2", "mpi-esm1-2-lr", inplace = True)

In [5]:
columns_to_convert = ['lat', 'lon']
# Convert the specified columns to float
grid[columns_to_convert] = grid[columns_to_convert].apply(pd.to_numeric)

In [8]:
grid['lat_lon'] = grid['lat'].astype(str) + ',' + grid['lon'].astype(str)
grid.drop(["lat", "lon"], axis =1, inplace = True)
grouped_df = grid.groupby('lat_lon')

### ***Sobol' Sensitivity Analysis***

In [10]:
gcm = grid["model"].unique()
scenario = grid["scenario"].unique()
method = grid["method"].unique()

In [11]:
import pandas as pd 
from SALib.sample import saltelli 
from SALib.analyze import sobol
import numpy as np 
import warnings

warnings.filterwarnings("ignore")

In [14]:
problem = {
    'num_vars' : 3,
    'names' : ["gcm", "scenario", "method"],
    'bounds': [[0,17], [0,3], [0,3]]
}

In [15]:
X = saltelli.sample(problem, 2048*2)
X = X.astype(int)

In [None]:
results = []
for i, (lat_lon, group_data) in enumerate(grouped_df):
    group_data.drop_duplicates(inplace=True)
    Y = np.zeros(len(X))
    for j, x in enumerate(X):
        g, s, m = gcm[x[0]], scenario[x[1]], method[x[2]]
        Y[j] = group_data[(group_data.model == g) & (group_data.scenario == s) & (group_data.method == m)]["scaling"]
    
    # Perform Sobol analysis for the current group
    Si = sobol.analyze(problem, Y, print_to_console=False)

    # Append the results to the result DataFrame
    results.append({
        'gcm': Si['S1'][0],
        'scenario': Si['S1'][1],
        'method': Si['S1'][2],
        'gcm:scenario': Si['S2'][0, 1],
        'gcm:method': Si['S2'][0, 2],
        'scenario:method': Si['S2'][1, 2],
        'gcm_T': Si['ST'][0],
        'scenario_T': Si['ST'][1],
        'method_T': Si['ST'][2],
        'lat_lon': lat_lon
    })
    if (i % 100 == 0):
        print("done -> ", i)
result_df = pd.DataFrame(results)

In [17]:
result_df.to_csv(r"save_pth.csv")