In [None]:
import seaborn as sns
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from glob import glob


In [None]:
from glob import glob
files = glob('results/analyses/overregularisation/*.json')

df = []
for file in files:
    df.append(pd.read_json(file,orient='table').reset_index())   
    _, df[-1]['icd_code'],  df[-1]['et'],df[-1]['m'],df[-1]['e'],df[-1]['ntrain'], df[-1]['seed'] = file.split('/')[-1].split('.')[0].split('_')
df = pd.concat(df)

# cast all numeric columns to float
for col in df.columns:
    if df[col].dtype == 'object':
        try:
            df[col] = df[col].astype(float)
        except:
            pass

df

In [None]:
df = df[(df['et']=='et1') & (df['m']=='m3') & (df['e']=='e1')]
df.rename(columns={'index':'alpha'},inplace=True)
df['my_effect'] = df['effect_corr']
df.set_index(['icd_code'],inplace=True)
df.loc['fluid-intelligence-custom','my_effect'] = -1 * df.loc['fluid-intelligence-custom','my_effect']
df.loc['socialsupport-bin','my_effect'] = -1 * df.loc['socialsupport-bin','my_effect']
df = df.reset_index()

In [None]:
df.set_index(['icd_code']).loc['fluid-intelligence-custom','my_effect']

In [None]:
# average over seeds
a = df.groupby(['icd_code','alpha'])[['r2_test','mae_test','my_effect']].mean().reset_index()
# find value of column "alpha" that maximises r2_test
b = a.loc[a.groupby(['icd_code'])['r2_test'].idxmax()]
b.set_index('icd_code',inplace=True)
b
c = a.loc[a.groupby(['icd_code'])['my_effect'].idxmax()]
c.set_index('icd_code',inplace=True)
c
d = a.loc[a.groupby(['icd_code'])['mae_test'].idxmin()]
d.set_index('icd_code',inplace=True)

a



In [None]:
# reduce horizontal spacing
plt.rcParams['ytick.labelsize'] = 'xx-small'
plt.rcParams['xtick.labelsize'] = 'xx-small'

plt.rcParams['axes.titlesize'] = 'small'
plt.rcParams['axes.labelsize'] = 'x-small'
plt.rcParams['legend.fontsize'] = 'x-small'

In [None]:
def twin_lineplot(x,y,color,**kwargs):
    ax = plt.twinx()
    sns.lineplot(x=x,y=y,color=color,**kwargs, ax=ax)
    if ax.get_subplotspec().is_last_col()  and not ax.get_subplotspec().is_first_row() and not ax.get_subplotspec().is_last_row():
        ax.set_ylabel('accuracy [R^2]')
    else:
        ax.set_ylabel('')
    # no legend
    ax.get_legend().remove()
    # only 2 yticks, rotation 90
    ax.set_yticks([0.2, 0.6])
    ax.set_yticklabels([0.2, 0.6], rotation=90)


g = sns.FacetGrid(df, height=1.5, col="icd_code", col_wrap=3, col_order=['F10', 'F31', 'F32','G20', 'G40', 'G47', 'fluid-intelligence-custom', 'stress-bin', 'socialsupport-bin'], sharey=0)
# i need 2 lines per plot, one for the effect size, one for the r2 on different scales
g.map(sns.lineplot, "alpha", "my_effect", errorbar='se', color='blue', label='effect size')
g.map(twin_lineplot, "alpha", "r2_test", errorbar='se', color='red', label='r2')
g.set(xscale="log")
# set ylabels

for ax in g.axes:
    # only set ylabels for the left column
    if ax.get_subplotspec().is_first_col() and not ax.get_subplotspec().is_first_row() and not ax.get_subplotspec().is_last_row():
        ax.set_ylabel('effect size [d]')
    else:
        ax.set_ylabel('')

# add custom legend, red='r2', blue='effect size'
import matplotlib.lines as mlines
import matplotlib.pyplot as plt
red_line = mlines.Line2D([], [], color='red', label='brain-age prediction accuracy [R^2]')
blue_line = mlines.Line2D([], [], color='blue', label='brain-age-gap effect size [d]')
#place legend centered on the bottom, no frame
g.axes[-2].legend(handles=[red_line, blue_line], loc='lower center', bbox_to_anchor=(0.5, -.7), ncol=2, frameon=False)

# manually set all subplot titles
for ax in g.axes:
    ax.set_title(ax.get_title().split('=')[-1].strip())

# add vertical lines for max r2_test
for ax in g.axes:
    ax.axvline(x=d.loc[ax.get_title(),'alpha'], color='red', linestyle='--', alpha=.5)
    ax.axvline(x=c.loc[ax.get_title(),'alpha'], color='blue', linestyle='--', alpha=.5)
    ax.axhline(y=a.set_index(['icd_code','alpha']).loc[(ax.get_title(),c.loc[ax.get_title(),'alpha'])].my_effect, color='blue', linestyle='--', alpha=.5)
    ax.axhline(y=a.set_index(['icd_code','alpha']).loc[(ax.get_title(),d.loc[ax.get_title(),'alpha'])].my_effect, color='red', linestyle='--', alpha=.5)


g.set_yticklabels(rotation=90)
plt.subplots_adjust(hspace=0.4, wspace=0.5)

# limit to 2 yticks
for ax in g.axes:
    ax.set_yticks(ax.get_yticks()[::2])

# g.set_axis_labels("n_replaced", "effect size")
#replace titles
titles = ['Alcohol Dependency','Bipolar Disorder','Depression',"Parkinson's",'Epilepsy','Sleep Disorders','Fluid Intelligence','Severe Stress','Social Support']
for ax, title in zip(g.axes.flat, titles):
    ax.set_title(title)

# set xlabel
for ax in g.axes:
    if not ax.get_subplotspec().is_first_col() and not ax.get_subplotspec().is_last_col():
        ax.set_xlabel('regularisation strength [$\\alpha$]')
    else:
        ax.set_xlabel('')

plt.savefig('fig2.png',dpi=300,bbox_inches='tight')

In [None]:
g=sns.lineplot(data=df,x='index', y='effect', hue='file')
g.set_xscale('log')

# add "r2" column from df to plot
g=sns.lineplot(data=df_,x='index', y='r2')

# vertical line at r2_max_index
import matplotlib.pyplot as plt
plt.axvline(r2_max_index, color='k', linestyle='--')

In [None]:
# from glob import glob
# files = glob('results/analyses/trainset_mixture/eff*.json')
# print(files)
# df = []
# for file in files:
#     df.append(pd.read_json(file, orient='table'))   
#     df[-1]['file'] = file.split('/')[-1].split('.')[0]
# df = pd.concat(df)
# df.reset_index(inplace=True)
# df

from glob import glob
files = glob('results/analyses/trainset_mixture/effects_*_m*_e*.json')
print(files)
df = []
for file in files:
    df.append(pd.read_json(file, orient='table'))   
    _, df[-1]['icd_code'], df[-1]['matching'], df[-1]['exclusion']= file.split('/')[-1].split('.')[0].split('_')
    df[-1]['file'] = file.split('/')[-1].split('.')[0]
df = pd.concat(df)
df.reset_index(inplace=True)
df['effect'] = -df['effect']
df


In [None]:
g = sns.lmplot(data=df, x='fraction', y='effect_corr', row='icd_code', col='matching', hue='exclusion', x_estimator=np.mean, sharey='row', sharex='row')

# Add horizontal zero line to each subplot
for ax in g.axes.flatten():
    ax.axhline(0, color='black', linestyle='--')



In [None]:
g = sns.lmplot(data=df, x='fraction', y='effect', row='icd_code', col='matching', hue='exclusion', x_estimator=np.mean, sharey='row', sharex='row')

# Add horizontal zero line to each subplot
for ax in g.axes.flatten():
    ax.axhline(0, color='black', linestyle='--')



In [None]:
import json
with open('results/analyses/icd10_brainage_sweep/table_blocks.json') as f:
    data = json.load(f)
    df_sweep = pd.read_json(data['results'], orient='table')


In [None]:
df_sweep[(df_sweep['n']>20) & df_sweep['icd_letter'].isin(['F', 'G', 'O']) & (df_sweep['t_abs']>0)]