In [None]:
import os
import matplotlib.pyplot as plt
import seaborn as sns
from os.path import join
from tqdm import tqdm
import pandas as pd
import numpy as np
import joblib
import imodelsx.process_results
import sys
sys.path.append('../experiments/')
results = []

results_dir = f'../results/08_timed'
experiment_filename = '../experiments/08_figs_restructure.py'

# load the results in to a pandas dataframe
r = imodelsx.process_results.get_results_df(results_dir)
cols_varied = imodelsx.process_results.get_experiment_keys(
    r, experiment_filename)
print('experiment varied these params:', cols_varied)
r = imodelsx.process_results.fill_missing_args_with_default(
    r, experiment_filename)

# get the breakdown of data in these groups
r.groupby(cols_varied).size()

In [None]:
r['figs_training_time'].mean(), r['ftd_training_time'].mean()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


def plot_histogram_for_dataset(df, dataset_name, train=False):
    # Set the aesthetics for the plots
    sns.set_context("talk", font_scale=5)
    sns.set(style="whitegrid")
    
    train_str = 'train' if train else 'val'

    # Create a catplot for model_name and distiller_name
    sns.set(font_scale=1.25)
    g = sns.catplot(
        data=df,
        x='dataset_name',
        y=f'{train_str}_r2',
        hue='model_name',
        kind='bar',
        height=6,
        aspect=2,
    )
    
    # Set the title and labels
    if train:
        g.fig.suptitle(f'Best Train R2 Scores for FIGS, FT Distill, Ridge', fontsize=25)
        g.set_axis_labels("Model Name", "Train R2 Score", fontsize=25)
    else:
        g.fig.suptitle(f'Best Val R2 Scores for FIGS, FT Distill, Ridge', fontsize=25)
        g.set_axis_labels("Model Name", "Val R2 Score", fontsize=25)
    #g._legend.set_title('Distiller Name')
    sns.move_legend(g, bbox_to_anchor=(1,0.5), loc="center left", markerscale=5, title="model")
    
    # Adjust the layout
    plt.yticks(ticks=np.arange(0, 1.09, 0.1))
    plt.xticks(rotation=45)
    plt.tight_layout(rect=[0, 0, 1, 0.97])
    g.savefig(f'plots/{dataset_name}_{train_str}_r2.png', bbox_inches='tight')
#hue=df[['distiller_name', 'binary_mapper_name']].apply(tuple, axis =1)

In [None]:
r['max_rules'].value_counts()

In [None]:
np.argmin(r['ftd_r2_score_val']/r['figs_r2_score_val'])

In [None]:
r.iloc[73, :]

In [None]:
plt.hist((r[r['max_rules'] == 20]['ftd_r2_score_val']/r[r['max_rules'] == 20]['figs_r2_score_val'])*100 - 100)

In [None]:
plt.hist((r[r['max_rules'] == 30]['ftd_r2_score_val']/r[r['max_rules'] == 30]['figs_r2_score_val'])*100-100)

In [None]:
plt.hist((r['ftd_r2_score_val']/r['figs_r2_score_val'])*100-100, bins=50)

In [None]:
metrics = r.groupby(['dataset_name']).apply(lambda sdf: sdf.sort_values(by='ftd_r2_score_val', ascending=False).iloc[0,:])[['figs_max_interaction_size', 'ftd_max_interaction_size', 'num_common_interactions', 'figs_training_time' , 'ftd_training_time']]

In [None]:
metrics = metrics.reset_index()

In [None]:
df_melted = pd.melt(metrics, id_vars='dataset_name', value_vars=['figs_max_interaction_size', 'ftd_max_interaction_size'],
                    var_name='model_name', value_name='max_interaction_size')
fig = plt.figure(figsize=(15,10))
# Create a categorical plot
g = sns.catplot(
    data=df_melted, 
    x='dataset_name', 
    y='max_interaction_size', 
    hue='model_name', 
    kind='bar', 
    height=5, 
    aspect=2
)

# Set plot title and labels
plt.title('Max Interaction Size by Dataset for Best Val R2 Model')
plt.xlabel('Dataset Name')
plt.ylabel('Max Interaction Size')

# Show the plot
plt.show()
g.savefig(f'plots/interaction_size.png', bbox_inches='tight')

In [None]:
int_df

In [None]:
int_df = r.groupby(['dataset_name'])[['figs_max_interaction_size', 'ftd_max_interaction_size']].mean().reset_index().rename(columns={'figs_max_interaction_size':'figs', 'ftd_max_interaction_size':'ftd'})
df_melted = pd.melt(int_df, id_vars='dataset_name', value_vars=['figs', 'ftd'],
                    var_name='model_name', value_name='max_interaction_size')

# Create a categorical plot
g = sns.catplot(
    data=df_melted, 
    x='dataset_name', 
    y='max_interaction_size', 
    hue='model_name', 
    kind='bar', 
    height=5, 
    aspect=2
)

# Set plot title and labels
plt.title('Mean Max Interaction Size by Dataset')
plt.xlabel('Dataset Name')
plt.ylabel('Mean Max Interaction Size')

# Show the plot
plt.show()
g.savefig(f'plots/interaction_size.png', bbox_inches='tight')

In [None]:
ftd = r.groupby(['dataset_name']).apply(lambda sdf: sdf.sort_values(by='ftd_r2_score_val', ascending=False).iloc[0,:])[['ftd_r2_score_train', 'ftd_r2_score_val']]
figs = r.groupby(['dataset_name']).apply(lambda sdf: sdf.sort_values(by='figs_r2_score_val', ascending=False).iloc[0,:])[['figs_r2_score_train', 'figs_r2_score_val']]
ridge_decoup = r.groupby(['dataset_name']).apply(lambda sdf: sdf.sort_values(by='ridge_figs_decoup_r2_score_val', ascending=False).iloc[0,:])[['ridge_figs_decoup_r2_score_train', 'ridge_figs_decoup_r2_score_val']]
ridge_inter = r.groupby(['dataset_name']).apply(lambda sdf: sdf.sort_values(by='ridge_figs_inter_r2_score_val', ascending=False).iloc[0,:])[['ridge_figs_inter_r2_score_train', 'ridge_figs_inter_r2_score_val']]
xgb = r.groupby(['dataset_name']).apply(lambda sdf: sdf.sort_values(by='xgboost_r2_score_val', ascending=False).iloc[0,:])[['xgboost_r2_score_train', 'xgboost_r2_score_val']]

In [None]:
ftd['model_name'] = 'ft_distill'
figs['model_name'] = 'figs'
ridge_decoup['model_name'] = 'ridge_decoup'
ridge_inter['model_name'] = 'ridge_inter'
xgb['model_name'] = 'xgboost'
ftd = ftd.rename(columns = {'ftd_r2_score_train':'train_r2', 'ftd_r2_score_val':'val_r2'})
figs= figs.rename(columns = {'figs_r2_score_train':'train_r2', 'figs_r2_score_val':'val_r2'})
ridge_decoup= ridge_decoup.rename(columns = {'ridge_figs_decoup_r2_score_train':'train_r2', 'ridge_figs_decoup_r2_score_val':'val_r2'})
ridge_inter= ridge_inter.rename(columns = {'ridge_figs_inter_r2_score_train':'train_r2', 'ridge_figs_inter_r2_score_val':'val_r2'})
xgb= xgb.rename(columns = {'xgboost_r2_score_train':'train_r2', 'xgboost_r2_score_val':'val_r2'})

In [None]:
models = pd.concat([ftd, figs, ridge_decoup, ridge_inter, xgb], axis = 0)
models.reset_index(inplace=True)

In [None]:
def show_p_improve(base, comp, round_degree = 2):
    core = np.round((comp.drop(columns = {'model_name'}).values/base.drop(columns = {'model_name'}).values), round_degree)*100-100
    return pd.DataFrame(core, columns = comp.columns[:-1]).set_index([comp.index])

In [None]:
show_p_improve(figs, ftd)

In [None]:
show_p_improve(figs, ridge_decoup)

In [None]:
show_p_improve(figs, ridge_inter)

In [None]:
plot_histogram_for_dataset(models, 'figs_sim', train=False)

In [None]:
r['num_common_interactions'].value_counts()

In [None]:
int_df = r.groupby(['dataset_name'])[['figs_max_interaction_size', 'ftd_max_interaction_size']].mean().reset_index().rename(columns={'figs_max_interaction_size':'figs', 'ftd_max_interaction_size':'ftd'})
df_melted = pd.melt(int_df, id_vars='dataset_name', value_vars=['figs', 'ftd'],
                    var_name='model_name', value_name='max_interaction_size')

# Create a categorical plot
g = sns.catplot(
    data=df_melted, 
    x='dataset_name', 
    y='max_interaction_size', 
    hue='model_name', 
    kind='bar', 
    height=5, 
    aspect=2
)

# Set plot title and labels
plt.title('Mean Max Interaction Size by Dataset')
plt.xlabel('Dataset Name')
plt.ylabel('Mean Max Interaction Size')

# Show the plot
plt.show()
g.savefig(f'plots/interaction_size.png', bbox_inches='tight')

In [None]:
r['num_common_interactions'].value_counts()

In [None]:
r['ftd_max_interaction_size'].value_counts()

In [None]:
ftd = r.groupby(['dataset_name']).apply(lambda sdf: sdf.sort_values(by='ftd_r2_score_val', ascending=False).iloc[0,:])[['ftd_r2_score_train', 'ftd_r2_score_val']] #[['ftd_r2_score_train', 'ftd_r2_score_val', 'figs_r2_score_train', 'figs_r2_score_val']].max()

In [None]:
figs = r.groupby(['dataset_name']).apply(lambda sdf: sdf.sort_values(by='figs_r2_score_val', ascending=False).iloc[0,:])[['figs_r2_score_train', 'figs_r2_score_val']] #[['ftd_r2_score_train', 'ftd_r2_score_val', 'figs_r2_score_train', 'figs_r2_score_val']].max()

In [None]:
ridge = r.groupby(['dataset_name']).apply(lambda sdf: sdf.sort_values(by='ridge_r2_score_val', ascending=False).iloc[0,:])[['ridge_r2_score_train', 'ridge_r2_score_val']] #[['ftd_r2_score_train', 'ftd_r2_score_val', 'figs_r2_score_train', 'figs_r2_score_val']].max()

In [None]:
ftd['model_name'] = 'ft_distill'
figs['model_name'] = 'figs'
ridge['model_name'] = 'ridge'
ftd = ftd.rename(columns = {'ftd_r2_score_train':'train_r2', 'ftd_r2_score_val':'val_r2'})
figs= figs.rename(columns = {'figs_r2_score_train':'train_r2', 'figs_r2_score_val':'val_r2'})
ridge= ridge.rename(columns = {'ridge_r2_score_train':'train_r2', 'ridge_r2_score_val':'val_r2'})

In [None]:
models = pd.concat([ftd, figs, ridge], axis = 0)
models.reset_index(inplace=True)

In [None]:
plot_histogram_for_dataset(models, 'figs_sim', train=False)

In [None]:
r.groupby(['dataset_name']).apply(lambda sdf: sdf.sort_values(by='ridge_r2_score_val', ascending=False).iloc[0,:])[['dataset_name', 'binary_mapper_frac', 'ridge_r2_score_train', 'ridge_r2_score_val', 'max_features']] #[['ftd_r2_score_train', 'ftd_r2_score_val', 'figs_r2_score_train', 'figs_r2_score_val']].max()

In [None]:
df = r.groupby(['model_name', 'distiller_name', 'binary_mapper_name'])['val_r2'].mean().reset_index() #.apply(lambda sdf: sdf.sort_values(by='val_r2', ascending=False).iloc[0,:])
df.loc[:, 'distiller+binary_mapper'] = df['distiller_name'] + ' + '+ df['binary_mapper_name'] #(df['distiller_name'] == 'None').map(lambda x: 'original_model' if x else '') + (df['distiller_name'] + " + " + df['binary_mapper_name'])*(df['distiller_name'] != 'None').to_numpy()
plot_histogram_for_dataset(df, 'all datasets')

In [None]:
r['n_epochs'].value_counts()

In [None]:
r[r['model_name'] == 'ft_distill']

In [None]:
datasets = r['dataset_name'].unique()
datasets

In [None]:
r.groupby(['dataset_name', 'binary_mapper_name'])[['teacher_r2_score_train_true', 'distiller_r2_score_train_true']].mean()

In [None]:
r[(r['distiller_name']=='ft_distill') & (r['max_depth']==5) & (r['max_features']==0.75)] #['max_features'].value_counts()

In [None]:
#RF params: max_depth, max_features
random_forest= r[r['model_name'] == 'random_forest'].drop(columns=['subsample_frac', 'save_dir', 'use_cache', 'cat_mappings', 'task_type', 'save_dir_unique',
                                                        'gpu', 'n_epochs', 'max_rules', 'max_trees', 'pre_interaction', 'post_interaction',
                                                        'pre_max_features', 'post_max_features', 'size_interactions'])
random_forest_g = random_forest.groupby(['dataset_name','model_name', 'max_depth', 'max_features', 'bit', 'depth'])[['teacher_r2_score_train_true', 'teacher_r2_score_val_true']].mean()

In [None]:
random_forest.groupby(['max_depth', 'max_features', 'seed']).count()

In [None]:
# random_forest_best_hyp = {}
# for d in datasets:
#     queried = random_forest_g.query(f"dataset_name == '{d}'")
#     d_best_hyp = dict(zip(['dataset_name','model_name', 'max_depth', 'max_features'], list(queried['r2_score_val_true'].idxmax())))
#     d_best_hyp['r2_val'] = queried['r2_score_val_true'].max()
#     del d_best_hyp['dataset_name']
#     del d_best_hyp['model_name']
#     random_forest_best_hyp[d] = d_best_hyp
# random_forest_best_hyp

In [None]:
#RF+ params: max_depth, max_features
rf_plus = r[r['model_name'] == 'rf_plus'].drop(columns=['subsample_frac', 'save_dir', 'use_cache', 'cat_mappings', 'task_type', 'save_dir_unique',
                                                        'gpu', 'n_epochs', 'max_rules', 'max_trees', 'pre_interaction', 'post_interaction',
                                                        'pre_max_features', 'post_max_features', 'size_interactions'])
rf_plus_g = rf_plus.groupby(['dataset_name','model_name', 'max_depth', 'max_features', 'bit', 'depth'])[['teacher_r2_score_train_true', 'teacher_r2_score_val_true']].mean()
rf_plus_g

In [None]:
rf_plus.groupby(['max_depth', 'max_features', 'seed']).count()

In [None]:
# rf_plus_best_hyp = {}
# for d in datasets:
#     queried = rf_plus_g.query(f"dataset_name == '{d}'")
#     d_best_hyp = dict(zip(['dataset_name','model_name', 'max_depth', 'max_features'], list(queried['r2_score_val_true'].idxmax())))
#     d_best_hyp['r2_val'] = queried['r2_score_val_true'].max()
#     del d_best_hyp['dataset_name']
#     del d_best_hyp['model_name']
#     rf_plus_best_hyp[d] = d_best_hyp
# rf_plus_best_hyp

In [None]:
#FIGS params: max_rules, max_trees, max_features
figs = r[r['model_name'] == 'figs'].drop(columns=['subsample_frac', 'save_dir', 'use_cache', 'cat_mappings', 'task_type', 'save_dir_unique',
                                                        'gpu', 'n_epochs', 'max_depth','pre_interaction', 'post_interaction',
                                                        'pre_max_features', 'post_max_features', 'size_interactions'])
figs_g = figs.groupby(['dataset_name','model_name', 'max_rules','max_trees', 'max_features', 'bit', 'depth'])[['teacher_r2_score_train_true', 'teacher_r2_score_val_true']].mean().round(2)
figs_g

In [None]:
figs.groupby(['max_rules', 'max_trees', 'max_features', 'seed']).count()

In [None]:
#RF+ params: max_depth, max_features
xgboost = r[r['model_name'] == 'xgboost'].drop(columns=['subsample_frac', 'save_dir','use_cache', 'cat_mappings', 'task_type', 'save_dir_unique',
                                                        'gpu', 'n_epochs', 'max_rules', 'max_trees', 'pre_interaction', 'post_interaction',
                                                        'pre_max_features', 'post_max_features', 'size_interactions', 'max_features'])
xgboost_g = xgboost.groupby(['dataset_name','model_name', 'max_depth', 'bit', 'depth'])[['teacher_r2_score_train_true', 'teacher_r2_score_val_true']].mean().round(2)
xgboost_g

In [None]:
xgboost.groupby(['max_depth', 'seed']).count()

In [None]:
# xgboost_best_hyp = {}
# for d in datasets:
#     queried = xgboost_g.query(f"dataset_name == '{d}'")
#     d_best_hyp = dict(zip(['dataset_name','model_name', 'max_depth'], list(queried['r2_score_val_true'].idxmax())))
#     d_best_hyp['r2_val'] = queried['r2_score_val_true'].max()
#     del d_best_hyp['dataset_name']
#     del d_best_hyp['model_name']
#     xgboost_best_hyp[d] = d_best_hyp
# xgboost_best_hyp

In [None]:
#ResNet params: n_epochs
resnet = r[r['model_name'] == 'resnet'].drop(columns=['subsample_frac', 'save_dir','use_cache', 'cat_mappings', 'task_type', 'save_dir_unique',
                                                        'gpu', 'max_rules', 'max_trees', 'pre_interaction', 'post_interaction',
                                                        'pre_max_features', 'post_max_features', 'size_interactions', 'max_features', 'max_depth'])
resnet_g = resnet.groupby(['dataset_name','model_name', 'n_epochs', 'bit', 'depth'])[['teacher_r2_score_train_true', 'teacher_r2_score_val_true']].mean()
resnet_g

In [None]:
resnet.groupby(['n_epochs', 'seed']).count()

In [None]:
# resnet_best_hyp = {}
# for d in resnet['dataset_name'].unique():
#     queried = resnet_g.query(f"dataset_name == '{d}'")
#     d_best_hyp = dict(zip(['dataset_name','model_name','n_epochs'], list(queried['r2_score_val_true'].idxmax())))
#     d_best_hyp['r2_val'] = queried['r2_score_val_true'].max()
#     del d_best_hyp['dataset_name']
#     del d_best_hyp['model_name']
#     resnet_best_hyp[d] = d_best_hyp
# resnet_best_hyp

In [None]:
#FT Transformer params: n_epochs
ft_transformer = r[r['model_name'] == 'ft_transformer'].drop(columns=['subsample_frac', 'save_dir', 'use_cache', 'cat_mappings', 'task_type', 'save_dir_unique',
                                                        'gpu', 'max_rules', 'max_trees', 'pre_interaction', 'post_interaction',
                                                        'pre_max_features', 'post_max_features', 'size_interactions', 'max_features', 'max_depth'])
ft_transformer_g = ft_transformer.groupby(['dataset_name','model_name', 'n_epochs', 'bit', 'depth'])[['teacher_r2_score_train_true', 'teacher_r2_score_val_true']].mean()
ft_transformer_g

In [None]:
ft_transformer.groupby(['n_epochs', 'seed']).count()

In [None]:
# ft_transformer_best_hyp = {}
# for d in ft_transformer['dataset_name'].unique():
#     queried = ft_transformer_g.query(f"dataset_name == '{d}'")
#     d_best_hyp = dict(zip(['dataset_name','model_name','n_epochs'], list(queried['r2_score_val_true'].idxmax())))
#     d_best_hyp['r2_val'] = queried['r2_score_val_true'].max()
#     del d_best_hyp['dataset_name']
#     del d_best_hyp['model_name']
#     ft_transformer_best_hyp[d] = d_best_hyp
# ft_transformer_best_hyp

In [None]:
best_hyp = {}
for m in ['random_forest', 'rf_plus', 'figs', 'xgboost', 'resnet', 'ft_transformer']:
    best_hyp[m] = eval(f'{m}_best_hyp')

In [None]:
import json 

class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        if isinstance(obj, np.floating):
            return float(obj)
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return super(NpEncoder, self).default(obj)

with open('/home/mattyshen/interpretableDistillation/scripts/best_hyperparams/original_hyperparams.json', "w") as outfile: 
    json.dump(best_hyp, outfile, cls =NpEncoder)