## Imports 

In [2]:
import pandas as pd
from autogluon.tabular import TabularPredictor
import time
import pickle
import warnings
warnings.filterwarnings('ignore')

In [3]:
def report(predictor,df_test):
    model_names = [
        'KNeighborsUnif',
        'KNeighborsDist',
        'NeuralNetFastAI',
        'LightGBMXT',
        'LightGBM',
        'RandomForestGini',
        'RandomForestEntr',
        'CatBoost',
        'ExtraTreesGini',
        'ExtraTreesEntr',
        'XGBoost',
        'NeuralNetTorch',
        'LightGBMLarge',
        'WeightedEnsemble_L2'
    ]
    labels = ['ENTP', 'ESFP', 'ISFJ', 'INTJ', 'ISFP', 'ESTP', 'INTP', 'INFJ', 'ESTJ', 'ENFP', 'ISTJ', 'ENTJ', 'INFP', 'ISTP', 'ESFJ', 'ENFJ']
    df_report = pd.concat({model_name:pd.DataFrame({label:{'PRE':0.0,'REC':0.0,'F1':0.0,'AUC':0.0} for label in labels}) for model_name in model_names})
    yhats_dct = {model_name:predictor.predict_proba(df_test,model=model_name) for model_name in model_names}    
    y = df_test['type']
    for model_name in model_names:
        yhat = yhats_dct[model_name].idxmax(axis=1)    
        for label in labels:
            if label in set(y):
                _y = (y == label)*1
                _y_pred = (yhat == label)*1
                _y_proba = yhats_dct[model_name][label]
                df_report[label][model_name]['PRE'] = sklearn.metrics.precision_score(_y,_y_pred)
                df_report[label][model_name]['REC'] = sklearn.metrics.recall_score(_y,_y_pred)
                df_report[label][model_name]['F1'] = sklearn.metrics.f1_score(_y,_y_pred)
                df_report[label][model_name]['AUC'] = sklearn.metrics.roc_auc_score(_y,_y_proba)
            else:
                pass 
    return df_report

In [4]:
def fit_predict_save(path,experiments_index):
    t1 = time.time()
    df_train = df_trains_dct[experiments_index]
    df_test = df_tests_dct[experiments_index]
    predictor = TabularPredictor(label='type', eval_metric='acc', path=f"AutogluonModels/{experiments_index}",verbosity=False)
    predictor.fit(
        df_train,
        hyperparameters = {
            'NN_TORCH': {},
            'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, 'GBMLarge'],
            'CAT': {},
            'XGB': {},
            'FASTAI': {},
            'RF': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],	'XT': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
            'KNN': [{'weights': 'uniform', 'ag_args': {'name_suffix': 'Unif'}}, {'weights': 'distance', 'ag_args': {'name_suffix': 'Dist'}}],
        },
    )
    df_report = report(predictor,df_test)
    df_report.to_csv(f"{path}{experiments_index}.csv")
    t2 = time.time()
    print(f"{experiments_index} -- completed (time = {(t2-t1)/60:.4f} min)")

### Data 

In [5]:
with open('experiment_setup/df_trains_dct.pickle', 'rb') as f:    
    df_trains_dct = pickle.load(f)
with open('experiment_setup/df_tests_dct.pickle', 'rb') as f:    
    df_tests_dct = pickle.load(f)    

### Experiment

*Experiment1*

In [13]:
![ -d "results/experiment1" ] || mkdir "results/experiment1"
fit_predict_save(path='results/',experiments_index='experiment1/scenario1')
fit_predict_save(path='results/',experiments_index='experiment1/scenario2')

*Experiment2*

In [None]:
![ -d "results/experiment2" ] || mkdir "experiment_setup/experiment2"
fit_predict_save('results/','experiment2/scenario0a')
fit_predict_save('results/','experiment2/scenario1a')
fit_predict_save('results/','experiment2/scenario1b')
fit_predict_save('results/','experiment2/scenario1c')
fit_predict_save('results/','experiment2/scenario2a')
fit_predict_save('results/','experiment2/scenario2b')
fit_predict_save('results/','experiment2/scenario2c')
fit_predict_save('results/','experiment2/scenario3a')
fit_predict_save('results/','experiment2/scenario3b')
fit_predict_save('results/','experiment2/scenario3c')
fit_predict_save('results/','experiment2/scenario4a')
fit_predict_save('results/','experiment2/scenario4b')
fit_predict_save('results/','experiment2/scenario4c')

*Experiment3*

In [None]:
![ -d "results/experiment3" ] || mkdir "results/experiment3"
fit_predict_save('results/','experiment3/scenario0a')
fit_predict_save('results/','experiment3/scenario1a')
fit_predict_save('results/','experiment3/scenario1b')
fit_predict_save('results/','experiment3/scenario2a')
fit_predict_save('results/','experiment3/scenario2b')
fit_predict_save('results/','experiment3/scenario3a')
fit_predict_save('results/','experiment3/scenario3b')
fit_predict_save('results/','experiment3/scenario4a')
fit_predict_save('results/','experiment3/scenario4b')
fit_predict_save('results/','experiment3/scenario5a')
fit_predict_save('results/','experiment3/scenario5b')
fit_predict_save('results/','experiment3/scenario6a')
fit_predict_save('results/','experiment3/scenario6b')
fit_predict_save('results/','experiment3/scenario7a')
fit_predict_save('results/','experiment3/scenario7b')