In [1]:
!pip install session_info
!pip install pycaret

Collecting session_info
  Using cached session_info-1.0.0-py3-none-any.whl
Collecting stdlib-list
  Downloading stdlib_list-0.9.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 KB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: stdlib-list, session_info
Successfully installed session_info-1.0.0 stdlib-list-0.9.0
Collecting pycaret
  Using cached pycaret-3.0.2-py3-none-any.whl (483 kB)
Collecting category-encoders>=2.4.0
  Using cached category_encoders-2.6.1-py2.py3-none-any.whl (81 kB)
Collecting joblib>=1.2.0
  Using cached joblib-1.2.0-py3-none-any.whl (297 kB)
Collecting lightgbm>=3.0.0
  Using cached lightgbm-3.3.5-py3-none-manylinux1_x86_64.whl (2.0 MB)
Collecting deprecation>=2.1.0
  Using cached deprecation-2.1.0-py2.py3-none-any.whl (11 kB)
Collecting schemdraw==0.15
  Using cached schemdraw-0.15-py3-none-any.whl (106 kB)
Collecting imbalanced-learn>=0.8.1
  Using cached imbalanced_learn-0.10.1

#### Import librairies

In [2]:
import pandas as pd
import os
import re

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import accuracy_score, recall_score, f1_score, confusion_matrix
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset , random_split
from torch.optim.lr_scheduler import ReduceLROnPlateau

import random

from pycaret.classification import *
from sklearn.metrics import accuracy_score, recall_score, f1_score, confusion_matrix

import session_info

device= torch.device('cuda' if torch.cuda.is_available() else 'cpu')
session_info.show()

#### Set the random seed for reproducibility

In [3]:
seed = 64
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

#### Get the data

In [4]:
raw_data= pd.read_csv('data/train.csv', low_memory=False)
data_test= pd.read_csv('data/test.csv', low_memory=False)

In [5]:
raw_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2989 entries, 0 to 2988
Columns: 365 entries, Patient_ID to Type_of_Venom_Allergy_IGE_Venom
dtypes: float64(322), int64(32), object(11)
memory usage: 8.3+ MB


#### Looking which are the targets to predict

In [6]:
missing_cols = set(raw_data.columns) ^ set(data_test.columns)
print(missing_cols)
len(missing_cols)

{'Type_of_Respiratory_Allergy_GINA', 'Type_of_Respiratory_Allergy_IGE_Pollen_Gram', 'Type_of_Food_Allergy_Other', 'Type_of_Food_Allergy_TPO', 'Type_of_Food_Allergy_Egg', 'Venom_Allergy', 'Type_of_Respiratory_Allergy_IGE_Pollen_Tree', 'Food_Allergy', 'trustii_id', 'Type_of_Venom_Allergy_IGE_Venom', 'Type_of_Food_Allergy_Shellfish', 'Type_of_Food_Allergy_Aromatics', 'Type_of_Food_Allergy_Tree_Nuts', 'Type_of_Food_Allergy_Mammalian_Milk', 'Type_of_Respiratory_Allergy_IGE_Dander_Animals', 'Severe_Allergy', 'Type_of_Food_Allergy_Fruits_and_Vegetables', 'Type_of_Respiratory_Allergy_IGE_Pollen_Herb', 'Type_of_Food_Allergy_Fish', 'Type_of_Respiratory_Allergy_ARIA', 'Type_of_Food_Allergy_Peanut', 'Type_of_Food_Allergy_Oral_Syndrom', 'Allergy_Present', 'Type_of_Venom_Allergy_ATCD_Venom', 'Type_of_Respiratory_Allergy_IGE_Molds_Yeast', 'Type_of_Food_Allergy_Cereals_&_Seeds', 'Type_of_Respiratory_Allergy_CONJ', 'Type_of_Food_Allergy_Other_Legumes', 'Respiratory_Allergy', 'Type_of_Respiratory_Allerg

30

## Data Pre-processing

### Preprocessing for the train set

In [7]:
liste_of_Targets =['Allergy_Present', 'Severe_Allergy', 'Respiratory_Allergy', 'Food_Allergy', 'Venom_Allergy',
                     'Type_of_Respiratory_Allergy_ARIA', 'Type_of_Respiratory_Allergy_CONJ', 
                     'Type_of_Respiratory_Allergy_GINA', 'Type_of_Respiratory_Allergy_IGE_Pollen_Gram',
                     'Type_of_Respiratory_Allergy_IGE_Pollen_Herb', 'Type_of_Respiratory_Allergy_IGE_Pollen_Tree',
                     'Type_of_Respiratory_Allergy_IGE_Dander_Animals', 'Type_of_Respiratory_Allergy_IGE_Mite_Cockroach',
                     'Type_of_Respiratory_Allergy_IGE_Molds_Yeast', 'Type_of_Food_Allergy_Aromatics', 'Type_of_Food_Allergy_Other',
                     'Type_of_Food_Allergy_Cereals_&_Seeds', 'Type_of_Food_Allergy_Egg', 'Type_of_Food_Allergy_Fish',
                     'Type_of_Food_Allergy_Fruits_and_Vegetables', 'Type_of_Food_Allergy_Mammalian_Milk', 
                     'Type_of_Food_Allergy_Oral_Syndrom', 'Type_of_Food_Allergy_Other_Legumes', 'Type_of_Food_Allergy_Peanut',
                     'Type_of_Food_Allergy_Shellfish', 'Type_of_Food_Allergy_TPO', 'Type_of_Food_Allergy_Tree_Nuts',
                     'Type_of_Venom_Allergy_ATCD_Venom', 'Type_of_Venom_Allergy_IGE_Venom']
def preprocessing_data(df):
    df = df.drop('Food_Type_0', axis =1)
    df.replace(-1, 0, inplace=True)
    data_noNAN = df.fillna(-1)
    # obtain Targets
    Targets = data_noNAN.loc[:,liste_of_Targets]
    # filter feautures
    X1=data_noNAN.loc[:, ['Chip_Type','Age','Gender','French_Residence_Department','Blood_Month_sample']]
    X= data_noNAN.iloc[:, 8:-29]
    data = pd.concat( [X1, X] , axis=1)
    # handle the 'Treatment_of_rhinitis' feature
    data['Treatment_of_rhinitis'] = data['Treatment_of_rhinitis'].astype(str)
    data['Treatment_of_rhinitis'] = data['Treatment_of_rhinitis'].str.replace('.0', '', regex=True)
    
    ##  Get_dummies of the 'object' type columns
    
    columns_to_encode = ['Chip_Type', 'French_Residence_Department', 'French_Region',
         'Treatment_of_athsma', 'Age_of_onsets',
       'General_cofactors', 'Treatment_of_atopic_dematitis','Treatment_of_rhinitis']
    
    ### Split the columns using multiple delimiters and create dummy columns
    dummy_dfs = []
    for col in columns_to_encode:
        # Split the data in the column that use  delimiters
        data[col] = data[col].astype(str)
        data[col] = data[col].apply(lambda x: [i.strip() for i in re.split('[,.]', x)])

        # Create dummy columns
        dummy_df = pd.get_dummies(data[col].apply(pd.Series).stack(), prefix=f"{col}", prefix_sep='_').groupby(level=0).sum()
        dummy_dfs.append(dummy_df)

    ### Concatenate the original DataFrame with the dummy columns
    df_final = pd.concat([data] + dummy_dfs, axis=1)

    ### Drop the original columns from the final dataset
    df_final.drop(columns=columns_to_encode, inplace=True)
    
    # Converting all values into 'float16' type
    encode_data = df_final.astype('float16')
    print(encode_data.info())
    
    return encode_data,Targets

In [8]:
encode_data,Targets = preprocessing_data(raw_data)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2989 entries, 0 to 2988
Columns: 467 entries, Age to Treatment_of_rhinitis_9
dtypes: float16(467)
memory usage: 2.7 MB
None


### Preprocessing for the test set

In [9]:
def preprocessing_data_test(df):
    df = df.drop('Food_Type_0', axis =1)
    df.replace(-1, 0, inplace=True)
    data_test_noNAN = df.fillna(-1)
    # filter feautures
    X1=data_test_noNAN.loc[:, ['Chip_Type']]
    X= data_test_noNAN.iloc[:, 5:]
    data = pd.concat( [X1, X] , axis=1)
    # handle the 'Treatment_of_rhinitis' feature
    data['Treatment_of_rhinitis'] = data['Treatment_of_rhinitis'].astype(str)
    data['Treatment_of_rhinitis'] = data['Treatment_of_rhinitis'].str.replace('.0', '', regex=True)
    # handle 'Age_of_onsets' which don't have the same format in data test and train
    data['Age_of_onsets'] = data['Age_of_onsets'].astype(str)

    
    ##  Get_dummies of the 'object' type columns
    
    columns_to_encode = ['Chip_Type', 'French_Residence_Department', 'French_Region',
         'Treatment_of_athsma', 'Age_of_onsets',
       'General_cofactors', 'Treatment_of_atopic_dematitis','Treatment_of_rhinitis']
    
    ### Split the columns using multiple delimiters and create dummy columns
    dummy_dfs = []
    for col in columns_to_encode:
        # Split the data in the column that use  delimiters
        data[col] = data[col].astype(str)
        data[col] = data[col].apply(lambda x: [i.strip() for i in re.split('[,.]', x)])

        # Create dummy columns
        dummy_df = pd.get_dummies(data[col].apply(pd.Series).stack(), prefix=f"{col}", prefix_sep='_').groupby(level=0).sum()
        dummy_dfs.append(dummy_df)

    ### Concatenate the original DataFrame with the dummy columns
    df_final = pd.concat([data] + dummy_dfs, axis=1)

    ### Drop the original columns from the final dataset
    df_final.drop(columns=columns_to_encode, inplace=True)
    
    # Converting all values into 'float16' type
    encode_data = df_final.astype('float16')
    print(encode_data.info())
    
    return encode_data


In [10]:
encode_data_test = preprocessing_data_test(data_test)
missing_cols = set(encode_data.columns) ^ set(encode_data_test.columns)
print(missing_cols)
len(missing_cols)
encode_data_test = encode_data_test.reindex(columns=encode_data.columns, fill_value=0).astype('float16')
encode_data_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 586 entries, 0 to 585
Columns: 444 entries, Age to Treatment_of_rhinitis_9
dtypes: float16(444)
memory usage: 508.3 KB
None
{'Treatment_of_athsma_8', 'Treatment_of_atopic_dematitis_7', 'French_Residence_Department_deptJJJ', 'French_Residence_Department_deptOOO', 'French_Residence_Department_deptIII', 'French_Residence_Department_deptTTT', 'French_Residence_Department_deptRRR', 'French_Residence_Department_deptU', 'French_Residence_Department_deptCCCC', 'French_Region_regionO', 'French_Residence_Department_deptHHH', 'French_Residence_Department_deptNNN', 'French_Residence_Department_deptDD', 'French_Residence_Department_deptQQQ', 'French_Residence_Department_deptPPP', 'French_Residence_Department_deptK', 'French_Residence_Department_deptUU', 'French_Residence_Department_deptW', 'General_cofactors_11', 'French_Residence_Department_deptMMM', 'French_Residence_Department_deptDDD', 'French_Residence_Department_deptAAAA', 'French_Region_regio

#### Pycaret part

In [11]:
import os

def obtain_all_methods_imbl (encode_data, Y, list_of_metods):
    
    liste_column_monovalue=['Type_of_Food_Allergy_Other','Type_of_Food_Allergy_Cereals_&_Seeds']
    for method in list_of_metods:
        print(method)
        
        folder_path = f'Test_Imbl_{method}_tuned'
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)
        
        for column in Y.columns:
            if column not in liste_column_monovalue :
                rows_with_9 = Y[column].isin([9])
                Targets_without_9 = Y[column][~rows_with_9].reset_index(drop=True)
                X = encode_data[~rows_with_9].reset_index(drop=True)

                dataset= pd.concat([X,Targets_without_9], axis = 1)
                model = setup(data= dataset, target= column, train_size=0.85, fix_imbalance=True, fix_imbalance_method=method)
                best = compare_models(sort = 'F1', include = ['rf', 'et', 'lightgbm','ada'])
                tuned_best= tune_model(best,n_iter= 50, optimize = 'F1',fold = 10)
                file_name = f"{column}_ETC_brutforce_imb_binary"
                file_path = os.path.join(folder_path, file_name)
                save_model(tuned_best, file_path)
            else:
                continue

In [None]:
List_methods_imbl=['RandomOverSampler','SMOTE','BorderlineSMOTE']  
obtain_all_methods_imbl (encode_data, Targets, List_methods_imbl) 

RandomOverSampler


Unnamed: 0,Description,Value
0,Session id,7951
1,Target,Allergy_Present
2,Target type,Binary
3,Original data shape,"(1759, 468)"
4,Transformed data shape,"(2590, 468)"
5,Transformed train set shape,"(2326, 468)"
6,Transformed test set shape,"(264, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.8474,0.8875,0.9372,0.8759,0.9052,0.5151,0.5263,0.454
rf,Random Forest Classifier,0.8421,0.8779,0.914,0.8871,0.9001,0.5228,0.526,0.795
lightgbm,Light Gradient Boosting Machine,0.8354,0.887,0.9053,0.8859,0.8953,0.5096,0.5119,0.601
ada,Ada Boost Classifier,0.7886,0.8584,0.7893,0.9282,0.8528,0.486,0.5068,0.433


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.78,0.5,1.0,0.78,0.8764,0.0,0.0
1,0.78,0.5,1.0,0.78,0.8764,0.0,0.0
2,0.78,0.5,1.0,0.78,0.8764,0.0,0.0
3,0.7733,0.5,1.0,0.7733,0.8722,0.0,0.0
4,0.7733,0.5,1.0,0.7733,0.8722,0.0,0.0
5,0.7785,0.5,1.0,0.7785,0.8755,0.0,0.0
6,0.7785,0.5,1.0,0.7785,0.8755,0.0,0.0
7,0.7785,0.5,1.0,0.7785,0.8755,0.0,0.0
8,0.7785,0.5,1.0,0.7785,0.8755,0.0,0.0
9,0.7785,0.5,1.0,0.7785,0.8755,0.0,0.0


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,2195
1,Target,Severe_Allergy
2,Target type,Binary
3,Original data shape,"(1670, 468)"
4,Transformed data shape,"(2021, 468)"
5,Transformed train set shape,"(1770, 468)"
6,Transformed test set shape,"(251, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.8161,0.8904,0.8465,0.8572,0.8511,0.6105,0.6122,0.454
et,Extra Trees Classifier,0.8083,0.8898,0.8646,0.8346,0.8488,0.5871,0.5892,0.45
rf,Random Forest Classifier,0.7991,0.8804,0.8555,0.8299,0.8408,0.5682,0.5733,0.5
ada,Ada Boost Classifier,0.7674,0.8506,0.7617,0.8499,0.8026,0.5218,0.5274,0.42


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.831,0.9064,0.8864,0.8478,0.8667,0.6362,0.6374
1,0.831,0.8824,0.8977,0.8404,0.8681,0.6335,0.6362
2,0.831,0.9036,0.8864,0.8478,0.8667,0.6362,0.6374
3,0.7676,0.8723,0.8182,0.809,0.8136,0.5052,0.5052
4,0.8028,0.8709,0.7865,0.8861,0.8333,0.594,0.6004
5,0.8451,0.9317,0.8652,0.8851,0.875,0.6714,0.6717
6,0.8099,0.9105,0.8427,0.8523,0.8475,0.5951,0.5952
7,0.8028,0.8876,0.7978,0.8765,0.8353,0.591,0.5951
8,0.7817,0.8857,0.7753,0.8625,0.8166,0.5489,0.5537
9,0.8227,0.8864,0.8523,0.8621,0.8571,0.6235,0.6236


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,6768
1,Target,Respiratory_Allergy
2,Target type,Binary
3,Original data shape,"(1507, 468)"
4,Transformed data shape,"(2193, 468)"
5,Transformed train set shape,"(1966, 468)"
6,Transformed test set shape,"(227, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.9109,0.9454,0.9461,0.9389,0.9422,0.7478,0.7502,0.545
et,Extra Trees Classifier,0.8984,0.9413,0.9623,0.911,0.9357,0.6937,0.7029,0.462
rf,Random Forest Classifier,0.8977,0.9261,0.9664,0.9066,0.9355,0.689,0.6985,0.514
ada,Ada Boost Classifier,0.857,0.9044,0.8789,0.9313,0.904,0.6242,0.6295,0.45


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9297,0.9582,0.9596,0.95,0.9548,0.7969,0.7971
1,0.9219,0.9551,0.9798,0.9238,0.951,0.7595,0.7675
2,0.8828,0.9209,0.9293,0.92,0.9246,0.6615,0.6617
3,0.9297,0.9446,0.9694,0.9406,0.9548,0.797,0.7989
4,0.9062,0.9289,0.949,0.93,0.9394,0.7326,0.7333
5,0.9062,0.9241,0.9388,0.9388,0.9388,0.7388,0.7388
6,0.9297,0.95,0.9592,0.9495,0.9543,0.8018,0.802
7,0.9297,0.9789,0.9694,0.9406,0.9548,0.797,0.7989
8,0.8906,0.931,0.898,0.9565,0.9263,0.715,0.7204
9,0.8906,0.9466,0.9388,0.92,0.9293,0.688,0.6887


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,8934
1,Target,Food_Allergy
2,Target type,Binary
3,Original data shape,"(1607, 468)"
4,Transformed data shape,"(1760, 468)"
5,Transformed train set shape,"(1518, 468)"
6,Transformed test set shape,"(242, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.7956,0.8698,0.7907,0.76,0.7742,0.5878,0.5893,0.522
rf,Random Forest Classifier,0.7934,0.8668,0.7823,0.76,0.7706,0.5828,0.5835,0.54
et,Extra Trees Classifier,0.7898,0.8724,0.7577,0.7691,0.7618,0.5739,0.5757,0.505
ada,Ada Boost Classifier,0.7685,0.8389,0.746,0.7386,0.7412,0.5319,0.5332,0.484


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7299,0.8186,0.7213,0.6875,0.704,0.4559,0.4564
1,0.8321,0.9105,0.8361,0.7969,0.816,0.6618,0.6624
2,0.7299,0.8182,0.7049,0.6935,0.6992,0.4542,0.4542
3,0.8321,0.9098,0.8361,0.7969,0.816,0.6618,0.6624
4,0.8248,0.8889,0.8525,0.7761,0.8125,0.6488,0.6513
5,0.8309,0.9024,0.8833,0.7681,0.8217,0.6623,0.6682
6,0.7868,0.8721,0.8,0.7385,0.768,0.5713,0.5729
7,0.875,0.9285,0.8833,0.8413,0.8618,0.7478,0.7486
8,0.8382,0.8809,0.8333,0.8065,0.8197,0.6731,0.6734
9,0.7941,0.8529,0.8197,0.7463,0.7813,0.5876,0.5899


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,426
1,Target,Venom_Allergy
2,Target type,Binary
3,Original data shape,"(2989, 468)"
4,Transformed data shape,"(5473, 468)"
5,Transformed train set shape,"(5024, 468)"
6,Transformed test set shape,"(449, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ada,Ada Boost Classifier,0.9681,0.733,0.2833,0.1128,0.1549,0.143,0.1612,0.802
lightgbm,Light Gradient Boosting Machine,0.9854,0.7118,0.0333,0.0333,0.0333,0.0292,0.029,0.839
rf,Random Forest Classifier,0.9886,0.7428,0.0,0.0,0.0,-0.0005,-0.0006,0.681
et,Extra Trees Classifier,0.989,0.7226,0.0,0.0,0.0,0.0,0.0,0.652


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9528,0.5813,0.5,0.0833,0.1429,0.1311,0.1901
1,0.9567,0.8532,0.5,0.0909,0.1538,0.1424,0.1999
2,0.9606,0.9595,0.6667,0.1818,0.2857,0.2722,0.3348
3,0.9528,0.9336,0.3333,0.0909,0.1429,0.1266,0.1558
4,0.9685,0.8712,0.6667,0.2222,0.3333,0.3213,0.3733
5,0.9449,0.7636,0.3333,0.0769,0.125,0.1079,0.14
6,0.9803,0.5485,0.3333,0.25,0.2857,0.2759,0.2789
7,0.9685,0.8592,0.6667,0.2222,0.3333,0.3213,0.3733
8,0.9646,0.7185,0.3333,0.125,0.1818,0.1675,0.1889
9,0.9685,0.8811,0.3333,0.1429,0.2,0.1865,0.2042


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,3484
1,Target,Type_of_Respiratory_Allergy_ARIA
2,Target type,Binary
3,Original data shape,"(1507, 468)"
4,Transformed data shape,"(1575, 468)"
5,Transformed train set shape,"(1348, 468)"
6,Transformed test set shape,"(227, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.8789,0.9454,0.8692,0.8976,0.8827,0.7576,0.7589,0.526
et,Extra Trees Classifier,0.8672,0.9375,0.8678,0.8797,0.8723,0.7338,0.7363,0.549
rf,Random Forest Classifier,0.8398,0.9265,0.8663,0.8372,0.8504,0.678,0.6806,0.57
ada,Ada Boost Classifier,0.8367,0.9115,0.841,0.8488,0.8438,0.6727,0.6744,0.506


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.875,0.9418,0.8955,0.8696,0.8824,0.7491,0.7494
1,0.8281,0.9254,0.7313,0.9245,0.8167,0.659,0.6751
2,0.8828,0.9618,0.8806,0.8939,0.8872,0.7653,0.7654
3,0.8281,0.9161,0.806,0.8571,0.8308,0.6565,0.6578
4,0.8281,0.9011,0.791,0.8689,0.8281,0.657,0.6599
5,0.8906,0.9347,0.8657,0.9206,0.8923,0.7814,0.7829
6,0.9375,0.9797,0.9118,0.9688,0.9394,0.875,0.8767
7,0.9375,0.9637,0.9412,0.9412,0.9412,0.8745,0.8745
8,0.8516,0.9201,0.8235,0.8889,0.855,0.7034,0.7056
9,0.8906,0.9444,0.8529,0.9355,0.8923,0.7817,0.7851


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,4551
1,Target,Type_of_Respiratory_Allergy_CONJ
2,Target type,Binary
3,Original data shape,"(1507, 468)"
4,Transformed data shape,"(2121, 468)"
5,Transformed train set shape,"(1894, 468)"
6,Transformed test set shape,"(227, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.7602,0.7664,0.5012,0.5407,0.5175,0.359,0.3609,0.622
ada,Ada Boost Classifier,0.6906,0.7129,0.5889,0.4303,0.4948,0.2802,0.2894,0.572
rf,Random Forest Classifier,0.7719,0.7729,0.3842,0.6013,0.4664,0.3303,0.3451,0.659
et,Extra Trees Classifier,0.7812,0.7958,0.3389,0.6654,0.4432,0.3256,0.3568,0.614


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7969,0.8069,0.7353,0.5952,0.6579,0.5157,0.5215
1,0.7031,0.7744,0.6471,0.4583,0.5366,0.3274,0.338
2,0.75,0.7544,0.6176,0.525,0.5676,0.3934,0.3959
3,0.8359,0.8606,0.8182,0.6429,0.72,0.6063,0.6151
4,0.8281,0.8743,0.7576,0.641,0.6944,0.576,0.5799
5,0.7266,0.7828,0.6061,0.4762,0.5333,0.3439,0.3489
6,0.7031,0.7694,0.6667,0.449,0.5366,0.3302,0.3442
7,0.6875,0.7247,0.4848,0.4103,0.4444,0.2291,0.2307
8,0.7344,0.7783,0.6364,0.4884,0.5526,0.3684,0.3749
9,0.75,0.7879,0.697,0.5111,0.5897,0.416,0.4264


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,3449
1,Target,Type_of_Respiratory_Allergy_GINA
2,Target type,Binary
3,Original data shape,"(1507, 468)"
4,Transformed data shape,"(1509, 468)"
5,Transformed train set shape,"(1282, 468)"
6,Transformed test set shape,"(227, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.8789,0.9389,0.8908,0.8712,0.8804,0.7578,0.7591,0.635
lightgbm,Light Gradient Boosting Machine,0.8688,0.9398,0.8596,0.8767,0.8676,0.7375,0.7383,0.639
ada,Ada Boost Classifier,0.8516,0.9211,0.8534,0.8521,0.8517,0.7031,0.7049,0.606
rf,Random Forest Classifier,0.8484,0.9251,0.855,0.8456,0.8495,0.6969,0.6983,0.651


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.875,0.9524,0.8923,0.8657,0.8788,0.7498,0.7502
1,0.8672,0.9403,0.875,0.8615,0.8682,0.7344,0.7345
2,0.875,0.9573,0.8438,0.9,0.871,0.75,0.7515
3,0.8672,0.9526,0.8906,0.8507,0.8702,0.7344,0.7352
4,0.8984,0.9456,0.9062,0.8923,0.8992,0.7969,0.797
5,0.8984,0.9775,0.9844,0.84,0.9065,0.7969,0.8089
6,0.8672,0.9404,0.8906,0.8507,0.8702,0.7344,0.7352
7,0.8594,0.9272,0.8906,0.8382,0.8636,0.7188,0.7202
8,0.8906,0.9496,0.9531,0.8472,0.8971,0.7812,0.7874
9,0.8906,0.9307,0.8438,0.931,0.8852,0.7812,0.7847


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,2565
1,Target,Type_of_Respiratory_Allergy_IGE_Pollen_Gram
2,Target type,Binary
3,Original data shape,"(1507, 468)"
4,Transformed data shape,"(1567, 468)"
5,Transformed train set shape,"(1340, 468)"
6,Transformed test set shape,"(227, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.9406,0.9825,0.9393,0.9371,0.9378,0.881,0.8817,0.682
ada,Ada Boost Classifier,0.9109,0.9615,0.923,0.8952,0.9078,0.8217,0.824,0.642
rf,Random Forest Classifier,0.8914,0.9583,0.9213,0.8621,0.8899,0.783,0.7863,0.704
et,Extra Trees Classifier,0.8562,0.9333,0.8672,0.8416,0.8521,0.7124,0.7164,0.667


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9297,0.9741,0.9344,0.9194,0.9268,0.8592,0.8593
1,0.9297,0.9807,0.918,0.9333,0.9256,0.859,0.8591
2,0.9531,0.9924,0.9672,0.9365,0.9516,0.9062,0.9066
3,0.9531,0.979,0.9508,0.9508,0.9508,0.906,0.906
4,0.9531,0.9902,0.9836,0.9231,0.9524,0.9063,0.9081
5,0.9453,0.9936,0.9672,0.9219,0.944,0.8906,0.8916
6,0.9219,0.9839,0.9672,0.8806,0.9219,0.8441,0.8478
7,0.9219,0.9704,0.9344,0.9048,0.9194,0.8436,0.844
8,0.9297,0.9829,0.918,0.9333,0.9256,0.859,0.8591
9,0.9531,0.9878,0.918,0.9825,0.9492,0.9058,0.9076


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,1457
1,Target,Type_of_Respiratory_Allergy_IGE_Pollen_Herb
2,Target type,Binary
3,Original data shape,"(1507, 468)"
4,Transformed data shape,"(1963, 468)"
5,Transformed train set shape,"(1736, 468)"
6,Transformed test set shape,"(227, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.9359,0.9858,0.9102,0.8962,0.9013,0.8539,0.8558,0.792
ada,Ada Boost Classifier,0.9016,0.9485,0.8615,0.8396,0.8495,0.7764,0.7775,0.699
rf,Random Forest Classifier,0.8812,0.9478,0.8639,0.7921,0.8236,0.7347,0.7393,0.764
et,Extra Trees Classifier,0.8391,0.9211,0.6797,0.7978,0.7279,0.6156,0.624,0.728


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9297,0.9776,0.9286,0.8667,0.8966,0.8434,0.8445
1,0.9297,0.972,0.9762,0.8367,0.9011,0.8471,0.8531
2,0.9062,0.9734,0.9268,0.8085,0.8636,0.7927,0.797
3,0.9688,0.9938,0.9756,0.9302,0.9524,0.9291,0.9297
4,0.9688,0.9902,0.9756,0.9302,0.9524,0.9291,0.9297
5,0.9375,0.9804,0.9756,0.8511,0.9091,0.8618,0.8665
6,0.8906,0.9605,0.8537,0.814,0.8333,0.752,0.7525
7,0.9219,0.972,0.9024,0.8605,0.881,0.8229,0.8234
8,0.9453,0.9815,0.878,0.9474,0.9114,0.8719,0.8732
9,0.9688,0.9919,0.9268,0.9744,0.95,0.9273,0.9279


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,6977
1,Target,Type_of_Respiratory_Allergy_IGE_Pollen_Tree
2,Target type,Binary
3,Original data shape,"(1507, 468)"
4,Transformed data shape,"(1591, 468)"
5,Transformed train set shape,"(1364, 468)"
6,Transformed test set shape,"(227, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.9125,0.9691,0.9097,0.9037,0.9064,0.8243,0.8248,0.815
rf,Random Forest Classifier,0.9039,0.9519,0.9281,0.8746,0.9003,0.8077,0.8095,0.801
ada,Ada Boost Classifier,0.8711,0.9241,0.8579,0.8679,0.8614,0.741,0.7432,0.757
et,Extra Trees Classifier,0.8633,0.9415,0.8762,0.8403,0.8566,0.7261,0.7287,0.775


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9375,0.9756,0.9492,0.918,0.9333,0.8745,0.875
1,0.9375,0.9598,0.9831,0.8923,0.9355,0.8752,0.879
2,0.8984,0.9678,0.8833,0.8983,0.8908,0.7959,0.796
3,0.9219,0.9809,0.9833,0.8676,0.9219,0.8444,0.851
4,0.9219,0.9681,0.9333,0.9032,0.918,0.8434,0.8439
5,0.8828,0.9602,0.9333,0.8358,0.8819,0.7663,0.7709
6,0.9219,0.9837,0.95,0.8906,0.9194,0.8438,0.8454
7,0.8984,0.9544,0.9,0.8852,0.8926,0.7963,0.7964
8,0.9219,0.9643,0.95,0.8906,0.9194,0.8438,0.8454
9,0.9062,0.9759,0.9667,0.8529,0.9062,0.8132,0.8196


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,7359
1,Target,Type_of_Respiratory_Allergy_IGE_Dander_Animals
2,Target type,Binary
3,Original data shape,"(1507, 468)"
4,Transformed data shape,"(1665, 468)"
5,Transformed train set shape,"(1438, 468)"
6,Transformed test set shape,"(227, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.9312,0.9787,0.918,0.9256,0.9214,0.8603,0.8608,0.827
rf,Random Forest Classifier,0.8891,0.9548,0.9073,0.8533,0.878,0.7766,0.7802,0.837
ada,Ada Boost Classifier,0.8883,0.9406,0.8716,0.8738,0.8725,0.7731,0.7734,0.809
et,Extra Trees Classifier,0.8188,0.9094,0.8039,0.7887,0.7954,0.6328,0.6341,0.818


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9297,0.973,0.9286,0.9123,0.9204,0.8574,0.8575
1,0.9453,0.9881,0.9286,0.9455,0.9369,0.8887,0.8888
2,0.9062,0.9683,0.8929,0.8929,0.8929,0.8095,0.8095
3,0.9219,0.9883,0.9286,0.8966,0.9123,0.8419,0.8423
4,0.9297,0.9891,0.8571,0.9796,0.9143,0.8551,0.8606
5,0.9141,0.9603,0.875,0.9245,0.8991,0.8244,0.8253
6,0.9688,0.9913,0.9464,0.9815,0.9636,0.9363,0.9367
7,0.9375,0.9777,0.8929,0.9615,0.9259,0.872,0.8738
8,0.9453,0.9764,0.9643,0.9153,0.9391,0.8895,0.8905
9,0.9062,0.9602,0.9474,0.8571,0.9,0.8122,0.8158


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,3398
1,Target,Type_of_Respiratory_Allergy_IGE_Mite_Cockroach
2,Target type,Binary
3,Original data shape,"(1507, 468)"
4,Transformed data shape,"(1631, 468)"
5,Transformed train set shape,"(1404, 468)"
6,Transformed test set shape,"(227, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.9234,0.975,0.9049,0.925,0.9142,0.8451,0.8462,0.914
rf,Random Forest Classifier,0.8891,0.9559,0.9049,0.8584,0.8803,0.7771,0.7792,0.865
ada,Ada Boost Classifier,0.875,0.9281,0.853,0.8699,0.8606,0.7473,0.7485,0.862
et,Extra Trees Classifier,0.8516,0.9283,0.8253,0.8463,0.8341,0.6999,0.7022,0.896


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9531,0.9778,0.9483,0.9483,0.9483,0.9054,0.9054
1,0.9219,0.9554,0.9138,0.9138,0.9138,0.8424,0.8424
2,0.9141,0.9441,0.931,0.8852,0.9076,0.8274,0.8283
3,0.9141,0.9672,0.9138,0.8983,0.906,0.8269,0.827
4,0.9375,0.9798,0.931,0.931,0.931,0.8739,0.8739
5,0.9062,0.951,0.8276,0.96,0.8889,0.8086,0.8152
6,0.9219,0.9761,0.9138,0.9138,0.9138,0.8424,0.8424
7,0.8984,0.965,0.8966,0.8814,0.8889,0.7954,0.7955
8,0.9297,0.9854,0.9474,0.9,0.9231,0.8584,0.8594
9,0.9453,0.979,0.9123,0.963,0.9369,0.8887,0.8897


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,553
1,Target,Type_of_Respiratory_Allergy_IGE_Molds_Yeast
2,Target type,Binary
3,Original data shape,"(1507, 468)"
4,Transformed data shape,"(2207, 468)"
5,Transformed train set shape,"(1980, 468)"
6,Transformed test set shape,"(227, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.9719,0.9919,0.9379,0.9398,0.9378,0.9196,0.9205,1.006
ada,Ada Boost Classifier,0.9484,0.9686,0.9,0.8779,0.8867,0.8534,0.8552,0.945
rf,Random Forest Classifier,0.9055,0.9561,0.7207,0.8438,0.7737,0.7148,0.7208,0.958
et,Extra Trees Classifier,0.8367,0.8993,0.3552,0.833,0.4939,0.4155,0.4708,0.913


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9922,0.9986,0.9655,1.0,0.9825,0.9774,0.9777
1,0.9688,0.9934,0.8966,0.963,0.9286,0.9086,0.9096
2,0.9844,0.9822,0.9655,0.9655,0.9655,0.9554,0.9554
3,0.9531,0.9854,0.931,0.871,0.9,0.8694,0.8703
4,0.9766,0.9979,0.9655,0.9333,0.9492,0.9339,0.9342
5,0.9844,0.9892,1.0,0.9355,0.9667,0.9565,0.9574
6,0.9609,0.9728,0.9655,0.875,0.918,0.8925,0.8943
7,0.9531,0.9808,0.8621,0.9259,0.8929,0.8629,0.8638
8,0.9688,0.9885,0.8966,0.963,0.9286,0.9086,0.9096
9,0.9844,0.999,0.931,1.0,0.9643,0.9543,0.9553


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,1520
1,Target,Type_of_Food_Allergy_Aromatics
2,Target type,Binary
3,Original data shape,"(1607, 468)"
4,Transformed data shape,"(2914, 468)"
5,Transformed train set shape,"(2672, 468)"
6,Transformed test set shape,"(242, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ada,Ada Boost Classifier,0.9275,0.6873,0.2167,0.069,0.1028,0.0744,0.0896,0.99
rf,Random Forest Classifier,0.9773,0.7835,0.0,0.0,0.0,-0.0022,-0.0026,1.027
et,Extra Trees Classifier,0.978,0.767,0.0,0.0,0.0,-0.0011,-0.0013,0.972
lightgbm,Light Gradient Boosting Machine,0.9744,0.7086,0.0,0.0,0.0,-0.0056,-0.0061,1.03


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9489,0.7388,0.6667,0.25,0.3636,0.3427,0.3881
1,0.9343,0.8234,0.6667,0.2,0.3077,0.2836,0.3415
2,0.9343,0.7065,0.0,0.0,0.0,-0.0301,-0.032
3,0.927,0.7363,0.3333,0.1111,0.1667,0.1384,0.1616
4,0.9124,0.709,0.0,0.0,0.0,-0.034,-0.0397
5,0.9118,0.8694,0.0,0.0,0.0,-0.0251,-0.0344
6,0.9191,0.5013,0.0,0.0,0.0,-0.0331,-0.0375
7,0.8971,0.6015,0.3333,0.0769,0.125,0.0925,0.1214
8,0.9485,0.6441,0.3333,0.1667,0.2222,0.1987,0.2115
9,0.9265,0.792,0.6667,0.1818,0.2857,0.2601,0.3227


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,6751
1,Target,Type_of_Food_Allergy_Egg
2,Target type,Binary
3,Original data shape,"(1607, 468)"
4,Transformed data shape,"(2900, 468)"
5,Transformed train set shape,"(2658, 468)"
6,Transformed test set shape,"(242, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ada,Ada Boost Classifier,0.9495,0.7465,0.275,0.1872,0.2019,0.1794,0.1926,1.07
lightgbm,Light Gradient Boosting Machine,0.9663,0.8547,0.05,0.1333,0.0686,0.0572,0.067,1.089
rf,Random Forest Classifier,0.9722,0.8558,0.025,0.1,0.04,0.0361,0.0458,1.053
et,Extra Trees Classifier,0.9729,0.873,0.0,0.0,0.0,-0.0012,-0.0015,1.029


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9197,0.8327,0.5,0.1818,0.2667,0.2339,0.2678
1,0.9562,0.9737,0.75,0.375,0.5,0.4797,0.5115
2,0.8978,0.9305,0.75,0.1875,0.3,0.2657,0.3419
3,0.9124,0.7312,0.5,0.1667,0.25,0.2156,0.253
4,0.9343,0.7331,0.25,0.1429,0.1818,0.1502,0.1567
5,0.9044,0.8872,0.0,0.0,0.0,-0.0351,-0.0423
6,0.9412,0.9048,0.6667,0.2222,0.3333,0.3105,0.3628
7,0.9706,0.9048,0.6667,0.4,0.5,0.4858,0.5027
8,0.9632,0.9211,0.3333,0.25,0.2857,0.2672,0.2702
9,0.9412,0.8939,0.5,0.25,0.3333,0.3061,0.3264


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,237
1,Target,Type_of_Food_Allergy_Fish
2,Target type,Binary
3,Original data shape,"(1607, 468)"
4,Transformed data shape,"(2908, 468)"
5,Transformed train set shape,"(2666, 468)"
6,Transformed test set shape,"(242, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ada,Ada Boost Classifier,0.9553,0.7942,0.2167,0.1679,0.1787,0.1574,0.1636,1.141
rf,Random Forest Classifier,0.9766,0.8859,0.0,0.0,0.0,0.0,0.0,1.098
et,Extra Trees Classifier,0.9766,0.9111,0.0,0.0,0.0,0.0,0.0,1.073
lightgbm,Light Gradient Boosting Machine,0.9722,0.8022,0.0,0.0,0.0,-0.0051,-0.0053,1.098


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9489,0.7214,0.0,0.0,0.0,-0.0257,-0.0259
1,0.9708,0.7139,0.3333,0.3333,0.3333,0.3184,0.3184
2,0.9635,0.9254,0.3333,0.25,0.2857,0.2674,0.2703
3,0.9562,0.7613,0.25,0.25,0.25,0.2274,0.2274
4,0.9708,0.6673,0.25,0.5,0.3333,0.3201,0.3404
5,0.9338,0.3659,0.0,0.0,0.0,-0.0303,-0.0323
6,0.9485,0.6967,0.0,0.0,0.0,-0.0259,-0.0261
7,0.9485,0.8672,0.3333,0.1667,0.2222,0.1987,0.2115
8,0.9706,0.7669,0.3333,0.3333,0.3333,0.3183,0.3183
9,0.9779,0.787,0.0,0.0,0.0,0.0,0.0


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,1195
1,Target,Type_of_Food_Allergy_Fruits_and_Vegetables
2,Target type,Binary
3,Original data shape,"(1607, 468)"
4,Transformed data shape,"(2808, 468)"
5,Transformed train set shape,"(2566, 468)"
6,Transformed test set shape,"(242, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ada,Ada Boost Classifier,0.8557,0.6962,0.3528,0.1714,0.229,0.1609,0.1742,1.167
rf,Random Forest Classifier,0.9407,0.8169,0.1222,0.6333,0.1986,0.1806,0.2518,1.186
lightgbm,Light Gradient Boosting Machine,0.9297,0.795,0.0625,0.2,0.0945,0.0698,0.0824,1.174
et,Extra Trees Classifier,0.9399,0.8549,0.0,0.0,0.0,0.0,0.0,1.117


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8832,0.7694,0.25,0.1667,0.2,0.1397,0.1431
1,0.9051,0.7578,0.375,0.2727,0.3158,0.2662,0.2701
2,0.8759,0.53,0.375,0.2,0.2609,0.1999,0.2118
3,0.8613,0.4401,0.2222,0.1429,0.1739,0.1021,0.1051
4,0.8613,0.7769,0.4444,0.2222,0.2963,0.2287,0.2457
5,0.8603,0.4482,0.0,0.0,0.0,-0.0731,-0.0742
6,0.8897,0.7734,0.625,0.2941,0.4,0.3478,0.378
7,0.8309,0.7861,0.5,0.1739,0.2581,0.1871,0.2207
8,0.8824,0.7197,0.5,0.25,0.3333,0.2766,0.2967
9,0.8529,0.709,0.375,0.1667,0.2308,0.1626,0.179


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,3010
1,Target,Type_of_Food_Allergy_Mammalian_Milk
2,Target type,Binary
3,Original data shape,"(1607, 468)"
4,Transformed data shape,"(2930, 468)"
5,Transformed train set shape,"(2688, 468)"
6,Transformed test set shape,"(242, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ada,Ada Boost Classifier,0.967,0.7956,0.15,0.065,0.0905,0.0754,0.0831,1.183
lightgbm,Light Gradient Boosting Machine,0.9773,0.7417,0.05,0.05,0.05,0.0438,0.0435,1.222
rf,Random Forest Classifier,0.9846,0.8851,0.0,0.0,0.0,0.0,0.0,1.191
et,Extra Trees Classifier,0.9846,0.8644,0.0,0.0,0.0,0.0,0.0,1.157


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.927,0.8444,0.0,0.0,0.0,-0.0239,-0.0303
1,0.9416,0.8444,0.5,0.125,0.2,0.1809,0.2292
2,0.9051,0.7037,0.5,0.0769,0.1333,0.1108,0.1683
3,0.8905,0.7852,0.5,0.0667,0.1176,0.0943,0.1522
4,0.927,0.8507,0.3333,0.1111,0.1667,0.1384,0.1616
5,0.9044,0.9104,0.5,0.0769,0.1333,0.1107,0.168
6,0.8971,0.903,1.0,0.125,0.2222,0.2013,0.3346
7,0.9338,0.7948,0.5,0.1111,0.1818,0.1616,0.2132
8,0.9044,0.9403,0.5,0.0769,0.1333,0.1107,0.168
9,0.9338,0.9627,1.0,0.1818,0.3077,0.29,0.4118


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,1877
1,Target,Type_of_Food_Allergy_Oral_Syndrom
2,Target type,Binary
3,Original data shape,"(1607, 468)"
4,Transformed data shape,"(2788, 468)"
5,Transformed train set shape,"(2546, 468)"
6,Transformed test set shape,"(242, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ada,Ada Boost Classifier,0.8382,0.8181,0.5533,0.238,0.3257,0.2539,0.2856,1.285
lightgbm,Light Gradient Boosting Machine,0.9297,0.8498,0.1867,0.465,0.2503,0.2222,0.2542,1.239
et,Extra Trees Classifier,0.9348,0.8724,0.0656,0.5,0.1148,0.1051,0.1684,1.267
rf,Random Forest Classifier,0.9341,0.8482,0.0544,0.4,0.0948,0.0862,0.136,1.25


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8978,0.8915,0.5556,0.3333,0.4167,0.3645,0.3788
1,0.8905,0.7144,0.3333,0.25,0.2857,0.2277,0.2305
2,0.8467,0.7969,0.4444,0.2,0.2759,0.2037,0.2241
3,0.8759,0.8803,0.4,0.2667,0.32,0.2547,0.2611
4,0.8905,0.8079,0.5,0.3333,0.4,0.3424,0.3509
5,0.9044,0.8583,0.3333,0.3,0.3158,0.2646,0.265
6,0.9044,0.8154,0.4444,0.3333,0.381,0.3303,0.3343
7,0.8971,0.7988,0.3333,0.2727,0.3,0.245,0.2465
8,0.8971,0.8101,0.4444,0.3077,0.3636,0.3096,0.3158
9,0.8897,0.783,0.3333,0.25,0.2857,0.2273,0.23


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,899
1,Target,Type_of_Food_Allergy_Other_Legumes
2,Target type,Binary
3,Original data shape,"(1607, 468)"
4,Transformed data shape,"(2888, 468)"
5,Transformed train set shape,"(2646, 468)"
6,Transformed test set shape,"(242, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ada,Ada Boost Classifier,0.9392,0.807,0.295,0.1862,0.222,0.1927,0.2013,1.336
lightgbm,Light Gradient Boosting Machine,0.9663,0.8162,0.05,0.0833,0.0619,0.0555,0.0574,1.338
rf,Random Forest Classifier,0.9692,0.8083,0.025,0.1,0.04,0.0381,0.0479,1.289
et,Extra Trees Classifier,0.9692,0.8364,0.0,0.0,0.0,0.0,0.0,1.316


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9343,0.8271,0.5,0.2222,0.3077,0.2785,0.304
1,0.9343,0.938,0.5,0.2222,0.3077,0.2785,0.304
2,0.9489,0.9117,0.5,0.2857,0.3636,0.3391,0.3536
3,0.8978,0.5167,0.0,0.0,0.0,-0.0492,-0.0516
4,0.9489,0.703,0.2,0.25,0.2222,0.1961,0.1975
5,0.9338,0.6989,0.5,0.2222,0.3077,0.2783,0.3038
6,0.9191,0.9451,0.75,0.2308,0.3529,0.3225,0.3874
7,0.9265,0.9223,0.75,0.25,0.375,0.3462,0.4061
8,0.9559,0.7273,0.25,0.25,0.25,0.2273,0.2273
9,0.9485,0.9072,0.25,0.2,0.2222,0.1959,0.1973


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,4475
1,Target,Type_of_Food_Allergy_Peanut
2,Target type,Binary
3,Original data shape,"(1607, 468)"
4,Transformed data shape,"(2788, 468)"
5,Transformed train set shape,"(2546, 468)"
6,Transformed test set shape,"(242, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.9334,0.8749,0.35,0.4771,0.3985,0.3661,0.3728,1.386
ada,Ada Boost Classifier,0.885,0.8115,0.5444,0.3066,0.382,0.3254,0.3476,1.326
rf,Random Forest Classifier,0.9384,0.903,0.2933,0.5773,0.3743,0.3475,0.3757,1.369
et,Extra Trees Classifier,0.9297,0.8874,0.0322,0.15,0.053,0.0418,0.0548,1.328


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9051,0.8559,0.6667,0.375,0.48,0.4323,0.454
1,0.8978,0.8941,0.5556,0.3333,0.4167,0.3645,0.3788
2,0.9197,0.9349,0.7778,0.4375,0.56,0.5196,0.5457
3,0.8978,0.8772,0.6,0.375,0.4615,0.4084,0.4222
4,0.9416,0.8433,0.8,0.5714,0.6667,0.6356,0.6465
5,0.9265,0.8959,0.6667,0.4615,0.5455,0.5069,0.517
6,0.8897,0.8976,0.7778,0.35,0.4828,0.4308,0.4741
7,0.8971,0.7725,0.3333,0.2727,0.3,0.245,0.2465
8,0.8971,0.9274,0.6667,0.3529,0.4615,0.4105,0.436
9,0.9044,0.8311,0.5556,0.3571,0.4348,0.3853,0.3965


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,3742
1,Target,Type_of_Food_Allergy_Shellfish
2,Target type,Binary
3,Original data shape,"(1607, 468)"
4,Transformed data shape,"(2904, 468)"
5,Transformed train set shape,"(2662, 468)"
6,Transformed test set shape,"(242, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ada,Ada Boost Classifier,0.9421,0.8298,0.2833,0.1217,0.1654,0.1399,0.1564,1.449
et,Extra Trees Classifier,0.9758,0.8325,0.0333,0.1,0.05,0.0494,0.0573,1.388
lightgbm,Light Gradient Boosting Machine,0.9751,0.8362,0.0333,0.1,0.05,0.0483,0.0558,1.486
rf,Random Forest Classifier,0.9751,0.8029,0.0,0.0,0.0,0.0,0.0,1.42


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9416,0.7662,0.0,0.0,0.0,-0.0281,-0.0291
1,0.9562,0.8553,0.25,0.25,0.25,0.2274,0.2274
2,0.9343,0.7237,0.5,0.2222,0.3077,0.2785,0.304
3,0.9197,0.6015,0.0,0.0,0.0,-0.0386,-0.0402
4,0.9416,0.7763,0.25,0.1667,0.2,0.171,0.1747
5,0.9485,0.8321,0.3333,0.1667,0.2222,0.1987,0.2115
6,0.9412,0.9223,0.3333,0.1429,0.2,0.1745,0.1916
7,0.9191,0.7945,0.0,0.0,0.0,-0.0331,-0.0375
8,0.9412,0.9825,1.0,0.2727,0.4286,0.4081,0.5063
9,0.9779,0.9637,0.3333,0.5,0.4,0.3892,0.3975


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,8984
1,Target,Type_of_Food_Allergy_TPO
2,Target type,Binary
3,Original data shape,"(1607, 468)"
4,Transformed data shape,"(2888, 468)"
5,Transformed train set shape,"(2646, 468)"
6,Transformed test set shape,"(242, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ada,Ada Boost Classifier,0.9143,0.6525,0.24,0.1,0.1368,0.1005,0.1133,1.43
lightgbm,Light Gradient Boosting Machine,0.9663,0.8118,0.045,0.0833,0.0571,0.0502,0.0526,1.47
rf,Random Forest Classifier,0.97,0.8437,0.02,0.1,0.0333,0.0325,0.0441,1.473
et,Extra Trees Classifier,0.9678,0.8375,0.0,0.0,0.0,-0.0024,-0.003,1.453


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8686,0.8853,0.75,0.15,0.25,0.2116,0.2967
1,0.8832,0.7895,0.75,0.1667,0.2727,0.2362,0.3176
2,0.9197,0.9511,1.0,0.2667,0.4211,0.3931,0.4946
3,0.8686,0.5545,0.4,0.1176,0.1818,0.1329,0.1629
4,0.9051,0.8106,0.4,0.1667,0.2353,0.1938,0.2151
5,0.9191,0.715,0.5,0.1818,0.2667,0.2336,0.2676
6,0.8971,0.8617,0.5,0.1429,0.2222,0.1849,0.2275
7,0.8824,0.7614,0.5,0.125,0.2,0.1605,0.2066
8,0.8971,0.7708,0.75,0.1875,0.3,0.2654,0.3417
9,0.8676,0.7936,0.5,0.1111,0.1818,0.1404,0.1889


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,3373
1,Target,Type_of_Food_Allergy_Tree_Nuts
2,Target type,Binary
3,Original data shape,"(1607, 468)"
4,Transformed data shape,"(2728, 468)"
5,Transformed train set shape,"(2486, 468)"
6,Transformed test set shape,"(242, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ada,Ada Boost Classifier,0.8477,0.7654,0.4673,0.2883,0.3527,0.2728,0.2851,1.545
rf,Random Forest Classifier,0.9128,0.8552,0.1936,0.5411,0.2703,0.2373,0.2784,1.567
lightgbm,Light Gradient Boosting Machine,0.9019,0.8417,0.1955,0.4101,0.2531,0.2095,0.2311,1.612
et,Extra Trees Classifier,0.9143,0.8894,0.0885,0.5167,0.1432,0.1273,0.1858,1.526


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7518,0.668,0.4167,0.1562,0.2273,0.1144,0.1341
1,0.8321,0.829,0.6667,0.2963,0.4103,0.3289,0.3658
2,0.8467,0.679,0.4167,0.2632,0.3226,0.2411,0.2492
3,0.8467,0.723,0.6154,0.3333,0.4324,0.3528,0.375
4,0.7956,0.8099,0.6154,0.2581,0.3636,0.2654,0.3011
5,0.8529,0.8871,0.75,0.3462,0.4737,0.4014,0.4421
6,0.8382,0.797,0.6667,0.3077,0.4211,0.3415,0.3762
7,0.875,0.8814,0.75,0.3913,0.5143,0.4506,0.4821
8,0.8309,0.8562,0.6667,0.2963,0.4103,0.3282,0.3651
9,0.8456,0.6794,0.5,0.2857,0.3636,0.2831,0.2975


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,1605
1,Target,Type_of_Venom_Allergy_ATCD_Venom
2,Target type,Binary
3,Original data shape,"(2989, 468)"
4,Transformed data shape,"(5493, 468)"
5,Transformed train set shape,"(5044, 468)"
6,Transformed test set shape,"(449, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ada,Ada Boost Classifier,0.9846,0.6509,0.1,0.05,0.065,0.0585,0.063,1.715
rf,Random Forest Classifier,0.9929,0.669,0.0,0.0,0.0,0.0,0.0,1.652
et,Extra Trees Classifier,0.9929,0.6651,0.0,0.0,0.0,0.0,0.0,1.615
lightgbm,Light Gradient Boosting Machine,0.9894,0.6183,0.0,0.0,0.0,-0.0037,-0.0038,1.672


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9882,0.1067,0.0,0.0,0.0,-0.0053,-0.0056
1,0.9921,0.3597,0.0,0.0,0.0,-0.004,-0.004
2,0.9843,0.6806,0.0,0.0,0.0,-0.0079,-0.0079
3,0.9843,0.6905,0.5,0.25,0.3333,0.3263,0.3465
4,0.9843,0.6647,0.0,0.0,0.0,-0.0079,-0.0079
5,0.9803,0.7698,0.0,0.0,0.0,-0.0095,-0.0097
6,0.9764,0.6825,0.0,0.0,0.0,-0.0106,-0.0113
7,0.9803,0.7421,0.0,0.0,0.0,-0.0095,-0.0097
8,0.9921,0.873,0.5,0.5,0.5,0.496,0.496
9,0.9921,0.7123,0.0,0.0,0.0,0.0,0.0


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,634
1,Target,Type_of_Venom_Allergy_IGE_Venom
2,Target type,Binary
3,Original data shape,"(2989, 468)"
4,Transformed data shape,"(5491, 468)"
5,Transformed train set shape,"(5042, 468)"
6,Transformed test set shape,"(449, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ada,Ada Boost Classifier,0.9819,0.7212,0.25,0.1926,0.1908,0.1835,0.1975,1.816
lightgbm,Light Gradient Boosting Machine,0.9886,0.6544,0.15,0.2,0.1667,0.1621,0.166,1.799
rf,Random Forest Classifier,0.9929,0.9315,0.05,0.1,0.0667,0.0665,0.0706,1.646
et,Extra Trees Classifier,0.9925,0.9014,0.0,0.0,0.0,0.0,0.0,1.669


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9724,0.9763,1.0,0.125,0.2222,0.2167,0.3486
1,0.9724,0.6786,0.5,0.1429,0.2222,0.2126,0.2571
2,0.9685,0.9087,0.5,0.125,0.2,0.1898,0.239
3,0.9685,0.9861,1.0,0.2,0.3333,0.3245,0.4401
4,0.9685,0.6925,0.0,0.0,0.0,-0.012,-0.0139
5,0.9882,0.9702,0.5,0.3333,0.4,0.3943,0.4026
6,0.9843,0.9821,0.5,0.25,0.3333,0.3263,0.3465
7,0.9843,0.9841,0.5,0.25,0.3333,0.3263,0.3465
8,0.9843,0.9861,0.5,0.25,0.3333,0.3263,0.3465
9,0.9921,0.9683,0.5,0.5,0.5,0.496,0.496


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved
SMOTE


Unnamed: 0,Description,Value
0,Session id,7660
1,Target,Allergy_Present
2,Target type,Binary
3,Original data shape,"(1759, 468)"
4,Transformed data shape,"(2590, 468)"
5,Transformed train set shape,"(2326, 468)"
6,Transformed test set shape,"(264, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.8441,0.8895,0.914,0.8891,0.901,0.5327,0.5366,1.691
rf,Random Forest Classifier,0.8334,0.8766,0.9123,0.8786,0.8948,0.4941,0.4989,1.74
lightgbm,Light Gradient Boosting Machine,0.8254,0.8797,0.9037,0.8759,0.8893,0.4751,0.4782,1.8
ada,Ada Boost Classifier,0.8154,0.8648,0.8642,0.8957,0.8793,0.4862,0.4892,1.724


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.86,0.9083,0.9145,0.9068,0.9106,0.5876,0.5877
1,0.8533,0.9244,0.8632,0.9439,0.9018,0.6146,0.6242
2,0.8467,0.9158,0.8803,0.9196,0.8996,0.5763,0.5787
3,0.8467,0.909,0.8793,0.9189,0.8987,0.5842,0.5866
4,0.8667,0.9011,0.8966,0.9286,0.9123,0.6349,0.6366
5,0.8255,0.9193,0.8966,0.8814,0.8889,0.4828,0.4832
6,0.7852,0.831,0.8276,0.8889,0.8571,0.4269,0.4314
7,0.8255,0.9185,0.8362,0.9327,0.8818,0.5522,0.5644
8,0.7919,0.8689,0.819,0.9048,0.8597,0.461,0.4696
9,0.8255,0.867,0.8879,0.8879,0.8879,0.494,0.494


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,6654
1,Target,Severe_Allergy
2,Target type,Binary
3,Original data shape,"(1670, 468)"
4,Transformed data shape,"(2021, 468)"
5,Transformed train set shape,"(1770, 468)"
6,Transformed test set shape,"(251, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.8097,0.8957,0.852,0.8464,0.8482,0.593,0.5955,1.751
et,Extra Trees Classifier,0.8069,0.8911,0.8588,0.8369,0.8471,0.5851,0.5871,1.698
rf,Random Forest Classifier,0.8055,0.8788,0.8644,0.8317,0.8469,0.58,0.583,1.764
ada,Ada Boost Classifier,0.7808,0.8578,0.8046,0.8382,0.8205,0.5393,0.5411,1.708


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7746,0.8916,0.8523,0.7979,0.8242,0.5114,0.5136
1,0.8451,0.8906,0.8977,0.8587,0.8778,0.6665,0.6678
2,0.8451,0.9194,0.8977,0.8587,0.8778,0.6665,0.6678
3,0.8099,0.9156,0.8295,0.8588,0.8439,0.6009,0.6015
4,0.7958,0.8847,0.8427,0.8333,0.838,0.5618,0.5619
5,0.8099,0.8815,0.8539,0.8444,0.8492,0.592,0.5921
6,0.7887,0.9006,0.809,0.8471,0.8276,0.5552,0.5562
7,0.831,0.904,0.8539,0.8736,0.8636,0.6415,0.6418
8,0.8592,0.925,0.8764,0.8966,0.8864,0.7012,0.7016
9,0.8369,0.9147,0.8523,0.8824,0.8671,0.6562,0.6569


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,3652
1,Target,Respiratory_Allergy
2,Target type,Binary
3,Original data shape,"(1507, 468)"
4,Transformed data shape,"(2193, 468)"
5,Transformed train set shape,"(1966, 468)"
6,Transformed test set shape,"(227, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.9125,0.9457,0.939,0.947,0.9427,0.7569,0.7587,1.853
rf,Random Forest Classifier,0.8992,0.9246,0.9614,0.9126,0.9362,0.6971,0.7044,1.855
et,Extra Trees Classifier,0.8969,0.9426,0.9481,0.9206,0.9339,0.6984,0.702,1.765
ada,Ada Boost Classifier,0.8812,0.8933,0.9165,0.9283,0.922,0.6719,0.6739,1.833


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8984,0.9415,0.9091,0.9574,0.9326,0.7269,0.7309
1,0.9141,0.9352,0.9495,0.94,0.9447,0.7518,0.752
2,0.8906,0.9234,0.899,0.957,0.9271,0.7092,0.7148
3,0.8984,0.949,0.9184,0.9474,0.9326,0.7265,0.7279
4,0.9062,0.9449,0.9184,0.9574,0.9375,0.7503,0.7529
5,0.9375,0.9541,0.9592,0.9592,0.9592,0.8259,0.8259
6,0.9453,0.9568,0.9592,0.9691,0.9641,0.8494,0.8496
7,0.9062,0.9265,0.9184,0.9574,0.9375,0.7503,0.7529
8,0.9453,0.9354,0.9592,0.9691,0.9641,0.8494,0.8496
9,0.9219,0.9303,0.9286,0.9681,0.9479,0.7919,0.7947


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,4997
1,Target,Food_Allergy
2,Target type,Binary
3,Original data shape,"(1607, 468)"
4,Transformed data shape,"(1760, 468)"
5,Transformed train set shape,"(1518, 468)"
6,Transformed test set shape,"(242, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.8037,0.8692,0.7823,0.7789,0.7796,0.6027,0.604,1.817
rf,Random Forest Classifier,0.7897,0.8614,0.7691,0.7618,0.7643,0.5747,0.5761,1.822
lightgbm,Light Gradient Boosting Machine,0.7853,0.8639,0.7805,0.7501,0.7638,0.5675,0.5693,1.804
ada,Ada Boost Classifier,0.7824,0.8477,0.7806,0.7439,0.7611,0.5616,0.5631,1.772


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7737,0.8768,0.7869,0.7273,0.7559,0.5456,0.5471
1,0.8029,0.8624,0.7705,0.7833,0.7769,0.6004,0.6005
2,0.8394,0.9154,0.8197,0.8197,0.8197,0.6749,0.6749
3,0.781,0.8596,0.8525,0.7123,0.7761,0.5652,0.5739
4,0.8248,0.8962,0.8197,0.7937,0.8065,0.6465,0.6468
5,0.7941,0.8829,0.8667,0.7222,0.7879,0.5911,0.6004
6,0.8309,0.8923,0.8167,0.8033,0.8099,0.6576,0.6577
7,0.7426,0.8531,0.8167,0.6712,0.7368,0.4897,0.4987
8,0.8088,0.8711,0.7833,0.7833,0.7833,0.6123,0.6123
9,0.75,0.8575,0.7049,0.7288,0.7167,0.4931,0.4933


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,2399
1,Target,Venom_Allergy
2,Target type,Binary
3,Original data shape,"(2989, 468)"
4,Transformed data shape,"(5473, 468)"
5,Transformed train set shape,"(5024, 468)"
6,Transformed test set shape,"(449, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ada,Ada Boost Classifier,0.9839,0.7635,0.1167,0.1667,0.1233,0.1164,0.1248,2.604
rf,Random Forest Classifier,0.9898,0.856,0.0667,0.2,0.1,0.0994,0.115,2.262
lightgbm,Light Gradient Boosting Machine,0.987,0.8028,0.0667,0.15,0.09,0.086,0.0943,2.971
et,Extra Trees Classifier,0.989,0.8186,0.0333,0.1,0.05,0.0491,0.0568,1.998


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9764,0.9603,0.5,0.1667,0.25,0.241,0.2794
1,0.9803,0.7917,0.0,0.0,0.0,-0.0095,-0.0097
2,0.9724,0.8234,0.6667,0.25,0.3636,0.3525,0.3976
3,0.9803,0.9814,0.6667,0.3333,0.4444,0.4356,0.4629
4,0.9882,0.6946,0.3333,0.5,0.4,0.3943,0.4026
5,0.9921,0.7543,0.3333,1.0,0.5,0.497,0.5751
6,0.9724,0.8433,0.0,0.0,0.0,-0.0137,-0.0138
7,0.9843,0.9841,0.3333,0.3333,0.3333,0.3254,0.3254
8,0.9685,0.747,0.0,0.0,0.0,-0.015,-0.0155
9,0.9764,0.7875,0.0,0.0,0.0,-0.012,-0.012


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,7144
1,Target,Type_of_Respiratory_Allergy_ARIA
2,Target type,Binary
3,Original data shape,"(1507, 468)"
4,Transformed data shape,"(1575, 468)"
5,Transformed train set shape,"(1348, 468)"
6,Transformed test set shape,"(227, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.868,0.9346,0.8664,0.8813,0.8732,0.7354,0.7365,1.894
et,Extra Trees Classifier,0.8555,0.9331,0.8456,0.8757,0.8599,0.7107,0.7118,1.857
ada,Ada Boost Classifier,0.8484,0.9113,0.8308,0.8754,0.8519,0.6969,0.6988,1.889
rf,Random Forest Classifier,0.8375,0.9211,0.8679,0.8315,0.8488,0.6732,0.675,1.893


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9297,0.9753,0.8955,0.9677,0.9302,0.8596,0.8622
1,0.8359,0.8899,0.791,0.8833,0.8346,0.6728,0.6769
2,0.9062,0.9716,0.8806,0.9365,0.9077,0.8126,0.8142
3,0.875,0.9591,0.8657,0.8923,0.8788,0.7498,0.7502
4,0.8828,0.9293,0.8657,0.9062,0.8855,0.7656,0.7665
5,0.8047,0.9131,0.7463,0.8621,0.8,0.6111,0.6172
6,0.8906,0.936,0.9118,0.8857,0.8986,0.78,0.7803
7,0.8828,0.9395,0.8824,0.8955,0.8889,0.7649,0.765
8,0.8672,0.9353,0.8088,0.9322,0.8661,0.7357,0.743
9,0.8828,0.9292,0.8971,0.8841,0.8905,0.7645,0.7646


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,6028
1,Target,Type_of_Respiratory_Allergy_CONJ
2,Target type,Binary
3,Original data shape,"(1507, 468)"
4,Transformed data shape,"(2121, 468)"
5,Transformed train set shape,"(1894, 468)"
6,Transformed test set shape,"(227, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.782,0.7947,0.4201,0.617,0.4982,0.3662,0.3778,1.964
lightgbm,Light Gradient Boosting Machine,0.7688,0.7772,0.4324,0.5858,0.4928,0.3483,0.3575,2.054
ada,Ada Boost Classifier,0.7344,0.7398,0.4952,0.4936,0.4924,0.3132,0.3143,1.959
rf,Random Forest Classifier,0.7773,0.7742,0.3694,0.6302,0.4608,0.3334,0.3545,2.004


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7891,0.7885,0.6667,0.5789,0.6197,0.4748,0.477
1,0.7891,0.8268,0.6667,0.5789,0.6197,0.4748,0.477
2,0.8203,0.8144,0.7273,0.6316,0.6761,0.5526,0.5552
3,0.7656,0.815,0.6364,0.5385,0.5833,0.4219,0.4247
4,0.7969,0.8309,0.6364,0.6,0.6176,0.4795,0.4799
5,0.7031,0.7059,0.3939,0.4194,0.4062,0.2086,0.2088
6,0.7891,0.8278,0.6061,0.5882,0.597,0.4542,0.4543
7,0.7656,0.7922,0.7059,0.5455,0.6154,0.4508,0.4586
8,0.8125,0.8686,0.7647,0.619,0.6842,0.553,0.5592
9,0.7578,0.7309,0.6176,0.5385,0.5753,0.4071,0.4089


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,8492
1,Target,Type_of_Respiratory_Allergy_GINA
2,Target type,Binary
3,Original data shape,"(1507, 468)"
4,Transformed data shape,"(1509, 468)"
5,Transformed train set shape,"(1282, 468)"
6,Transformed test set shape,"(227, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.8922,0.9499,0.8924,0.8931,0.8923,0.7844,0.7852,1.935
rf,Random Forest Classifier,0.8727,0.9391,0.8908,0.8609,0.875,0.7453,0.7468,2.028
lightgbm,Light Gradient Boosting Machine,0.8727,0.9494,0.8767,0.8712,0.8728,0.7453,0.7473,1.992
ada,Ada Boost Classifier,0.8641,0.9311,0.8658,0.865,0.8639,0.7281,0.7307,1.912


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8828,0.958,0.9231,0.8571,0.8889,0.7653,0.7676
1,0.9141,0.9697,0.9531,0.8841,0.9173,0.8281,0.8307
2,0.8906,0.9567,0.9219,0.8676,0.8939,0.7812,0.7828
3,0.8516,0.885,0.875,0.8358,0.855,0.7031,0.7039
4,0.875,0.9116,0.8906,0.8636,0.8769,0.75,0.7504
5,0.8984,0.9211,0.9375,0.8696,0.9023,0.7969,0.7993
6,0.9219,0.9521,0.9531,0.8971,0.9242,0.8438,0.8454
7,0.9141,0.9711,0.9219,0.9077,0.9147,0.8281,0.8282
8,0.8984,0.9779,0.9062,0.8923,0.8992,0.7969,0.797
9,0.875,0.9651,0.8594,0.8871,0.873,0.75,0.7504


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,5180
1,Target,Type_of_Respiratory_Allergy_IGE_Pollen_Gram
2,Target type,Binary
3,Original data shape,"(1507, 468)"
4,Transformed data shape,"(1567, 468)"
5,Transformed train set shape,"(1340, 468)"
6,Transformed test set shape,"(227, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.9367,0.9832,0.9344,0.9332,0.9336,0.8732,0.8735,2.063
rf,Random Forest Classifier,0.9039,0.9568,0.941,0.8691,0.9032,0.8081,0.8116,2.12
ada,Ada Boost Classifier,0.8922,0.9513,0.8902,0.8859,0.8873,0.784,0.7851,2.01
et,Extra Trees Classifier,0.8719,0.9375,0.8721,0.8614,0.8663,0.7433,0.7441,2.04


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9531,0.9826,0.9672,0.9365,0.9516,0.9062,0.9066
1,0.9141,0.9716,0.9344,0.8906,0.912,0.8281,0.829
2,0.9844,0.9939,0.9672,1.0,0.9833,0.9686,0.9691
3,0.9609,0.9886,0.9344,0.9828,0.958,0.9215,0.9226
4,0.9531,0.9936,0.9508,0.9508,0.9508,0.906,0.906
5,0.9141,0.9687,0.8852,0.931,0.9076,0.8274,0.8283
6,0.9688,0.993,0.9672,0.9672,0.9672,0.9374,0.9374
7,0.9531,0.9905,0.9344,0.9661,0.95,0.9059,0.9064
8,0.9297,0.9799,0.9344,0.9194,0.9268,0.8592,0.8593
9,0.9219,0.9706,0.918,0.918,0.918,0.8434,0.8434


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,6117
1,Target,Type_of_Respiratory_Allergy_IGE_Pollen_Herb
2,Target type,Binary
3,Original data shape,"(1507, 468)"
4,Transformed data shape,"(1963, 468)"
5,Transformed train set shape,"(1736, 468)"
6,Transformed test set shape,"(227, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.9406,0.9871,0.9152,0.9046,0.9085,0.8647,0.8661,2.28
ada,Ada Boost Classifier,0.8992,0.9515,0.8422,0.8507,0.8437,0.7695,0.7722,2.172
rf,Random Forest Classifier,0.8781,0.9452,0.8497,0.7923,0.8181,0.7268,0.7298,2.168
et,Extra Trees Classifier,0.8445,0.9214,0.7402,0.772,0.7545,0.6409,0.6423,2.125


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9297,0.965,0.9512,0.8478,0.8966,0.8436,0.8468
1,0.9062,0.9767,0.9268,0.8085,0.8636,0.7927,0.797
2,0.9766,0.9927,0.9512,0.975,0.963,0.9458,0.946
3,0.9453,0.9809,0.9268,0.9048,0.9157,0.8752,0.8753
4,0.9688,0.9919,0.9512,0.9512,0.9512,0.9282,0.9282
5,0.9141,0.9781,0.8537,0.875,0.8642,0.8014,0.8015
6,0.9453,0.9947,0.9512,0.8864,0.9176,0.8768,0.878
7,0.9375,0.9818,0.9512,0.8667,0.907,0.8601,0.8622
8,0.9609,0.9892,0.9286,0.9512,0.9398,0.9109,0.911
9,0.9453,0.9826,0.9524,0.8889,0.9195,0.8782,0.8794


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,8124
1,Target,Type_of_Respiratory_Allergy_IGE_Pollen_Tree
2,Target type,Binary
3,Original data shape,"(1507, 468)"
4,Transformed data shape,"(1591, 468)"
5,Transformed train set shape,"(1364, 468)"
6,Transformed test set shape,"(227, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.9273,0.9772,0.9214,0.9239,0.9222,0.854,0.8548,2.158
rf,Random Forest Classifier,0.9047,0.9593,0.9248,0.8795,0.9008,0.8092,0.8116,2.188
et,Extra Trees Classifier,0.8711,0.948,0.8797,0.8513,0.8645,0.7417,0.7432,2.135
ada,Ada Boost Classifier,0.8711,0.9298,0.8595,0.8652,0.8616,0.741,0.7421,2.124


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9141,0.9659,0.9333,0.8889,0.9106,0.828,0.8289
1,0.9375,0.9787,0.95,0.9194,0.9344,0.8748,0.8752
2,0.9297,0.9654,0.9667,0.8923,0.928,0.8595,0.8621
3,0.8906,0.9767,0.8833,0.8833,0.8833,0.7804,0.7804
4,0.8984,0.9696,0.9333,0.8615,0.896,0.7971,0.7995
5,0.9141,0.9873,0.9167,0.9016,0.9091,0.8276,0.8277
6,0.8906,0.9708,0.9833,0.8194,0.8939,0.7829,0.7969
7,0.9297,0.9738,0.9333,0.918,0.9256,0.859,0.8591
8,0.9297,0.9786,0.9661,0.8906,0.9268,0.8594,0.862
9,0.9375,0.9862,0.9322,0.9322,0.9322,0.8742,0.8742


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,8656
1,Target,Type_of_Respiratory_Allergy_IGE_Dander_Animals
2,Target type,Binary
3,Original data shape,"(1507, 468)"
4,Transformed data shape,"(1665, 468)"
5,Transformed train set shape,"(1438, 468)"
6,Transformed test set shape,"(227, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.9367,0.984,0.918,0.9381,0.9272,0.8713,0.8725,2.207
rf,Random Forest Classifier,0.8938,0.9608,0.9144,0.8547,0.883,0.7859,0.7883,2.237
ada,Ada Boost Classifier,0.8875,0.9396,0.87,0.8747,0.8713,0.7714,0.7729,2.107
et,Extra Trees Classifier,0.8234,0.9135,0.8182,0.7888,0.8021,0.6429,0.6449,2.17


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9297,0.9747,0.9286,0.9123,0.9204,0.8574,0.8575
1,0.9219,0.973,0.8929,0.9259,0.9091,0.8406,0.8411
2,0.9219,0.9896,0.9643,0.871,0.9153,0.8431,0.8469
3,0.9766,0.9888,0.9643,0.9818,0.973,0.9523,0.9524
4,0.9297,0.9789,0.8571,0.9796,0.9143,0.8551,0.8606
5,0.9531,0.9883,0.9286,0.963,0.9455,0.9044,0.9048
6,0.9297,0.9849,0.8929,0.9434,0.9174,0.8563,0.8573
7,0.9375,0.9678,0.8929,0.9615,0.9259,0.872,0.8738
8,0.9453,0.9864,0.9286,0.9455,0.9369,0.8887,0.8888
9,0.9297,0.9904,0.8596,0.98,0.9159,0.8559,0.8614


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,3082
1,Target,Type_of_Respiratory_Allergy_IGE_Mite_Cockroach
2,Target type,Binary
3,Original data shape,"(1507, 468)"
4,Transformed data shape,"(1631, 468)"
5,Transformed train set shape,"(1404, 468)"
6,Transformed test set shape,"(227, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.9234,0.973,0.9067,0.923,0.9145,0.8452,0.8457,2.294
rf,Random Forest Classifier,0.8883,0.9536,0.8912,0.8677,0.8783,0.7751,0.7771,2.288
ada,Ada Boost Classifier,0.8695,0.9256,0.8479,0.8614,0.8538,0.7361,0.7374,2.236
et,Extra Trees Classifier,0.8523,0.9273,0.827,0.844,0.8349,0.7014,0.7022,2.285


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9141,0.9781,0.9655,0.8615,0.9106,0.8284,0.8334
1,0.8906,0.9586,0.8793,0.8793,0.8793,0.7793,0.7793
2,0.9219,0.965,0.8793,0.9444,0.9107,0.8414,0.8431
3,0.9141,0.9768,0.8966,0.9123,0.9043,0.8263,0.8264
4,0.9375,0.9768,0.931,0.931,0.931,0.8739,0.8739
5,0.9297,0.9727,0.931,0.9153,0.9231,0.8583,0.8584
6,0.8594,0.9608,0.8448,0.8448,0.8448,0.7163,0.7163
7,0.8984,0.9631,0.8448,0.9245,0.8829,0.7935,0.7961
8,0.9375,0.979,0.9474,0.9153,0.931,0.8739,0.8744
9,0.9297,0.9753,0.9298,0.9138,0.9217,0.8579,0.858


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,970
1,Target,Type_of_Respiratory_Allergy_IGE_Molds_Yeast
2,Target type,Binary
3,Original data shape,"(1507, 468)"
4,Transformed data shape,"(2207, 468)"
5,Transformed train set shape,"(1980, 468)"
6,Transformed test set shape,"(227, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.9742,0.9926,0.9414,0.9449,0.9427,0.9261,0.9264,2.537
ada,Ada Boost Classifier,0.9312,0.9597,0.8448,0.8528,0.8472,0.8029,0.8042,2.342
rf,Random Forest Classifier,0.9055,0.9563,0.731,0.8362,0.7773,0.7178,0.7225,2.404
et,Extra Trees Classifier,0.8523,0.8963,0.5207,0.7491,0.6123,0.5254,0.5396,2.378


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9375,0.9788,0.7931,0.92,0.8519,0.8125,0.8161
1,0.9609,0.9885,0.931,0.9,0.9153,0.8899,0.8901
2,0.9766,0.9909,0.9655,0.9333,0.9492,0.9339,0.9342
3,0.9609,0.9836,0.931,0.9,0.9153,0.8899,0.8901
4,0.9922,0.9976,0.9655,1.0,0.9825,0.9774,0.9777
5,0.9531,0.9659,0.931,0.871,0.9,0.8694,0.8703
6,0.9766,0.9742,0.931,0.9643,0.9474,0.9323,0.9325
7,0.9531,0.9878,0.8966,0.8966,0.8966,0.8662,0.8662
8,0.9766,0.9941,0.9655,0.9333,0.9492,0.9339,0.9342
9,0.9766,0.9948,0.931,0.9643,0.9474,0.9323,0.9325


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,176
1,Target,Type_of_Food_Allergy_Aromatics
2,Target type,Binary
3,Original data shape,"(1607, 468)"
4,Transformed data shape,"(2914, 468)"
5,Transformed train set shape,"(2672, 468)"
6,Transformed test set shape,"(242, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ada,Ada Boost Classifier,0.9641,0.8336,0.1333,0.1167,0.1194,0.1029,0.1054,2.538
rf,Random Forest Classifier,0.978,0.8103,0.0,0.0,0.0,-0.0011,-0.0013,2.492
et,Extra Trees Classifier,0.9773,0.8709,0.0,0.0,0.0,-0.0021,-0.0023,2.373
lightgbm,Light Gradient Boosting Machine,0.9751,0.7967,0.0,0.0,0.0,-0.005,-0.0055,3.141


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7956,0.8632,0.6667,0.069,0.125,0.0888,0.1667
1,0.854,0.6667,0.3333,0.0526,0.0909,0.0552,0.0843
2,0.781,0.7736,0.6667,0.0645,0.1176,0.0809,0.1575
3,0.8759,0.9167,1.0,0.15,0.2609,0.2316,0.3619
4,0.8759,0.9005,0.6667,0.1111,0.1905,0.1589,0.2371
5,0.8456,0.8825,0.5,0.0476,0.087,0.0618,0.1168
6,0.8824,0.7381,0.3333,0.0667,0.1111,0.0772,0.1069
7,0.8015,0.8233,1.0,0.1,0.1818,0.1476,0.2823
8,0.8015,0.8546,0.6667,0.0714,0.129,0.0929,0.1712
9,0.875,0.881,0.6667,0.1111,0.1905,0.1587,0.2368


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,3937
1,Target,Type_of_Food_Allergy_Egg
2,Target type,Binary
3,Original data shape,"(1607, 468)"
4,Transformed data shape,"(2900, 468)"
5,Transformed train set shape,"(2658, 468)"
6,Transformed test set shape,"(242, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ada,Ada Boost Classifier,0.9634,0.7162,0.2333,0.275,0.2448,0.2265,0.2311,2.524
lightgbm,Light Gradient Boosting Machine,0.9707,0.857,0.15,0.4,0.2133,0.2023,0.2304,2.929
rf,Random Forest Classifier,0.9714,0.9084,0.05,0.1333,0.0686,0.0626,0.0722,2.516
et,Extra Trees Classifier,0.9692,0.8996,0.0,0.0,0.0,-0.0061,-0.0069,2.404


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8978,0.8412,0.5,0.1429,0.2222,0.1852,0.2278
1,0.9051,0.7838,0.5,0.1538,0.2353,0.1996,0.2397
2,0.9197,0.8289,0.75,0.2308,0.3529,0.3227,0.3877
3,0.927,0.9323,0.75,0.25,0.375,0.3464,0.4064
4,0.9635,0.9718,0.75,0.4286,0.5455,0.5279,0.5505
5,0.9118,0.9398,0.6667,0.1538,0.25,0.2221,0.2917
6,0.9118,0.9687,1.0,0.2,0.3333,0.3079,0.4266
7,0.8971,0.9699,1.0,0.1765,0.3,0.2727,0.3974
8,0.9485,0.8546,0.3333,0.1667,0.2222,0.1987,0.2115
9,0.9485,0.8759,0.75,0.3333,0.4615,0.4387,0.4789


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,7775
1,Target,Type_of_Food_Allergy_Fish
2,Target type,Binary
3,Original data shape,"(1607, 468)"
4,Transformed data shape,"(2908, 468)"
5,Transformed train set shape,"(2666, 468)"
6,Transformed test set shape,"(242, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ada,Ada Boost Classifier,0.9634,0.6468,0.0917,0.12,0.0983,0.0813,0.0843,2.654
rf,Random Forest Classifier,0.9766,0.8881,0.0,0.0,0.0,0.0,0.0,2.577
et,Extra Trees Classifier,0.9744,0.8947,0.0,0.0,0.0,-0.0034,-0.0041,2.493
lightgbm,Light Gradient Boosting Machine,0.9729,0.8285,0.0,0.0,0.0,-0.0053,-0.006,3.23


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9708,0.7736,0.3333,0.3333,0.3333,0.3184,0.3184
1,0.9343,0.7114,0.3333,0.125,0.1818,0.1549,0.1754
2,0.9635,0.8657,0.0,0.0,0.0,-0.0178,-0.0182
3,0.9562,0.7575,0.0,0.0,0.0,-0.0199,-0.0211
4,0.9708,0.4455,0.25,0.5,0.3333,0.3201,0.3404
5,0.9853,0.7444,0.3333,1.0,0.5,0.4944,0.5731
6,0.9779,0.9674,0.3333,0.5,0.4,0.3892,0.3975
7,0.9632,0.8596,0.3333,0.25,0.2857,0.2672,0.2702
8,0.9779,0.9098,0.3333,0.5,0.4,0.3892,0.3975
9,0.9559,0.604,0.0,0.0,0.0,-0.0226,-0.0226


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,2786
1,Target,Type_of_Food_Allergy_Fruits_and_Vegetables
2,Target type,Binary
3,Original data shape,"(1607, 468)"
4,Transformed data shape,"(2808, 468)"
5,Transformed train set shape,"(2566, 468)"
6,Transformed test set shape,"(242, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ada,Ada Boost Classifier,0.9091,0.7961,0.2569,0.2821,0.259,0.2123,0.217,2.633
lightgbm,Light Gradient Boosting Machine,0.9436,0.8658,0.1347,0.65,0.2176,0.2023,0.2741,2.965
rf,Random Forest Classifier,0.9355,0.8403,0.1208,0.48,0.182,0.1589,0.2062,2.552
et,Extra Trees Classifier,0.9333,0.8779,0.1097,0.28,0.1528,0.1294,0.1463,2.516


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9197,0.7258,0.25,0.2857,0.2667,0.2244,0.225
1,0.9343,0.8682,0.5,0.4444,0.4706,0.4357,0.4366
2,0.8978,0.7878,0.5,0.2857,0.3636,0.3125,0.3271
3,0.9124,0.8247,0.3333,0.3333,0.3333,0.2865,0.2865
4,0.927,0.7179,0.1111,0.3333,0.1667,0.1384,0.1616
5,0.9485,0.8789,0.5,0.5714,0.5333,0.5062,0.5075
6,0.9338,0.9492,0.625,0.4545,0.5263,0.4917,0.4989
7,0.9191,0.8564,0.375,0.3333,0.3529,0.31,0.3106
8,0.9044,0.9014,0.375,0.2727,0.3158,0.2658,0.2697
9,0.8603,0.7393,0.25,0.1333,0.1739,0.1053,0.1115


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,3647
1,Target,Type_of_Food_Allergy_Mammalian_Milk
2,Target type,Binary
3,Original data shape,"(1607, 468)"
4,Transformed data shape,"(2930, 468)"
5,Transformed train set shape,"(2688, 468)"
6,Transformed test set shape,"(242, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.9824,0.8785,0.05,0.05,0.05,0.0463,0.0461,3.493
ada,Ada Boost Classifier,0.9751,0.652,0.05,0.05,0.05,0.0402,0.0401,2.897
rf,Random Forest Classifier,0.9846,0.9142,0.0,0.0,0.0,0.0,0.0,2.705
et,Extra Trees Classifier,0.9832,0.8957,0.0,0.0,0.0,-0.002,-0.0021,2.549


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9708,0.9111,0.0,0.0,0.0,-0.0148,-0.0148
1,0.9854,0.7704,0.5,0.5,0.5,0.4926,0.4926
2,0.9708,0.9519,0.0,0.0,0.0,-0.0148,-0.0148
3,0.9854,0.7074,0.5,0.5,0.5,0.4926,0.4926
4,0.9781,0.99,0.3333,0.5,0.4,0.3893,0.3976
5,0.9706,0.9664,0.5,0.25,0.3333,0.32,0.3403
6,0.9706,0.9142,0.0,0.0,0.0,-0.0149,-0.0149
7,0.9926,0.9963,1.0,0.6667,0.8,0.7964,0.8134
8,0.9853,0.9627,0.5,0.5,0.5,0.4925,0.4925
9,0.9412,0.7052,0.0,0.0,0.0,-0.0226,-0.0262


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,3748
1,Target,Type_of_Food_Allergy_Oral_Syndrom
2,Target type,Binary
3,Original data shape,"(1607, 468)"
4,Transformed data shape,"(2788, 468)"
5,Transformed train set shape,"(2546, 468)"
6,Transformed test set shape,"(242, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ada,Ada Boost Classifier,0.8857,0.8088,0.2611,0.2218,0.2361,0.1757,0.1781,2.776
et,Extra Trees Classifier,0.9341,0.8719,0.1189,0.4833,0.1869,0.1689,0.2169,2.658
lightgbm,Light Gradient Boosting Machine,0.9268,0.8344,0.1089,0.3619,0.1591,0.134,0.1626,2.871
rf,Random Forest Classifier,0.9319,0.8356,0.0533,0.2,0.0838,0.0732,0.0905,2.694


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8613,0.8845,0.8889,0.3077,0.4571,0.3984,0.4728
1,0.8248,0.9054,0.8889,0.2581,0.4,0.332,0.4199
2,0.8467,0.77,0.4444,0.2,0.2759,0.2037,0.2241
3,0.8394,0.8618,0.7,0.2692,0.3889,0.3169,0.3651
4,0.8394,0.8276,0.8,0.2857,0.4211,0.3513,0.4145
5,0.8529,0.8495,0.5556,0.2381,0.3333,0.2653,0.2955
6,0.875,0.8775,0.7778,0.3182,0.4516,0.3948,0.4453
7,0.8603,0.8898,0.5556,0.25,0.3448,0.279,0.307
8,0.8088,0.8491,0.8889,0.2424,0.381,0.3091,0.4013
9,0.8309,0.8132,0.5556,0.2083,0.303,0.2288,0.2647


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,1067
1,Target,Type_of_Food_Allergy_Other_Legumes
2,Target type,Binary
3,Original data shape,"(1607, 468)"
4,Transformed data shape,"(2888, 468)"
5,Transformed train set shape,"(2646, 468)"
6,Transformed test set shape,"(242, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ada,Ada Boost Classifier,0.9538,0.6798,0.115,0.1667,0.1319,0.1098,0.1136,2.883
lightgbm,Light Gradient Boosting Machine,0.9678,0.8499,0.075,0.3,0.12,0.1129,0.1426,3.379
et,Extra Trees Classifier,0.9663,0.8485,0.1,0.1833,0.1186,0.1101,0.1197,2.669
rf,Random Forest Classifier,0.97,0.8366,0.075,0.25,0.1133,0.1094,0.1314,2.73


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9635,0.7519,0.0,0.0,0.0,-0.0118,-0.0149
1,0.9708,0.9774,0.75,0.5,0.6,0.5855,0.5985
2,0.9708,0.6692,0.25,0.5,0.3333,0.3201,0.3404
3,0.9635,0.8152,0.4,0.5,0.4444,0.4258,0.4287
4,0.9416,0.6424,0.0,0.0,0.0,-0.0281,-0.0291
5,0.9559,0.6477,0.25,0.25,0.25,0.2273,0.2273
6,0.9632,0.6667,0.5,0.4,0.4444,0.4257,0.4285
7,0.9412,0.8636,0.25,0.1667,0.2,0.1707,0.1745
8,0.9779,0.7462,0.25,1.0,0.4,0.3929,0.4944
9,0.9485,0.5852,0.0,0.0,0.0,-0.0259,-0.0261


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,4749
1,Target,Type_of_Food_Allergy_Peanut
2,Target type,Binary
3,Original data shape,"(1607, 468)"
4,Transformed data shape,"(2788, 468)"
5,Transformed train set shape,"(2546, 468)"
6,Transformed test set shape,"(242, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ada,Ada Boost Classifier,0.9304,0.8657,0.37,0.4825,0.4039,0.3687,0.3797,2.91
rf,Random Forest Classifier,0.9377,0.8955,0.3144,0.5442,0.3947,0.3658,0.3825,2.753
lightgbm,Light Gradient Boosting Machine,0.9341,0.8917,0.3167,0.505,0.3805,0.3492,0.3636,3.109
et,Extra Trees Classifier,0.937,0.9133,0.2389,0.6255,0.3371,0.3107,0.3547,2.716


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9489,0.9609,0.5556,0.625,0.5882,0.5611,0.5622
1,0.9416,0.9705,0.6667,0.5455,0.6,0.5688,0.5722
2,0.9562,0.9627,0.6667,0.6667,0.6667,0.6432,0.6432
3,0.927,0.9362,0.5,0.5,0.5,0.4606,0.4606
4,0.8978,0.8402,0.3,0.3,0.3,0.2449,0.2449
5,0.9485,0.8871,0.4444,0.6667,0.5333,0.5072,0.519
6,0.9412,0.8889,0.5556,0.5556,0.5556,0.5241,0.5241
7,0.9338,0.8119,0.4444,0.5,0.4706,0.4354,0.4363
8,0.9265,0.8539,0.4444,0.4444,0.4444,0.4051,0.4051
9,0.9118,0.944,0.4444,0.3636,0.4,0.3529,0.355


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,3709
1,Target,Type_of_Food_Allergy_Shellfish
2,Target type,Binary
3,Original data shape,"(1607, 468)"
4,Transformed data shape,"(2904, 468)"
5,Transformed train set shape,"(2662, 468)"
6,Transformed test set shape,"(242, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ada,Ada Boost Classifier,0.9678,0.825,0.1167,0.2533,0.1456,0.1325,0.1499,2.94
lightgbm,Light Gradient Boosting Machine,0.9707,0.8417,0.05,0.125,0.065,0.0568,0.0663,3.687
rf,Random Forest Classifier,0.9751,0.8071,0.025,0.1,0.04,0.0382,0.0482,2.857
et,Extra Trees Classifier,0.9751,0.832,0.0,0.0,0.0,0.0,0.0,2.777


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9635,0.9552,0.0,0.0,0.0,-0.0178,-0.0182
1,0.9708,0.7688,0.25,0.5,0.3333,0.3201,0.3404
2,0.9562,0.8327,0.0,0.0,0.0,-0.0199,-0.0211
3,0.9708,0.8985,0.25,0.5,0.3333,0.3201,0.3404
4,0.9708,0.8703,0.25,0.5,0.3333,0.3201,0.3404
5,0.9853,0.782,0.3333,1.0,0.5,0.4944,0.5731
6,0.9559,0.6917,0.0,0.0,0.0,-0.0226,-0.0226
7,0.9853,0.8596,0.6667,0.6667,0.6667,0.6591,0.6591
8,0.9632,0.8045,0.0,0.0,0.0,-0.018,-0.0183
9,0.9632,0.8145,0.3333,0.25,0.2857,0.2672,0.2702


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,4169
1,Target,Type_of_Food_Allergy_TPO
2,Target type,Binary
3,Original data shape,"(1607, 468)"
4,Transformed data shape,"(2888, 468)"
5,Transformed train set shape,"(2646, 468)"
6,Transformed test set shape,"(242, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ada,Ada Boost Classifier,0.9531,0.6802,0.12,0.0867,0.1,0.0797,0.0804,3.007
lightgbm,Light Gradient Boosting Machine,0.967,0.8171,0.05,0.1333,0.0686,0.0624,0.0716,3.456
rf,Random Forest Classifier,0.9692,0.8269,0.025,0.1,0.04,0.0381,0.0479,2.888
et,Extra Trees Classifier,0.9648,0.8437,0.025,0.05,0.0333,0.0252,0.0258,2.837


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9562,0.8656,0.5,0.3333,0.4,0.3782,0.3866
1,0.9416,0.906,0.25,0.1667,0.2,0.171,0.1747
2,0.9635,0.8289,0.5,0.4,0.4444,0.4258,0.4287
3,0.9635,0.9182,0.4,0.5,0.4444,0.4258,0.4287
4,0.9489,0.8379,0.4,0.3333,0.3636,0.3372,0.3388
5,0.9412,0.8409,0.5,0.25,0.3333,0.3061,0.3264
6,0.9265,0.9375,0.75,0.25,0.375,0.3462,0.4061
7,0.9559,0.8551,0.25,0.25,0.25,0.2273,0.2273
8,0.9632,0.8712,0.5,0.4,0.4444,0.4257,0.4285
9,0.9265,0.697,0.25,0.125,0.1667,0.1327,0.1414


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,633
1,Target,Type_of_Food_Allergy_Tree_Nuts
2,Target type,Binary
3,Original data shape,"(1607, 468)"
4,Transformed data shape,"(2728, 468)"
5,Transformed train set shape,"(2486, 468)"
6,Transformed test set shape,"(242, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.9246,0.8789,0.3192,0.7104,0.4286,0.3943,0.4359,2.895
rf,Random Forest Classifier,0.915,0.8678,0.3115,0.5703,0.3928,0.3518,0.3753,2.932
ada,Ada Boost Classifier,0.8967,0.7999,0.3705,0.4166,0.3842,0.3289,0.3338,3.05
lightgbm,Light Gradient Boosting Machine,0.918,0.8802,0.2872,0.6011,0.372,0.3358,0.3693,3.326


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8978,0.8907,0.5,0.4286,0.4615,0.4055,0.4069
1,0.9416,0.8807,0.5833,0.7,0.6364,0.6049,0.6079
2,0.8978,0.8833,0.4167,0.4167,0.4167,0.3607,0.3607
3,0.8832,0.8617,0.3846,0.3846,0.3846,0.3201,0.3201
4,0.8905,0.835,0.5385,0.4375,0.4828,0.4223,0.4251
5,0.9191,0.879,0.5833,0.5385,0.56,0.5155,0.516
6,0.9338,0.9556,0.5,0.6667,0.5714,0.5364,0.5429
7,0.9118,0.9214,0.6667,0.5,0.5714,0.5234,0.5301
8,0.9265,0.8817,0.5,0.6,0.5455,0.5058,0.5083
9,0.9044,0.9341,0.6667,0.4706,0.5517,0.5,0.5095


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,5115
1,Target,Type_of_Venom_Allergy_ATCD_Venom
2,Target type,Binary
3,Original data shape,"(2989, 468)"
4,Transformed data shape,"(5493, 468)"
5,Transformed train set shape,"(5044, 468)"
6,Transformed test set shape,"(449, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ada,Ada Boost Classifier,0.9902,0.6764,0.2,0.2333,0.2067,0.2031,0.2079,3.614
lightgbm,Light Gradient Boosting Machine,0.9906,0.7052,0.1,0.15,0.1167,0.1128,0.1167,3.845
rf,Random Forest Classifier,0.9929,0.809,0.0,0.0,0.0,0.0,0.0,3.376
et,Extra Trees Classifier,0.9929,0.8067,0.0,0.0,0.0,0.0,0.0,3.025


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9961,1.0,1.0,0.5,0.6667,0.6649,0.7057
1,0.9921,0.5375,0.0,0.0,0.0,-0.004,-0.004
2,0.9961,0.6091,0.5,1.0,0.6667,0.6649,0.7057
3,0.9724,0.9286,0.5,0.1429,0.2222,0.2126,0.2571
4,0.9882,0.7123,0.5,0.3333,0.4,0.3943,0.4026
5,0.9921,0.5417,0.0,0.0,0.0,0.0,0.0
6,0.9882,0.5913,0.0,0.0,0.0,-0.0053,-0.0056
7,0.9882,0.6984,0.5,0.3333,0.4,0.3943,0.4026
8,0.9882,0.6984,0.0,0.0,0.0,-0.0053,-0.0056
9,0.9882,0.7004,0.0,0.0,0.0,-0.0053,-0.0056


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,1974
1,Target,Type_of_Venom_Allergy_IGE_Venom
2,Target type,Binary
3,Original data shape,"(2989, 468)"
4,Transformed data shape,"(5491, 468)"
5,Transformed train set shape,"(5042, 468)"
6,Transformed test set shape,"(449, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ada,Ada Boost Classifier,0.9886,0.8036,0.2,0.2333,0.2067,0.202,0.2066,3.631
lightgbm,Light Gradient Boosting Machine,0.9917,0.9331,0.15,0.1833,0.1567,0.1547,0.1596,3.947
rf,Random Forest Classifier,0.9921,0.9393,0.0,0.0,0.0,-0.0005,-0.0006,3.485
et,Extra Trees Classifier,0.9925,0.8418,0.0,0.0,0.0,0.0,0.0,3.05


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9843,0.9605,0.0,0.0,0.0,-0.0059,-0.0069
1,0.9921,0.9762,0.5,0.5,0.5,0.496,0.496
2,0.9921,0.996,0.5,0.5,0.5,0.496,0.496
3,0.9724,0.9921,1.0,0.2222,0.3636,0.3553,0.4648
4,0.9803,0.9821,0.5,0.2,0.2857,0.2776,0.308
5,0.9803,0.6687,0.0,0.0,0.0,-0.0095,-0.0097
6,0.9843,0.9861,0.5,0.25,0.3333,0.3263,0.3465
7,1.0,1.0,1.0,1.0,1.0,1.0,1.0
8,0.9646,0.9425,0.0,0.0,0.0,-0.0124,-0.015
9,0.9843,0.9841,0.5,0.25,0.3333,0.3263,0.3465


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved
BorderlineSMOTE


Unnamed: 0,Description,Value
0,Session id,2712
1,Target,Allergy_Present
2,Target type,Binary
3,Original data shape,"(1759, 468)"
4,Transformed data shape,"(2590, 468)"
5,Transformed train set shape,"(2326, 468)"
6,Transformed test set shape,"(264, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.8468,0.8944,0.9149,0.8917,0.9028,0.5401,0.5435,3.048
rf,Random Forest Classifier,0.8435,0.8813,0.9166,0.8862,0.901,0.5274,0.5299,3.105
lightgbm,Light Gradient Boosting Machine,0.8375,0.891,0.914,0.8819,0.8974,0.5054,0.51,3.062
ada,Ada Boost Classifier,0.8107,0.8519,0.859,0.8946,0.8759,0.475,0.4787,3.051


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.78,0.8873,0.812,0.8962,0.852,0.4275,0.4355
1,0.8467,0.9166,0.8974,0.9052,0.9013,0.558,0.5581
2,0.7867,0.8829,0.8034,0.9126,0.8545,0.4606,0.4739
3,0.8733,0.9419,0.8621,0.9709,0.9132,0.6817,0.6985
4,0.84,0.8938,0.8621,0.9259,0.8929,0.5787,0.5844
5,0.8591,0.912,0.8534,0.9612,0.9041,0.6418,0.6582
6,0.8389,0.9127,0.8621,0.9259,0.8929,0.5702,0.5761
7,0.8121,0.9049,0.8362,0.9151,0.8739,0.5084,0.5164
8,0.8121,0.8947,0.8276,0.9231,0.8727,0.5178,0.5292
9,0.7987,0.895,0.8362,0.8981,0.8661,0.4627,0.4676


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,5042
1,Target,Severe_Allergy
2,Target type,Binary
3,Original data shape,"(1670, 468)"
4,Transformed data shape,"(2021, 468)"
5,Transformed train set shape,"(1770, 468)"
6,Transformed test set shape,"(251, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.8161,0.9046,0.8621,0.8487,0.8542,0.6045,0.6076,2.979
et,Extra Trees Classifier,0.8132,0.9002,0.8475,0.8533,0.8496,0.6029,0.6048,2.99
rf,Random Forest Classifier,0.8062,0.8941,0.8599,0.8352,0.8469,0.5829,0.5847,3.06
ada,Ada Boost Classifier,0.778,0.8522,0.7978,0.8389,0.8175,0.5345,0.5363,2.943


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7817,0.8779,0.7841,0.8519,0.8166,0.5481,0.551
1,0.8662,0.9278,0.875,0.9059,0.8902,0.7191,0.7198
2,0.8099,0.9059,0.8523,0.8427,0.8475,0.5951,0.5952
3,0.7958,0.8721,0.875,0.8105,0.8415,0.5556,0.5588
4,0.7887,0.873,0.8539,0.8172,0.8352,0.5414,0.5425
5,0.8803,0.9334,0.9438,0.875,0.9081,0.7371,0.7415
6,0.8028,0.8728,0.8427,0.8427,0.8427,0.5785,0.5785
7,0.8028,0.8929,0.7865,0.8861,0.8333,0.594,0.6004
8,0.831,0.9095,0.8764,0.8571,0.8667,0.636,0.6363
9,0.8227,0.9016,0.875,0.8462,0.8603,0.6178,0.6185


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,3799
1,Target,Respiratory_Allergy
2,Target type,Binary
3,Original data shape,"(1507, 468)"
4,Transformed data shape,"(2193, 468)"
5,Transformed train set shape,"(1966, 468)"
6,Transformed test set shape,"(227, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.9227,0.9534,0.9481,0.9515,0.9497,0.7822,0.7833,3.096
et,Extra Trees Classifier,0.9117,0.9536,0.9552,0.9318,0.9433,0.7436,0.7454,2.909
rf,Random Forest Classifier,0.9047,0.9365,0.9654,0.9154,0.9397,0.7131,0.7192,3.055
ada,Ada Boost Classifier,0.8945,0.9286,0.9257,0.9368,0.9309,0.7075,0.7097,2.962


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9766,0.9927,0.9697,1.0,0.9846,0.9355,0.9374
1,0.9141,0.908,0.9495,0.94,0.9447,0.7518,0.752
2,0.8828,0.9446,0.9192,0.9286,0.9239,0.6696,0.6698
3,0.9141,0.9585,0.9286,0.9579,0.943,0.7686,0.7701
4,0.9141,0.9177,0.949,0.9394,0.9442,0.7577,0.7579
5,0.9375,0.9827,0.9286,0.9891,0.9579,0.8372,0.8435
6,0.9141,0.9143,0.9286,0.9579,0.943,0.7686,0.7701
7,0.9297,0.982,0.9286,0.9785,0.9529,0.8148,0.8191
8,0.9219,0.8993,0.949,0.949,0.949,0.7823,0.7823
9,0.9375,0.9772,0.949,0.9688,0.9588,0.8298,0.8305


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,4037
1,Target,Food_Allergy
2,Target type,Binary
3,Original data shape,"(1607, 468)"
4,Transformed data shape,"(1760, 468)"
5,Transformed train set shape,"(1518, 468)"
6,Transformed test set shape,"(242, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.7971,0.873,0.7889,0.7637,0.7752,0.5906,0.5921,2.994
et,Extra Trees Classifier,0.7934,0.8717,0.7855,0.7588,0.7709,0.5829,0.5846,2.944
rf,Random Forest Classifier,0.7876,0.8672,0.7691,0.7582,0.7627,0.5706,0.5718,3.062
ada,Ada Boost Classifier,0.7656,0.8256,0.7328,0.7387,0.7343,0.5248,0.5264,2.85


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7372,0.8127,0.6721,0.7193,0.6949,0.4646,0.4654
1,0.7956,0.8796,0.7377,0.7895,0.7627,0.5836,0.5846
2,0.7956,0.8725,0.8361,0.7391,0.7846,0.5916,0.5956
3,0.7883,0.8734,0.7541,0.7667,0.7603,0.5708,0.5709
4,0.8175,0.9019,0.8689,0.7571,0.8092,0.6359,0.6414
5,0.8456,0.8837,0.8361,0.8226,0.8293,0.6883,0.6884
6,0.7794,0.8537,0.8167,0.7206,0.7656,0.5588,0.5627
7,0.8529,0.9103,0.8167,0.8448,0.8305,0.7007,0.701
8,0.7868,0.8421,0.8,0.7385,0.768,0.5713,0.5729
9,0.8382,0.8788,0.8833,0.7794,0.8281,0.6765,0.6812


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,4130
1,Target,Venom_Allergy
2,Target type,Binary
3,Original data shape,"(2989, 468)"
4,Transformed data shape,"(5473, 468)"
5,Transformed train set shape,"(5024, 468)"
6,Transformed test set shape,"(449, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.9874,0.7532,0.0667,0.15,0.09,0.0866,0.0949,3.85
ada,Ada Boost Classifier,0.9831,0.708,0.0667,0.05,0.0571,0.0499,0.0502,3.61
rf,Random Forest Classifier,0.989,0.831,0.0,0.0,0.0,0.0,0.0,3.328
et,Extra Trees Classifier,0.989,0.7943,0.0,0.0,0.0,0.0,0.0,3.036


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9803,0.6746,0.0,0.0,0.0,-0.0095,-0.0097
1,0.9803,0.0754,0.0,0.0,0.0,-0.0095,-0.0097
2,0.9843,0.9243,0.0,0.0,0.0,-0.0059,-0.0069
3,0.9764,0.8406,0.3333,0.2,0.25,0.2388,0.2468
4,0.9724,0.9416,0.3333,0.1667,0.2222,0.2098,0.223
5,0.9724,0.8805,0.0,0.0,0.0,-0.0137,-0.0138
6,0.9764,0.8871,0.6667,0.2857,0.4,0.3899,0.4268
7,0.9882,0.7995,0.6667,0.5,0.5714,0.5656,0.5716
8,0.9803,0.9124,0.0,0.0,0.0,-0.0095,-0.0097
9,0.9843,0.8181,0.3333,0.3333,0.3333,0.3254,0.3254


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,2583
1,Target,Type_of_Respiratory_Allergy_ARIA
2,Target type,Binary
3,Original data shape,"(1507, 468)"
4,Transformed data shape,"(1575, 468)"
5,Transformed train set shape,"(1348, 468)"
6,Transformed test set shape,"(227, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.8695,0.9395,0.8561,0.8928,0.8734,0.7389,0.7408,2.905
et,Extra Trees Classifier,0.8609,0.9365,0.8559,0.8784,0.8662,0.7214,0.7231,2.96
rf,Random Forest Classifier,0.8375,0.9227,0.8621,0.8352,0.8476,0.6736,0.6757,3.061
ada,Ada Boost Classifier,0.8398,0.9186,0.8338,0.8596,0.8458,0.6793,0.6808,2.979


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8594,0.9316,0.806,0.9153,0.8571,0.7198,0.7254
1,0.875,0.92,0.8955,0.8696,0.8824,0.7491,0.7494
2,0.875,0.9337,0.8507,0.9048,0.8769,0.7502,0.7517
3,0.8828,0.944,0.8806,0.8939,0.8872,0.7653,0.7654
4,0.8359,0.9306,0.7761,0.8966,0.832,0.6733,0.68
5,0.8281,0.9131,0.8507,0.8261,0.8382,0.655,0.6553
6,0.8672,0.9261,0.8382,0.9048,0.8702,0.7346,0.7369
7,0.875,0.9381,0.8529,0.9062,0.8788,0.75,0.7515
8,0.9141,0.9556,0.8824,0.9524,0.916,0.8283,0.8308
9,0.875,0.9355,0.8824,0.8824,0.8824,0.749,0.749


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,4374
1,Target,Type_of_Respiratory_Allergy_CONJ
2,Target type,Binary
3,Original data shape,"(1507, 468)"
4,Transformed data shape,"(2121, 468)"
5,Transformed train set shape,"(1894, 468)"
6,Transformed test set shape,"(227, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ada,Ada Boost Classifier,0.7594,0.7659,0.5014,0.544,0.5185,0.3594,0.3617,3.002
et,Extra Trees Classifier,0.7891,0.8013,0.4356,0.6454,0.5145,0.3875,0.4026,2.97
lightgbm,Light Gradient Boosting Machine,0.7766,0.7905,0.4567,0.59,0.513,0.3719,0.3778,3.078
rf,Random Forest Classifier,0.7883,0.7828,0.3906,0.6523,0.4848,0.3639,0.3841,3.083


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7578,0.7719,0.5152,0.5312,0.5231,0.3608,0.3609
1,0.7422,0.8026,0.6364,0.5,0.56,0.3814,0.3869
2,0.75,0.7474,0.4848,0.5161,0.5,0.3336,0.3338
3,0.7812,0.7869,0.6667,0.5641,0.6111,0.4604,0.4635
4,0.8047,0.8014,0.6667,0.6111,0.6377,0.5043,0.5052
5,0.7734,0.7518,0.6061,0.5556,0.5797,0.425,0.4258
6,0.7734,0.8207,0.6061,0.5556,0.5797,0.425,0.4258
7,0.7812,0.8285,0.5294,0.6,0.5625,0.4174,0.4189
8,0.8281,0.8351,0.6176,0.7,0.6562,0.5423,0.5442
9,0.7188,0.7459,0.6765,0.4792,0.561,0.3628,0.3745


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,7099
1,Target,Type_of_Respiratory_Allergy_GINA
2,Target type,Binary
3,Original data shape,"(1507, 468)"
4,Transformed data shape,"(1509, 468)"
5,Transformed train set shape,"(1282, 468)"
6,Transformed test set shape,"(227, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.8906,0.9459,0.8971,0.8863,0.8913,0.7812,0.782,2.989
lightgbm,Light Gradient Boosting Machine,0.8789,0.9439,0.8674,0.8881,0.8772,0.7578,0.7586,3.012
ada,Ada Boost Classifier,0.8602,0.9201,0.8642,0.8577,0.8606,0.7203,0.7209,2.959
rf,Random Forest Classifier,0.8531,0.9287,0.8502,0.8554,0.8524,0.7062,0.7069,3.035


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.875,0.9311,0.8923,0.8657,0.8788,0.7498,0.7502
1,0.9062,0.9547,0.9219,0.8939,0.9077,0.8125,0.8129
2,0.8672,0.9362,0.9375,0.8219,0.8759,0.7344,0.7417
3,0.8984,0.9583,0.9375,0.8696,0.9023,0.7969,0.7993
4,0.9219,0.9834,0.9062,0.9355,0.9206,0.8438,0.8442
5,0.9219,0.9711,0.9531,0.8971,0.9242,0.8438,0.8454
6,0.8906,0.9557,0.9062,0.8788,0.8923,0.7812,0.7816
7,0.8594,0.9524,0.9062,0.8286,0.8657,0.7188,0.7219
8,0.8438,0.9094,0.8125,0.8667,0.8387,0.6875,0.6888
9,0.8828,0.9375,0.8906,0.8769,0.8837,0.7656,0.7657


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,7442
1,Target,Type_of_Respiratory_Allergy_IGE_Pollen_Gram
2,Target type,Binary
3,Original data shape,"(1507, 468)"
4,Transformed data shape,"(1567, 468)"
5,Transformed train set shape,"(1340, 468)"
6,Transformed test set shape,"(227, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.9289,0.9759,0.9344,0.9181,0.9259,0.8576,0.8582,2.964
rf,Random Forest Classifier,0.9016,0.959,0.9393,0.8672,0.9007,0.8034,0.8083,3.026
ada,Ada Boost Classifier,0.8859,0.9467,0.8951,0.8708,0.8821,0.7717,0.7732,2.895
et,Extra Trees Classifier,0.8555,0.9316,0.8672,0.8377,0.8508,0.7108,0.7136,3.019


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9453,0.9704,0.9672,0.9219,0.944,0.8906,0.8916
1,0.8984,0.9689,0.9016,0.8871,0.8943,0.7966,0.7967
2,0.9609,0.9949,0.9672,0.9516,0.9593,0.9218,0.9219
3,0.9375,0.9738,0.9508,0.9206,0.9355,0.8749,0.8753
4,0.9062,0.9535,0.918,0.8889,0.9032,0.8124,0.8128
5,0.9766,0.9831,0.9836,0.9677,0.9756,0.9531,0.9532
6,0.9375,0.9821,0.9016,0.9649,0.9322,0.8744,0.8761
7,0.9219,0.9736,0.8852,0.9474,0.9153,0.8429,0.8446
8,0.9297,0.9672,0.9016,0.9483,0.9244,0.8588,0.8597
9,0.9297,0.9753,0.9344,0.9194,0.9268,0.8592,0.8593


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,5074
1,Target,Type_of_Respiratory_Allergy_IGE_Pollen_Herb
2,Target type,Binary
3,Original data shape,"(1507, 468)"
4,Transformed data shape,"(1963, 468)"
5,Transformed train set shape,"(1736, 468)"
6,Transformed test set shape,"(227, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.9438,0.987,0.9249,0.9034,0.9137,0.872,0.8725,3.07
rf,Random Forest Classifier,0.8922,0.9519,0.8517,0.8224,0.8354,0.7554,0.7571,3.084
ada,Ada Boost Classifier,0.8906,0.9479,0.8203,0.8398,0.8286,0.7484,0.7498,2.95
et,Extra Trees Classifier,0.85,0.9203,0.7427,0.7853,0.7613,0.6522,0.6546,2.92


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9688,0.991,0.9756,0.9302,0.9524,0.9291,0.9297
1,0.9297,0.9762,0.9024,0.881,0.8916,0.8396,0.8397
2,0.9688,0.9952,0.9756,0.9302,0.9524,0.9291,0.9297
3,0.9375,0.9717,0.9268,0.8837,0.9048,0.8583,0.8588
4,0.9531,0.9907,0.9512,0.907,0.9286,0.8937,0.8943
5,0.9453,0.9927,0.9512,0.8864,0.9176,0.8768,0.878
6,0.9609,0.9801,0.9268,0.95,0.9383,0.9097,0.9099
7,0.9609,0.9865,0.9024,0.9737,0.9367,0.9085,0.9099
8,0.9531,0.9936,0.9524,0.9091,0.9302,0.895,0.8955
9,0.9062,0.9637,0.881,0.8409,0.8605,0.7899,0.7904


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,6255
1,Target,Type_of_Respiratory_Allergy_IGE_Pollen_Tree
2,Target type,Binary
3,Original data shape,"(1507, 468)"
4,Transformed data shape,"(1591, 468)"
5,Transformed train set shape,"(1364, 468)"
6,Transformed test set shape,"(227, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.9203,0.9787,0.9214,0.9114,0.9154,0.8401,0.8418,2.942
rf,Random Forest Classifier,0.8953,0.9594,0.9129,0.8709,0.8905,0.7903,0.7929,3.058
ada,Ada Boost Classifier,0.8859,0.9429,0.8812,0.8758,0.8781,0.7709,0.7715,2.901
et,Extra Trees Classifier,0.8719,0.9437,0.888,0.8478,0.8663,0.7435,0.7462,2.963


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9062,0.9667,0.9833,0.8429,0.9077,0.8136,0.8236
1,0.8984,0.9672,0.8833,0.8983,0.8908,0.7959,0.796
2,0.9297,0.9765,0.9333,0.918,0.9256,0.859,0.8591
3,0.9297,0.9824,0.9167,0.9322,0.9244,0.8587,0.8588
4,0.9062,0.9679,0.9167,0.8871,0.9016,0.8121,0.8125
5,0.9375,0.9752,0.95,0.9194,0.9344,0.8748,0.8752
6,0.9141,0.9831,0.9,0.9153,0.9076,0.8273,0.8274
7,0.9219,0.9586,0.9833,0.8676,0.9219,0.8444,0.851
8,0.8984,0.9659,0.8644,0.9107,0.887,0.7949,0.7958
9,0.9062,0.9725,0.8814,0.9123,0.8966,0.8109,0.8113


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,2943
1,Target,Type_of_Respiratory_Allergy_IGE_Dander_Animals
2,Target type,Binary
3,Original data shape,"(1507, 468)"
4,Transformed data shape,"(1665, 468)"
5,Transformed train set shape,"(1438, 468)"
6,Transformed test set shape,"(227, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.9305,0.9802,0.9179,0.9243,0.9201,0.8586,0.8599,3.003
rf,Random Forest Classifier,0.8938,0.9579,0.9144,0.8548,0.8832,0.786,0.788,3.028
ada,Ada Boost Classifier,0.8773,0.935,0.861,0.8606,0.8602,0.751,0.7518,2.923
et,Extra Trees Classifier,0.8133,0.9077,0.8287,0.7663,0.7953,0.6242,0.6273,2.92


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9766,0.9978,1.0,0.95,0.9744,0.9528,0.9539
1,0.9375,0.9841,0.9464,0.9138,0.9298,0.8735,0.874
2,0.9062,0.9809,0.9464,0.8548,0.8983,0.8118,0.8154
3,0.9297,0.9816,0.8929,0.9434,0.9174,0.8563,0.8573
4,0.9609,0.9866,0.9643,0.9474,0.9558,0.9208,0.9209
5,0.9297,0.9633,0.9107,0.9273,0.9189,0.8569,0.857
6,0.9141,0.9697,0.9286,0.8814,0.9043,0.8264,0.8274
7,0.9062,0.9742,0.8571,0.9231,0.8889,0.808,0.8097
8,0.9609,0.9864,0.9286,0.9811,0.9541,0.9202,0.9212
9,0.8828,0.9588,0.8571,0.8727,0.8649,0.7614,0.7615


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,4636
1,Target,Type_of_Respiratory_Allergy_IGE_Mite_Cockroach
2,Target type,Binary
3,Original data shape,"(1507, 468)"
4,Transformed data shape,"(1631, 468)"
5,Transformed train set shape,"(1404, 468)"
6,Transformed test set shape,"(227, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.9188,0.9723,0.9082,0.9127,0.9096,0.8358,0.8371,2.971
rf,Random Forest Classifier,0.8922,0.9522,0.8996,0.8676,0.8828,0.7831,0.7844,3.01
ada,Ada Boost Classifier,0.8617,0.9166,0.8442,0.8499,0.8459,0.7205,0.7221,2.85
et,Extra Trees Classifier,0.8531,0.9267,0.8443,0.8346,0.8378,0.7037,0.706,2.932


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9219,0.9787,0.8947,0.9273,0.9107,0.8413,0.8417
1,0.8906,0.9592,0.8596,0.8909,0.875,0.7778,0.7782
2,0.9531,0.9823,1.0,0.9062,0.9508,0.9062,0.9103
3,0.9141,0.9697,0.9138,0.8983,0.906,0.8269,0.827
4,0.875,0.9623,0.8448,0.875,0.8596,0.747,0.7474
5,0.9297,0.9667,0.9655,0.8889,0.9256,0.8592,0.8618
6,0.9141,0.9796,0.9138,0.8983,0.906,0.8269,0.827
7,0.8906,0.9608,0.8621,0.8929,0.8772,0.7787,0.779
8,0.9375,0.9734,0.8793,0.9808,0.9273,0.8728,0.8768
9,0.9375,0.9813,0.9138,0.9464,0.9298,0.8735,0.874


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,2694
1,Target,Type_of_Respiratory_Allergy_IGE_Molds_Yeast
2,Target type,Binary
3,Original data shape,"(1507, 468)"
4,Transformed data shape,"(2207, 468)"
5,Transformed train set shape,"(1980, 468)"
6,Transformed test set shape,"(227, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.9727,0.9917,0.9379,0.9431,0.9393,0.9217,0.9227,3.127
ada,Ada Boost Classifier,0.9469,0.9668,0.8793,0.8866,0.8816,0.8474,0.8485,3.015
rf,Random Forest Classifier,0.9047,0.9524,0.6966,0.8585,0.7664,0.7077,0.7154,3.097
et,Extra Trees Classifier,0.8516,0.8881,0.5138,0.7577,0.6085,0.5217,0.5387,2.936


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9922,1.0,1.0,0.9667,0.9831,0.978,0.9782
1,0.9766,0.9976,1.0,0.9062,0.9508,0.9355,0.9374
2,0.9766,0.9868,1.0,0.9062,0.9508,0.9355,0.9374
3,0.9688,0.9927,0.8966,0.963,0.9286,0.9086,0.9096
4,0.9688,0.9941,0.931,0.931,0.931,0.9108,0.9108
5,0.9688,0.9896,0.8621,1.0,0.9259,0.9063,0.9103
6,0.9688,0.9962,0.8966,0.963,0.9286,0.9086,0.9096
7,0.9531,0.9613,0.8621,0.9259,0.8929,0.8629,0.8638
8,0.9297,0.9794,0.8621,0.8333,0.8475,0.8018,0.802
9,0.9766,0.9916,0.931,0.9643,0.9474,0.9323,0.9325


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,3090
1,Target,Type_of_Food_Allergy_Aromatics
2,Target type,Binary
3,Original data shape,"(1607, 468)"
4,Transformed data shape,"(2914, 468)"
5,Transformed train set shape,"(2672, 468)"
6,Transformed test set shape,"(242, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ada,Ada Boost Classifier,0.9612,0.7806,0.0333,0.0333,0.0333,0.0163,0.0155,3.225
rf,Random Forest Classifier,0.9788,0.7452,0.0,0.0,0.0,0.0,0.0,3.038
et,Extra Trees Classifier,0.9788,0.8005,0.0,0.0,0.0,0.0,0.0,2.967
lightgbm,Light Gradient Boosting Machine,0.9744,0.7958,0.0,0.0,0.0,-0.0059,-0.0066,3.819


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9708,0.8109,0.3333,0.3333,0.3333,0.3184,0.3184
1,0.9635,0.8806,0.3333,0.25,0.2857,0.2674,0.2703
2,0.9781,0.9179,0.3333,0.5,0.4,0.3893,0.3976
3,0.9635,0.6692,0.0,0.0,0.0,-0.0178,-0.0182
4,0.9781,0.607,0.0,0.0,0.0,0.0,0.0
5,0.9559,0.5672,0.0,0.0,0.0,-0.02,-0.0213
6,0.9853,0.8622,0.3333,1.0,0.5,0.4944,0.5731
7,0.9559,0.9398,0.3333,0.2,0.25,0.2287,0.2367
8,0.9559,0.8221,0.0,0.0,0.0,-0.0226,-0.0226
9,0.9706,0.9223,0.3333,0.3333,0.3333,0.3183,0.3183


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,5033
1,Target,Type_of_Food_Allergy_Egg
2,Target type,Binary
3,Original data shape,"(1607, 468)"
4,Transformed data shape,"(2900, 468)"
5,Transformed train set shape,"(2658, 468)"
6,Transformed test set shape,"(242, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ada,Ada Boost Classifier,0.9627,0.7716,0.2917,0.3608,0.2987,0.2806,0.2936,3.059
rf,Random Forest Classifier,0.9729,0.9047,0.025,0.1,0.04,0.0373,0.0473,3.055
et,Extra Trees Classifier,0.9729,0.8823,0.025,0.05,0.0333,0.0308,0.0325,2.881
lightgbm,Light Gradient Boosting Machine,0.9663,0.8951,0.025,0.05,0.0333,0.0229,0.0243,3.491


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9489,0.9718,0.5,0.2857,0.3636,0.3391,0.3536
1,0.9197,0.7613,0.5,0.1818,0.2667,0.2339,0.2678
2,0.9562,0.9436,0.25,0.25,0.25,0.2274,0.2274
3,0.9343,0.8929,0.5,0.2222,0.3077,0.2785,0.304
4,0.9051,0.8797,0.25,0.0909,0.1333,0.0946,0.1083
5,0.9485,0.7018,0.3333,0.1667,0.2222,0.1987,0.2115
6,0.9412,0.8897,0.3333,0.1429,0.2,0.1745,0.1916
7,0.9632,0.9875,1.0,0.375,0.5455,0.5304,0.6008
8,0.9485,0.5238,0.0,0.0,0.0,-0.0259,-0.0261
9,0.9559,0.9223,0.25,0.25,0.25,0.2273,0.2273


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,879
1,Target,Type_of_Food_Allergy_Fish
2,Target type,Binary
3,Original data shape,"(1607, 468)"
4,Transformed data shape,"(2908, 468)"
5,Transformed train set shape,"(2666, 468)"
6,Transformed test set shape,"(242, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ada,Ada Boost Classifier,0.959,0.615,0.1,0.145,0.1036,0.0848,0.0934,3.14
rf,Random Forest Classifier,0.9766,0.7586,0.0,0.0,0.0,0.0,0.0,2.996
et,Extra Trees Classifier,0.9751,0.8081,0.0,0.0,0.0,-0.0023,-0.0028,2.99
lightgbm,Light Gradient Boosting Machine,0.9722,0.8002,0.0,0.0,0.0,-0.0064,-0.0073,3.289


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9489,0.5547,0.0,0.0,0.0,-0.0257,-0.0259
1,0.9635,0.7711,0.3333,0.25,0.2857,0.2674,0.2703
2,0.9197,0.8881,0.3333,0.1,0.1538,0.1243,0.1498
3,0.8759,0.3882,0.25,0.0667,0.1053,0.062,0.078
4,0.8978,0.7829,0.25,0.0833,0.125,0.0849,0.0996
5,0.9412,0.7093,0.3333,0.1429,0.2,0.1745,0.1916
6,0.9485,0.7055,0.3333,0.1667,0.2222,0.1987,0.2115
7,0.8971,0.6855,0.0,0.0,0.0,-0.0359,-0.0446
8,0.9118,0.7882,0.3333,0.0909,0.1429,0.1121,0.1391
9,0.9338,0.7607,0.6667,0.2,0.3077,0.2834,0.3413


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,2233
1,Target,Type_of_Food_Allergy_Fruits_and_Vegetables
2,Target type,Binary
3,Original data shape,"(1607, 468)"
4,Transformed data shape,"(2808, 468)"
5,Transformed train set shape,"(2566, 468)"
6,Transformed test set shape,"(242, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ada,Ada Boost Classifier,0.9172,0.7907,0.2819,0.352,0.3034,0.2608,0.2674,3.051
lightgbm,Light Gradient Boosting Machine,0.9355,0.8477,0.0972,0.4667,0.1538,0.1323,0.1814,3.256
et,Extra Trees Classifier,0.9385,0.8729,0.0986,0.44,0.1534,0.1363,0.1807,2.942
rf,Random Forest Classifier,0.9377,0.8361,0.0625,0.3667,0.103,0.0895,0.1309,3.074


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8978,0.7384,0.25,0.2,0.2222,0.1683,0.1695
1,0.8832,0.6657,0.125,0.1,0.1111,0.0494,0.0498
2,0.9635,0.9671,0.5,0.8,0.6154,0.5973,0.6155
3,0.8832,0.8064,0.2222,0.1818,0.2,0.1377,0.1385
4,0.9416,0.9245,0.4444,0.5714,0.5,0.4695,0.4737
5,0.8897,0.7021,0.375,0.2308,0.2857,0.2296,0.2376
6,0.9191,0.7871,0.375,0.3333,0.3529,0.31,0.3106
7,0.8971,0.8594,0.5,0.2857,0.3636,0.3121,0.3267
8,0.9559,0.9053,0.625,0.625,0.625,0.6016,0.6016
9,0.8971,0.8545,0.375,0.25,0.3,0.2468,0.2528


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,1019
1,Target,Type_of_Food_Allergy_Mammalian_Milk
2,Target type,Binary
3,Original data shape,"(1607, 468)"
4,Transformed data shape,"(2930, 468)"
5,Transformed train set shape,"(2688, 468)"
6,Transformed test set shape,"(242, 468)"
7,Numeric features,467
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.9846,0.8668,0.1333,0.2333,0.1567,0.1537,0.1665,3.881
ada,Ada Boost Classifier,0.9802,0.589,0.0833,0.15,0.1067,0.0988,0.1035,3.328
rf,Random Forest Classifier,0.9846,0.8807,0.0,0.0,0.0,0.0,0.0,3.023
et,Extra Trees Classifier,0.9839,0.8853,0.0,0.0,0.0,-0.001,-0.0011,2.978


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 50 candidates, totalling 500 fits


### Generate predictions from classifiers and include them into the dataset + evaluation 

In [55]:
data= pd.read_csv('data/train.csv', low_memory=False)
data_test= pd.read_csv('data/test.csv', low_memory=False)

### Define which target will be predict at the end with which model

In [56]:
list_of_Target_predict_with_pytorch_meta = ['Type_of_Food_Allergy_Other_Legumes', 'Type_of_Food_Allergy_Mammalian_Milk', 
                  'Venom_Allergy', 'Type_of_Food_Allergy_Tree_Nuts', 'Type_of_Food_Allergy_Aromatics', 
                  'Type_of_Venom_Allergy_IGE_Venom', 'Type_of_Venom_Allergy_ATCD_Venom',
                  'Type_of_Respiratory_Allergy_CONJ', 'Type_of_Food_Allergy_Peanut', 
                  'Type_of_Food_Allergy_Egg', 'Type_of_Food_Allergy_TPO', 
                  'Type_of_Food_Allergy_Fruits_and_Vegetables', 'Type_of_Food_Allergy_Shellfish']

In [89]:
class CustomDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform
        self.features = self.data.iloc[:, :-1].values
        self.labels = self.data.iloc[:, -1].values

        if self.transform is not None:
            self.features, self.labels = self.transform.fit_resample(self.features, self.labels)

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        features = torch.tensor(self.features[idx], dtype=torch.float32)
        label = torch.tensor(int(self.labels[idx]), dtype=torch.long)

        return features, label
    
    @property
    def num_features(self):
        return self.features.shape[1]
    
    @property
    def num_classes(self):
        return len(set(self.labels))

    
class Allergy_Net(nn.Module):
    
    def __init__(self,input_size,hidden_size, num_class,dropout_rate):

        super(Allergy_Net,self).__init__()
        #self.subsampler = RandomFeatureSubsampler(p=0.2)
        self.linear1= nn.Linear(input_size,hidden_size)
        self.linear2= nn.Linear(hidden_size,int(hidden_size/8))
        self.linear3= nn.Linear(int(hidden_size/8),int(hidden_size/32))
        self.linear4= nn.Linear(int(hidden_size/32),num_class)

        self.dropout1 = nn.Dropout(dropout_rate)
        self.batchnorm1 = nn.BatchNorm1d(int(hidden_size/32))

    def forward(self,inputs):
        #inputs = self.subsampler(inputs)
        x = torch.relu(self.linear1(inputs))
        x= self.dropout1(x)
        x = torch.relu(self.linear2(x))
        x= self.dropout1(x)
        x = torch.relu(self.linear3(x))
        x= self.dropout1(x)
        x= self.batchnorm1(x)
        outputs= self.linear4(x)

        # no softmax because Cross entropy Loss
        return outputs
    


### Obtain pred with pycaret Smote only

In [58]:
def obtain_pred(data, folder_path):
    liste=[]
    for subdir, dirs, files in os.walk(folder_path):
        for file in files:
            print("Calling model:", file) 
            model = load_model(os.path.join(subdir, os.path.splitext(file)[0]))
            values=predict_model(model, data= data.reset_index())
            file_name = os.path.splitext(file)[0]
            values_name = '{}_{}'.format(file_name, 'values')
            globals()[values_name] = values
            globals()[values_name] = globals()[values_name].rename(columns={'prediction_label': 'pred_label ' + str(values_name), 'prediction_score': 'pred_score ' + str(values_name) })
            liste.append(globals()[values_name])
        return liste

In [74]:
folder_path_pycaret = 'Test_Imbl_SMOTE_tuned'
pred_pycaret= obtain_pred(encode_data.reset_index(drop=True), folder_path_pycaret)

Calling model: Type_of_Food_Allergy_Aromatics_ETC_brutforce_imb_binary.pkl
Transformation Pipeline and Model Successfully Loaded
Calling model: Type_of_Respiratory_Allergy_CONJ_ETC_brutforce_imb_binary.pkl
Transformation Pipeline and Model Successfully Loaded
Calling model: Type_of_Food_Allergy_Peanut_ETC_brutforce_imb_binary.pkl
Transformation Pipeline and Model Successfully Loaded
Calling model: Type_of_Respiratory_Allergy_IGE_Pollen_Tree_ETC_brutforce_imb_binary.pkl
Transformation Pipeline and Model Successfully Loaded
Calling model: Venom_Allergy_ETC_brutforce_imb_binary.pkl
Transformation Pipeline and Model Successfully Loaded
Calling model: Food_Allergy_ETC_brutforce_imb_binary.pkl
Transformation Pipeline and Model Successfully Loaded
Calling model: Type_of_Food_Allergy_Fruits_and_Vegetables_ETC_brutforce_imb_binary.pkl
Transformation Pipeline and Model Successfully Loaded
Calling model: Type_of_Food_Allergy_Shellfish_ETC_brutforce_imb_binary.pkl
Transformation Pipeline and Model

In [73]:
liste_column_monovalue=['Type_of_Food_Allergy_Other','Type_of_Food_Allergy_Cereals_&_Seeds']

merged_df = pd.DataFrame()
for i,df in enumerate(pred_pycaret):
    if i==0:
        merged_df = df
    else:
        last_two_columns = df.iloc[:, -2:]
        merged_df = pd.concat([merged_df, last_two_columns], axis=1)
        
added_list = ['pred_label ' + s + '_ETC_brutforce_imb_binary_values' for s in liste_column_monovalue]
zeros_data = pd.DataFrame(0, index=np.arange(len(merged_df)), columns= added_list)
merged_df = pd.concat([merged_df, zeros_data], axis=1)
merged_df.info()
merged_df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2989 entries, 0 to 2988
Columns: 524 entries, index to pred_label Type_of_Food_Allergy_Cereals_&_Seeds_ETC_brutforce_imb_binary_values
dtypes: float32(467), float64(27), int32(1), int64(29)
memory usage: 6.6 MB


Unnamed: 0,index,Age,Gender,Blood_Month_sample,Rural_or_urban_area,Sensitization,Skin_Symptoms,Act_d_1,Act_d_2,Act_d_5,...,pred_label Type_of_Food_Allergy_Fish_ETC_brutforce_imb_binary_values,pred_score Type_of_Food_Allergy_Fish_ETC_brutforce_imb_binary_values,pred_label Type_of_Venom_Allergy_ATCD_Venom_ETC_brutforce_imb_binary_values,pred_score Type_of_Venom_Allergy_ATCD_Venom_ETC_brutforce_imb_binary_values,pred_label Type_of_Respiratory_Allergy_IGE_Mite_Cockroach_ETC_brutforce_imb_binary_values,pred_score Type_of_Respiratory_Allergy_IGE_Mite_Cockroach_ETC_brutforce_imb_binary_values,pred_label Type_of_Respiratory_Allergy_IGE_Pollen_Herb_ETC_brutforce_imb_binary_values,pred_score Type_of_Respiratory_Allergy_IGE_Pollen_Herb_ETC_brutforce_imb_binary_values,pred_label Type_of_Food_Allergy_Other_ETC_brutforce_imb_binary_values,pred_label Type_of_Food_Allergy_Cereals_&_Seeds_ETC_brutforce_imb_binary_values
0,0,15.0,0.0,7.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0,0.6424,0,0.5383,0,0.9994,0,0.9961,0,0
1,1,72.0,1.0,5.0,9.0,1.0,9.0,0.0,0.0,0.0,...,0,0.6098,0,0.5492,0,0.9995,0,0.9961,0,0
2,2,67.0,1.0,6.0,9.0,0.0,9.0,0.0,0.0,0.0,...,0,0.6098,0,0.5624,0,0.9998,0,0.9975,0,0
3,3,13.0,1.0,9.0,1.0,1.0,9.0,0.0,2.150391,0.0,...,0,0.7264,0,0.7138,1,0.997,1,0.9558,0,0
4,4,28.0,1.0,12.0,9.0,1.0,1.0,0.0,74.0,0.0,...,0,0.6646,0,0.6503,1,0.9993,1,0.5495,0,0


### Obtain pred by pytorch_fold_meta

In [62]:
folder_path_fold = 'liste_classif_optim_2_no_oversampled'
folder_path_meta='liste_meta_4'
liste_column_monovalue=['Type_of_Food_Allergy_Other','Type_of_Food_Allergy_Cereals_&_Seeds']

def get_predictions_fold_imblearn_meta_classifier(df,list_of_targets, folder_path_fold,folder_path_meta):
    
    liste=[]
    for column in list_of_targets.columns:
        print(column)
        dataset_panda= pd.concat([df,list_of_targets[column]], axis = 1).reset_index(drop=True)
        dataset_all=CustomDataset(dataset_panda)
        train_all = DataLoader(dataset_all, batch_size=64, shuffle=False)

       
        if column not in liste_column_monovalue:
            input_size= 467 
            hidden_size= 1024
            num_class = 2 
            dropout_rate=0.3
            
            model_list = []
            target_string = f"{column}_fold"

            # Iterate over the files in the folder
            for filename in os.listdir(folder_path_fold):
                # Check if the file path contains the target string
                if target_string in filename:
                    # Load the model using the appropriate method
                    model = Allergy_Net(input_size,hidden_size,num_class,dropout_rate)
                    model_path = os.path.join(folder_path_fold, filename)
                    model.load_state_dict(torch.load(model_path))
                    model_list.append(model)

            # obtain prediction for each folds
            predictions_all = []
            for model in model_list:
                model.eval()
                pred_model=[]
                with torch.no_grad():
                    for data, labels in train_all:
                        outputs = model(data)
                        pred_model.extend(outputs)
                predictions_all.append(pred_model)

            # Combine predictions from different folds
            combined_pred = []
            for i in range(len(predictions_all[0])):
                combined_pred_mini = []
                for j in range(len(predictions_all)):
                    combined_pred_mini.extend(predictions_all[j][i])
                combined_pred.append(combined_pred_mini)
                
            # Extract values from tensors and convert to a list of lists
            combined_pred_values = [[value.item() for value in sample] for sample in combined_pred]
            num_folds = len(predictions_all)*2
            # Create a dictionary to store the fold predictions
            fold_predictions = {}
            for i in range(num_folds):
                fold_predictions[f"Value_{i+1}"] = [sample[i].item() for sample in combined_pred]

            # Convert the dictionary into a DataFrame
            df_combined_pred = pd.DataFrame(fold_predictions)


            target_string_for_meta_imbl = f"{column}_meta_model_pycaret_binary"
            for filename in os.listdir(folder_path_meta):
                # Check if the file path contains the target string
                if target_string_for_meta_imbl in filename:
                    model = load_model(os.path.join(folder_path_meta, os.path.splitext(filename)[0]))
                    values=predict_model(model, data= df_combined_pred.reset_index())
                    file_name = os.path.splitext(filename)[0]
                    values_name = '{}_{}'.format(file_name, 'values')
                    globals()[values_name] = values
                    globals()[values_name] = globals()[values_name].rename(columns={'prediction_label': 'pred_label ' + str(values_name), 'prediction_score': 'pred_score ' + str(values_name) })
                    liste.append(globals()[values_name])
                    print('done')
    return liste

In [63]:
pred_pytorch_meta=get_predictions_fold_imblearn_meta_classifier(encode_data,Targets, folder_path_fold,folder_path_meta)

Allergy_Present
Transformation Pipeline and Model Successfully Loaded
done
Severe_Allergy
Transformation Pipeline and Model Successfully Loaded
done
Respiratory_Allergy
Transformation Pipeline and Model Successfully Loaded
done
Food_Allergy
Transformation Pipeline and Model Successfully Loaded
done
Venom_Allergy
Transformation Pipeline and Model Successfully Loaded
done
Type_of_Respiratory_Allergy_ARIA
Transformation Pipeline and Model Successfully Loaded
done
Type_of_Respiratory_Allergy_CONJ
Transformation Pipeline and Model Successfully Loaded
done
Type_of_Respiratory_Allergy_GINA
Transformation Pipeline and Model Successfully Loaded
done
Type_of_Respiratory_Allergy_IGE_Pollen_Gram
Transformation Pipeline and Model Successfully Loaded
done
Type_of_Respiratory_Allergy_IGE_Pollen_Herb
Transformation Pipeline and Model Successfully Loaded
done
Type_of_Respiratory_Allergy_IGE_Pollen_Tree
Transformation Pipeline and Model Successfully Loaded
done
Type_of_Respiratory_Allergy_IGE_Dander_Ani

In [64]:
print(len(pred_pytorch_meta))
pred_pytorch_meta[0]

27


Unnamed: 0,index,Value_1,Value_2,Value_3,Value_4,Value_5,Value_6,Value_7,Value_8,Value_9,Value_10,Value_11,Value_12,Value_13,Value_14,Value_15,Value_16,pred_label Allergy_Present_meta_model_pycaret_binary_values,pred_score Allergy_Present_meta_model_pycaret_binary_values
0,0,-0.748127,0.729992,-0.584334,0.499938,-0.773140,0.906865,-0.365902,0.172279,-0.376860,0.111737,-0.633374,0.795414,-0.585548,0.792772,-0.906501,0.757275,1,1.00
1,1,0.746651,-0.758935,0.341197,-0.434825,0.950985,-0.915116,0.418233,-0.683620,0.324738,-0.618463,0.632095,-0.547063,0.668416,-0.474834,0.404382,-0.541455,0,0.83
2,2,0.733035,-0.744944,0.363207,-0.454483,0.988670,-0.956325,0.420199,-0.676870,0.331931,-0.624628,0.657758,-0.573595,0.695211,-0.501030,0.441308,-0.576205,1,1.00
3,3,-0.814914,0.790831,-1.250587,1.151908,-1.247099,1.370371,-1.376382,1.003550,-0.441291,0.180355,-1.376143,1.567106,-1.131734,1.325826,-1.204989,1.048573,1,1.00
4,4,2.561285,-2.592341,1.212610,-1.193888,2.119431,-2.285433,-1.933193,1.469598,0.540509,-0.800075,1.962318,-2.170416,2.523063,-2.252595,3.708384,-3.600559,0,0.99
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2984,2984,-0.786242,0.773249,-0.847874,0.780075,-0.214157,0.318150,-0.090434,-0.136489,-0.115012,-0.155921,-0.068002,0.183874,-0.054706,0.246645,-0.621908,0.472636,1,0.74
2985,2985,0.634945,-0.641316,0.682803,-0.726152,1.071488,-1.048074,0.201980,-0.465010,0.309130,-0.606897,1.063268,-1.031496,0.911311,-0.705643,0.790858,-0.903036,0,0.88
2986,2986,-1.022694,1.008510,-1.007091,0.946322,-0.863912,1.000217,-0.347636,0.157731,-0.616937,0.385259,-0.822320,1.007491,-0.686823,0.898095,-0.907244,0.757713,1,1.00
2987,2987,-1.474948,1.441920,-0.980786,0.898808,-1.064659,1.189412,-0.841738,0.569061,-0.989572,0.851761,-1.010729,1.221723,-0.905806,1.116985,-1.044051,0.890789,1,1.00


In [65]:
liste_column_monovalue=['Type_of_Food_Allergy_Other','Type_of_Food_Allergy_Cereals_&_Seeds']

merged_df_pytorch = pd.DataFrame()
for i,df in enumerate(pred_pytorch_meta):
    if i==0:
        merged_df_pytorch = df
    else:
        last_two_columns = df.iloc[:, -2:]
        merged_df_pytorch = pd.concat([merged_df_pytorch, last_two_columns], axis=1)
        
added_list = ['pred_label ' + s + '_meta_model_pycaret_binary_values' for s in liste_column_monovalue]
zeros_data = pd.DataFrame(0, index=np.arange(len(merged_df_pytorch)), columns= added_list)
merged_df_pytorch = pd.concat([merged_df_pytorch, zeros_data], axis=1)
merged_df_pytorch.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2989 entries, 0 to 2988
Data columns (total 73 columns):
 #   Column                                                                                      Non-Null Count  Dtype  
---  ------                                                                                      --------------  -----  
 0   index                                                                                       2989 non-null   int32  
 1   Value_1                                                                                     2989 non-null   float32
 2   Value_2                                                                                     2989 non-null   float32
 3   Value_3                                                                                     2989 non-null   float32
 4   Value_4                                                                                     2989 non-null   float32
 5   Value_5                                  

In [80]:
df_predictions_final= pd.DataFrame()

for target in Targets:
    if target not in list_of_Target_predict_with_pytorch_meta:
        column_target= merged_df[f'pred_label {target}_ETC_brutforce_imb_binary_values']
        df_predictions_final = pd.concat([df_predictions_final, column_target], axis=1)
        
    else:
        column_target= merged_df_pytorch[f'pred_label {target}_meta_model_pycaret_binary_values']
        df_predictions_final = pd.concat([df_predictions_final, column_target], axis=1) 
df_predictions_final.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2989 entries, 0 to 2988
Data columns (total 29 columns):
 #   Column                                                                                     Non-Null Count  Dtype
---  ------                                                                                     --------------  -----
 0   pred_label Allergy_Present_ETC_brutforce_imb_binary_values                                 2989 non-null   int64
 1   pred_label Severe_Allergy_ETC_brutforce_imb_binary_values                                  2989 non-null   int64
 2   pred_label Respiratory_Allergy_ETC_brutforce_imb_binary_values                             2989 non-null   int64
 3   pred_label Food_Allergy_ETC_brutforce_imb_binary_values                                    2989 non-null   int64
 4   pred_label Venom_Allergy_meta_model_pycaret_binary_values                                  2989 non-null   int64
 5   pred_label Type_of_Respiratory_Allergy_ARIA_ETC_brutforce_imb_binar

#### homogenize columns names for evaluation

In [81]:
for target in Targets:
    liste_string_to_check = [f' {target}_m',f' {target}_E']
    for elem in liste_string_to_check:
        for column in df_predictions_final.columns:
            if elem in column:
                new_column_name = column.replace(column, f'pred_label_{target}_values')
                df_predictions_final = df_predictions_final.rename(columns={column: new_column_name})
df_predictions_final.info()
df_predictions_final.head()

<class 'pandas.core.frame.DataFrame'>
Index: 2989 entries, 0 to 2988
Data columns (total 29 columns):
 #   Column                                                            Non-Null Count  Dtype
---  ------                                                            --------------  -----
 0   pred_label_Allergy_Present_values                                 2989 non-null   int64
 1   pred_label_Severe_Allergy_values                                  2989 non-null   int64
 2   pred_label_Respiratory_Allergy_values                             2989 non-null   int64
 3   pred_label_Food_Allergy_values                                    2989 non-null   int64
 4   pred_label_Venom_Allergy_values                                   2989 non-null   int64
 5   pred_label_Type_of_Respiratory_Allergy_ARIA_values                2989 non-null   int64
 6   pred_label_Type_of_Respiratory_Allergy_CONJ_values                2989 non-null   int64
 7   pred_label_Type_of_Respiratory_Allergy_GINA_values      

Unnamed: 0,pred_label_Allergy_Present_values,pred_label_Severe_Allergy_values,pred_label_Respiratory_Allergy_values,pred_label_Food_Allergy_values,pred_label_Venom_Allergy_values,pred_label_Type_of_Respiratory_Allergy_ARIA_values,pred_label_Type_of_Respiratory_Allergy_CONJ_values,pred_label_Type_of_Respiratory_Allergy_GINA_values,pred_label_Type_of_Respiratory_Allergy_IGE_Pollen_Gram_values,pred_label_Type_of_Respiratory_Allergy_IGE_Pollen_Herb_values,...,pred_label_Type_of_Food_Allergy_Fruits_and_Vegetables_values,pred_label_Type_of_Food_Allergy_Mammalian_Milk_values,pred_label_Type_of_Food_Allergy_Oral_Syndrom_values,pred_label_Type_of_Food_Allergy_Other_Legumes_values,pred_label_Type_of_Food_Allergy_Peanut_values,pred_label_Type_of_Food_Allergy_Shellfish_values,pred_label_Type_of_Food_Allergy_TPO_values,pred_label_Type_of_Food_Allergy_Tree_Nuts_values,pred_label_Type_of_Venom_Allergy_ATCD_Venom_values,pred_label_Type_of_Venom_Allergy_IGE_Venom_values
0,1,1,1,0,0,1,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,1,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,1,1,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,1,1,1,0,1,0,1,1,1,...,0,0,0,0,0,0,0,0,0,0
4,1,1,1,1,0,0,1,1,1,1,...,0,0,1,0,0,0,0,0,0,0


#### Evaluation binary for each methods

In [82]:
data_true= pd.read_csv('data/train.csv', low_memory=False)
data_true.head()

Unnamed: 0,Patient_ID,Chip_Code,Chip_Type,Chip_Image_Name,Age,Gender,Blood_Month_sample,French_Residence_Department,French_Region,Rural_or_urban_area,...,Type_of_Food_Allergy_Fruits_and_Vegetables,Type_of_Food_Allergy_Mammalian_Milk,Type_of_Food_Allergy_Oral_Syndrom,Type_of_Food_Allergy_Other_Legumes,Type_of_Food_Allergy_Peanut,Type_of_Food_Allergy_Shellfish,Type_of_Food_Allergy_TPO,Type_of_Food_Allergy_Tree_Nuts,Type_of_Venom_Allergy_ATCD_Venom,Type_of_Venom_Allergy_IGE_Venom
0,PCR0232,02AHX0C9,ALEX,02AHX0C9.bmp,15.0,0.0,7.0,deptGGG,regionF,1,...,0,0,0,0,0,0,0,0,0,0
1,QVW0271,AD53727_2,ISAC_V1,,72.0,1.0,5.0,deptY,regionD,9,...,9,9,9,9,9,9,9,9,0,0
2,QVW0489,AM91127_4,ISAC_V1,,67.0,1.0,6.0,deptY,regionD,9,...,0,0,1,0,0,0,0,0,0,0
3,TXV0075,881309006950,ISAC_V1,AC10727_4_881309006950_2013_10_28_21_20_5.bmp,13.0,1.0,9.0,deptEE,regionC,1,...,0,0,0,0,0,0,0,0,0,0
4,QVW0946,CSB5927_4,ISAC_V1,,28.0,1.0,12.0,deptY,regionD,9,...,9,9,9,9,9,9,9,9,0,0


In [83]:
target_columns = ['Type_of_Food_Allergy_Other_Legumes','Type_of_Food_Allergy_Cereals_&_Seeds', 'Allergy_Present', 'Type_of_Food_Allergy_Mammalian_Milk', 'Type_of_Food_Allergy_Other',
                  'Type_of_Respiratory_Allergy_IGE_Mite_Cockroach', 'Venom_Allergy', 'Type_of_Respiratory_Allergy_ARIA', 'Type_of_Respiratory_Allergy_IGE_Pollen_Gram',
                  'Type_of_Respiratory_Allergy_IGE_Pollen_Herb', 'Food_Allergy', 'Type_of_Food_Allergy_Oral_Syndrom','Type_of_Food_Allergy_Tree_Nuts', 'Severe_Allergy',
                  'Type_of_Food_Allergy_Aromatics', 'Type_of_Venom_Allergy_IGE_Venom', 'Type_of_Venom_Allergy_ATCD_Venom', 'Type_of_Respiratory_Allergy_CONJ', 'Type_of_Food_Allergy_Peanut',
                  'Type_of_Food_Allergy_Egg', 'Type_of_Food_Allergy_Fish', 'Type_of_Respiratory_Allergy_GINA', 'Respiratory_Allergy', 'Type_of_Food_Allergy_TPO',
                  'Type_of_Respiratory_Allergy_IGE_Pollen_Tree', 'Type_of_Food_Allergy_Fruits_and_Vegetables', 'Type_of_Respiratory_Allergy_IGE_Molds_Yeast',
                  'Type_of_Respiratory_Allergy_IGE_Dander_Animals', 'Type_of_Food_Allergy_Shellfish']
f1_scores=[]
below_threshold_columns=[]
threshold =0.89
for column in target_columns:
    # Get the predicted and true labels for the column
    
    rows_with_9 = data_true[column].isin([9])
    Targets_without_9 = data_true[column][~rows_with_9]
    merged_df_removal = df_predictions_final[~rows_with_9]
        
    merged_df_mod = pd.concat([merged_df_removal, Targets_without_9] , axis=1).reset_index(drop=True)
    y_true = merged_df_mod[column]
    y_pred = merged_df_mod[f'pred_label_{column}_values']
    
    # Calculate accuracy
    accuracy = accuracy_score(y_true, y_pred)
    
    # Calculate recall
    recall = recall_score(y_true, y_pred)
    
    # Calculate F1 score
    if (data_true[column] == 1).any():
        f1 = f1_score(y_true, y_pred,average='macro')
    else:
        f1 = f1_score(y_true, y_pred, pos_label=0)
    cm = confusion_matrix(y_true, y_pred)

    # Display the metrics and confusion matrix
    print(f"Metrics for {column}:")
    print(f"Accuracy: {accuracy}")
    print(f"Recall: {recall}")
    print(f"F1 Score: {f1}")
    print("Confusion Matrix:")
    print(cm)
    print()
    if f1 != 0:
        f1_scores.append(f1) # Append the F1 score to the list
    if f1 < threshold:
        below_threshold_columns.append(column)

mean_f1 = sum(f1_scores) / len(f1_scores)  # Calculate the mean of F1 scores

print(f"Mean F1 Score: {mean_f1}")
print(below_threshold_columns)

Metrics for Type_of_Food_Allergy_Other_Legumes:
Accuracy: 0.9931549471064095
Recall: 1.0
F1 Score: 0.9486779722035925
Confusion Matrix:
[[1546   11]
 [   0   50]]

Metrics for Type_of_Food_Allergy_Cereals_&_Seeds:
Accuracy: 1.0
Recall: 0.0
F1 Score: 1.0
Confusion Matrix:
[[1607]]

Metrics for Allergy_Present:
Accuracy: 0.9766913018760659
Recall: 0.9890350877192983
F1 Score: 0.9659484677778423
Confusion Matrix:
[[ 365   26]
 [  15 1353]]

Metrics for Type_of_Food_Allergy_Mammalian_Milk:
Accuracy: 0.9981331673926571
Recall: 1.0
F1 Score: 0.9712235798320332
Confusion Matrix:
[[1579    3]
 [   0   25]]

Metrics for Type_of_Food_Allergy_Other:
Accuracy: 1.0
Recall: 0.0
F1 Score: 1.0
Confusion Matrix:
[[1607]]

Metrics for Type_of_Respiratory_Allergy_IGE_Mite_Cockroach:
Accuracy: 0.9900464499004645
Recall: 0.9882352941176471
F1 Score: 0.9899495141487098
Confusion Matrix:
[[820   7]
 [  8 672]]

Metrics for Venom_Allergy:
Accuracy: 0.9959852793576447
Recall: 1.0
F1 Score: 0.9220599739243807
C

## Generate prediction for the test set

In [84]:
original_validation_data = pd.read_csv('data/test.csv')

data_test = original_validation_data.set_index('trustii_id')

In [85]:
encode_data_test = preprocessing_data_test(data_test)
missing_cols = set(encode_data.columns) ^ set(encode_data_test.columns)
print(missing_cols)
len(missing_cols)
encode_data_test = encode_data_test.reindex(columns=encode_data.columns, fill_value=0).astype('float16')
encode_data_test.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 586 entries, 1 to 1282
Columns: 443 entries, Gender to Treatment_of_rhinitis_9
dtypes: float16(443)
memory usage: 527.8 KB
None
{'Treatment_of_athsma_8', 'Treatment_of_atopic_dematitis_7', 'French_Residence_Department_deptJJJ', 'French_Residence_Department_deptOOO', 'French_Residence_Department_deptIII', 'French_Residence_Department_deptTTT', 'French_Residence_Department_deptRRR', 'French_Residence_Department_deptU', 'Age', 'French_Residence_Department_deptCCCC', 'French_Region_regionO', 'French_Residence_Department_deptHHH', 'French_Residence_Department_deptNNN', 'French_Residence_Department_deptDD', 'French_Residence_Department_deptQQQ', 'French_Residence_Department_deptPPP', 'French_Residence_Department_deptK', 'French_Residence_Department_deptUU', 'French_Residence_Department_deptW', 'General_cofactors_11', 'French_Residence_Department_deptMMM', 'French_Residence_Department_deptDDD', 'French_Residence_Department_deptAAAA', 'French_R

In [91]:
pred_pycaret= obtain_pred(encode_data_test.reset_index(), folder_path_pycaret)

liste_column_monovalue=['Type_of_Food_Allergy_Other','Type_of_Food_Allergy_Cereals_&_Seeds']

merged_df = pd.DataFrame()
for i,df in enumerate(pred_pycaret):
    if i==0:
        merged_df = df
    else:
        last_two_columns = df.iloc[:, -2:]
        merged_df = pd.concat([merged_df, last_two_columns], axis=1)
        
added_list = ['pred_label ' + s + '_ETC_brutforce_imb_binary_values' for s in liste_column_monovalue]
zeros_data = pd.DataFrame(0, index=np.arange(len(merged_df)), columns= added_list)
merged_df = pd.concat([merged_df, zeros_data], axis=1)
merged_df.info()

Calling model: Type_of_Food_Allergy_Aromatics_ETC_brutforce_imb_binary.pkl
Transformation Pipeline and Model Successfully Loaded
Calling model: Type_of_Respiratory_Allergy_CONJ_ETC_brutforce_imb_binary.pkl
Transformation Pipeline and Model Successfully Loaded
Calling model: Type_of_Food_Allergy_Peanut_ETC_brutforce_imb_binary.pkl
Transformation Pipeline and Model Successfully Loaded
Calling model: Type_of_Respiratory_Allergy_IGE_Pollen_Tree_ETC_brutforce_imb_binary.pkl
Transformation Pipeline and Model Successfully Loaded
Calling model: Venom_Allergy_ETC_brutforce_imb_binary.pkl
Transformation Pipeline and Model Successfully Loaded
Calling model: Food_Allergy_ETC_brutforce_imb_binary.pkl
Transformation Pipeline and Model Successfully Loaded
Calling model: Type_of_Food_Allergy_Fruits_and_Vegetables_ETC_brutforce_imb_binary.pkl
Transformation Pipeline and Model Successfully Loaded
Calling model: Type_of_Food_Allergy_Shellfish_ETC_brutforce_imb_binary.pkl
Transformation Pipeline and Model

In [92]:
merged_df.head()

Unnamed: 0,index,trustii_id,Age,Gender,Blood_Month_sample,Rural_or_urban_area,Sensitization,Skin_Symptoms,Act_d_1,Act_d_2,...,pred_label Type_of_Food_Allergy_Fish_ETC_brutforce_imb_binary_values,pred_score Type_of_Food_Allergy_Fish_ETC_brutforce_imb_binary_values,pred_label Type_of_Venom_Allergy_ATCD_Venom_ETC_brutforce_imb_binary_values,pred_score Type_of_Venom_Allergy_ATCD_Venom_ETC_brutforce_imb_binary_values,pred_label Type_of_Respiratory_Allergy_IGE_Mite_Cockroach_ETC_brutforce_imb_binary_values,pred_score Type_of_Respiratory_Allergy_IGE_Mite_Cockroach_ETC_brutforce_imb_binary_values,pred_label Type_of_Respiratory_Allergy_IGE_Pollen_Herb_ETC_brutforce_imb_binary_values,pred_score Type_of_Respiratory_Allergy_IGE_Pollen_Herb_ETC_brutforce_imb_binary_values,pred_label Type_of_Food_Allergy_Other_ETC_brutforce_imb_binary_values,pred_label Type_of_Food_Allergy_Cereals_&_Seeds_ETC_brutforce_imb_binary_values
0,0,1,0.0,1.0,6.0,9.0,1.0,0.0,0.0,0.0,...,0,0.5733,0,0.6803,1,0.9995,0,0.982,0,0
1,1,4,0.0,1.0,7.0,9.0,1.0,1.0,0.0,0.109985,...,0,0.5295,0,0.5489,1,0.995,0,0.9933,0,0
2,2,5,0.0,0.0,10.0,1.0,1.0,1.0,0.0,0.0,...,0,0.5427,0,0.6187,0,0.9095,1,0.9884,0,0
3,3,7,0.0,1.0,8.0,0.0,1.0,0.0,0.0,0.0,...,0,0.6487,0,0.5608,0,0.9567,0,0.9948,0,0
4,4,8,0.0,0.0,4.0,1.0,0.0,9.0,0.0,0.0,...,0,0.5544,0,0.5868,0,0.9998,0,0.9986,0,0


In [94]:
num_rows = len(encode_data_test)  # Specify the number of rows
data_array = np.random.choice([0, 1], size=(num_rows, 29))

# Create a DataFrame using the numpy array
Targets_test = pd.DataFrame(data_array, columns=['Allergy_Present', 'Severe_Allergy', 'Respiratory_Allergy', 'Food_Allergy', 'Venom_Allergy',
                     'Type_of_Respiratory_Allergy_ARIA', 'Type_of_Respiratory_Allergy_CONJ', 
                     'Type_of_Respiratory_Allergy_GINA', 'Type_of_Respiratory_Allergy_IGE_Pollen_Gram',
                     'Type_of_Respiratory_Allergy_IGE_Pollen_Herb', 'Type_of_Respiratory_Allergy_IGE_Pollen_Tree',
                     'Type_of_Respiratory_Allergy_IGE_Dander_Animals', 'Type_of_Respiratory_Allergy_IGE_Mite_Cockroach',
                     'Type_of_Respiratory_Allergy_IGE_Molds_Yeast', 'Type_of_Food_Allergy_Aromatics', 'Type_of_Food_Allergy_Other',
                     'Type_of_Food_Allergy_Cereals_&_Seeds', 'Type_of_Food_Allergy_Egg', 'Type_of_Food_Allergy_Fish',
                     'Type_of_Food_Allergy_Fruits_and_Vegetables', 'Type_of_Food_Allergy_Mammalian_Milk', 
                     'Type_of_Food_Allergy_Oral_Syndrom', 'Type_of_Food_Allergy_Other_Legumes', 'Type_of_Food_Allergy_Peanut',
                     'Type_of_Food_Allergy_Shellfish', 'Type_of_Food_Allergy_TPO', 'Type_of_Food_Allergy_Tree_Nuts',
                     'Type_of_Venom_Allergy_ATCD_Venom', 'Type_of_Venom_Allergy_IGE_Venom'])

Targets_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 586 entries, 0 to 585
Data columns (total 29 columns):
 #   Column                                          Non-Null Count  Dtype
---  ------                                          --------------  -----
 0   Allergy_Present                                 586 non-null    int64
 1   Severe_Allergy                                  586 non-null    int64
 2   Respiratory_Allergy                             586 non-null    int64
 3   Food_Allergy                                    586 non-null    int64
 4   Venom_Allergy                                   586 non-null    int64
 5   Type_of_Respiratory_Allergy_ARIA                586 non-null    int64
 6   Type_of_Respiratory_Allergy_CONJ                586 non-null    int64
 7   Type_of_Respiratory_Allergy_GINA                586 non-null    int64
 8   Type_of_Respiratory_Allergy_IGE_Pollen_Gram     586 non-null    int64
 9   Type_of_Respiratory_Allergy_IGE_Pollen_Herb     586 non-null    i

In [96]:
pred_pytorch_meta=get_predictions_fold_imblearn_meta_classifier(encode_data_test.reset_index(drop=True),Targets_test, folder_path_fold,folder_path_meta)

Allergy_Present
Transformation Pipeline and Model Successfully Loaded
done
Severe_Allergy
Transformation Pipeline and Model Successfully Loaded
done
Respiratory_Allergy
Transformation Pipeline and Model Successfully Loaded
done
Food_Allergy
Transformation Pipeline and Model Successfully Loaded
done
Venom_Allergy
Transformation Pipeline and Model Successfully Loaded
done
Type_of_Respiratory_Allergy_ARIA
Transformation Pipeline and Model Successfully Loaded
done
Type_of_Respiratory_Allergy_CONJ
Transformation Pipeline and Model Successfully Loaded
done
Type_of_Respiratory_Allergy_GINA
Transformation Pipeline and Model Successfully Loaded
done
Type_of_Respiratory_Allergy_IGE_Pollen_Gram
Transformation Pipeline and Model Successfully Loaded
done
Type_of_Respiratory_Allergy_IGE_Pollen_Herb
Transformation Pipeline and Model Successfully Loaded
done
Type_of_Respiratory_Allergy_IGE_Pollen_Tree
Transformation Pipeline and Model Successfully Loaded
done
Type_of_Respiratory_Allergy_IGE_Dander_Ani

In [97]:
merged_df_pytorch = pd.DataFrame()
for i,df in enumerate(pred_pytorch_meta):
    if i==0:
        merged_df_pytorch = df
    else:
        last_two_columns = df.iloc[:, -2:]
        merged_df_pytorch = pd.concat([merged_df_pytorch, last_two_columns], axis=1)
        
added_list = ['pred_label ' + s + '_meta_model_pycaret_binary_values' for s in liste_column_monovalue]
zeros_data = pd.DataFrame(0, index=np.arange(len(merged_df_pytorch)), columns= added_list)
merged_df_pytorch = pd.concat([merged_df_pytorch, zeros_data], axis=1)
merged_df_pytorch.info()

df_predictions_final= pd.DataFrame()

for target in Targets:
    if target not in list_of_Target_predict_with_pytorch_meta:
        column_target= merged_df[f'pred_label {target}_ETC_brutforce_imb_binary_values']
        df_predictions_final = pd.concat([df_predictions_final, column_target], axis=1)
        
    else:
        column_target= merged_df_pytorch[f'pred_label {target}_meta_model_pycaret_binary_values']
        df_predictions_final = pd.concat([df_predictions_final, column_target], axis=1) 
df_predictions_final.info()


for target in Targets:
    liste_string_to_check = [f' {target}_m',f' {target}_E']
    for elem in liste_string_to_check:
        for column in df_predictions_final.columns:
            if elem in column:
                new_column_name = column.replace(column, f'pred_label_{target}_values')
                df_predictions_final = df_predictions_final.rename(columns={column: new_column_name})
df_predictions_final.info()
df_predictions_final.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 586 entries, 0 to 585
Data columns (total 73 columns):
 #   Column                                                                                      Non-Null Count  Dtype  
---  ------                                                                                      --------------  -----  
 0   index                                                                                       586 non-null    int32  
 1   Value_1                                                                                     586 non-null    float32
 2   Value_2                                                                                     586 non-null    float32
 3   Value_3                                                                                     586 non-null    float32
 4   Value_4                                                                                     586 non-null    float32
 5   Value_5                                    

Unnamed: 0,pred_label_Allergy_Present_values,pred_label_Severe_Allergy_values,pred_label_Respiratory_Allergy_values,pred_label_Food_Allergy_values,pred_label_Venom_Allergy_values,pred_label_Type_of_Respiratory_Allergy_ARIA_values,pred_label_Type_of_Respiratory_Allergy_CONJ_values,pred_label_Type_of_Respiratory_Allergy_GINA_values,pred_label_Type_of_Respiratory_Allergy_IGE_Pollen_Gram_values,pred_label_Type_of_Respiratory_Allergy_IGE_Pollen_Herb_values,...,pred_label_Type_of_Food_Allergy_Fruits_and_Vegetables_values,pred_label_Type_of_Food_Allergy_Mammalian_Milk_values,pred_label_Type_of_Food_Allergy_Oral_Syndrom_values,pred_label_Type_of_Food_Allergy_Other_Legumes_values,pred_label_Type_of_Food_Allergy_Peanut_values,pred_label_Type_of_Food_Allergy_Shellfish_values,pred_label_Type_of_Food_Allergy_TPO_values,pred_label_Type_of_Food_Allergy_Tree_Nuts_values,pred_label_Type_of_Venom_Allergy_ATCD_Venom_values,pred_label_Type_of_Venom_Allergy_IGE_Venom_values
0,1,0,1,0,0,0,0,1,1,0,...,0,0,0,0,1,0,0,0,0,0
1,1,1,1,1,0,0,0,1,0,0,...,0,0,1,0,0,0,0,0,0,0
2,1,1,1,1,0,1,0,1,1,1,...,1,1,0,0,0,0,1,0,0,0
3,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [99]:
target_columns = ['Type_of_Food_Allergy_Cereals_&_Seeds', 'Type_of_Food_Allergy_Other_Legumes', 'Allergy_Present', 'Type_of_Food_Allergy_Mammalian_Milk', 'Type_of_Food_Allergy_Other',
                  'Type_of_Respiratory_Allergy_IGE_Mite_Cockroach', 'Venom_Allergy', 'Type_of_Respiratory_Allergy_ARIA', 'Type_of_Respiratory_Allergy_IGE_Pollen_Gram',
                  'Type_of_Respiratory_Allergy_IGE_Pollen_Herb', 'Food_Allergy', 'Type_of_Food_Allergy_Oral_Syndrom','Type_of_Food_Allergy_Tree_Nuts', 'Severe_Allergy',
                  'Type_of_Food_Allergy_Aromatics', 'Type_of_Venom_Allergy_IGE_Venom', 'Type_of_Venom_Allergy_ATCD_Venom', 'Type_of_Respiratory_Allergy_CONJ', 'Type_of_Food_Allergy_Peanut',
                  'Type_of_Food_Allergy_Egg', 'Type_of_Food_Allergy_Fish', 'Type_of_Respiratory_Allergy_GINA', 'Respiratory_Allergy', 'Type_of_Food_Allergy_TPO',
                  'Type_of_Respiratory_Allergy_IGE_Pollen_Tree', 'Type_of_Food_Allergy_Fruits_and_Vegetables', 'Type_of_Respiratory_Allergy_IGE_Molds_Yeast',
                  'Type_of_Respiratory_Allergy_IGE_Dander_Animals', 'Type_of_Food_Allergy_Shellfish']

for elem in target_columns:
    original_validation_data[elem]=  df_predictions_final[f'pred_label_{elem}_values']

In [100]:
original_validation_data.head(30)

Unnamed: 0,trustii_id,Patient_ID,Chip_Code,Chip_Type,Chip_Image_Name,Age,Gender,Blood_Month_sample,French_Residence_Department,French_Region,...,Type_of_Food_Allergy_Egg,Type_of_Food_Allergy_Fish,Type_of_Respiratory_Allergy_GINA,Respiratory_Allergy,Type_of_Food_Allergy_TPO,Type_of_Respiratory_Allergy_IGE_Pollen_Tree,Type_of_Food_Allergy_Fruits_and_Vegetables,Type_of_Respiratory_Allergy_IGE_Molds_Yeast,Type_of_Respiratory_Allergy_IGE_Dander_Animals,Type_of_Food_Allergy_Shellfish
0,1,PMP0156,22 262C 3858,ISAC_V2,,8.0,1.0,6.0,deptBBB,regionJ,...,0,0,1,1,0,1,0,1,1,0
1,4,PCR0234,02AHX0DC,ALEX,02AHX0DC.bmp,14.0,1.0,7.0,deptL,regionD,...,0,0,1,1,0,0,0,0,1,0
2,5,PCR0532,02AUN372,ALEX,02AUN372.png,32.0,0.0,10.0,deptUUU,regionF,...,0,0,1,1,1,1,1,1,1,0
3,7,GJH0147,EKF3830_4,ISAC_V2,EKF3830_4_2200444337_2023_2_17_11_58_24.bmp,65.0,1.0,8.0,deptQ,regionF,...,1,0,0,0,0,0,0,0,0,0
4,8,TXV0009,881204001164,ISAC_V1,1G20027_2_881204001164_2012_4_25_18_32_58.bmp,5.0,0.0,4.0,deptII,regionC,...,0,0,0,0,0,0,0,0,0,0
5,9,PCR0118,02AFA752,ALEX,,49.0,0.0,1.0,deptXXX,regionI,...,0,0,0,1,0,0,0,0,0,0
6,10,QVW0214,AB02627_3,ISAC_V1,,6.0,1.0,2.0,deptY,regionD,...,0,0,1,1,0,1,0,1,1,0
7,15,TXV0157,881602013302,ISAC_V1,BAF4027_4_881602013302_2016_2_23_16_38_11.bmp,13.0,1.0,2.0,deptRR,regionB,...,0,0,1,1,0,0,0,0,0,0
8,18,WQW0190,223112546,ISAC_V2,END0E30_1_223112546_2023_1_3_16_20_19.bmp,12.0,0.0,11.0,deptOO,regionL,...,1,0,0,0,0,0,0,0,0,0
9,23,TXV0282,881903001372,ISAC_V1,CXG1527_3_881903001372_2019_3_14_3_51_59.bmp,8.0,0.0,3.0,deptEE,regionC,...,0,0,1,1,0,0,0,0,0,0


In [102]:
original_validation_data.to_csv('Submission_mix_pycaret_SMOTE_Pytorch_resampling_KStrat_fold_meta_imblearn.csv', index=False, encoding='UTF-8')

#### Logics is the same than the first submission but an fix_imbalanced_method is set to give models more samples from the minority class and less from the majority class.
#### one notebook for the three best sampling methods previously test to compare the results when the models are tuned