# Using Pytorch with generation of sample after train/test split + KfoldStratified

In [2]:
!pip install session_info
!pip install imbalanced-learn
!pip install pycaret



#### Import librairies

In [3]:
import pandas as pd
import os
import re

from sklearn.model_selection import train_test_split, StratifiedKFold
import numpy as np
from sklearn.metrics import accuracy_score, recall_score, f1_score, confusion_matrix
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset , random_split
from torch.optim.lr_scheduler import ReduceLROnPlateau

import imblearn.over_sampling as oversampling
import imblearn.under_sampling as undersampling
import imblearn.combine as combination
import random

from torch.utils.data import TensorDataset # to recreate the modified dataset at each epoch

import session_info

device= torch.device('cuda' if torch.cuda.is_available() else 'cpu')
session_info.show()

#### Set the random seed for reproducibility

In [4]:
seed = 64
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

#### Get the data

In [5]:
raw_data= pd.read_csv('data/train.csv', low_memory=False)
data_test= pd.read_csv('data/test.csv', low_memory=False)

In [6]:
raw_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2989 entries, 0 to 2988
Columns: 365 entries, Patient_ID to Type_of_Venom_Allergy_IGE_Venom
dtypes: float64(322), int64(32), object(11)
memory usage: 8.3+ MB


#### Looking which are the targets to predict

In [7]:
missing_cols = set(raw_data.columns) ^ set(data_test.columns)
print(missing_cols)
len(missing_cols)

{'Type_of_Food_Allergy_Other_Legumes', 'Type_of_Respiratory_Allergy_GINA', 'Type_of_Respiratory_Allergy_ARIA', 'Allergy_Present', 'Type_of_Food_Allergy_Fruits_and_Vegetables', 'trustii_id', 'Severe_Allergy', 'Type_of_Respiratory_Allergy_IGE_Pollen_Herb', 'Food_Allergy', 'Type_of_Respiratory_Allergy_IGE_Pollen_Tree', 'Venom_Allergy', 'Type_of_Food_Allergy_Mammalian_Milk', 'Type_of_Respiratory_Allergy_IGE_Mite_Cockroach', 'Type_of_Food_Allergy_Oral_Syndrom', 'Type_of_Food_Allergy_Egg', 'Type_of_Venom_Allergy_IGE_Venom', 'Type_of_Respiratory_Allergy_IGE_Pollen_Gram', 'Type_of_Food_Allergy_Aromatics', 'Type_of_Food_Allergy_Cereals_&_Seeds', 'Type_of_Food_Allergy_Shellfish', 'Respiratory_Allergy', 'Type_of_Food_Allergy_Peanut', 'Type_of_Food_Allergy_Fish', 'Type_of_Food_Allergy_Tree_Nuts', 'Type_of_Respiratory_Allergy_CONJ', 'Type_of_Respiratory_Allergy_IGE_Molds_Yeast', 'Type_of_Food_Allergy_TPO', 'Type_of_Venom_Allergy_ATCD_Venom', 'Type_of_Food_Allergy_Other', 'Type_of_Respiratory_Allerg

30

## Data Pre-processing

### Preprocessing for the train set

In [8]:
liste_of_Targets =['Allergy_Present', 'Severe_Allergy', 'Respiratory_Allergy', 'Food_Allergy', 'Venom_Allergy',
                     'Type_of_Respiratory_Allergy_ARIA', 'Type_of_Respiratory_Allergy_CONJ', 
                     'Type_of_Respiratory_Allergy_GINA', 'Type_of_Respiratory_Allergy_IGE_Pollen_Gram',
                     'Type_of_Respiratory_Allergy_IGE_Pollen_Herb', 'Type_of_Respiratory_Allergy_IGE_Pollen_Tree',
                     'Type_of_Respiratory_Allergy_IGE_Dander_Animals', 'Type_of_Respiratory_Allergy_IGE_Mite_Cockroach',
                     'Type_of_Respiratory_Allergy_IGE_Molds_Yeast', 'Type_of_Food_Allergy_Aromatics', 'Type_of_Food_Allergy_Other',
                     'Type_of_Food_Allergy_Cereals_&_Seeds', 'Type_of_Food_Allergy_Egg', 'Type_of_Food_Allergy_Fish',
                     'Type_of_Food_Allergy_Fruits_and_Vegetables', 'Type_of_Food_Allergy_Mammalian_Milk', 
                     'Type_of_Food_Allergy_Oral_Syndrom', 'Type_of_Food_Allergy_Other_Legumes', 'Type_of_Food_Allergy_Peanut',
                     'Type_of_Food_Allergy_Shellfish', 'Type_of_Food_Allergy_TPO', 'Type_of_Food_Allergy_Tree_Nuts',
                     'Type_of_Venom_Allergy_ATCD_Venom', 'Type_of_Venom_Allergy_IGE_Venom']
def preprocessing_data(df):
    df = df.drop('Food_Type_0', axis =1)
    df.replace(-1, 0, inplace=True)
    data_noNAN = df.fillna(-1)
    # obtain Targets
    Targets = data_noNAN.loc[:,liste_of_Targets]
    # filter feautures
    X1=data_noNAN.loc[:, ['Chip_Type','Age','Gender','French_Residence_Department','Blood_Month_sample']]
    X= data_noNAN.iloc[:, 8:-29]
    data = pd.concat( [X1, X] , axis=1)
    # handle the 'Treatment_of_rhinitis' feature
    data['Treatment_of_rhinitis'] = data['Treatment_of_rhinitis'].astype(str)
    data['Treatment_of_rhinitis'] = data['Treatment_of_rhinitis'].str.replace('.0', '', regex=True)
    
    ##  Get_dummies of the 'object' type columns
    
    columns_to_encode = ['Chip_Type', 'French_Residence_Department', 'French_Region',
         'Treatment_of_athsma', 'Age_of_onsets',
       'General_cofactors', 'Treatment_of_atopic_dematitis','Treatment_of_rhinitis']
    
    ### Split the columns using multiple delimiters and create dummy columns
    dummy_dfs = []
    for col in columns_to_encode:
        # Split the data in the column that use  delimiters
        data[col] = data[col].astype(str)
        data[col] = data[col].apply(lambda x: [i.strip() for i in re.split('[,.]', x)])

        # Create dummy columns
        dummy_df = pd.get_dummies(data[col].apply(pd.Series).stack(), prefix=f"{col}", prefix_sep='_').groupby(level=0).sum()
        dummy_dfs.append(dummy_df)

    ### Concatenate the original DataFrame with the dummy columns
    df_final = pd.concat([data] + dummy_dfs, axis=1)

    ### Drop the original columns from the final dataset
    df_final.drop(columns=columns_to_encode, inplace=True)
    
    # Converting all values into 'float16' type
    encode_data = df_final.astype('float16')
    print(encode_data.info())
    
    return encode_data,Targets

In [9]:
encode_data,Targets = preprocessing_data(raw_data)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2989 entries, 0 to 2988
Columns: 467 entries, Age to Treatment_of_rhinitis_9
dtypes: float16(467)
memory usage: 2.7 MB
None


### Preprocessing for the test set

In [10]:
def preprocessing_data_test(df):
    df = df.drop('Food_Type_0', axis =1)
    df.replace(-1, 0, inplace=True)
    data_test_noNAN = df.fillna(-1)
    # filter feautures
    X1=data_test_noNAN.loc[:, ['Chip_Type']]
    X= data_test_noNAN.iloc[:, 5:]
    data = pd.concat( [X1, X] , axis=1)
    # handle the 'Treatment_of_rhinitis' feature
    data['Treatment_of_rhinitis'] = data['Treatment_of_rhinitis'].astype(str)
    data['Treatment_of_rhinitis'] = data['Treatment_of_rhinitis'].str.replace('.0', '', regex=True)
    # handle 'Age_of_onsets' which don't have the same format in data test and train
    data['Age_of_onsets'] = data['Age_of_onsets'].astype(str)

    
    ##  Get_dummies of the 'object' type columns
    
    columns_to_encode = ['Chip_Type', 'French_Residence_Department', 'French_Region',
         'Treatment_of_athsma', 'Age_of_onsets',
       'General_cofactors', 'Treatment_of_atopic_dematitis','Treatment_of_rhinitis']
    
    ### Split the columns using multiple delimiters and create dummy columns
    dummy_dfs = []
    for col in columns_to_encode:
        # Split the data in the column that use  delimiters
        data[col] = data[col].astype(str)
        data[col] = data[col].apply(lambda x: [i.strip() for i in re.split('[,.]', x)])

        # Create dummy columns
        dummy_df = pd.get_dummies(data[col].apply(pd.Series).stack(), prefix=f"{col}", prefix_sep='_').groupby(level=0).sum()
        dummy_dfs.append(dummy_df)

    ### Concatenate the original DataFrame with the dummy columns
    df_final = pd.concat([data] + dummy_dfs, axis=1)

    ### Drop the original columns from the final dataset
    df_final.drop(columns=columns_to_encode, inplace=True)
    
    # Converting all values into 'float16' type
    encode_data = df_final.astype('float16')
    print(encode_data.info())
    
    return encode_data


In [11]:
encode_data_test = preprocessing_data_test(data_test)
missing_cols = set(encode_data.columns) ^ set(encode_data_test.columns)
print(missing_cols)
len(missing_cols)
encode_data_test = encode_data_test.reindex(columns=encode_data.columns, fill_value=0).astype('float16')
encode_data_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 586 entries, 0 to 585
Columns: 444 entries, Age to Treatment_of_rhinitis_9
dtypes: float16(444)
memory usage: 508.3 KB
None
{'French_Residence_Department_deptW', 'General_cofactors_11', 'French_Residence_Department_deptPPP', 'French_Residence_Department_deptJJJ', 'French_Residence_Department_deptCCCC', 'French_Residence_Department_deptAAAA', 'French_Residence_Department_deptIII', 'Treatment_of_athsma_8', 'French_Residence_Department_deptNNN', 'French_Residence_Department_deptOOO', 'French_Region_regionN', 'French_Residence_Department_deptP', 'French_Residence_Department_deptHHH', 'French_Residence_Department_deptK', 'French_Residence_Department_deptMMM', 'French_Region_regionO', 'French_Residence_Department_deptUU', 'French_Residence_Department_deptQQQ', 'French_Residence_Department_deptTTT', 'Treatment_of_atopic_dematitis_7', 'French_Residence_Department_deptZZZ', 'French_Residence_Department_deptT', 'French_Residence_Department_deptRR

## For all targets

#### General definitions

##### The custom dataset that will be used to store datas as tensors

In [12]:
class CustomDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform
        self.features = self.data.iloc[:, :-1].values
        self.labels = self.data.iloc[:, -1].values

        if self.transform is not None:
            self.features, self.labels = self.transform.fit_resample(self.features, self.labels)

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        features = torch.tensor(self.features[idx], dtype=torch.float32)
        label = torch.tensor(int(self.labels[idx]), dtype=torch.long)

        return features, label
    
    @property
    def num_features(self):
        return self.features.shape[1]
    
    @property
    def num_classes(self):
        return len(set(self.labels))

##### The dico of sampling technique that will be used to randomly chose the enhance data method at each epoch

In [13]:
sampling_techniques = {
    2: oversampling.RandomOverSampler(),
    #1: oversampling.SMOTE(),
    3: oversampling.BorderlineSMOTE(),
    #0: oversampling.SVMSMOTE(),
    #4: undersampling.TomekLinks(sampling_strategy='auto'),
    4: undersampling.TomekLinks(sampling_strategy='all'),
    5: combination.SMOTETomek(sampling_strategy='auto'),
    6: combination.SMOTETomek(sampling_strategy='all'),
}

##### The class_weight calculator that will be used after each imblearn transformation

In [14]:
def calculate_class_weights(dataset):
    # Convert labels to a PyTorch tensor
    labels_tensor = torch.from_numpy(dataset.labels)
    
    # Get the number of samples in each class
    class_counts = torch.bincount(labels_tensor)
    
    # Calculate the weight for each class as the inverse of its sample count
    total_samples = torch.sum(class_counts)
    class_weights = total_samples / (class_counts * len(class_counts))
    
    return class_weights

In [15]:
class RandomFeatureSubsampler(nn.Module):
    def __init__(self, p=0.2):
        super(RandomFeatureSubsampler, self).__init__()
        self.p = p

    def forward(self, x):
        if self.training:
            num_features = x.size(1)
            mask = torch.rand(num_features) < self.p
            mask = mask.view(1, -1).to(x.device)
            x = torch.where(mask, x, torch.tensor(-1, dtype=torch.float32).to(x.device))
        return x

##### The architecture of the models for generate folding models

In [16]:
class Allergy_Net(nn.Module):
    
    def __init__(self,input_size,hidden_size, num_class,dropout_rate):

        super(Allergy_Net,self).__init__()
        self.subsampler = RandomFeatureSubsampler(p=0.2)
        self.linear1= nn.Linear(input_size,hidden_size)
        self.linear2= nn.Linear(hidden_size,int(hidden_size/8))
        self.linear3= nn.Linear(int(hidden_size/8),int(hidden_size/32))
        self.linear4= nn.Linear(int(hidden_size/32),num_class)

        self.dropout1 = nn.Dropout(dropout_rate)
        self.batchnorm1 = nn.BatchNorm1d(int(hidden_size/32))

    def forward(self,inputs):
        #inputs = self.subsampler(inputs)
        x = torch.relu(self.linear1(inputs))
        x= self.dropout1(x)
        x = torch.relu(self.linear2(x))
        x= self.dropout1(x)
        x = torch.relu(self.linear3(x))
        x= self.dropout1(x)
        x= self.batchnorm1(x)
        outputs= self.linear4(x)

        # no softmax because Cross entropy Loss
        return outputs

##### how to obtain the stratified models

In [134]:
def obtain_fold_model_10(df_data,target,batch_size=64,nbr_of_fold=8,hidden_size=2048,num_epochs=30,
                         learning_rate = 1e-4,dropout_rate = 0.35,weight_decay= 1e-2, factor=0.7):
    
    print(target)
    # we take the rows where the target is not 9
    rows_with_9 = Targets[target].isin([9])
    Targets_without_9 = Targets[target][~rows_with_9]
    encode_data_without_9 = df_data[~rows_with_9]
    
    # we create the dataframe that we will used
    dataset_panda= pd.concat([encode_data_without_9,Targets_without_9], axis = 1).reset_index(drop=True)
    
    selected_technique = oversampling.SMOTE()
    encode_data_without_9_resampled,Targets_without_9_resampled  = selected_technique.fit_resample(encode_data_without_9,Targets_without_9)
    
    # define the number of fold
    k = nbr_of_fold
    skf = StratifiedKFold(n_splits=k, shuffle=True, random_state=123)
    
    # create the full dataloader for final evaluation 
    batch_size=batch_size
    dataset_all= CustomDataset(dataset_panda)
    test_all = DataLoader(dataset_all, batch_size=batch_size, shuffle=False)

    # Convert the dataset to NumPy arrays to do the stratified k_folding
    X = encode_data_without_9#_resampled
    y = Targets_without_9#_resampled
    dataset_panda_resampled = pd.concat([X,y], axis = 1)
    # Create an empty list to store the models trained in each fold
    fold_models = []

    # Iterate over the folds
    for fold, (train_indices, test_indices) in enumerate(skf.split(X, y)):
        print(f"Fold {fold + 1}")

        # Split the data into train and test sets for the current fold
        train_data = dataset_panda_resampled.iloc[train_indices].reset_index(drop=True)
        test_data = dataset_panda_resampled.iloc[test_indices].reset_index(drop=True)
        
        # create the dataloader test for each fold progress evaluation
        dataset_test=CustomDataset(test_data)
        test_loader = DataLoader(dataset_test, batch_size=batch_size, shuffle=False)
        dataset_train=CustomDataset(train_data)
        train_loader = DataLoader(dataset_train, batch_size=batch_size, shuffle=True)

        # define hyperparameters
        input_size= dataset_all.num_features
        num_class = dataset_all.num_classes
        hidden_size= hidden_size
        num_epochs= num_epochs
        learning_rate = learning_rate
        dropout_rate = dropout_rate
        weight_decay= weight_decay
        factor= factor
        print(input_size, num_class)

        # Create an instance of your model
        model = Allergy_Net(input_size,hidden_size,num_class,dropout_rate).to(device)

        # Define your loss function and optimizer
        criterion= nn.CrossEntropyLoss()
        optimizer = torch.optim.Adamax(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
        lr_scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=factor, patience=5, verbose=True)

        # Initialization of some indicators that are used to save best model during training
        best_f1_score = 0.0
        best_model_state = None

        # Train your model
        optimizer.zero_grad()
        for epoch in range(num_epochs):
            model.train()
            # Update class weights
            class_weights = calculate_class_weights(dataset_train)
            criterion.weight = class_weights

            true_labels = []
            predicted_labels = []
            model.train()
            optimizer.zero_grad()
            for i,(data,labels) in enumerate (train_loader):
                optimizer.zero_grad()
                model=model.to(device)
                data= data.to(device)
                labels= labels.to(device)

                #forward
                outputs=model(data)
                loss= criterion(outputs,labels)

                #backward
                loss.backward()
                optimizer.step()
                
                # Calculate some metrics
                _, predictions = torch.max(outputs, 1)
                true_labels.extend(labels.cpu().numpy())
                predicted_labels.extend(predictions.cpu().numpy())

            acc = accuracy_score(true_labels, predicted_labels)
            f1 = f1_score(true_labels, predicted_labels, average='macro')
            
            print (f'epoch {epoch+1}/{num_epochs}, loss = {loss:.5f}, train_acc = {acc:.4f}, F1 Score_Train = {f1:.4f}')

            # Test 
            model.eval()
            with torch.no_grad():
                true_labels_test = []
                predicted_labels_test = []
                for i,(data,labels) in enumerate (test_loader):
                    data= data.to(device)
                    labels= labels.to(device)

                    outputs = model(data)

                    # return value and index of the best class
                    _, predictions = torch.max(outputs, 1)

                    true_labels_test.extend(labels.cpu().numpy())
                    predicted_labels_test.extend(predictions.cpu().numpy())
                    
                test_accuracy = accuracy_score(true_labels_test, predicted_labels_test)
                f1_test = f1_score(true_labels_test, predicted_labels_test, average='macro')

                lr_scheduler.step(f1_test + (f1*0.01))

                # Check if the current model is the best one based on f1 score

                if f1_test  > best_f1_score:
                    best_f1_score = f1_test
                    best_model_state = model.state_dict()
                    best_model= model
                    torch.save(model.state_dict(), 'best_model.pth')  # Save the best model
                    print('\033[91m'+'MODEL_SAVE')
                    print(f'Accuracy_test = {test_accuracy:.4f}, F1 Score_test = {f1_test:.4f}'+'\033[0m')

                print(' ')

        # Store the trained model for the current fold
        fold_models.append(best_model)

    for i, model in enumerate(fold_models):
        model_path = f"{target}_fold{i}"
        torch.save(model.state_dict(), model_path)

#### By Target: Type_of_Venom_Allergy_IGE_Venom

In [40]:
obtain_fold_model_10(encode_data,'Type_of_Venom_Allergy_IGE_Venom',batch_size=32,nbr_of_fold=10,hidden_size=1024,num_epochs=30,
                         learning_rate = 1e-3,dropout_rate = 0.6,weight_decay= 1e-3, factor=0.5)

Type_of_Venom_Allergy_IGE_Venom
Fold 1
467 2
BorderlineSMOTE()
epoch 1/30, loss = 0.38628, train_acc = 0.8013, F1 Score_Train = 0.7992
[91mMODEL_SAVE
Accuracy_test = 0.8194, F1 Score_test = 0.4680[0m
 
TomekLinks(sampling_strategy='all')
epoch 2/30, loss = 0.61487, train_acc = 0.5340, F1 Score_Train = 0.3532
 
SMOTETomek(sampling_strategy='all')
epoch 3/30, loss = 0.44751, train_acc = 0.7695, F1 Score_Train = 0.7692
[91mMODEL_SAVE
Accuracy_test = 0.8528, F1 Score_test = 0.4819[0m
 
TomekLinks(sampling_strategy='all')
epoch 4/30, loss = 0.55997, train_acc = 0.6185, F1 Score_Train = 0.3930
 
SMOTETomek(sampling_strategy='all')
epoch 5/30, loss = 0.29883, train_acc = 0.8285, F1 Score_Train = 0.8280
[91mMODEL_SAVE
Accuracy_test = 0.8829, F1 Score_test = 0.4958[0m
 
SMOTETomek(sampling_strategy='all')
epoch 6/30, loss = 0.37251, train_acc = 0.8850, F1 Score_Train = 0.8849
 
BorderlineSMOTE()
epoch 7/30, loss = 0.24352, train_acc = 0.9202, F1 Score_Train = 0.9200
[91mMODEL_SAVE
Accura

In [133]:
obtain_fold_model_10(encode_data,'Type_of_Venom_Allergy_IGE_Venom',batch_size=32,nbr_of_fold=8,hidden_size=1024,num_epochs=30,
                         learning_rate = 8e-4,dropout_rate = 0.05,weight_decay= 1e-9, factor=0.9)

Type_of_Venom_Allergy_IGE_Venom
Fold 1
467 2
epoch 1/30, loss = 0.61352, train_acc = 0.5277, F1 Score_Train = 0.3512
[91mMODEL_SAVE
Accuracy_test = 0.6711, F1 Score_test = 0.4239[0m
 
epoch 2/30, loss = 0.73464, train_acc = 0.8153, F1 Score_Train = 0.4707
[91mMODEL_SAVE
Accuracy_test = 0.9706, F1 Score_test = 0.5694[0m
 
epoch 3/30, loss = 0.43798, train_acc = 0.8750, F1 Score_Train = 0.5033
 
epoch 4/30, loss = 0.20805, train_acc = 0.9201, F1 Score_Train = 0.5344
 
epoch 5/30, loss = 0.31571, train_acc = 0.9449, F1 Score_Train = 0.5622
[91mMODEL_SAVE
Accuracy_test = 0.9813, F1 Score_test = 0.6064[0m
 
epoch 6/30, loss = 0.25311, train_acc = 0.9648, F1 Score_Train = 0.5945
 
epoch 7/30, loss = 0.23390, train_acc = 0.9859, F1 Score_Train = 0.7028
 
epoch 8/30, loss = 0.25020, train_acc = 0.9759, F1 Score_Train = 0.6318
[91mMODEL_SAVE
Accuracy_test = 0.9866, F1 Score_test = 0.6395[0m
 
epoch 9/30, loss = 0.53120, train_acc = 0.9820, F1 Score_Train = 0.6735
 
epoch 10/30, loss = 0

#### By Target: Type_of_Venom_Allergy_ATCD_Venom

In [54]:
obtain_fold_model_10(encode_data,'Type_of_Venom_Allergy_ATCD_Venom',batch_size=32,nbr_of_fold=10,hidden_size=1024,num_epochs=30,
                         learning_rate = 8e-4,dropout_rate = 0.55,weight_decay= 1e-2, factor=0.3)

Type_of_Venom_Allergy_ATCD_Venom
Fold 1
467 2
TomekLinks(sampling_strategy='all')
epoch 1/30, loss = 0.71338, train_acc = 0.7901, F1 Score_Train = 0.4517
[91mMODEL_SAVE
Accuracy_test = 0.9933, F1 Score_test = 0.4983[0m
 
SMOTETomek()
epoch 2/30, loss = 0.62085, train_acc = 0.5936, F1 Score_Train = 0.5675
[91mMODEL_SAVE
Accuracy_test = 0.8930, F1 Score_test = 0.5010[0m
 
SMOTETomek(sampling_strategy='all')
epoch 3/30, loss = 0.49620, train_acc = 0.7119, F1 Score_Train = 0.7083
 
RandomOverSampler()
epoch 4/30, loss = 0.54855, train_acc = 0.7799, F1 Score_Train = 0.7794
[91mMODEL_SAVE
Accuracy_test = 0.8194, F1 Score_test = 0.4845[0m
 
SMOTETomek()
epoch 5/30, loss = 0.38017, train_acc = 0.8572, F1 Score_Train = 0.8572
[91mMODEL_SAVE
Accuracy_test = 0.8595, F1 Score_test = 0.4848[0m
 
SMOTETomek(sampling_strategy='all')
epoch 6/30, loss = 0.30248, train_acc = 0.8856, F1 Score_Train = 0.8855
[91mMODEL_SAVE
Accuracy_test = 0.8796, F1 Score_test = 0.4942[0m
 
RandomOverSampler()
e

In [132]:
obtain_fold_model_10(encode_data,'Type_of_Venom_Allergy_ATCD_Venom',batch_size=32,nbr_of_fold=8,hidden_size=1024,num_epochs=30,
                         learning_rate = 8e-4,dropout_rate = 0.05,weight_decay= 1e-9, factor=0.9)

Type_of_Venom_Allergy_ATCD_Venom
Fold 1
467 2
epoch 1/30, loss = 0.63938, train_acc = 0.3950, F1 Score_Train = 0.2912
[91mMODEL_SAVE
Accuracy_test = 0.6230, F1 Score_test = 0.3969[0m
 
epoch 2/30, loss = 0.54870, train_acc = 0.6566, F1 Score_Train = 0.4098
[91mMODEL_SAVE
Accuracy_test = 0.8289, F1 Score_test = 0.4824[0m
 
epoch 3/30, loss = 0.42005, train_acc = 0.8979, F1 Score_Train = 0.4979
[91mMODEL_SAVE
Accuracy_test = 0.9385, F1 Score_test = 0.5241[0m
 
epoch 4/30, loss = 0.34667, train_acc = 0.9560, F1 Score_Train = 0.5213
 
epoch 5/30, loss = 0.30646, train_acc = 0.9820, F1 Score_Train = 0.5682
 
epoch 6/30, loss = 0.28877, train_acc = 0.9709, F1 Score_Train = 0.5508
 
epoch 7/30, loss = 0.23515, train_acc = 0.9633, F1 Score_Train = 0.5620
 
epoch 8/30, loss = 0.20779, train_acc = 0.9763, F1 Score_Train = 0.5861
 
epoch 9/30, loss = 0.93758, train_acc = 0.9832, F1 Score_Train = 0.6164
Epoch 00009: reducing learning rate of group 0 to 7.2000e-04.
 
epoch 10/30, loss = 0.719

#### By Target: Type_of_Food_Allergy_Tree_Nuts

In [58]:
obtain_fold_model_10(encode_data,'Type_of_Food_Allergy_Tree_Nuts',batch_size=32,nbr_of_fold=10,hidden_size=1024,num_epochs=30,
                         learning_rate = 7e-3,dropout_rate = 0.55,weight_decay= 8e-4, factor=0.5)

Type_of_Food_Allergy_Tree_Nuts
Fold 1
467 2
SMOTETomek()
epoch 1/30, loss = 0.64656, train_acc = 0.6110, F1 Score_Train = 0.6078
[91mMODEL_SAVE
Accuracy_test = 0.5839, F1 Score_test = 0.4798[0m
 
RandomOverSampler()
epoch 2/30, loss = 0.59068, train_acc = 0.7378, F1 Score_Train = 0.7363
[91mMODEL_SAVE
Accuracy_test = 0.7702, F1 Score_test = 0.5943[0m
 
SMOTETomek()
epoch 3/30, loss = 0.99750, train_acc = 0.7840, F1 Score_Train = 0.7834
 
SMOTETomek()
epoch 4/30, loss = 0.98728, train_acc = 0.8222, F1 Score_Train = 0.8215
[91mMODEL_SAVE
Accuracy_test = 0.8012, F1 Score_test = 0.6083[0m
 
SMOTETomek()
epoch 5/30, loss = 0.62317, train_acc = 0.8459, F1 Score_Train = 0.8455
[91mMODEL_SAVE
Accuracy_test = 0.8323, F1 Score_test = 0.6227[0m
 
RandomOverSampler()
epoch 6/30, loss = 0.50407, train_acc = 0.8704, F1 Score_Train = 0.8702
 
RandomOverSampler()
epoch 7/30, loss = 0.11224, train_acc = 0.8765, F1 Score_Train = 0.8762
 
BorderlineSMOTE()
epoch 8/30, loss = 0.07265, train_acc = 

In [110]:
obtain_fold_model_10(encode_data,'Type_of_Food_Allergy_Tree_Nuts',batch_size=32,nbr_of_fold=8,hidden_size=1024,num_epochs=30,
                         learning_rate = 2e-2,dropout_rate = 0.5,weight_decay= 2e-9, factor=0.1)

Type_of_Food_Allergy_Tree_Nuts
Fold 1
467 2
epoch 1/30, loss = 0.64170, train_acc = 0.4893, F1 Score_Train = 0.3963
[91mMODEL_SAVE
Accuracy_test = 0.8706, F1 Score_test = 0.6682[0m
 
epoch 2/30, loss = 0.59468, train_acc = 0.7077, F1 Score_Train = 0.4913
 
epoch 3/30, loss = 0.60573, train_acc = 0.6166, F1 Score_Train = 0.4879
 
epoch 4/30, loss = 0.64002, train_acc = 0.6750, F1 Score_Train = 0.5310
 
epoch 5/30, loss = 0.61109, train_acc = 0.7411, F1 Score_Train = 0.5898
 
epoch 6/30, loss = 0.58738, train_acc = 0.7560, F1 Score_Train = 0.5936
 
epoch 7/30, loss = 0.95452, train_acc = 0.6821, F1 Score_Train = 0.5580
Epoch 00007: reducing learning rate of group 0 to 2.0000e-03.
 
epoch 8/30, loss = 0.36814, train_acc = 0.7838, F1 Score_Train = 0.6326
 
epoch 9/30, loss = 0.36647, train_acc = 0.7824, F1 Score_Train = 0.6243
 
epoch 10/30, loss = 0.56135, train_acc = 0.7802, F1 Score_Train = 0.6200
 
epoch 11/30, loss = 0.36627, train_acc = 0.7710, F1 Score_Train = 0.6167
 
epoch 12/30

#### By Target: Type_of_Food_Allergy_TPO

In [80]:
obtain_fold_model_10(encode_data,'Type_of_Food_Allergy_TPO',batch_size=32,nbr_of_fold=10,hidden_size=1024,num_epochs=30,
                         learning_rate = 2.5e-3,dropout_rate = 0.55,weight_decay= 1e-4, factor=0.5)

Type_of_Food_Allergy_TPO
Fold 1
467 2
epoch 1/30, loss = 0.68170, train_acc = 0.5353, F1 Score_Train = 0.5277
[91mMODEL_SAVE
Accuracy_test = 0.7212, F1 Score_test = 0.7211[0m
 
epoch 2/30, loss = 0.60343, train_acc = 0.5939, F1 Score_Train = 0.5886
 
epoch 3/30, loss = 0.55476, train_acc = 0.6538, F1 Score_Train = 0.6521
[91mMODEL_SAVE
Accuracy_test = 0.7532, F1 Score_test = 0.7475[0m
 
epoch 4/30, loss = 0.74353, train_acc = 0.6920, F1 Score_Train = 0.6914
[91mMODEL_SAVE
Accuracy_test = 0.7917, F1 Score_test = 0.7913[0m
 
epoch 5/30, loss = 0.44821, train_acc = 0.6956, F1 Score_Train = 0.6949
 
epoch 6/30, loss = 0.47391, train_acc = 0.7091, F1 Score_Train = 0.7084
[91mMODEL_SAVE
Accuracy_test = 0.8109, F1 Score_test = 0.8076[0m
 
epoch 7/30, loss = 0.55739, train_acc = 0.7302, F1 Score_Train = 0.7299
[91mMODEL_SAVE
Accuracy_test = 0.8622, F1 Score_test = 0.8619[0m
 
epoch 8/30, loss = 0.43544, train_acc = 0.7416, F1 Score_Train = 0.7406
 
epoch 9/30, loss = 0.51653, train_a

In [107]:
obtain_fold_model_10(encode_data,'Type_of_Food_Allergy_TPO',batch_size=32,nbr_of_fold=8,hidden_size=1024,num_epochs=30,
                         learning_rate = 2.2e-3,dropout_rate = 0.4,weight_decay= 1e-9, factor=0.8)

Type_of_Food_Allergy_TPO
Fold 1
467 2
epoch 1/30, loss = 0.73000, train_acc = 0.3364, F1 Score_Train = 0.2738
[91mMODEL_SAVE
Accuracy_test = 0.3483, F1 Score_test = 0.2845[0m
 
epoch 2/30, loss = 0.67810, train_acc = 0.4445, F1 Score_Train = 0.3330
[91mMODEL_SAVE
Accuracy_test = 0.7164, F1 Score_test = 0.4770[0m
 
epoch 3/30, loss = 0.62430, train_acc = 0.5583, F1 Score_Train = 0.3935
[91mMODEL_SAVE
Accuracy_test = 0.8458, F1 Score_test = 0.5386[0m
 
epoch 4/30, loss = 0.67689, train_acc = 0.6351, F1 Score_Train = 0.4210
[91mMODEL_SAVE
Accuracy_test = 0.8756, F1 Score_test = 0.5631[0m
 
epoch 5/30, loss = 0.64263, train_acc = 0.6920, F1 Score_Train = 0.4516
[91mMODEL_SAVE
Accuracy_test = 0.8856, F1 Score_test = 0.5980[0m
 
epoch 6/30, loss = 0.69092, train_acc = 0.7831, F1 Score_Train = 0.4911
[91mMODEL_SAVE
Accuracy_test = 0.9154, F1 Score_test = 0.6080[0m
 
epoch 7/30, loss = 0.71646, train_acc = 0.8393, F1 Score_Train = 0.5308
 
epoch 8/30, loss = 0.46903, train_acc = 0.

#### By Target: Type_of_Food_Allergy_Shellfish

In [82]:
obtain_fold_model_10(encode_data,'Type_of_Food_Allergy_Shellfish',batch_size=32,nbr_of_fold=10,hidden_size=1024,num_epochs=30,
                         learning_rate = 8e-4,dropout_rate = 0.5,weight_decay= 8e-5, factor=0.5)

Type_of_Food_Allergy_Shellfish
Fold 1
467 2
epoch 1/30, loss = 0.47234, train_acc = 0.5262, F1 Score_Train = 0.5249
[91mMODEL_SAVE
Accuracy_test = 0.6146, F1 Score_test = 0.5500[0m
 
epoch 2/30, loss = 0.65084, train_acc = 0.5656, F1 Score_Train = 0.5648
[91mMODEL_SAVE
Accuracy_test = 0.6178, F1 Score_test = 0.5574[0m
 
epoch 3/30, loss = 0.86137, train_acc = 0.5812, F1 Score_Train = 0.5811
[91mMODEL_SAVE
Accuracy_test = 0.6529, F1 Score_test = 0.6092[0m
 
epoch 4/30, loss = 0.63510, train_acc = 0.6110, F1 Score_Train = 0.6109
[91mMODEL_SAVE
Accuracy_test = 0.6656, F1 Score_test = 0.6253[0m
 
epoch 5/30, loss = 0.69844, train_acc = 0.6408, F1 Score_Train = 0.6408
 
epoch 6/30, loss = 1.07588, train_acc = 0.6869, F1 Score_Train = 0.6864
[91mMODEL_SAVE
Accuracy_test = 0.6815, F1 Score_test = 0.6456[0m
 
epoch 7/30, loss = 1.08163, train_acc = 0.6915, F1 Score_Train = 0.6909
[91mMODEL_SAVE
Accuracy_test = 0.7834, F1 Score_test = 0.7734[0m
 
epoch 8/30, loss = 0.68483, train_ac

In [97]:
obtain_fold_model_10(encode_data,'Type_of_Food_Allergy_Shellfish',batch_size=32,nbr_of_fold=8,hidden_size=1024,num_epochs=30,
                         learning_rate = 2e-2,dropout_rate = 0.27,weight_decay= 1e-7, factor=0.5)

Type_of_Food_Allergy_Shellfish
Fold 1
467 2
epoch 1/30, loss = 0.95608, train_acc = 0.7603, F1 Score_Train = 0.4515
[91mMODEL_SAVE
Accuracy_test = 0.9502, F1 Score_test = 0.4872[0m
 
epoch 2/30, loss = 0.51254, train_acc = 0.8862, F1 Score_Train = 0.4991
 
epoch 3/30, loss = 0.35976, train_acc = 0.9139, F1 Score_Train = 0.5225
[91mMODEL_SAVE
Accuracy_test = 0.9701, F1 Score_test = 0.4924[0m
 
epoch 4/30, loss = 0.58933, train_acc = 0.9225, F1 Score_Train = 0.5636
 
epoch 5/30, loss = 0.35323, train_acc = 0.9139, F1 Score_Train = 0.5919
 
epoch 6/30, loss = 0.37725, train_acc = 0.9189, F1 Score_Train = 0.5828
 
epoch 7/30, loss = 0.28975, train_acc = 0.9253, F1 Score_Train = 0.6132
 
epoch 8/30, loss = 0.27052, train_acc = 0.9374, F1 Score_Train = 0.6399
 
epoch 9/30, loss = 0.09723, train_acc = 0.9267, F1 Score_Train = 0.6303
[91mMODEL_SAVE
Accuracy_test = 0.9652, F1 Score_test = 0.6022[0m
 
epoch 10/30, loss = 0.12267, train_acc = 0.9431, F1 Score_Train = 0.6573
 
epoch 11/30, l

#### By Target: Type_of_Food_Allergy_Peanut

In [84]:
obtain_fold_model_10(encode_data,'Type_of_Food_Allergy_Peanut',batch_size=32,nbr_of_fold=10,hidden_size=2048,num_epochs=30,
                         learning_rate = 3e-3,dropout_rate = 0.5,weight_decay= 8e-4, factor=0.5)

Type_of_Food_Allergy_Peanut
Fold 1
467 2
epoch 1/30, loss = 0.69880, train_acc = 0.5764, F1 Score_Train = 0.5757
[91mMODEL_SAVE
Accuracy_test = 0.7200, F1 Score_test = 0.7197[0m
 
epoch 2/30, loss = 0.50995, train_acc = 0.6749, F1 Score_Train = 0.6745
[91mMODEL_SAVE
Accuracy_test = 0.7533, F1 Score_test = 0.7508[0m
 
epoch 3/30, loss = 0.42054, train_acc = 0.7317, F1 Score_Train = 0.7316
[91mMODEL_SAVE
Accuracy_test = 0.7833, F1 Score_test = 0.7821[0m
 
epoch 4/30, loss = 0.37890, train_acc = 0.7428, F1 Score_Train = 0.7426
[91mMODEL_SAVE
Accuracy_test = 0.7900, F1 Score_test = 0.7883[0m
 
epoch 5/30, loss = 0.33867, train_acc = 0.7617, F1 Score_Train = 0.7612
 
epoch 6/30, loss = 0.50097, train_acc = 0.7635, F1 Score_Train = 0.7631
[91mMODEL_SAVE
Accuracy_test = 0.8100, F1 Score_test = 0.8092[0m
 
epoch 7/30, loss = 0.69144, train_acc = 0.7698, F1 Score_Train = 0.7696
 
epoch 8/30, loss = 0.39899, train_acc = 0.7698, F1 Score_Train = 0.7695
 
epoch 9/30, loss = 0.80873, trai

In [85]:
obtain_fold_model_10(encode_data,'Type_of_Food_Allergy_Peanut',batch_size=32,nbr_of_fold=8,hidden_size=1024,num_epochs=30,
                         learning_rate = 5.5e-3,dropout_rate = 0.48,weight_decay= 1e-7, factor=0.1)

Type_of_Food_Allergy_Peanut
Fold 1
467 2
epoch 1/30, loss = 0.80962, train_acc = 0.4580, F1 Score_Train = 0.3667
[91mMODEL_SAVE
Accuracy_test = 0.5871, F1 Score_test = 0.4583[0m
 
epoch 2/30, loss = 0.62662, train_acc = 0.5889, F1 Score_Train = 0.4614
[91mMODEL_SAVE
Accuracy_test = 0.6816, F1 Score_test = 0.5006[0m
 
epoch 3/30, loss = 0.39288, train_acc = 0.6714, F1 Score_Train = 0.5129
[91mMODEL_SAVE
Accuracy_test = 0.7463, F1 Score_test = 0.5433[0m
 
epoch 4/30, loss = 0.55785, train_acc = 0.7340, F1 Score_Train = 0.5652
[91mMODEL_SAVE
Accuracy_test = 0.8159, F1 Score_test = 0.5845[0m
 
epoch 5/30, loss = 0.37176, train_acc = 0.7603, F1 Score_Train = 0.5788
 
epoch 6/30, loss = 0.37478, train_acc = 0.7859, F1 Score_Train = 0.6011
 
epoch 7/30, loss = 0.62746, train_acc = 0.7916, F1 Score_Train = 0.6089
 
epoch 8/30, loss = 0.40859, train_acc = 0.7994, F1 Score_Train = 0.6270
[91mMODEL_SAVE
Accuracy_test = 0.8806, F1 Score_test = 0.6339[0m
 
epoch 9/30, loss = 0.35935, trai

#### By Target: Type_of_Food_Allergy_Other_Legumes

In [86]:
obtain_fold_model_10(encode_data,'Type_of_Food_Allergy_Other_Legumes',batch_size=32,nbr_of_fold=10,hidden_size=2048,num_epochs=30,
                         learning_rate = 8e-4,dropout_rate = 0.5,weight_decay= 8e-5, factor=0.5)

Type_of_Food_Allergy_Other_Legumes
Fold 1
467 2
epoch 1/30, loss = 0.59535, train_acc = 0.5378, F1 Score_Train = 0.5378
[91mMODEL_SAVE
Accuracy_test = 0.7212, F1 Score_test = 0.7085[0m
 
epoch 2/30, loss = 0.69776, train_acc = 0.5749, F1 Score_Train = 0.5745
[91mMODEL_SAVE
Accuracy_test = 0.7340, F1 Score_test = 0.7302[0m
 
epoch 3/30, loss = 0.51650, train_acc = 0.6317, F1 Score_Train = 0.6316
[91mMODEL_SAVE
Accuracy_test = 0.7692, F1 Score_test = 0.7691[0m
 
epoch 4/30, loss = 0.73881, train_acc = 0.6527, F1 Score_Train = 0.6527
[91mMODEL_SAVE
Accuracy_test = 0.7949, F1 Score_test = 0.7947[0m
 
epoch 5/30, loss = 0.59798, train_acc = 0.6913, F1 Score_Train = 0.6912
[91mMODEL_SAVE
Accuracy_test = 0.8045, F1 Score_test = 0.8045[0m
 
epoch 6/30, loss = 0.54194, train_acc = 0.7148, F1 Score_Train = 0.7148
 
epoch 7/30, loss = 0.49664, train_acc = 0.7259, F1 Score_Train = 0.7257
[91mMODEL_SAVE
Accuracy_test = 0.8077, F1 Score_test = 0.8074[0m
 
epoch 8/30, loss = 0.50565, trai

In [81]:
obtain_fold_model_10(encode_data,'Type_of_Food_Allergy_Other_Legumes',batch_size=32,nbr_of_fold=8,hidden_size=1024,num_epochs=30,
                         learning_rate = 8e-3,dropout_rate = 0.38,weight_decay= 2e-8, factor=0.1)

Type_of_Food_Allergy_Other_Legumes
Fold 1
467 2
epoch 1/30, loss = 1.18024, train_acc = 0.5825, F1 Score_Train = 0.4043
[91mMODEL_SAVE
Accuracy_test = 0.8060, F1 Score_test = 0.4463[0m
 
epoch 2/30, loss = 0.33215, train_acc = 0.7205, F1 Score_Train = 0.4656
[91mMODEL_SAVE
Accuracy_test = 0.8607, F1 Score_test = 0.5247[0m
 
epoch 3/30, loss = 0.42782, train_acc = 0.7596, F1 Score_Train = 0.4880
 
epoch 4/30, loss = 0.41132, train_acc = 0.8179, F1 Score_Train = 0.5109
[91mMODEL_SAVE
Accuracy_test = 0.8905, F1 Score_test = 0.5477[0m
 
epoch 5/30, loss = 0.42804, train_acc = 0.8144, F1 Score_Train = 0.5308
[91mMODEL_SAVE
Accuracy_test = 0.8955, F1 Score_test = 0.5831[0m
 
epoch 6/30, loss = 0.24129, train_acc = 0.8421, F1 Score_Train = 0.5512
 
epoch 7/30, loss = 0.81886, train_acc = 0.8741, F1 Score_Train = 0.5759
[91mMODEL_SAVE
Accuracy_test = 0.9055, F1 Score_test = 0.6228[0m
 
epoch 8/30, loss = 0.53475, train_acc = 0.8556, F1 Score_Train = 0.5800
 
epoch 9/30, loss = 0.3597

#### By Target: Type_of_Food_Allergy_Oral_Syndrom

In [88]:
obtain_fold_model_10(encode_data,'Type_of_Food_Allergy_Oral_Syndrom',batch_size=128,nbr_of_fold=10,hidden_size=2048,num_epochs=30,
                         learning_rate = 6.5e-3,dropout_rate = 0.5,weight_decay= 1e-4, factor=0.5)

Type_of_Food_Allergy_Oral_Syndrom
Fold 1
467 2
epoch 1/30, loss = 0.73151, train_acc = 0.5159, F1 Score_Train = 0.5156
[91mMODEL_SAVE
Accuracy_test = 0.6233, F1 Score_test = 0.6233[0m
 
epoch 2/30, loss = 0.61591, train_acc = 0.5193, F1 Score_Train = 0.5192
 
epoch 3/30, loss = 0.75533, train_acc = 0.5348, F1 Score_Train = 0.5344
 
epoch 4/30, loss = 0.66510, train_acc = 0.5600, F1 Score_Train = 0.5578
 
epoch 5/30, loss = 0.75404, train_acc = 0.5452, F1 Score_Train = 0.5409
 
epoch 6/30, loss = 0.66647, train_acc = 0.6008, F1 Score_Train = 0.5918
[91mMODEL_SAVE
Accuracy_test = 0.6800, F1 Score_test = 0.6798[0m
 
epoch 7/30, loss = 0.52988, train_acc = 0.6030, F1 Score_Train = 0.5991
[91mMODEL_SAVE
Accuracy_test = 0.7200, F1 Score_test = 0.7192[0m
 
epoch 8/30, loss = 0.65681, train_acc = 0.6227, F1 Score_Train = 0.6191
[91mMODEL_SAVE
Accuracy_test = 0.7900, F1 Score_test = 0.7900[0m
 
epoch 9/30, loss = 0.60005, train_acc = 0.6527, F1 Score_Train = 0.6511
 
epoch 10/30, loss =

In [77]:
obtain_fold_model_10(encode_data,'Type_of_Food_Allergy_Oral_Syndrom',batch_size=32,nbr_of_fold=8,hidden_size=1024,num_epochs=30,
                         learning_rate = 1e-2,dropout_rate = 0.42,weight_decay= 2e-6, factor=0.1)

Type_of_Food_Allergy_Oral_Syndrom
Fold 1
467 2
epoch 1/30, loss = 0.69893, train_acc = 0.5789, F1 Score_Train = 0.4038
[91mMODEL_SAVE
Accuracy_test = 0.7164, F1 Score_test = 0.4339[0m
 
epoch 2/30, loss = 0.64278, train_acc = 0.6892, F1 Score_Train = 0.4774
[91mMODEL_SAVE
Accuracy_test = 0.8308, F1 Score_test = 0.4813[0m
 
epoch 3/30, loss = 0.60096, train_acc = 0.7013, F1 Score_Train = 0.4653
[91mMODEL_SAVE
Accuracy_test = 0.9353, F1 Score_test = 0.4833[0m
 
epoch 4/30, loss = 0.61328, train_acc = 0.8186, F1 Score_Train = 0.4861
 
epoch 5/30, loss = 0.62908, train_acc = 0.8549, F1 Score_Train = 0.4885
 
epoch 6/30, loss = 0.69937, train_acc = 0.8926, F1 Score_Train = 0.5247
 
epoch 7/30, loss = 0.79534, train_acc = 0.7895, F1 Score_Train = 0.5073
 
epoch 8/30, loss = 0.93220, train_acc = 0.7283, F1 Score_Train = 0.5087
 
epoch 9/30, loss = 0.70055, train_acc = 0.7752, F1 Score_Train = 0.5127
 
epoch 10/30, loss = 0.65009, train_acc = 0.6942, F1 Score_Train = 0.5001
[91mMODEL_SA

#### By Target: Type_of_Food_Allergy_Mammalian_Milk

In [90]:
obtain_fold_model_10(encode_data,'Type_of_Food_Allergy_Mammalian_Milk',batch_size=32,nbr_of_fold=10,hidden_size=2048,num_epochs=30,
                         learning_rate = 1e-4,dropout_rate = 0.3,weight_decay= 1e-4, factor=0.7)

Type_of_Food_Allergy_Mammalian_Milk
Fold 1
467 2
epoch 1/30, loss = 0.66743, train_acc = 0.5437, F1 Score_Train = 0.5428
[91mMODEL_SAVE
Accuracy_test = 0.5962, F1 Score_test = 0.5219[0m
 
epoch 2/30, loss = 0.66133, train_acc = 0.5975, F1 Score_Train = 0.5975
[91mMODEL_SAVE
Accuracy_test = 0.6593, F1 Score_test = 0.6342[0m
 
epoch 3/30, loss = 0.70237, train_acc = 0.6516, F1 Score_Train = 0.6516
[91mMODEL_SAVE
Accuracy_test = 0.6656, F1 Score_test = 0.6397[0m
 
epoch 4/30, loss = 0.54510, train_acc = 0.6660, F1 Score_Train = 0.6659
[91mMODEL_SAVE
Accuracy_test = 0.6877, F1 Score_test = 0.6687[0m
 
epoch 5/30, loss = 0.49369, train_acc = 0.6906, F1 Score_Train = 0.6904
[91mMODEL_SAVE
Accuracy_test = 0.7066, F1 Score_test = 0.7009[0m
 
epoch 6/30, loss = 0.79452, train_acc = 0.7194, F1 Score_Train = 0.7193
[91mMODEL_SAVE
Accuracy_test = 0.7224, F1 Score_test = 0.7167[0m
 
epoch 7/30, loss = 0.36317, train_acc = 0.7302, F1 Score_Train = 0.7302
[91mMODEL_SAVE
Accuracy_test = 0

In [73]:
obtain_fold_model_10(encode_data,'Type_of_Food_Allergy_Mammalian_Milk',batch_size=32,nbr_of_fold=8,hidden_size=1024,num_epochs=30,
                         learning_rate = 6e-4,dropout_rate = 0.1,weight_decay= 1e-6, factor=0.1)

Type_of_Food_Allergy_Mammalian_Milk
Fold 1
467 2
epoch 1/30, loss = 0.81862, train_acc = 0.4282, F1 Score_Train = 0.3176
[91mMODEL_SAVE
Accuracy_test = 0.6567, F1 Score_test = 0.4099[0m
 
epoch 2/30, loss = 0.74590, train_acc = 0.5413, F1 Score_Train = 0.3763
 
epoch 3/30, loss = 0.28981, train_acc = 0.5868, F1 Score_Train = 0.3996
[91mMODEL_SAVE
Accuracy_test = 0.7114, F1 Score_test = 0.4319[0m
 
epoch 4/30, loss = 0.60621, train_acc = 0.6828, F1 Score_Train = 0.4435
 
epoch 5/30, loss = 0.56516, train_acc = 0.7475, F1 Score_Train = 0.4750
[91mMODEL_SAVE
Accuracy_test = 0.8308, F1 Score_test = 0.4538[0m
 
epoch 6/30, loss = 0.42514, train_acc = 0.7895, F1 Score_Train = 0.5023
[91mMODEL_SAVE
Accuracy_test = 0.8657, F1 Score_test = 0.4640[0m
 
epoch 7/30, loss = 0.49582, train_acc = 0.8293, F1 Score_Train = 0.5210
[91mMODEL_SAVE
Accuracy_test = 0.8905, F1 Score_test = 0.4711[0m
 
epoch 8/30, loss = 0.47392, train_acc = 0.8535, F1 Score_Train = 0.5343
 
epoch 9/30, loss = 0.410

#### By Target: Type_of_Food_Allergy_Fruits_and_Vegetables

In [92]:
obtain_fold_model_10(encode_data,'Type_of_Food_Allergy_Fruits_and_Vegetables',batch_size=32,nbr_of_fold=8,hidden_size=1024,num_epochs=30,
                         learning_rate = 1e-3,dropout_rate = 0.45,weight_decay= 2e-3, factor=0.1)

Type_of_Food_Allergy_Fruits_and_Vegetables
Fold 1
467 2
epoch 1/30, loss = 0.65503, train_acc = 0.5460, F1 Score_Train = 0.5452
[91mMODEL_SAVE
Accuracy_test = 0.7185, F1 Score_test = 0.7185[0m
 
epoch 2/30, loss = 0.80092, train_acc = 0.5611, F1 Score_Train = 0.5608
[91mMODEL_SAVE
Accuracy_test = 0.7583, F1 Score_test = 0.7575[0m
 
epoch 3/30, loss = 0.58273, train_acc = 0.5901, F1 Score_Train = 0.5901
 
epoch 4/30, loss = 0.67479, train_acc = 0.5953, F1 Score_Train = 0.5952
 
epoch 5/30, loss = 0.76644, train_acc = 0.6387, F1 Score_Train = 0.6383
[91mMODEL_SAVE
Accuracy_test = 0.7881, F1 Score_test = 0.7857[0m
 
epoch 6/30, loss = 0.67092, train_acc = 0.6645, F1 Score_Train = 0.6640
[91mMODEL_SAVE
Accuracy_test = 0.7980, F1 Score_test = 0.7949[0m
 
epoch 7/30, loss = 0.51876, train_acc = 0.6876, F1 Score_Train = 0.6868
[91mMODEL_SAVE
Accuracy_test = 0.7980, F1 Score_test = 0.7968[0m
 
epoch 8/30, loss = 0.64003, train_acc = 0.7068, F1 Score_Train = 0.7065
 
epoch 9/30, loss 

In [61]:
obtain_fold_model_10(encode_data,'Type_of_Food_Allergy_Fruits_and_Vegetables',batch_size=32,nbr_of_fold=8,hidden_size=1024,num_epochs=30,
                         learning_rate = 7e-3,dropout_rate = 0.45,weight_decay= 2e-5, factor=0.1)

Type_of_Food_Allergy_Fruits_and_Vegetables
Fold 1
467 2
epoch 1/30, loss = 0.71983, train_acc = 0.5270, F1 Score_Train = 0.3897
[91mMODEL_SAVE
Accuracy_test = 0.7264, F1 Score_test = 0.4685[0m
 
epoch 2/30, loss = 0.81497, train_acc = 0.6230, F1 Score_Train = 0.4372
[91mMODEL_SAVE
Accuracy_test = 0.7363, F1 Score_test = 0.4736[0m
 
epoch 3/30, loss = 0.73788, train_acc = 0.6188, F1 Score_Train = 0.4403
[91mMODEL_SAVE
Accuracy_test = 0.8159, F1 Score_test = 0.4975[0m
 
epoch 4/30, loss = 0.84940, train_acc = 0.6728, F1 Score_Train = 0.4773
[91mMODEL_SAVE
Accuracy_test = 0.8060, F1 Score_test = 0.5468[0m
 
epoch 5/30, loss = 0.76431, train_acc = 0.6835, F1 Score_Train = 0.4864
 
epoch 6/30, loss = 0.64432, train_acc = 0.7333, F1 Score_Train = 0.5266
[91mMODEL_SAVE
Accuracy_test = 0.8408, F1 Score_test = 0.6072[0m
 
epoch 7/30, loss = 0.63890, train_acc = 0.6984, F1 Score_Train = 0.5155
 
epoch 8/30, loss = 0.48013, train_acc = 0.6863, F1 Score_Train = 0.5204
 
epoch 9/30, loss 

#### By Target: Type_of_Food_Allergy_Fish

In [94]:
obtain_fold_model_10(encode_data,'Type_of_Food_Allergy_Fish',batch_size=32,nbr_of_fold=10,hidden_size=2048,num_epochs=30,
                         learning_rate = 1e-3,dropout_rate = 0.5,weight_decay= 1e-4, factor=0.7)

Type_of_Food_Allergy_Fish
Fold 1
467 2
epoch 1/30, loss = 0.91229, train_acc = 0.5676, F1 Score_Train = 0.5664
[91mMODEL_SAVE
Accuracy_test = 0.6815, F1 Score_test = 0.6594[0m
 
epoch 2/30, loss = 0.67913, train_acc = 0.6399, F1 Score_Train = 0.6399
[91mMODEL_SAVE
Accuracy_test = 0.7675, F1 Score_test = 0.7617[0m
 
epoch 3/30, loss = 0.48014, train_acc = 0.7181, F1 Score_Train = 0.7177
[91mMODEL_SAVE
Accuracy_test = 0.8662, F1 Score_test = 0.8659[0m
 
epoch 4/30, loss = 0.59946, train_acc = 0.7574, F1 Score_Train = 0.7573
[91mMODEL_SAVE
Accuracy_test = 0.8694, F1 Score_test = 0.8690[0m
 
epoch 5/30, loss = 1.00923, train_acc = 0.7780, F1 Score_Train = 0.7777
[91mMODEL_SAVE
Accuracy_test = 0.8981, F1 Score_test = 0.8980[0m
 
epoch 6/30, loss = 0.83703, train_acc = 0.7978, F1 Score_Train = 0.7976
[91mMODEL_SAVE
Accuracy_test = 0.9204, F1 Score_test = 0.9204[0m
 
epoch 7/30, loss = 0.15631, train_acc = 0.8102, F1 Score_Train = 0.8099
[91mMODEL_SAVE
Accuracy_test = 0.9236, F1 

In [49]:
obtain_fold_model_10(encode_data,'Type_of_Food_Allergy_Fish',batch_size=32,nbr_of_fold=8,hidden_size=1024,num_epochs=30,
                         learning_rate = 6e-3,dropout_rate = 0.45,weight_decay= 2e-4, factor=0.6)

Type_of_Food_Allergy_Fish
Fold 1
467 2
epoch 1/30, loss = 0.51755, train_acc = 0.8812, F1 Score_Train = 0.4801
[91mMODEL_SAVE
Accuracy_test = 0.9552, F1 Score_test = 0.4885[0m
 
epoch 2/30, loss = 0.46614, train_acc = 0.8962, F1 Score_Train = 0.4985
[91mMODEL_SAVE
Accuracy_test = 0.9801, F1 Score_test = 0.4950[0m
 
epoch 3/30, loss = 0.46390, train_acc = 0.9203, F1 Score_Train = 0.5202
 
epoch 4/30, loss = 0.56174, train_acc = 0.9410, F1 Score_Train = 0.5287
 
epoch 5/30, loss = 0.43420, train_acc = 0.8770, F1 Score_Train = 0.5407
[91mMODEL_SAVE
Accuracy_test = 0.9303, F1 Score_test = 0.5444[0m
 
epoch 6/30, loss = 0.36000, train_acc = 0.8414, F1 Score_Train = 0.5291
 
epoch 7/30, loss = 0.37282, train_acc = 0.8642, F1 Score_Train = 0.5566
 
epoch 8/30, loss = 0.54538, train_acc = 0.8826, F1 Score_Train = 0.5773
[91mMODEL_SAVE
Accuracy_test = 0.9403, F1 Score_test = 0.5560[0m
 
epoch 9/30, loss = 0.37589, train_acc = 0.8691, F1 Score_Train = 0.5678
 
epoch 10/30, loss = 0.33120

#### By Target: Type_of_Food_Allergy_Egg

In [96]:
obtain_fold_model_10(encode_data,'Type_of_Food_Allergy_Egg',batch_size=32,nbr_of_fold=10,hidden_size=2048,num_epochs=30,
                         learning_rate = 1e-3,dropout_rate = 0.5,weight_decay= 1e-4, factor=0.7)

Type_of_Food_Allergy_Egg
Fold 1
467 2


ValueError: Expected more than 1 value per channel when training, got input size torch.Size([1, 64])

In [32]:
obtain_fold_model_10(encode_data,'Type_of_Food_Allergy_Egg',batch_size=32,nbr_of_fold=8,hidden_size=1024,num_epochs=30,
                         learning_rate = 6e-4,dropout_rate = 0.28,weight_decay= 2e-9, factor=0.1)

Type_of_Food_Allergy_Egg
Fold 1
467 2
epoch 1/30, loss = 2.54945, train_acc = 0.4075, F1 Score_Train = 0.3081
[91mMODEL_SAVE
Accuracy_test = 0.5174, F1 Score_test = 0.3669[0m
 
epoch 2/30, loss = 0.72762, train_acc = 0.4552, F1 Score_Train = 0.3378
[91mMODEL_SAVE
Accuracy_test = 0.5473, F1 Score_test = 0.3902[0m
 
epoch 3/30, loss = 0.52638, train_acc = 0.5100, F1 Score_Train = 0.3676
[91mMODEL_SAVE
Accuracy_test = 0.6070, F1 Score_test = 0.4107[0m
 
epoch 4/30, loss = 0.70343, train_acc = 0.5270, F1 Score_Train = 0.3821
[91mMODEL_SAVE
Accuracy_test = 0.6716, F1 Score_test = 0.4646[0m
 
epoch 5/30, loss = 0.64142, train_acc = 0.5917, F1 Score_Train = 0.4220
[91mMODEL_SAVE
Accuracy_test = 0.7711, F1 Score_test = 0.5080[0m
 
epoch 6/30, loss = 0.70062, train_acc = 0.5953, F1 Score_Train = 0.4200
 
epoch 7/30, loss = 0.55244, train_acc = 0.6387, F1 Score_Train = 0.4396
 
epoch 8/30, loss = 0.60155, train_acc = 0.6558, F1 Score_Train = 0.4515
 
epoch 9/30, loss = 0.63548, train_a

#### By Target: Type_of_Food_Allergy_Aromatics

In [21]:
obtain_fold_model_10(encode_data,'Type_of_Food_Allergy_Aromatics',batch_size=32,nbr_of_fold=8,hidden_size=1024,num_epochs=30,
                         learning_rate = 6e-3,dropout_rate = 0.3,weight_decay= 2e-8, factor=0.1)

Type_of_Food_Allergy_Aromatics
Fold 1
467 2
epoch 1/30, loss = 0.34941, train_acc = 0.6095, F1 Score_Train = 0.3904
[91mMODEL_SAVE
Accuracy_test = 0.9751, F1 Score_test = 0.4937[0m
 
epoch 2/30, loss = 0.79819, train_acc = 0.8030, F1 Score_Train = 0.4659
 
epoch 3/30, loss = 0.41470, train_acc = 0.8620, F1 Score_Train = 0.5051
 
epoch 4/30, loss = 0.37906, train_acc = 0.9068, F1 Score_Train = 0.5298
 
epoch 5/30, loss = 0.85729, train_acc = 0.9196, F1 Score_Train = 0.5541
 
epoch 6/30, loss = 0.63922, train_acc = 0.9061, F1 Score_Train = 0.5677
 
epoch 7/30, loss = 0.17621, train_acc = 0.9232, F1 Score_Train = 0.5828
 
epoch 8/30, loss = 0.69321, train_acc = 0.9282, F1 Score_Train = 0.5957
Epoch 00008: reducing learning rate of group 0 to 6.0000e-04.
 
epoch 9/30, loss = 0.73052, train_acc = 0.9139, F1 Score_Train = 0.5818
 
epoch 10/30, loss = 0.48974, train_acc = 0.9189, F1 Score_Train = 0.5986
 
epoch 11/30, loss = 0.86485, train_acc = 0.9054, F1 Score_Train = 0.5718
 
epoch 12/30

In [19]:
obtain_fold_model_10(encode_data,'Type_of_Food_Allergy_Aromatics',batch_size=32,nbr_of_fold=8,hidden_size=1024,num_epochs=30,
                         learning_rate = 8e-3,dropout_rate = 0.3,weight_decay= 2e-6, factor=0.1)

Type_of_Food_Allergy_Aromatics
Fold 1
467 2
epoch 1/30, loss = 0.99448, train_acc = 0.6131, F1 Score_Train = 0.3967
[91mMODEL_SAVE
Accuracy_test = 0.8458, F1 Score_test = 0.4582[0m
 
epoch 2/30, loss = 0.46990, train_acc = 0.8862, F1 Score_Train = 0.4820
[91mMODEL_SAVE
Accuracy_test = 0.9751, F1 Score_test = 0.4937[0m
 
epoch 3/30, loss = 0.41633, train_acc = 0.9609, F1 Score_Train = 0.5239
[91mMODEL_SAVE
Accuracy_test = 0.9801, F1 Score_test = 0.4950[0m
 
epoch 4/30, loss = 0.36504, train_acc = 0.9616, F1 Score_Train = 0.5547
 
epoch 5/30, loss = 0.76073, train_acc = 0.9602, F1 Score_Train = 0.5656
 
epoch 6/30, loss = 0.36627, train_acc = 0.9595, F1 Score_Train = 0.5882
 
epoch 7/30, loss = 0.62924, train_acc = 0.9246, F1 Score_Train = 0.5529
 
epoch 8/30, loss = 0.32719, train_acc = 0.9346, F1 Score_Train = 0.5648
 
epoch 9/30, loss = 0.64732, train_acc = 0.8954, F1 Score_Train = 0.5569
 
epoch 10/30, loss = 0.17520, train_acc = 0.8812, F1 Score_Train = 0.5646
 
epoch 11/30, l

#### By Target: Type_of_Respiratory_Allergy_IGE_Molds_Yeast

In [None]:
obtain_fold_model_10(encode_data,'Type_of_Respiratory_Allergy_IGE_Molds_Yeast',batch_size=32,nbr_of_fold=10,hidden_size=2048,num_epochs=30,
                         learning_rate = 1e-4,dropout_rate = 0.35,weight_decay= 1e-2, factor=0.7)

In [46]:
obtain_fold_model_10(encode_data,'Type_of_Respiratory_Allergy_IGE_Molds_Yeast',batch_size=32,nbr_of_fold=8,hidden_size=1024,num_epochs=30,
                         learning_rate = 2e-3,dropout_rate = 0.5,weight_decay= 3e-3, factor=0.1)

Type_of_Respiratory_Allergy_IGE_Molds_Yeast
Fold 1
467 2
epoch 1/30, loss = 0.51152, train_acc = 0.5675, F1 Score_Train = 0.5241
[91mMODEL_SAVE
Accuracy_test = 0.7037, F1 Score_test = 0.6667[0m
 
epoch 2/30, loss = 0.23466, train_acc = 0.6973, F1 Score_Train = 0.6457
[91mMODEL_SAVE
Accuracy_test = 0.7302, F1 Score_test = 0.6844[0m
 
epoch 3/30, loss = 0.53175, train_acc = 0.7223, F1 Score_Train = 0.6697
[91mMODEL_SAVE
Accuracy_test = 0.7407, F1 Score_test = 0.6995[0m
 
epoch 4/30, loss = 0.34088, train_acc = 0.7428, F1 Score_Train = 0.6869
[91mMODEL_SAVE
Accuracy_test = 0.7513, F1 Score_test = 0.7091[0m
 
epoch 5/30, loss = 0.53738, train_acc = 0.7610, F1 Score_Train = 0.7090
[91mMODEL_SAVE
Accuracy_test = 0.7989, F1 Score_test = 0.7537[0m
 
epoch 6/30, loss = 0.79758, train_acc = 0.7678, F1 Score_Train = 0.7158
[91mMODEL_SAVE
Accuracy_test = 0.8201, F1 Score_test = 0.7688[0m
 
epoch 7/30, loss = 0.28063, train_acc = 0.8058, F1 Score_Train = 0.7554
[91mMODEL_SAVE
Accuracy_

#### By Target: Type_of_Respiratory_Allergy_IGE_Mite_Cockroach

In [102]:
obtain_fold_model_10(encode_data,'Type_of_Respiratory_Allergy_IGE_Mite_Cockroach',batch_size=32,nbr_of_fold=10,hidden_size=2048,num_epochs=30,
                         learning_rate = 1e-4,dropout_rate = 0.35,weight_decay= 1e-2, factor=0.7)

Type_of_Respiratory_Allergy_IGE_Mite_Cockroach
Fold 1
467 2
epoch 1/30, loss = 0.69875, train_acc = 0.5625, F1 Score_Train = 0.5622
[91mMODEL_SAVE
Accuracy_test = 0.6867, F1 Score_test = 0.6631[0m
 
epoch 2/30, loss = 0.67069, train_acc = 0.6788, F1 Score_Train = 0.6771
 
epoch 3/30, loss = 0.47473, train_acc = 0.6868, F1 Score_Train = 0.6850
[91mMODEL_SAVE
Accuracy_test = 0.6988, F1 Score_test = 0.6760[0m
 
epoch 4/30, loss = 0.54976, train_acc = 0.7171, F1 Score_Train = 0.7155
 
epoch 5/30, loss = 0.70799, train_acc = 0.7110, F1 Score_Train = 0.7099
 
epoch 6/30, loss = 0.57863, train_acc = 0.7184, F1 Score_Train = 0.7174
 
epoch 7/30, loss = 0.37145, train_acc = 0.7419, F1 Score_Train = 0.7409
 
epoch 8/30, loss = 0.67268, train_acc = 0.7473, F1 Score_Train = 0.7467
 
epoch 9/30, loss = 0.50918, train_acc = 0.7386, F1 Score_Train = 0.7377
Epoch 00009: reducing learning rate of group 0 to 7.0000e-05.
 
epoch 10/30, loss = 0.54770, train_acc = 0.7312, F1 Score_Train = 0.7307
 
epo

KeyboardInterrupt: 

In [45]:
obtain_fold_model_10(encode_data,'Type_of_Respiratory_Allergy_IGE_Mite_Cockroach',batch_size=32,nbr_of_fold=8,hidden_size=1024,num_epochs=30,
                         learning_rate = 1e-3,dropout_rate = 0.45,weight_decay= 8e-5, factor=0.1)

Type_of_Respiratory_Allergy_IGE_Mite_Cockroach
Fold 1
467 2
epoch 1/30, loss = 0.40690, train_acc = 0.7155, F1 Score_Train = 0.7025
[91mMODEL_SAVE
Accuracy_test = 0.7619, F1 Score_test = 0.7573[0m
 
epoch 2/30, loss = 0.29648, train_acc = 0.7595, F1 Score_Train = 0.7492
[91mMODEL_SAVE
Accuracy_test = 0.7937, F1 Score_test = 0.7883[0m
 
epoch 3/30, loss = 0.27710, train_acc = 0.7830, F1 Score_Train = 0.7745
 
epoch 4/30, loss = 0.22984, train_acc = 0.7936, F1 Score_Train = 0.7868
[91mMODEL_SAVE
Accuracy_test = 0.8042, F1 Score_test = 0.7998[0m
 
epoch 5/30, loss = 0.25745, train_acc = 0.8065, F1 Score_Train = 0.8012
[91mMODEL_SAVE
Accuracy_test = 0.8201, F1 Score_test = 0.8164[0m
 
epoch 6/30, loss = 0.63294, train_acc = 0.8058, F1 Score_Train = 0.8013
 
epoch 7/30, loss = 0.79984, train_acc = 0.8225, F1 Score_Train = 0.8191
 
epoch 8/30, loss = 0.47358, train_acc = 0.8293, F1 Score_Train = 0.8262
 
epoch 9/30, loss = 0.37721, train_acc = 0.8247, F1 Score_Train = 0.8229
 
epoch 

#### By Target: Type_of_Respiratory_Allergy_IGE_Dander_Animals

In [None]:
obtain_fold_model_10(encode_data,'Type_of_Respiratory_Allergy_IGE_Dander_Animals',batch_size=32,nbr_of_fold=10,hidden_size=2048,num_epochs=30,
                         learning_rate = 1e-4,dropout_rate = 0.35,weight_decay= 1e-2, factor=0.7)

In [39]:
obtain_fold_model_10(encode_data,'Type_of_Respiratory_Allergy_IGE_Dander_Animals',batch_size=32,nbr_of_fold=8,hidden_size=1024,num_epochs=30,
                         learning_rate = 1e-2,dropout_rate = 0.6,weight_decay= 8e-4, factor=0.1)

Type_of_Respiratory_Allergy_IGE_Dander_Animals
Fold 1
467 2
epoch 1/30, loss = 0.96154, train_acc = 0.6927, F1 Score_Train = 0.6760
[91mMODEL_SAVE
Accuracy_test = 0.7619, F1 Score_test = 0.7539[0m
 
epoch 2/30, loss = 0.76990, train_acc = 0.7473, F1 Score_Train = 0.7398
 
epoch 3/30, loss = 0.31115, train_acc = 0.7496, F1 Score_Train = 0.7420
 
epoch 4/30, loss = 0.44229, train_acc = 0.7709, F1 Score_Train = 0.7671
[91mMODEL_SAVE
Accuracy_test = 0.7778, F1 Score_test = 0.7699[0m
 
epoch 5/30, loss = 0.31495, train_acc = 0.7838, F1 Score_Train = 0.7807
 
epoch 6/30, loss = 0.27303, train_acc = 0.7883, F1 Score_Train = 0.7851
[91mMODEL_SAVE
Accuracy_test = 0.7831, F1 Score_test = 0.7775[0m
 
epoch 7/30, loss = 0.95125, train_acc = 0.8118, F1 Score_Train = 0.8099
[91mMODEL_SAVE
Accuracy_test = 0.8148, F1 Score_test = 0.8112[0m
 
epoch 8/30, loss = 0.36206, train_acc = 0.8141, F1 Score_Train = 0.8109
 
epoch 9/30, loss = 0.51310, train_acc = 0.8103, F1 Score_Train = 0.8079
 
epoch 

#### By Target: Type_of_Respiratory_Allergy_IGE_Pollen_Tree

In [None]:
obtain_fold_model_10(encode_data,'Type_of_Respiratory_Allergy_IGE_Pollen_Tree',batch_size=32,nbr_of_fold=10,hidden_size=2048,num_epochs=30,
                         learning_rate = 1e-4,dropout_rate = 0.35,weight_decay= 1e-2, factor=0.7)

In [35]:
obtain_fold_model_10(encode_data,'Type_of_Respiratory_Allergy_IGE_Pollen_Tree',batch_size=32,nbr_of_fold=8,hidden_size=1024,num_epochs=30,
                         learning_rate = 8e-3,dropout_rate = 0.55,weight_decay= 3e-4, factor=0.1)

Type_of_Respiratory_Allergy_IGE_Pollen_Tree
Fold 1
467 2
epoch 1/30, loss = 0.47697, train_acc = 0.6874, F1 Score_Train = 0.6802
[91mMODEL_SAVE
Accuracy_test = 0.7090, F1 Score_test = 0.7063[0m
 
epoch 2/30, loss = 0.70855, train_acc = 0.7754, F1 Score_Train = 0.7735
[91mMODEL_SAVE
Accuracy_test = 0.7884, F1 Score_test = 0.7862[0m
 
epoch 3/30, loss = 0.67610, train_acc = 0.8042, F1 Score_Train = 0.8037
[91mMODEL_SAVE
Accuracy_test = 0.8201, F1 Score_test = 0.8200[0m
 
epoch 4/30, loss = 0.25543, train_acc = 0.8118, F1 Score_Train = 0.8115
 
epoch 5/30, loss = 0.75101, train_acc = 0.8323, F1 Score_Train = 0.8323
 
epoch 6/30, loss = 0.34820, train_acc = 0.8376, F1 Score_Train = 0.8375
[91mMODEL_SAVE
Accuracy_test = 0.8413, F1 Score_test = 0.8412[0m
 
epoch 7/30, loss = 1.15903, train_acc = 0.8300, F1 Score_Train = 0.8300
 
epoch 8/30, loss = 0.14614, train_acc = 0.8505, F1 Score_Train = 0.8505
 
epoch 9/30, loss = 0.56216, train_acc = 0.8551, F1 Score_Train = 0.8549
 
epoch 10/

#### By Target: Type_of_Respiratory_Allergy_IGE_Pollen_Herb

In [None]:
obtain_fold_model_10(encode_data,'Type_of_Respiratory_Allergy_IGE_Pollen_Herb',batch_size=32,nbr_of_fold=10,hidden_size=2048,num_epochs=30,
                         learning_rate = 1e-4,dropout_rate = 0.35,weight_decay= 1e-2, factor=0.7)

In [27]:
obtain_fold_model_10(encode_data,'Type_of_Respiratory_Allergy_IGE_Pollen_Herb',batch_size=32,nbr_of_fold=8,hidden_size=1024,num_epochs=30,
                         learning_rate = 9e-3,dropout_rate = 0.5 ,weight_decay= 5e-4, factor=0.1)

Type_of_Respiratory_Allergy_IGE_Pollen_Herb
Fold 1
467 2
epoch 1/30, loss = 0.33015, train_acc = 0.7041, F1 Score_Train = 0.6610
[91mMODEL_SAVE
Accuracy_test = 0.7513, F1 Score_test = 0.7191[0m
 
epoch 2/30, loss = 0.37267, train_acc = 0.7648, F1 Score_Train = 0.7318
[91mMODEL_SAVE
Accuracy_test = 0.7831, F1 Score_test = 0.7486[0m
 
epoch 3/30, loss = 0.25562, train_acc = 0.7701, F1 Score_Train = 0.7445
 
epoch 4/30, loss = 0.17043, train_acc = 0.7754, F1 Score_Train = 0.7481
 
epoch 5/30, loss = 0.26653, train_acc = 0.7906, F1 Score_Train = 0.7682
[91mMODEL_SAVE
Accuracy_test = 0.7937, F1 Score_test = 0.7688[0m
 
epoch 6/30, loss = 1.26334, train_acc = 0.7883, F1 Score_Train = 0.7679
[91mMODEL_SAVE
Accuracy_test = 0.7989, F1 Score_test = 0.7738[0m
 
epoch 7/30, loss = 0.39955, train_acc = 0.8156, F1 Score_Train = 0.7949
[91mMODEL_SAVE
Accuracy_test = 0.8095, F1 Score_test = 0.7802[0m
 
epoch 8/30, loss = 0.75739, train_acc = 0.8171, F1 Score_Train = 0.7972
[91mMODEL_SAVE
Ac

#### By Target: Type_of_Respiratory_Allergy_IGE_Pollen_Gram

In [408]:
obtain_fold_model_10(encode_data,'Type_of_Respiratory_Allergy_IGE_Pollen_Gram',batch_size=32,nbr_of_fold=10,hidden_size=2048,num_epochs=30,
                         learning_rate = 1e-4,dropout_rate = 0.35,weight_decay= 1e-2, factor=0.7)

Type_of_Respiratory_Allergy_IGE_Pollen_Gram
Fold 1
467 2


RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [19]:
obtain_fold_model_10(encode_data,'Type_of_Respiratory_Allergy_IGE_Pollen_Gram',batch_size=32,nbr_of_fold=8,hidden_size=1024,num_epochs=30,
                         learning_rate = 2e-2,dropout_rate = 0.4 ,weight_decay= 1e-8, factor=0.1)

Type_of_Respiratory_Allergy_IGE_Pollen_Gram
Fold 1
467 2
epoch 1/30, loss = 0.41042, train_acc = 0.6791, F1 Score_Train = 0.6742
[91mMODEL_SAVE
Accuracy_test = 0.8466, F1 Score_test = 0.8459[0m
 
epoch 2/30, loss = 0.60267, train_acc = 0.7451, F1 Score_Train = 0.7386
 
epoch 3/30, loss = 0.34788, train_acc = 0.7716, F1 Score_Train = 0.7683
[91mMODEL_SAVE
Accuracy_test = 0.8624, F1 Score_test = 0.8618[0m
 
epoch 4/30, loss = 1.26296, train_acc = 0.7792, F1 Score_Train = 0.7764
 
epoch 5/30, loss = 1.11821, train_acc = 0.8088, F1 Score_Train = 0.8074
[91mMODEL_SAVE
Accuracy_test = 0.8677, F1 Score_test = 0.8675[0m
 
epoch 6/30, loss = 0.16076, train_acc = 0.8065, F1 Score_Train = 0.8048
 
epoch 7/30, loss = 0.66355, train_acc = 0.8240, F1 Score_Train = 0.8235
 
epoch 8/30, loss = 0.62029, train_acc = 0.8346, F1 Score_Train = 0.8334
[91mMODEL_SAVE
Accuracy_test = 0.8730, F1 Score_test = 0.8724[0m
 
epoch 9/30, loss = 0.40653, train_acc = 0.8513, F1 Score_Train = 0.8507
 
epoch 10/

#### By Target: Type_of_Respiratory_Allergy_GINA

In [None]:
obtain_fold_model_10(encode_data,'Type_of_Respiratory_Allergy_GINA',batch_size=32,nbr_of_fold=10,hidden_size=2048,num_epochs=30,
                         learning_rate = 1e-4,dropout_rate = 0.35,weight_decay= 1e-2, factor=0.7)

In [397]:
obtain_fold_model_10(encode_data,'Type_of_Respiratory_Allergy_GINA',batch_size=32,nbr_of_fold=8,hidden_size=1024,num_epochs=30,
                         learning_rate = 7e-3,dropout_rate = 0.4,weight_decay= 7e-3, factor=0.1)

Type_of_Respiratory_Allergy_GINA
Fold 1
467 2
epoch 1/30, loss = 0.54777, train_acc = 0.5774, F1 Score_Train = 0.5762
[91mMODEL_SAVE
Accuracy_test = 0.5926, F1 Score_test = 0.5669[0m
 
epoch 2/30, loss = 0.77270, train_acc = 0.6131, F1 Score_Train = 0.6104
[91mMODEL_SAVE
Accuracy_test = 0.6190, F1 Score_test = 0.5916[0m
 
epoch 3/30, loss = 0.48066, train_acc = 0.6442, F1 Score_Train = 0.6437
[91mMODEL_SAVE
Accuracy_test = 0.7566, F1 Score_test = 0.7566[0m
 
epoch 4/30, loss = 0.83430, train_acc = 0.6487, F1 Score_Train = 0.6487
 
epoch 5/30, loss = 0.55559, train_acc = 0.6487, F1 Score_Train = 0.6486
[91mMODEL_SAVE
Accuracy_test = 0.7619, F1 Score_test = 0.7606[0m
 
epoch 6/30, loss = 0.57472, train_acc = 0.6586, F1 Score_Train = 0.6583
 
epoch 7/30, loss = 0.74743, train_acc = 0.6684, F1 Score_Train = 0.6677
 
epoch 8/30, loss = 0.81954, train_acc = 0.6631, F1 Score_Train = 0.6623
 
epoch 9/30, loss = 0.54636, train_acc = 0.6882, F1 Score_Train = 0.6881
 
epoch 10/30, loss = 

#### By Target: Type_of_Respiratory_Allergy_CONJ

In [357]:
obtain_fold_model_10(encode_data,'Type_of_Respiratory_Allergy_CONJ',batch_size=32,nbr_of_fold=8,hidden_size=1024,num_epochs=30,
                         learning_rate = 3e-2,dropout_rate = 0.15,weight_decay= 1e-2, factor=0.1)

Type_of_Respiratory_Allergy_CONJ
Fold 1
467 2
epoch 1/30, loss = 0.79479, train_acc = 0.6100, F1 Score_Train = 0.5394
[91mMODEL_SAVE
Accuracy_test = 0.4921, F1 Score_test = 0.4914[0m
 
epoch 2/30, loss = 0.70635, train_acc = 0.6343, F1 Score_Train = 0.5509
[91mMODEL_SAVE
Accuracy_test = 0.7090, F1 Score_test = 0.5011[0m
 
epoch 3/30, loss = 0.68854, train_acc = 0.6510, F1 Score_Train = 0.5613
[91mMODEL_SAVE
Accuracy_test = 0.7302, F1 Score_test = 0.6130[0m
 
epoch 4/30, loss = 0.56964, train_acc = 0.6760, F1 Score_Train = 0.5608
 
epoch 5/30, loss = 0.70358, train_acc = 0.6601, F1 Score_Train = 0.5727
[91mMODEL_SAVE
Accuracy_test = 0.7354, F1 Score_test = 0.6354[0m
 
epoch 6/30, loss = 0.66585, train_acc = 0.6775, F1 Score_Train = 0.5647
[91mMODEL_SAVE
Accuracy_test = 0.7302, F1 Score_test = 0.6363[0m
 
epoch 7/30, loss = 0.59708, train_acc = 0.6897, F1 Score_Train = 0.5711
 
epoch 8/30, loss = 0.64139, train_acc = 0.6563, F1 Score_Train = 0.5705
 
epoch 9/30, loss = 0.61794,

In [378]:
obtain_fold_model_10(encode_data,'Type_of_Respiratory_Allergy_CONJ',batch_size=32,nbr_of_fold=8,hidden_size=1024,num_epochs=30,
                         learning_rate = 8e-3,dropout_rate = 0.22,weight_decay= 5e-2, factor=0.1)

Type_of_Respiratory_Allergy_CONJ
Fold 1
467 2
epoch 1/30, loss = 0.79898, train_acc = 0.5622, F1 Score_Train = 0.5398
[91mMODEL_SAVE
Accuracy_test = 0.7143, F1 Score_test = 0.5936[0m
 
epoch 2/30, loss = 0.70124, train_acc = 0.6419, F1 Score_Train = 0.5551
[91mMODEL_SAVE
Accuracy_test = 0.7143, F1 Score_test = 0.6281[0m
 
epoch 3/30, loss = 0.67359, train_acc = 0.6275, F1 Score_Train = 0.5634
 
epoch 4/30, loss = 0.70256, train_acc = 0.6206, F1 Score_Train = 0.5643
 
epoch 5/30, loss = 0.68783, train_acc = 0.6161, F1 Score_Train = 0.5694
 
epoch 6/30, loss = 0.65754, train_acc = 0.6487, F1 Score_Train = 0.5661
 
epoch 7/30, loss = 0.68272, train_acc = 0.6464, F1 Score_Train = 0.5782
 
epoch 8/30, loss = 0.67422, train_acc = 0.6419, F1 Score_Train = 0.5696
Epoch 00008: reducing learning rate of group 0 to 8.0000e-04.
 
epoch 9/30, loss = 0.67265, train_acc = 0.6548, F1 Score_Train = 0.5903
 
epoch 10/30, loss = 0.67768, train_acc = 0.6578, F1 Score_Train = 0.5915
 
epoch 11/30, loss

#### By Target: Type_of_Respiratory_Allergy_ARIA

In [344]:
obtain_fold_model_10(encode_data,'Type_of_Respiratory_Allergy_ARIA',batch_size=32,nbr_of_fold=8,hidden_size=1024,num_epochs=30,
                         learning_rate = 4.5e-3,dropout_rate = 0.5,weight_decay= 1e-3, factor=0.1)

Type_of_Respiratory_Allergy_ARIA
Fold 1
467 2
epoch 1/30, loss = 0.54410, train_acc = 0.5273, F1 Score_Train = 0.5200
[91mMODEL_SAVE
Accuracy_test = 0.5979, F1 Score_test = 0.5759[0m
 
epoch 2/30, loss = 0.62727, train_acc = 0.5524, F1 Score_Train = 0.5489
[91mMODEL_SAVE
Accuracy_test = 0.6138, F1 Score_test = 0.5975[0m
 
epoch 3/30, loss = 0.59036, train_acc = 0.5948, F1 Score_Train = 0.5922
 
epoch 4/30, loss = 0.73536, train_acc = 0.5827, F1 Score_Train = 0.5797
[91mMODEL_SAVE
Accuracy_test = 0.6455, F1 Score_test = 0.6415[0m
 
epoch 5/30, loss = 0.67631, train_acc = 0.5979, F1 Score_Train = 0.5944
 
epoch 6/30, loss = 0.67100, train_acc = 0.6047, F1 Score_Train = 0.6024
 
epoch 7/30, loss = 0.54666, train_acc = 0.6267, F1 Score_Train = 0.6259
[91mMODEL_SAVE
Accuracy_test = 0.6455, F1 Score_test = 0.6449[0m
 
epoch 8/30, loss = 0.60107, train_acc = 0.6426, F1 Score_Train = 0.6424
[91mMODEL_SAVE
Accuracy_test = 0.6720, F1 Score_test = 0.6719[0m
 
epoch 9/30, loss = 0.77851,

In [368]:
obtain_fold_model_10(encode_data,'Type_of_Respiratory_Allergy_ARIA',batch_size=32,nbr_of_fold=8,hidden_size=1024,num_epochs=30,
                         learning_rate = 5.5e-3,dropout_rate = 0.48,weight_decay= 1e-7, factor=0.1)

Type_of_Respiratory_Allergy_ARIA
Fold 1
467 2
epoch 1/30, loss = 0.74131, train_acc = 0.5402, F1 Score_Train = 0.5391
[91mMODEL_SAVE
Accuracy_test = 0.6349, F1 Score_test = 0.6339[0m
 
epoch 2/30, loss = 0.62170, train_acc = 0.5781, F1 Score_Train = 0.5767
 
epoch 3/30, loss = 0.59578, train_acc = 0.5728, F1 Score_Train = 0.5715
 
epoch 4/30, loss = 0.81572, train_acc = 0.5865, F1 Score_Train = 0.5862
[91mMODEL_SAVE
Accuracy_test = 0.6508, F1 Score_test = 0.6491[0m
 
epoch 5/30, loss = 0.64904, train_acc = 0.6055, F1 Score_Train = 0.6045
[91mMODEL_SAVE
Accuracy_test = 0.6614, F1 Score_test = 0.6586[0m
 
epoch 6/30, loss = 0.67204, train_acc = 0.6138, F1 Score_Train = 0.6137
 
epoch 7/30, loss = 0.58372, train_acc = 0.6275, F1 Score_Train = 0.6274
 
epoch 8/30, loss = 0.84188, train_acc = 0.6290, F1 Score_Train = 0.6281
 
epoch 9/30, loss = 0.75394, train_acc = 0.6540, F1 Score_Train = 0.6540
[91mMODEL_SAVE
Accuracy_test = 0.6720, F1 Score_test = 0.6704[0m
 
epoch 10/30, loss = 

#### By Target: Venom_Allergy

In [321]:
obtain_fold_model_10(encode_data,'Venom_Allergy',batch_size=32,nbr_of_fold=8,hidden_size=1024,num_epochs=30,
                         learning_rate = 5e-4,dropout_rate = 0.1,weight_decay= 9e-5, factor=0.8)

Venom_Allergy
Fold 1
467 2
epoch 1/30, loss = 0.76707, train_acc = 0.3480, F1 Score_Train = 0.2689
[91mMODEL_SAVE
Accuracy_test = 0.3102, F1 Score_test = 0.2448[0m
 
epoch 2/30, loss = 0.66869, train_acc = 0.4834, F1 Score_Train = 0.3426
[91mMODEL_SAVE
Accuracy_test = 0.5374, F1 Score_test = 0.3598[0m
 
epoch 3/30, loss = 0.63543, train_acc = 0.5962, F1 Score_Train = 0.3938
[91mMODEL_SAVE
Accuracy_test = 0.6578, F1 Score_test = 0.4041[0m
 
epoch 4/30, loss = 0.57301, train_acc = 0.7113, F1 Score_Train = 0.4469
[91mMODEL_SAVE
Accuracy_test = 0.7112, F1 Score_test = 0.4330[0m
 
epoch 5/30, loss = 0.51069, train_acc = 0.7533, F1 Score_Train = 0.4662
[91mMODEL_SAVE
Accuracy_test = 0.8824, F1 Score_test = 0.4904[0m
 
epoch 6/30, loss = 0.52030, train_acc = 0.8558, F1 Score_Train = 0.5131
[91mMODEL_SAVE
Accuracy_test = 0.9385, F1 Score_test = 0.5241[0m
 
epoch 7/30, loss = 0.47517, train_acc = 0.8876, F1 Score_Train = 0.5299
 
epoch 8/30, loss = 0.88350, train_acc = 0.8811, F1 Sc

In [309]:
obtain_fold_model_10(encode_data,'Venom_Allergy',batch_size=32,nbr_of_fold=8,hidden_size=1024,num_epochs=30,
                         learning_rate = 8e-4,dropout_rate = 0.15,weight_decay= 1e-4, factor=0.1)

Venom_Allergy
Fold 1
467 2
epoch 1/30, loss = 0.57002, train_acc = 0.7025, F1 Score_Train = 0.4294
[91mMODEL_SAVE
Accuracy_test = 0.7888, F1 Score_test = 0.4531[0m
 
epoch 2/30, loss = 0.51095, train_acc = 0.7774, F1 Score_Train = 0.4600
[91mMODEL_SAVE
Accuracy_test = 0.8529, F1 Score_test = 0.4940[0m
 
epoch 3/30, loss = 0.45466, train_acc = 0.8203, F1 Score_Train = 0.4785
 
epoch 4/30, loss = 0.39499, train_acc = 0.8776, F1 Score_Train = 0.5075
[91mMODEL_SAVE
Accuracy_test = 0.9198, F1 Score_test = 0.5103[0m
 
epoch 5/30, loss = 0.51308, train_acc = 0.9025, F1 Score_Train = 0.5300
 
epoch 6/30, loss = 0.35039, train_acc = 0.9010, F1 Score_Train = 0.5288
[91mMODEL_SAVE
Accuracy_test = 0.9465, F1 Score_test = 0.5317[0m
 
epoch 7/30, loss = 0.60001, train_acc = 0.8983, F1 Score_Train = 0.5355
 
epoch 8/30, loss = 0.31357, train_acc = 0.8925, F1 Score_Train = 0.5282
 
epoch 9/30, loss = 0.31823, train_acc = 0.9247, F1 Score_Train = 0.5611
[91mMODEL_SAVE
Accuracy_test = 0.9545, F

#### By Target: Food_Allergy

In [None]:
obtain_fold_model_10(encode_data,'Food_Allergy',batch_size=32,nbr_of_fold=10,hidden_size=2048,num_epochs=30,
                         learning_rate = 1e-4,dropout_rate = 0.35,weight_decay= 1e-2, factor=0.7)

In [279]:
obtain_fold_model_10(encode_data,'Food_Allergy',batch_size=32,nbr_of_fold=8,hidden_size=1024,num_epochs=30,
                         learning_rate = 3e-2,dropout_rate = 0.52,weight_decay= 1e-6, factor=0.1)

Food_Allergy
Fold 1
467 2
epoch 1/30, loss = 0.71669, train_acc = 0.5569, F1 Score_Train = 0.5435
[91mMODEL_SAVE
Accuracy_test = 0.6368, F1 Score_test = 0.6086[0m
 
epoch 2/30, loss = 0.68736, train_acc = 0.5960, F1 Score_Train = 0.5830
[91mMODEL_SAVE
Accuracy_test = 0.6567, F1 Score_test = 0.6152[0m
 
epoch 3/30, loss = 0.69343, train_acc = 0.6024, F1 Score_Train = 0.5914
[91mMODEL_SAVE
Accuracy_test = 0.6716, F1 Score_test = 0.6471[0m
 
epoch 4/30, loss = 0.56979, train_acc = 0.6479, F1 Score_Train = 0.6352
 
epoch 5/30, loss = 0.63355, train_acc = 0.6543, F1 Score_Train = 0.6473
[91mMODEL_SAVE
Accuracy_test = 0.7065, F1 Score_test = 0.7054[0m
 
epoch 6/30, loss = 0.56209, train_acc = 0.6764, F1 Score_Train = 0.6729
 
epoch 7/30, loss = 0.60751, train_acc = 0.6799, F1 Score_Train = 0.6789
[91mMODEL_SAVE
Accuracy_test = 0.7264, F1 Score_test = 0.7242[0m
 
epoch 8/30, loss = 0.55135, train_acc = 0.7034, F1 Score_Train = 0.7029
 
epoch 9/30, loss = 0.48125, train_acc = 0.7269,

#### By Target: Respiratory_Allergy

In [None]:
obtain_fold_model_10(encode_data,'Respiratory_Allergy',batch_size=32,nbr_of_fold=10,hidden_size=2048,num_epochs=30,
                         learning_rate = 1e-4,dropout_rate = 0.35,weight_decay= 1e-2, factor=0.7)

In [264]:
obtain_fold_model_10(encode_data,'Respiratory_Allergy',batch_size=32,nbr_of_fold=8,hidden_size=1024,num_epochs=30,
                         learning_rate = 4e-3,dropout_rate = 0.35,weight_decay= 2e-3, factor=0.1)

Respiratory_Allergy
Fold 1
467 2
epoch 1/30, loss = 0.59048, train_acc = 0.6047, F1 Score_Train = 0.5630
[91mMODEL_SAVE
Accuracy_test = 0.7725, F1 Score_test = 0.6681[0m
 
epoch 2/30, loss = 0.55621, train_acc = 0.6214, F1 Score_Train = 0.5802
[91mMODEL_SAVE
Accuracy_test = 0.7884, F1 Score_test = 0.7038[0m
 
epoch 3/30, loss = 0.33197, train_acc = 0.6449, F1 Score_Train = 0.6013
[91mMODEL_SAVE
Accuracy_test = 0.7778, F1 Score_test = 0.7307[0m
 
epoch 4/30, loss = 0.39510, train_acc = 0.6783, F1 Score_Train = 0.6267
 
epoch 5/30, loss = 0.56801, train_acc = 0.7117, F1 Score_Train = 0.6546
 
epoch 6/30, loss = 0.43810, train_acc = 0.7185, F1 Score_Train = 0.6649
[91mMODEL_SAVE
Accuracy_test = 0.7989, F1 Score_test = 0.7449[0m
 
epoch 7/30, loss = 0.92649, train_acc = 0.7382, F1 Score_Train = 0.6852
 
epoch 8/30, loss = 0.25123, train_acc = 0.7557, F1 Score_Train = 0.6945
 
epoch 9/30, loss = 0.54574, train_acc = 0.7762, F1 Score_Train = 0.7134
[91mMODEL_SAVE
Accuracy_test = 0.8

#### By Target: Severe_Allergy

In [None]:
obtain_fold_model_10(encode_data,'Severe_Allergy',batch_size=32,nbr_of_fold=10,hidden_size=2048,num_epochs=30,
                         learning_rate = 1e-4,dropout_rate = 0.35,weight_decay= 1e-2, factor=0.7)

In [258]:
obtain_fold_model_10(encode_data,'Severe_Allergy',batch_size=32,nbr_of_fold=8,hidden_size=1024,num_epochs=30,
                         learning_rate = 5e-3,dropout_rate = 0.56,weight_decay= 1e-3, factor=0.1)

Severe_Allergy
Fold 1
467 2
epoch 1/30, loss = 0.68359, train_acc = 0.5284, F1 Score_Train = 0.5280
[91mMODEL_SAVE
Accuracy_test = 0.5455, F1 Score_test = 0.5453[0m
 
epoch 2/30, loss = 0.61837, train_acc = 0.5606, F1 Score_Train = 0.5602
 
epoch 3/30, loss = 0.72147, train_acc = 0.5907, F1 Score_Train = 0.5906
[91mMODEL_SAVE
Accuracy_test = 0.5885, F1 Score_test = 0.5858[0m
 
epoch 4/30, loss = 0.73966, train_acc = 0.6146, F1 Score_Train = 0.6136
[91mMODEL_SAVE
Accuracy_test = 0.6507, F1 Score_test = 0.6388[0m
 
epoch 5/30, loss = 0.60767, train_acc = 0.6461, F1 Score_Train = 0.6387
 
epoch 6/30, loss = 0.70207, train_acc = 0.6523, F1 Score_Train = 0.6452
[91mMODEL_SAVE
Accuracy_test = 0.6459, F1 Score_test = 0.6430[0m
 
epoch 7/30, loss = 0.68800, train_acc = 0.6701, F1 Score_Train = 0.6662
 
epoch 8/30, loss = 0.52573, train_acc = 0.6865, F1 Score_Train = 0.6796
 
epoch 9/30, loss = 0.53653, train_acc = 0.6934, F1 Score_Train = 0.6866
 
epoch 10/30, loss = 0.68940, train_acc

#### By Target: Allergy_Present

In [182]:
obtain_fold_model_10(encode_data,'Allergy_Present',batch_size=32,nbr_of_fold=5,hidden_size=2048,num_epochs=30,
                         learning_rate = 5e-3,dropout_rate = 0.5,weight_decay= 1e-3, factor=0.1)

Allergy_Present
Fold 1
467 2
epoch 1/30, loss = 0.89030, train_acc = 0.5288, F1 Score_Train = 0.5079
[91mMODEL_SAVE
Accuracy_test = 0.5398, F1 Score_test = 0.5197[0m
 
epoch 2/30, loss = 0.61811, train_acc = 0.5430, F1 Score_Train = 0.5221
 
epoch 3/30, loss = 0.56388, train_acc = 0.5650, F1 Score_Train = 0.5418
[91mMODEL_SAVE
Accuracy_test = 0.5739, F1 Score_test = 0.5494[0m
 
epoch 4/30, loss = 0.60769, train_acc = 0.5800, F1 Score_Train = 0.5555
[91mMODEL_SAVE
Accuracy_test = 0.6648, F1 Score_test = 0.6223[0m
 
epoch 5/30, loss = 0.71722, train_acc = 0.6411, F1 Score_Train = 0.6074
[91mMODEL_SAVE
Accuracy_test = 0.7244, F1 Score_test = 0.6283[0m
 
epoch 6/30, loss = 0.64362, train_acc = 0.6354, F1 Score_Train = 0.6005
[91mMODEL_SAVE
Accuracy_test = 0.6932, F1 Score_test = 0.6375[0m
 
epoch 7/30, loss = 0.48562, train_acc = 0.6816, F1 Score_Train = 0.6354
[91mMODEL_SAVE
Accuracy_test = 0.7017, F1 Score_test = 0.6387[0m
 
epoch 8/30, loss = 0.53163, train_acc = 0.6823, F1 

In [211]:
obtain_fold_model_10(encode_data,'Allergy_Present',batch_size=32,nbr_of_fold=8,hidden_size=1024,num_epochs=30,
                         learning_rate = 8e-3,dropout_rate = 0.22,weight_decay= 2e-2, factor=0.1)

Allergy_Present
Fold 1
467 2
epoch 1/30, loss = 0.60085, train_acc = 0.5094, F1 Score_Train = 0.4926
[91mMODEL_SAVE
Accuracy_test = 0.5364, F1 Score_test = 0.5221[0m
 
epoch 2/30, loss = 1.00277, train_acc = 0.6153, F1 Score_Train = 0.5689
[91mMODEL_SAVE
Accuracy_test = 0.5591, F1 Score_test = 0.5416[0m
 
epoch 3/30, loss = 0.61635, train_acc = 0.5802, F1 Score_Train = 0.5519
[91mMODEL_SAVE
Accuracy_test = 0.7591, F1 Score_test = 0.6768[0m
 
epoch 4/30, loss = 0.60536, train_acc = 0.6706, F1 Score_Train = 0.6227
 
epoch 5/30, loss = 0.59612, train_acc = 0.6498, F1 Score_Train = 0.6054
 
epoch 6/30, loss = 0.66828, train_acc = 0.6400, F1 Score_Train = 0.5946
 
epoch 7/30, loss = 0.58507, train_acc = 0.6855, F1 Score_Train = 0.6341
 
epoch 8/30, loss = 0.59469, train_acc = 0.6342, F1 Score_Train = 0.6003
 
epoch 9/30, loss = 0.77938, train_acc = 0.6966, F1 Score_Train = 0.6409
 
epoch 10/30, loss = 0.66703, train_acc = 0.6732, F1 Score_Train = 0.6241
[91mMODEL_SAVE
Accuracy_test =

### obtain the dataframe with prediction of each fold and the corresponding labels test

In [None]:
# Dataset creation
column='Type_of_Respiratory_Allergy_GINA'
# we take the rows where the target is not 9
rows_with_9 = Targets[column].isin([9])
Targets_without_9 = Targets[column][~rows_with_9].reset_index(drop=True)
encode_data_without_9 = encode_data[~rows_with_9].reset_index(drop=True)

dataset_panda= pd.concat([encode_data_without_9,Targets_without_9], axis = 1).reset_index(drop=True)

In [None]:
dataset_all=CustomDataset(dataset_panda)   
get_all_fold_pred = DataLoader(dataset_all, batch_size=64, shuffle=False)


model_list = []
folder_path = "liste_classif_optim_2_no_oversampled"
target_string = f"{column}_fold"


input_size= 467 
hidden_size= 2048
num_class = 2 
dropout_rate=0.3

list_of_fold_model = []
# Iterate over the files in the folder
for filename in os.listdir(folder_path):
    # Check if the file path contains the target string
    if target_string in filename:
        # Load the model using the appropriate method
        model = Allergy_Net(input_size,hidden_size,num_class,dropout_rate)
        model_path = os.path.join(folder_path, filename)
        model.load_state_dict(torch.load(model_path))
        list_of_fold_model.append(model)
print(list_of_fold_model)


# obtain prediction for each folds
predictions_all = []
for model in list_of_fold_model:
    model.eval()
    pred_model=[]
    with torch.no_grad():
        for data, labels in get_all_fold_pred:
            outputs = model(data)
            pred_model.extend(outputs)
    predictions_all.append(pred_model)
    
# Combine predictions from different folds
combined_pred = []
for i in range(len(predictions_all[0])):
    combined_pred_mini = []
    for j in range(len(predictions_all)):
        combined_pred_mini.extend(predictions_all[j][i])
    combined_pred.append(combined_pred_mini)
    

In [None]:
# Extract values from tensors and convert to a list of lists
combined_pred_values = [[value.item() for value in sample] for sample in combined_pred]

# Get the number of folds
num_folds = len(predictions_all)*2

# Create a dictionary to store the fold predictions
fold_predictions = {}
for i in range(num_folds):
    fold_predictions[f"Value_{i+1}"] = [sample[i].item() for sample in combined_pred]
        
# Convert the dictionary into a DataFrame
df_combined_pred = pd.DataFrame(fold_predictions)

# Print the resulting DataFrame
print(df_combined_pred)

In [None]:
dataset_inter= pd.concat([df_combined_pred,Targets_without_9], axis = 1).reset_index(drop=True)
dataset_inter

In [135]:
!pip install pycaret



In [136]:
from pycaret.classification import *

In [None]:
column= 'Type_of_Respiratory_Allergy_GINA'

model = setup(data= dataset_inter, target= column, train_size=0.85, fix_imbalance=True, fix_imbalance_method='SMOTETomek')
best = compare_models(sort = 'F1', include = ['rf', 'et', 'lightgbm','ada'])
save_model(best, str(column) +'_meta_model_pycaret_binary')

#### method to generate all meta_classifier

In [139]:
def generate_meta_classifier_imblearn(df_data,list_of_targets):
    
    liste_column_monovalue=[]
    for column in list_of_targets.columns:
        print(column)
        # Dataset creation
        rows_with_9 = list_of_targets[column].isin([9])
        Targets_without_9 = list_of_targets[column][~rows_with_9].reset_index(drop=True)
        encode_data_without_9 = df_data[~rows_with_9].reset_index(drop=True)
        dataset_panda= pd.concat([encode_data_without_9,Targets_without_9], axis = 1).reset_index(drop=True)
        
        num_class = dataset_panda[column].nunique()
        
        if num_class >1:
            dataset_all=CustomDataset(dataset_panda)   
            get_all_fold_pred = DataLoader(dataset_all, batch_size=64, shuffle=False)
            model_list = []
            folder_path = "liste_classif_optim_2_no_oversampled"
            target_string = f"{column}_fold"

            input_size= 467 
            hidden_size= 1024
            num_class = 2 
            dropout_rate=0.3

            list_of_fold_model = []
            # Iterate over the files in the folder
            for filename in os.listdir(folder_path):
                # Check if the file path contains the target string
                if target_string in filename:
                    # Load the model using the appropriate method
                    model = Allergy_Net(input_size,hidden_size,num_class,dropout_rate)
                    model_path = os.path.join(folder_path, filename)
                    model.load_state_dict(torch.load(model_path))
                    list_of_fold_model.append(model)

            # obtain prediction for each folds
            predictions_all = []
            for model in list_of_fold_model:
                model.eval()
                pred_model=[]
                with torch.no_grad():
                    for data, labels in get_all_fold_pred:
                        outputs = model(data)
                        pred_model.extend(outputs)
                predictions_all.append(pred_model)

            # Combine predictions from different folds
            combined_pred = []
            for i in range(len(predictions_all[0])):
                combined_pred_mini = []
                for j in range(len(predictions_all)):
                    combined_pred_mini.extend(predictions_all[j][i])
                combined_pred.append(combined_pred_mini)
                
            # Extract values from tensors and convert to a list of lists
            combined_pred_values = [[value.item() for value in sample] for sample in combined_pred]
            num_folds = len(predictions_all)*2
            # Create a dictionary to store the fold predictions
            fold_predictions = {}
            for i in range(num_folds):
                fold_predictions[f"Value_{i+1}"] = [sample[i].item() for sample in combined_pred]

            # Convert the dictionary into a DataFrame
            df_combined_pred = pd.DataFrame(fold_predictions)
            dataset_inter= pd.concat([df_combined_pred,Targets_without_9], axis = 1).reset_index(drop=True)
            model = setup(data= dataset_inter, target= column, train_size=0.85,) #fix_imbalance=True, fix_imbalance_method='SMOTE')
            best = compare_models(sort = 'F1', include = ['rf', 'et', 'lightgbm','ada'])
            save_model(best, str(column) +'_meta_model_pycaret_binary')

In [140]:
generate_meta_classifier_imblearn(encode_data,Targets)

Allergy_Present


Unnamed: 0,Description,Value
0,Session id,2195
1,Target,Allergy_Present
2,Target type,Binary
3,Original data shape,"(1759, 17)"
4,Transformed data shape,"(1759, 17)"
5,Transformed train set shape,"(1495, 17)"
6,Transformed test set shape,"(264, 17)"
7,Numeric features,16
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.8863,0.9422,0.9217,0.9321,0.9264,0.6747,0.6782,0.528
et,Extra Trees Classifier,0.885,0.9427,0.9303,0.9232,0.9264,0.6614,0.6641,0.199
lightgbm,Light Gradient Boosting Machine,0.8803,0.9347,0.9252,0.9222,0.9233,0.6493,0.6525,0.166
ada,Ada Boost Classifier,0.8776,0.9356,0.9235,0.92,0.9215,0.643,0.645,0.136


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Severe_Allergy


Unnamed: 0,Description,Value
0,Session id,6768
1,Target,Severe_Allergy
2,Target type,Binary
3,Original data shape,"(1670, 17)"
4,Transformed data shape,"(1670, 17)"
5,Transformed train set shape,"(1419, 17)"
6,Transformed test set shape,"(251, 17)"
7,Numeric features,16
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.8901,0.9512,0.9005,0.9221,0.9107,0.7676,0.7694,0.246
et,Extra Trees Classifier,0.8894,0.9524,0.9028,0.9188,0.9105,0.7655,0.7664,0.2
lightgbm,Light Gradient Boosting Machine,0.8781,0.9485,0.9006,0.9043,0.9021,0.7403,0.7412,0.102
ada,Ada Boost Classifier,0.864,0.9443,0.8734,0.9059,0.8891,0.7133,0.7148,0.133


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Respiratory_Allergy


Unnamed: 0,Description,Value
0,Session id,8934
1,Target,Respiratory_Allergy
2,Target type,Binary
3,Original data shape,"(1507, 17)"
4,Transformed data shape,"(1507, 17)"
5,Transformed train set shape,"(1280, 17)"
6,Transformed test set shape,"(227, 17)"
7,Numeric features,16
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.9617,0.9875,0.9767,0.9739,0.9751,0.8923,0.8936,0.194
rf,Random Forest Classifier,0.9609,0.9864,0.9777,0.9719,0.9746,0.8899,0.8912,0.233
lightgbm,Light Gradient Boosting Machine,0.9609,0.9865,0.9767,0.973,0.9746,0.89,0.8918,0.109
ada,Ada Boost Classifier,0.9586,0.985,0.9746,0.9719,0.9731,0.8832,0.8847,0.138


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Food_Allergy


Unnamed: 0,Description,Value
0,Session id,426
1,Target,Food_Allergy
2,Target type,Binary
3,Original data shape,"(1607, 17)"
4,Transformed data shape,"(1607, 17)"
5,Transformed train set shape,"(1365, 17)"
6,Transformed test set shape,"(242, 17)"
7,Numeric features,16
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.8982,0.9583,0.8846,0.8883,0.8855,0.7939,0.7952,0.236
et,Extra Trees Classifier,0.8974,0.9595,0.8778,0.8919,0.8838,0.792,0.7936,0.205
lightgbm,Light Gradient Boosting Machine,0.8865,0.9581,0.8714,0.876,0.872,0.7701,0.7725,0.11
ada,Ada Boost Classifier,0.8799,0.9505,0.8565,0.873,0.8634,0.7563,0.7581,0.14


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Venom_Allergy


Unnamed: 0,Description,Value
0,Session id,3484
1,Target,Venom_Allergy
2,Target type,Binary
3,Original data shape,"(2989, 17)"
4,Transformed data shape,"(2989, 17)"
5,Transformed train set shape,"(2540, 17)"
6,Transformed test set shape,"(449, 17)"
7,Numeric features,16
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.9945,0.9409,0.6333,0.8333,0.69,0.6875,0.708,0.273
et,Extra Trees Classifier,0.9937,0.9415,0.5667,0.7583,0.6224,0.62,0.6394,0.193
lightgbm,Light Gradient Boosting Machine,0.9937,0.9292,0.5333,0.8083,0.6024,0.6001,0.6326,0.111
ada,Ada Boost Classifier,0.9909,0.9054,0.4333,0.7083,0.4905,0.487,0.5249,0.184


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Type_of_Respiratory_Allergy_ARIA


Unnamed: 0,Description,Value
0,Session id,4551
1,Target,Type_of_Respiratory_Allergy_ARIA
2,Target type,Binary
3,Original data shape,"(1507, 17)"
4,Transformed data shape,"(1507, 17)"
5,Transformed train set shape,"(1280, 17)"
6,Transformed test set shape,"(227, 17)"
7,Numeric features,16
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.9367,0.984,0.9303,0.949,0.9392,0.8732,0.874,0.24
et,Extra Trees Classifier,0.9367,0.9853,0.9273,0.9519,0.9391,0.8733,0.8743,0.194
ada,Ada Boost Classifier,0.9352,0.9831,0.9258,0.9507,0.9374,0.8702,0.8718,0.136
lightgbm,Light Gradient Boosting Machine,0.932,0.9855,0.9229,0.9473,0.9345,0.8639,0.8651,0.111


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Type_of_Respiratory_Allergy_CONJ


Unnamed: 0,Description,Value
0,Session id,3449
1,Target,Type_of_Respiratory_Allergy_CONJ
2,Target type,Binary
3,Original data shape,"(1507, 17)"
4,Transformed data shape,"(1507, 17)"
5,Transformed train set shape,"(1280, 17)"
6,Transformed test set shape,"(227, 17)"
7,Numeric features,16
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.8641,0.8927,0.6758,0.7789,0.7215,0.6324,0.6368,0.202
rf,Random Forest Classifier,0.8625,0.8941,0.6695,0.7729,0.7162,0.6262,0.63,0.265
ada,Ada Boost Classifier,0.8609,0.8912,0.637,0.7949,0.7033,0.6143,0.6234,0.141
lightgbm,Light Gradient Boosting Machine,0.8477,0.8815,0.652,0.7399,0.6888,0.589,0.5942,0.119


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Type_of_Respiratory_Allergy_GINA


Unnamed: 0,Description,Value
0,Session id,2565
1,Target,Type_of_Respiratory_Allergy_GINA
2,Target type,Binary
3,Original data shape,"(1507, 17)"
4,Transformed data shape,"(1507, 17)"
5,Transformed train set shape,"(1280, 17)"
6,Transformed test set shape,"(227, 17)"
7,Numeric features,16
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.9133,0.9614,0.9173,0.9127,0.9142,0.8266,0.8279,0.199
rf,Random Forest Classifier,0.9109,0.96,0.9188,0.9064,0.912,0.8219,0.8229,0.254
ada,Ada Boost Classifier,0.9109,0.9622,0.9141,0.9094,0.9114,0.8219,0.8226,0.143
lightgbm,Light Gradient Boosting Machine,0.9008,0.9585,0.9032,0.9007,0.9013,0.8015,0.8027,0.119


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Type_of_Respiratory_Allergy_IGE_Pollen_Gram


Unnamed: 0,Description,Value
0,Session id,1457
1,Target,Type_of_Respiratory_Allergy_IGE_Pollen_Gram
2,Target type,Binary
3,Original data shape,"(1507, 17)"
4,Transformed data shape,"(1507, 17)"
5,Transformed train set shape,"(1280, 17)"
6,Transformed test set shape,"(227, 17)"
7,Numeric features,16
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.968,0.994,0.9721,0.9614,0.9666,0.9358,0.9362,0.235
et,Extra Trees Classifier,0.968,0.9952,0.9705,0.9627,0.9665,0.9358,0.9361,0.194
lightgbm,Light Gradient Boosting Machine,0.9609,0.9937,0.959,0.9595,0.9587,0.9217,0.9226,0.117
ada,Ada Boost Classifier,0.9523,0.9911,0.9541,0.947,0.9499,0.9045,0.9056,0.146


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Type_of_Respiratory_Allergy_IGE_Pollen_Herb


Unnamed: 0,Description,Value
0,Session id,6977
1,Target,Type_of_Respiratory_Allergy_IGE_Pollen_Herb
2,Target type,Binary
3,Original data shape,"(1507, 17)"
4,Transformed data shape,"(1507, 17)"
5,Transformed train set shape,"(1280, 17)"
6,Transformed test set shape,"(227, 17)"
7,Numeric features,16
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.9398,0.9729,0.9053,0.9095,0.9063,0.862,0.8632,0.248
et,Extra Trees Classifier,0.9289,0.9735,0.8907,0.8913,0.8898,0.8374,0.8386,0.202
lightgbm,Light Gradient Boosting Machine,0.9266,0.9705,0.8835,0.8903,0.8858,0.8317,0.8328,0.119
ada,Ada Boost Classifier,0.925,0.9692,0.8909,0.8812,0.8848,0.8293,0.8306,0.145


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Type_of_Respiratory_Allergy_IGE_Pollen_Tree


Unnamed: 0,Description,Value
0,Session id,7359
1,Target,Type_of_Respiratory_Allergy_IGE_Pollen_Tree
2,Target type,Binary
3,Original data shape,"(1507, 17)"
4,Transformed data shape,"(1507, 17)"
5,Transformed train set shape,"(1280, 17)"
6,Transformed test set shape,"(227, 17)"
7,Numeric features,16
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.9484,0.9866,0.9481,0.9428,0.945,0.8965,0.8973,0.199
rf,Random Forest Classifier,0.9461,0.9865,0.9465,0.94,0.9426,0.8918,0.8928,0.245
lightgbm,Light Gradient Boosting Machine,0.9422,0.9862,0.9397,0.9375,0.9382,0.8839,0.8846,0.127
ada,Ada Boost Classifier,0.932,0.9784,0.9314,0.9249,0.9275,0.8636,0.8646,0.149


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Type_of_Respiratory_Allergy_IGE_Dander_Animals


Unnamed: 0,Description,Value
0,Session id,3398
1,Target,Type_of_Respiratory_Allergy_IGE_Dander_Animals
2,Target type,Binary
3,Original data shape,"(1507, 17)"
4,Transformed data shape,"(1507, 17)"
5,Transformed train set shape,"(1280, 17)"
6,Transformed test set shape,"(227, 17)"
7,Numeric features,16
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ada,Ada Boost Classifier,0.9109,0.9555,0.909,0.8918,0.8997,0.8196,0.8207,0.165
rf,Random Forest Classifier,0.9078,0.9557,0.9055,0.8891,0.8963,0.8134,0.8148,0.256
et,Extra Trees Classifier,0.9008,0.9584,0.9037,0.8771,0.8891,0.7995,0.8013,0.208
lightgbm,Light Gradient Boosting Machine,0.8922,0.9527,0.893,0.8687,0.8797,0.7822,0.7839,0.127


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Type_of_Respiratory_Allergy_IGE_Mite_Cockroach


Unnamed: 0,Description,Value
0,Session id,553
1,Target,Type_of_Respiratory_Allergy_IGE_Mite_Cockroach
2,Target type,Binary
3,Original data shape,"(1507, 17)"
4,Transformed data shape,"(1507, 17)"
5,Transformed train set shape,"(1280, 17)"
6,Transformed test set shape,"(227, 17)"
7,Numeric features,16
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.932,0.9726,0.9429,0.9109,0.9262,0.8633,0.8643,0.208
rf,Random Forest Classifier,0.9266,0.9742,0.9342,0.9073,0.9201,0.8522,0.8533,0.247
ada,Ada Boost Classifier,0.9227,0.9735,0.9273,0.9057,0.9159,0.8443,0.8454,0.149
lightgbm,Light Gradient Boosting Machine,0.9227,0.9735,0.9222,0.9092,0.9153,0.8442,0.8448,0.137


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Type_of_Respiratory_Allergy_IGE_Molds_Yeast


Unnamed: 0,Description,Value
0,Session id,1520
1,Target,Type_of_Respiratory_Allergy_IGE_Molds_Yeast
2,Target type,Binary
3,Original data shape,"(1507, 17)"
4,Transformed data shape,"(1507, 17)"
5,Transformed train set shape,"(1280, 17)"
6,Transformed test set shape,"(227, 17)"
7,Numeric features,16
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.9594,0.9823,0.9034,0.9174,0.9097,0.8835,0.8841,0.225
lightgbm,Light Gradient Boosting Machine,0.9586,0.9831,0.9,0.9172,0.908,0.8813,0.8818,0.134
rf,Random Forest Classifier,0.9555,0.9773,0.8897,0.9137,0.9007,0.872,0.8728,0.25
ada,Ada Boost Classifier,0.9516,0.9792,0.8828,0.9054,0.892,0.8608,0.8625,0.158


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Type_of_Food_Allergy_Aromatics


Unnamed: 0,Description,Value
0,Session id,6751
1,Target,Type_of_Food_Allergy_Aromatics
2,Target type,Binary
3,Original data shape,"(1607, 17)"
4,Transformed data shape,"(1607, 17)"
5,Transformed train set shape,"(1365, 17)"
6,Transformed test set shape,"(242, 17)"
7,Numeric features,16
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.989,0.9798,0.7333,0.85,0.7425,0.7375,0.7613,0.233
lightgbm,Light Gradient Boosting Machine,0.9861,0.9892,0.7333,0.775,0.7052,0.6987,0.723,0.11
et,Extra Trees Classifier,0.9875,0.9949,0.6667,0.825,0.6968,0.6911,0.7148,0.207
ada,Ada Boost Classifier,0.9868,0.9662,0.6333,0.7917,0.6529,0.6468,0.6753,0.164


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Type_of_Food_Allergy_Other
Type_of_Food_Allergy_Cereals_&_Seeds
Type_of_Food_Allergy_Egg


Unnamed: 0,Description,Value
0,Session id,237
1,Target,Type_of_Food_Allergy_Egg
2,Target type,Binary
3,Original data shape,"(1607, 17)"
4,Transformed data shape,"(1607, 17)"
5,Transformed train set shape,"(1365, 17)"
6,Transformed test set shape,"(242, 17)"
7,Numeric features,16
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.9861,0.9638,0.5917,0.8417,0.6712,0.6648,0.6865,0.208
rf,Random Forest Classifier,0.9854,0.9769,0.6,0.8833,0.6662,0.6599,0.6951,0.25
lightgbm,Light Gradient Boosting Machine,0.9832,0.9905,0.5917,0.7333,0.6338,0.6259,0.6402,0.128
ada,Ada Boost Classifier,0.9824,0.9637,0.4833,0.75,0.5624,0.5556,0.5813,0.165


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Type_of_Food_Allergy_Fish


Unnamed: 0,Description,Value
0,Session id,1195
1,Target,Type_of_Food_Allergy_Fish
2,Target type,Binary
3,Original data shape,"(1607, 17)"
4,Transformed data shape,"(1607, 17)"
5,Transformed train set shape,"(1365, 17)"
6,Transformed test set shape,"(242, 17)"
7,Numeric features,16
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.9934,0.9803,0.775,0.9417,0.8317,0.8285,0.8415,0.203
rf,Random Forest Classifier,0.9927,0.9627,0.8083,0.9017,0.8267,0.8232,0.837,0.245
ada,Ada Boost Classifier,0.9927,0.9637,0.775,0.9083,0.8183,0.8148,0.8261,0.164
lightgbm,Light Gradient Boosting Machine,0.9927,0.9642,0.775,0.9167,0.8174,0.8139,0.8278,0.114


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Type_of_Food_Allergy_Fruits_and_Vegetables


Unnamed: 0,Description,Value
0,Session id,3010
1,Target,Type_of_Food_Allergy_Fruits_and_Vegetables
2,Target type,Binary
3,Original data shape,"(1607, 17)"
4,Transformed data shape,"(1607, 17)"
5,Transformed train set shape,"(1365, 17)"
6,Transformed test set shape,"(242, 17)"
7,Numeric features,16
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.9677,0.9846,0.7194,0.7957,0.7326,0.7162,0.7296,0.248
et,Extra Trees Classifier,0.9677,0.9859,0.6806,0.7941,0.7103,0.694,0.7079,0.213
lightgbm,Light Gradient Boosting Machine,0.9641,0.9801,0.6569,0.7575,0.6836,0.6653,0.6778,0.142
ada,Ada Boost Classifier,0.9583,0.9415,0.6,0.6911,0.6193,0.5983,0.6116,0.18


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Type_of_Food_Allergy_Mammalian_Milk


Unnamed: 0,Description,Value
0,Session id,1877
1,Target,Type_of_Food_Allergy_Mammalian_Milk
2,Target type,Binary
3,Original data shape,"(1607, 17)"
4,Transformed data shape,"(1607, 17)"
5,Transformed train set shape,"(1365, 17)"
6,Transformed test set shape,"(242, 17)"
7,Numeric features,16
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.9949,0.9731,0.8,0.8417,0.799,0.7967,0.8075,0.233
et,Extra Trees Classifier,0.9956,0.9731,0.75,0.875,0.7857,0.7844,0.7976,0.211
ada,Ada Boost Classifier,0.9927,0.9469,0.65,0.7417,0.6657,0.663,0.6779,0.174
lightgbm,Light Gradient Boosting Machine,0.9919,0.9388,0.65,0.7267,0.655,0.652,0.6685,0.128


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Type_of_Food_Allergy_Oral_Syndrom


Unnamed: 0,Description,Value
0,Session id,899
1,Target,Type_of_Food_Allergy_Oral_Syndrom
2,Target type,Binary
3,Original data shape,"(1607, 17)"
4,Transformed data shape,"(1607, 17)"
5,Transformed train set shape,"(1365, 17)"
6,Transformed test set shape,"(242, 17)"
7,Numeric features,16
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.9641,0.976,0.6222,0.8152,0.6971,0.6787,0.6903,0.259
et,Extra Trees Classifier,0.9627,0.9683,0.6222,0.8089,0.6938,0.6745,0.6862,0.223
lightgbm,Light Gradient Boosting Machine,0.9568,0.9656,0.6322,0.7074,0.6629,0.64,0.6438,0.144
ada,Ada Boost Classifier,0.9531,0.956,0.5878,0.6765,0.6239,0.5993,0.6037,0.169


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Type_of_Food_Allergy_Other_Legumes


Unnamed: 0,Description,Value
0,Session id,4475
1,Target,Type_of_Food_Allergy_Other_Legumes
2,Target type,Binary
3,Original data shape,"(1607, 17)"
4,Transformed data shape,"(1607, 17)"
5,Transformed train set shape,"(1365, 17)"
6,Transformed test set shape,"(242, 17)"
7,Numeric features,16
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.9802,0.9775,0.6,0.7488,0.6442,0.6345,0.6499,0.214
rf,Random Forest Classifier,0.9795,0.9577,0.62,0.6733,0.62,0.6105,0.6237,0.252
lightgbm,Light Gradient Boosting Machine,0.9773,0.9865,0.545,0.6467,0.5792,0.5684,0.5766,0.139
ada,Ada Boost Classifier,0.9714,0.9504,0.435,0.59,0.4554,0.4427,0.4686,0.176


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Type_of_Food_Allergy_Peanut


Unnamed: 0,Description,Value
0,Session id,3742
1,Target,Type_of_Food_Allergy_Peanut
2,Target type,Binary
3,Original data shape,"(1607, 17)"
4,Transformed data shape,"(1607, 17)"
5,Transformed train set shape,"(1365, 17)"
6,Transformed test set shape,"(242, 17)"
7,Numeric features,16
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.9663,0.9879,0.6956,0.79,0.7283,0.7109,0.7185,0.266
lightgbm,Light Gradient Boosting Machine,0.9648,0.9854,0.7056,0.7675,0.7216,0.7034,0.7113,0.14
ada,Ada Boost Classifier,0.9626,0.9788,0.6722,0.7693,0.7049,0.6853,0.6939,0.183
et,Extra Trees Classifier,0.9619,0.9862,0.6411,0.7631,0.6855,0.666,0.6746,0.222


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Type_of_Food_Allergy_Shellfish


Unnamed: 0,Description,Value
0,Session id,8984
1,Target,Type_of_Food_Allergy_Shellfish
2,Target type,Binary
3,Original data shape,"(1607, 17)"
4,Transformed data shape,"(1607, 17)"
5,Transformed train set shape,"(1365, 17)"
6,Transformed test set shape,"(242, 17)"
7,Numeric features,16
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.9949,0.9978,0.9417,0.9071,0.9099,0.9074,0.9149,0.257
et,Extra Trees Classifier,0.9941,0.9995,0.9083,0.8988,0.8908,0.8879,0.8945,0.222
lightgbm,Light Gradient Boosting Machine,0.9927,0.9979,0.8583,0.8833,0.8562,0.8526,0.8601,0.13
ada,Ada Boost Classifier,0.9905,0.9959,0.8167,0.8233,0.7972,0.7926,0.8042,0.188


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Type_of_Food_Allergy_TPO


Unnamed: 0,Description,Value
0,Session id,3373
1,Target,Type_of_Food_Allergy_TPO
2,Target type,Binary
3,Original data shape,"(1607, 17)"
4,Transformed data shape,"(1607, 17)"
5,Transformed train set shape,"(1365, 17)"
6,Transformed test set shape,"(242, 17)"
7,Numeric features,16
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.9853,0.9444,0.66,0.85,0.7325,0.7252,0.7366,0.271
et,Extra Trees Classifier,0.9839,0.9555,0.615,0.8567,0.706,0.698,0.7132,0.24
ada,Ada Boost Classifier,0.981,0.9324,0.57,0.8017,0.6488,0.6394,0.6582,0.184
lightgbm,Light Gradient Boosting Machine,0.9788,0.9471,0.525,0.7567,0.6004,0.5901,0.6103,0.165


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Type_of_Food_Allergy_Tree_Nuts


Unnamed: 0,Description,Value
0,Session id,1605
1,Target,Type_of_Food_Allergy_Tree_Nuts
2,Target type,Binary
3,Original data shape,"(1607, 17)"
4,Transformed data shape,"(1607, 17)"
5,Transformed train set shape,"(1365, 17)"
6,Transformed test set shape,"(242, 17)"
7,Numeric features,16
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.9575,0.9698,0.7449,0.7864,0.7593,0.7361,0.7398,0.284
et,Extra Trees Classifier,0.9553,0.965,0.7455,0.7704,0.7506,0.7262,0.7305,0.228
ada,Ada Boost Classifier,0.9487,0.9429,0.6962,0.7317,0.7081,0.6802,0.6835,0.191
lightgbm,Light Gradient Boosting Machine,0.9421,0.9641,0.6545,0.6987,0.6677,0.6364,0.6413,0.164


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Type_of_Venom_Allergy_ATCD_Venom


Unnamed: 0,Description,Value
0,Session id,634
1,Target,Type_of_Venom_Allergy_ATCD_Venom
2,Target type,Binary
3,Original data shape,"(2989, 17)"
4,Transformed data shape,"(2989, 17)"
5,Transformed train set shape,"(2540, 17)"
6,Transformed test set shape,"(449, 17)"
7,Numeric features,16
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.9945,0.9917,0.5,0.45,0.44,0.4385,0.4558,0.168
et,Extra Trees Classifier,0.9941,0.9452,0.5,0.4333,0.4267,0.425,0.4447,0.243
ada,Ada Boost Classifier,0.9937,0.8877,0.45,0.4333,0.41,0.4082,0.4237,0.219
rf,Random Forest Classifier,0.9929,0.9446,0.5,0.4167,0.4067,0.4045,0.4295,0.294


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Type_of_Venom_Allergy_IGE_Venom


Unnamed: 0,Description,Value
0,Session id,7660
1,Target,Type_of_Venom_Allergy_IGE_Venom
2,Target type,Binary
3,Original data shape,"(2989, 17)"
4,Transformed data shape,"(2989, 17)"
5,Transformed train set shape,"(2540, 17)"
6,Transformed test set shape,"(449, 17)"
7,Numeric features,16
8,Preprocess,1
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.9961,0.9722,0.75,0.7333,0.6933,0.6919,0.7157,0.292
et,Extra Trees Classifier,0.9961,0.9967,0.7,0.7667,0.68,0.6786,0.7048,0.239
lightgbm,Light Gradient Boosting Machine,0.9957,0.9972,0.7,0.7,0.6733,0.6716,0.6848,0.159
ada,Ada Boost Classifier,0.9949,0.9978,0.65,0.6167,0.58,0.5784,0.6046,0.222


Processing:   0%|          | 0/21 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved


In [None]:
columns_to_drop = ['Allergy_Present', 'Severe_Allergy', 'Respiratory_Allergy', 'Food_Allergy', 'Venom_Allergy',
                     'Type_of_Respiratory_Allergy_ARIA', 'Type_of_Respiratory_Allergy_CONJ', 
                     'Type_of_Respiratory_Allergy_GINA', 'Type_of_Respiratory_Allergy_IGE_Pollen_Gram',
                     'Type_of_Respiratory_Allergy_IGE_Pollen_Herb', 'Type_of_Respiratory_Allergy_IGE_Pollen_Tree',
                     'Type_of_Respiratory_Allergy_IGE_Dander_Animals', 'Type_of_Respiratory_Allergy_IGE_Mite_Cockroach',
                     'Type_of_Respiratory_Allergy_IGE_Molds_Yeast', 'Type_of_Food_Allergy_Aromatics', 'Type_of_Food_Allergy_Other',
                     'Type_of_Food_Allergy_Cereals_&_Seeds', 'Type_of_Food_Allergy_Egg', 'Type_of_Food_Allergy_Fish',
                     'Type_of_Food_Allergy_Fruits_and_Vegetables', 'Type_of_Food_Allergy_Mammalian_Milk', 
                     'Type_of_Food_Allergy_Oral_Syndrom', 'Type_of_Food_Allergy_Other_Legumes', 'Type_of_Food_Allergy_Peanut',
                     ]

Targets_2 = Targets.drop(columns=columns_to_drop)
Targets_2

In [None]:
generate_meta_classifier_imblearn(encode_data,Targets_2)

## Obtain the prediction for the train dataset

In [141]:
data= pd.read_csv('data/train.csv', low_memory=False)
data_test= pd.read_csv('data/test.csv', low_memory=False)

In [142]:
encode_data,Targets = preprocessing_data(data)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2989 entries, 0 to 2988
Columns: 467 entries, Age to Treatment_of_rhinitis_9
dtypes: float16(467)
memory usage: 2.7 MB
None


In [143]:
folder_path_fold = 'liste_classif_optim_2_no_oversampled'
folder_path_meta='liste_meta_imblearn_3'
liste_column_monovalue=['Type_of_Food_Allergy_Other','Type_of_Food_Allergy_Cereals_&_Seeds']

def get_predictions_fold_imblearn_meta_classifier(df,list_of_targets, folder_path_fold,folder_path_meta):
    
    liste=[]
    for column in list_of_targets.columns:
        print(column)
        dataset_panda= pd.concat([df,list_of_targets[column]], axis = 1).reset_index(drop=True)
        dataset_all=CustomDataset(dataset_panda)
        train_all = DataLoader(dataset_all, batch_size=64, shuffle=False)

       
        if column not in liste_column_monovalue:
            input_size= 467 
            hidden_size= 1024
            num_class = 2 
            dropout_rate=0.3
            
            model_list = []
            target_string = f"{column}_fold"

            # Iterate over the files in the folder
            for filename in os.listdir(folder_path_fold):
                # Check if the file path contains the target string
                if target_string in filename:
                    # Load the model using the appropriate method
                    model = Allergy_Net(input_size,hidden_size,num_class,dropout_rate)
                    model_path = os.path.join(folder_path_fold, filename)
                    model.load_state_dict(torch.load(model_path))
                    model_list.append(model)

            # obtain prediction for each folds
            predictions_all = []
            for model in model_list:
                model.eval()
                pred_model=[]
                with torch.no_grad():
                    for data, labels in train_all:
                        outputs = model(data)
                        pred_model.extend(outputs)
                predictions_all.append(pred_model)

            # Combine predictions from different folds
            combined_pred = []
            for i in range(len(predictions_all[0])):
                combined_pred_mini = []
                for j in range(len(predictions_all)):
                    combined_pred_mini.extend(predictions_all[j][i])
                combined_pred.append(combined_pred_mini)
                
            # Extract values from tensors and convert to a list of lists
            combined_pred_values = [[value.item() for value in sample] for sample in combined_pred]
            num_folds = len(predictions_all)*2
            # Create a dictionary to store the fold predictions
            fold_predictions = {}
            for i in range(num_folds):
                fold_predictions[f"Value_{i+1}"] = [sample[i].item() for sample in combined_pred]

            # Convert the dictionary into a DataFrame
            df_combined_pred = pd.DataFrame(fold_predictions)


            target_string_for_meta_imbl = f"{column}_meta_model_pycaret_binary"
            for filename in os.listdir(folder_path_meta):
                # Check if the file path contains the target string
                if target_string_for_meta_imbl in filename:
                    model = load_model(os.path.join(folder_path_meta, os.path.splitext(filename)[0]))
                    values=predict_model(model, data= df_combined_pred.reset_index())
                    file_name = os.path.splitext(filename)[0]
                    values_name = '{}_{}'.format(file_name, 'values')
                    globals()[values_name] = values
                    globals()[values_name] = globals()[values_name].rename(columns={'prediction_label': 'pred_label ' + str(values_name), 'prediction_score': 'pred_score ' + str(values_name) })
                    liste.append(globals()[values_name])
                    print('done')
    return liste

In [144]:
liste_df=get_predictions_fold_imblearn_meta_classifier(encode_data,Targets, folder_path_fold,folder_path_meta)

Allergy_Present
Transformation Pipeline and Model Successfully Loaded


done
Severe_Allergy
Transformation Pipeline and Model Successfully Loaded


done
Respiratory_Allergy
Transformation Pipeline and Model Successfully Loaded


done
Food_Allergy
Transformation Pipeline and Model Successfully Loaded


done
Venom_Allergy
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Respiratory_Allergy_ARIA
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Respiratory_Allergy_CONJ
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Respiratory_Allergy_GINA
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Respiratory_Allergy_IGE_Pollen_Gram
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Respiratory_Allergy_IGE_Pollen_Herb
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Respiratory_Allergy_IGE_Pollen_Tree
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Respiratory_Allergy_IGE_Dander_Animals
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Respiratory_Allergy_IGE_Mite_Cockroach
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Respiratory_Allergy_IGE_Molds_Yeast
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Food_Allergy_Aromatics
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Food_Allergy_Other
Type_of_Food_Allergy_Cereals_&_Seeds
Type_of_Food_Allergy_Egg
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Food_Allergy_Fish
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Food_Allergy_Fruits_and_Vegetables
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Food_Allergy_Mammalian_Milk
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Food_Allergy_Oral_Syndrom
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Food_Allergy_Other_Legumes
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Food_Allergy_Peanut
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Food_Allergy_Shellfish
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Food_Allergy_TPO
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Food_Allergy_Tree_Nuts
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Venom_Allergy_ATCD_Venom
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Venom_Allergy_IGE_Venom
Transformation Pipeline and Model Successfully Loaded


done


In [145]:
print(len(liste_df))
liste_df[0]

27


Unnamed: 0,index,Value_1,Value_2,Value_3,Value_4,Value_5,Value_6,Value_7,Value_8,Value_9,Value_10,Value_11,Value_12,Value_13,Value_14,Value_15,Value_16,pred_label Allergy_Present_meta_model_pycaret_binary_values,pred_score Allergy_Present_meta_model_pycaret_binary_values
0,0,-0.748127,0.729992,-0.584334,0.499938,-0.773140,0.906865,-0.365902,0.172279,-0.376860,0.111737,-0.633374,0.795414,-0.585548,0.792772,-0.906501,0.757275,1,1.00
1,1,0.746651,-0.758935,0.341197,-0.434825,0.950985,-0.915116,0.418233,-0.683620,0.324738,-0.618463,0.632095,-0.547063,0.668416,-0.474834,0.404382,-0.541455,1,0.52
2,2,0.733035,-0.744944,0.363207,-0.454483,0.988670,-0.956325,0.420199,-0.676870,0.331931,-0.624628,0.657758,-0.573595,0.695211,-0.501030,0.441308,-0.576205,1,0.77
3,3,-0.814914,0.790831,-1.250587,1.151908,-1.247099,1.370371,-1.376382,1.003550,-0.441291,0.180355,-1.376143,1.567106,-1.131734,1.325826,-1.204989,1.048573,1,1.00
4,4,2.561285,-2.592341,1.212610,-1.193888,2.119431,-2.285433,-1.933193,1.469598,0.540509,-0.800075,1.962318,-2.170416,2.523063,-2.252595,3.708384,-3.600559,0,0.97
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2984,2984,-0.786242,0.773249,-0.847874,0.780075,-0.214157,0.318150,-0.090434,-0.136489,-0.115012,-0.155921,-0.068002,0.183874,-0.054706,0.246645,-0.621908,0.472636,1,0.96
2985,2985,0.634945,-0.641316,0.682803,-0.726152,1.071488,-1.048074,0.201980,-0.465010,0.309130,-0.606897,1.063268,-1.031496,0.911311,-0.705643,0.790858,-0.903036,0,0.78
2986,2986,-1.022694,1.008510,-1.007091,0.946322,-0.863912,1.000217,-0.347636,0.157731,-0.616937,0.385259,-0.822320,1.007491,-0.686823,0.898095,-0.907244,0.757713,1,1.00
2987,2987,-1.474948,1.441920,-0.980786,0.898808,-1.064659,1.189412,-0.841738,0.569061,-0.989572,0.851761,-1.010729,1.221723,-0.905806,1.116985,-1.044051,0.890789,1,1.00


In [146]:
liste_column_monovalue=['Type_of_Food_Allergy_Other','Type_of_Food_Allergy_Cereals_&_Seeds']

merged_df = pd.DataFrame()
for i,df in enumerate(liste_df):
    if i==0:
        merged_df = df
    else:
        last_two_columns = df.iloc[:, -2:]
        merged_df = pd.concat([merged_df, last_two_columns], axis=1)
        
added_list = ['pred_label ' + s + '_meta_model_pycaret_binary_values' for s in liste_column_monovalue]
zeros_data = pd.DataFrame(0, index=np.arange(len(merged_df)), columns= added_list)
merged_df = pd.concat([merged_df, zeros_data], axis=1)
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2989 entries, 0 to 2988
Data columns (total 73 columns):
 #   Column                                                                                      Non-Null Count  Dtype  
---  ------                                                                                      --------------  -----  
 0   index                                                                                       2989 non-null   int32  
 1   Value_1                                                                                     2989 non-null   float32
 2   Value_2                                                                                     2989 non-null   float32
 3   Value_3                                                                                     2989 non-null   float32
 4   Value_4                                                                                     2989 non-null   float32
 5   Value_5                                  

In [147]:
data_true= pd.read_csv('data/train.csv', low_memory=False)

In [148]:
target_columns = ['Type_of_Food_Allergy_Other_Legumes','Type_of_Food_Allergy_Cereals_&_Seeds', 'Allergy_Present', 'Type_of_Food_Allergy_Mammalian_Milk', 'Type_of_Food_Allergy_Other',
                  'Type_of_Respiratory_Allergy_IGE_Mite_Cockroach', 'Venom_Allergy', 'Type_of_Respiratory_Allergy_ARIA', 'Type_of_Respiratory_Allergy_IGE_Pollen_Gram',
                  'Type_of_Respiratory_Allergy_IGE_Pollen_Herb', 'Food_Allergy', 'Type_of_Food_Allergy_Oral_Syndrom','Type_of_Food_Allergy_Tree_Nuts', 'Severe_Allergy',
                  'Type_of_Food_Allergy_Aromatics', 'Type_of_Venom_Allergy_IGE_Venom', 'Type_of_Venom_Allergy_ATCD_Venom', 'Type_of_Respiratory_Allergy_CONJ', 'Type_of_Food_Allergy_Peanut',
                  'Type_of_Food_Allergy_Egg', 'Type_of_Food_Allergy_Fish', 'Type_of_Respiratory_Allergy_GINA', 'Respiratory_Allergy', 'Type_of_Food_Allergy_TPO',
                  'Type_of_Respiratory_Allergy_IGE_Pollen_Tree', 'Type_of_Food_Allergy_Fruits_and_Vegetables', 'Type_of_Respiratory_Allergy_IGE_Molds_Yeast',
                  'Type_of_Respiratory_Allergy_IGE_Dander_Animals', 'Type_of_Food_Allergy_Shellfish']
f1_scores=[]
below_threshold_columns=[]
threshold =0.89
for column in target_columns:
    # Get the predicted and true labels for the column
    
    rows_with_9 = data_true[column].isin([9])
    Targets_without_9 = data_true[column][~rows_with_9]
    merged_df_removal = merged_df[~rows_with_9]
        
    merged_df_mod = pd.concat([merged_df_removal, Targets_without_9] , axis=1).reset_index(drop=True)
    y_true = merged_df_mod[column]
    y_pred = merged_df_mod['pred_label '+ column + '_meta_model_pycaret_binary_values']
    
    # Calculate accuracy
    accuracy = accuracy_score(y_true, y_pred)
    
    # Calculate recall
    recall = recall_score(y_true, y_pred)
    
    # Calculate F1 score
    if (data_true[column] == 1).any():
        f1 = f1_score(y_true, y_pred,average='macro')
    else:
        f1 = f1_score(y_true, y_pred, pos_label=0)
    cm = confusion_matrix(y_true, y_pred)

    # Display the metrics and confusion matrix
    print(f"Metrics for {column}:")
    print(f"Accuracy: {accuracy}")
    print(f"Recall: {recall}")
    print(f"F1 Score: {f1}")
    print("Confusion Matrix:")
    print(cm)
    print()
    if f1 != 0:
        f1_scores.append(f1) # Append the F1 score to the list
    if f1 < threshold:
        below_threshold_columns.append(column)

mean_f1 = sum(f1_scores) / len(f1_scores)  # Calculate the mean of F1 scores

print(f"Mean F1 Score: {mean_f1}")
print(below_threshold_columns)

Metrics for Type_of_Food_Allergy_Other_Legumes:
Accuracy: 0.9975108898568762
Recall: 0.94
F1 Score: 0.9789499882109455
Confusion Matrix:
[[1556    1]
 [   3   47]]

Metrics for Type_of_Food_Allergy_Cereals_&_Seeds:
Accuracy: 1.0
Recall: 0.0
F1 Score: 1.0
Confusion Matrix:
[[1607]]

Metrics for Allergy_Present:
Accuracy: 0.984650369528141
Recall: 0.9890350877192983
F1 Score: 0.9778629209869465
Confusion Matrix:
[[ 379   12]
 [  15 1353]]

Metrics for Type_of_Food_Allergy_Mammalian_Milk:
Accuracy: 1.0
Recall: 1.0
F1 Score: 1.0
Confusion Matrix:
[[1582    0]
 [   0   25]]

Metrics for Type_of_Food_Allergy_Other:
Accuracy: 1.0
Recall: 0.0
F1 Score: 1.0
Confusion Matrix:
[[1607]]

Metrics for Type_of_Respiratory_Allergy_IGE_Mite_Cockroach:
Accuracy: 0.9907100199071002
Recall: 0.9911764705882353
F1 Score: 0.9906232111301924
Confusion Matrix:
[[819   8]
 [  6 674]]

Metrics for Venom_Allergy:
Accuracy: 0.998327199732352
Recall: 0.8787878787878788
F1 Score: 0.9598948060486523
Confusion Matrix:

## Generate prediction for the test set

In [149]:
original_validation_data = pd.read_csv('data/test.csv')

data_test = original_validation_data.set_index('trustii_id')

In [150]:
encode_data_test = preprocessing_data_test(data_test)
missing_cols = set(encode_data.columns) ^ set(encode_data_test.columns)
print(missing_cols)
len(missing_cols)
encode_data_test = encode_data_test.reindex(columns=encode_data.columns, fill_value=0).astype('float16').reset_index(drop=True)
encode_data_test.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 586 entries, 1 to 1282
Columns: 443 entries, Gender to Treatment_of_rhinitis_9
dtypes: float16(443)
memory usage: 527.8 KB
None
{'French_Residence_Department_deptW', 'General_cofactors_11', 'French_Residence_Department_deptPPP', 'French_Residence_Department_deptJJJ', 'French_Residence_Department_deptCCCC', 'French_Residence_Department_deptAAAA', 'French_Residence_Department_deptIII', 'Treatment_of_athsma_8', 'French_Residence_Department_deptNNN', 'French_Residence_Department_deptOOO', 'French_Region_regionN', 'French_Residence_Department_deptP', 'French_Residence_Department_deptHHH', 'French_Residence_Department_deptK', 'French_Residence_Department_deptMMM', 'French_Region_regionO', 'French_Residence_Department_deptUU', 'French_Residence_Department_deptQQQ', 'French_Residence_Department_deptTTT', 'Treatment_of_atopic_dematitis_7', 'French_Residence_Department_deptZZZ', 'French_Residence_Department_deptT', 'French_Residence_Department_de

In [151]:
def check_column_order(df1, df2):
    return list(df1.columns) == list(df2.columns)

same_order = check_column_order(encode_data, encode_data_test)
print(same_order)

True


In [152]:
encode_data_test.values

array([[ 0.,  1.,  6., ...,  0.,  0.,  0.],
       [ 0.,  1.,  7., ...,  0.,  0.,  0.],
       [ 0.,  0., 10., ...,  0.,  0.,  1.],
       ...,
       [ 0.,  0.,  1., ...,  0.,  0.,  1.],
       [ 0.,  0., 10., ...,  0.,  0.,  1.],
       [ 0.,  0.,  1., ...,  0.,  0.,  1.]], dtype=float16)

In [153]:
# Create a random targets dataframe to call for obtaining test values

num_rows = len(encode_data_test)  # Specify the number of rows
data_array = np.random.choice([0, 1], size=(num_rows, 29))

# Create a DataFrame using the numpy array
Targets_test = pd.DataFrame(data_array, columns=['Allergy_Present', 'Severe_Allergy', 'Respiratory_Allergy', 'Food_Allergy', 'Venom_Allergy',
                     'Type_of_Respiratory_Allergy_ARIA', 'Type_of_Respiratory_Allergy_CONJ', 
                     'Type_of_Respiratory_Allergy_GINA', 'Type_of_Respiratory_Allergy_IGE_Pollen_Gram',
                     'Type_of_Respiratory_Allergy_IGE_Pollen_Herb', 'Type_of_Respiratory_Allergy_IGE_Pollen_Tree',
                     'Type_of_Respiratory_Allergy_IGE_Dander_Animals', 'Type_of_Respiratory_Allergy_IGE_Mite_Cockroach',
                     'Type_of_Respiratory_Allergy_IGE_Molds_Yeast', 'Type_of_Food_Allergy_Aromatics', 'Type_of_Food_Allergy_Other',
                     'Type_of_Food_Allergy_Cereals_&_Seeds', 'Type_of_Food_Allergy_Egg', 'Type_of_Food_Allergy_Fish',
                     'Type_of_Food_Allergy_Fruits_and_Vegetables', 'Type_of_Food_Allergy_Mammalian_Milk', 
                     'Type_of_Food_Allergy_Oral_Syndrom', 'Type_of_Food_Allergy_Other_Legumes', 'Type_of_Food_Allergy_Peanut',
                     'Type_of_Food_Allergy_Shellfish', 'Type_of_Food_Allergy_TPO', 'Type_of_Food_Allergy_Tree_Nuts',
                     'Type_of_Venom_Allergy_ATCD_Venom', 'Type_of_Venom_Allergy_IGE_Venom'])

Targets_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 586 entries, 0 to 585
Data columns (total 29 columns):
 #   Column                                          Non-Null Count  Dtype
---  ------                                          --------------  -----
 0   Allergy_Present                                 586 non-null    int64
 1   Severe_Allergy                                  586 non-null    int64
 2   Respiratory_Allergy                             586 non-null    int64
 3   Food_Allergy                                    586 non-null    int64
 4   Venom_Allergy                                   586 non-null    int64
 5   Type_of_Respiratory_Allergy_ARIA                586 non-null    int64
 6   Type_of_Respiratory_Allergy_CONJ                586 non-null    int64
 7   Type_of_Respiratory_Allergy_GINA                586 non-null    int64
 8   Type_of_Respiratory_Allergy_IGE_Pollen_Gram     586 non-null    int64
 9   Type_of_Respiratory_Allergy_IGE_Pollen_Herb     586 non-null    i

In [154]:
liste_df=get_predictions_fold_imblearn_meta_classifier(encode_data_test,Targets_test, folder_path_fold,folder_path_meta)

Allergy_Present
Transformation Pipeline and Model Successfully Loaded


done
Severe_Allergy
Transformation Pipeline and Model Successfully Loaded


done
Respiratory_Allergy
Transformation Pipeline and Model Successfully Loaded


done
Food_Allergy
Transformation Pipeline and Model Successfully Loaded


done
Venom_Allergy
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Respiratory_Allergy_ARIA
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Respiratory_Allergy_CONJ
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Respiratory_Allergy_GINA
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Respiratory_Allergy_IGE_Pollen_Gram
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Respiratory_Allergy_IGE_Pollen_Herb
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Respiratory_Allergy_IGE_Pollen_Tree
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Respiratory_Allergy_IGE_Dander_Animals
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Respiratory_Allergy_IGE_Mite_Cockroach
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Respiratory_Allergy_IGE_Molds_Yeast
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Food_Allergy_Aromatics
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Food_Allergy_Other
Type_of_Food_Allergy_Cereals_&_Seeds
Type_of_Food_Allergy_Egg
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Food_Allergy_Fish
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Food_Allergy_Fruits_and_Vegetables
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Food_Allergy_Mammalian_Milk
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Food_Allergy_Oral_Syndrom
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Food_Allergy_Other_Legumes
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Food_Allergy_Peanut
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Food_Allergy_Shellfish
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Food_Allergy_TPO
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Food_Allergy_Tree_Nuts
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Venom_Allergy_ATCD_Venom
Transformation Pipeline and Model Successfully Loaded


done
Type_of_Venom_Allergy_IGE_Venom
Transformation Pipeline and Model Successfully Loaded


done


In [155]:
print(len(liste_df))
liste_df[0]

27


Unnamed: 0,index,Value_1,Value_2,Value_3,Value_4,Value_5,Value_6,Value_7,Value_8,Value_9,Value_10,Value_11,Value_12,Value_13,Value_14,Value_15,Value_16,pred_label Allergy_Present_meta_model_pycaret_binary_values,pred_score Allergy_Present_meta_model_pycaret_binary_values
0,0,-1.102905,1.085340,-0.990184,0.920620,-0.895220,1.032566,-0.414570,0.213286,-0.728638,0.535829,-0.862765,1.042009,-0.710387,0.917287,-0.974983,0.824874,1,1.00
1,1,-0.843507,0.820890,-0.849093,0.760614,-0.822458,0.957897,-0.547661,0.325485,-0.965944,0.794912,-0.540695,0.715214,-0.437896,0.636596,0.364315,-0.492687,1,0.82
2,2,-0.925698,0.906453,-0.781579,0.711893,-0.919611,1.058947,-0.352484,0.164510,-0.425268,0.149075,-0.782476,0.959337,-0.282945,0.479212,-0.778476,0.625255,1,0.98
3,3,-0.877881,0.865550,-0.825042,0.754940,-0.798731,0.934011,-0.354227,0.167034,-0.571669,0.326683,-0.713648,0.902831,-0.565053,0.769340,-0.798405,0.645475,1,0.91
4,4,0.172201,-0.183142,0.080547,-0.156115,0.706231,-0.651334,-0.223131,0.039624,-0.038391,-0.248390,0.217860,-0.108217,0.319018,-0.116763,0.219283,-0.351579,0,0.63
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
581,581,-0.505540,0.493876,-0.373515,0.288877,0.134134,-0.047215,-0.258242,0.070553,-0.120380,-0.150617,-0.272629,0.421549,-0.136955,0.338710,-0.132818,-0.005439,1,0.98
582,582,-0.723555,0.710466,-0.711373,0.649096,-0.492129,0.606592,-0.338129,0.153164,-0.456057,0.189084,-0.622614,0.791680,-0.470014,0.669289,-0.710529,0.557808,1,1.00
583,583,-0.185368,0.169611,-0.069823,0.012788,0.382955,-0.311873,-0.107826,-0.103661,-0.010831,-0.281991,-0.047415,0.161571,0.416175,-0.213512,0.101817,-0.231421,1,0.74
584,584,-0.489808,0.468365,-0.317988,0.252578,-0.620456,0.742858,-0.353308,0.161451,-0.209629,-0.066208,-0.745724,0.901566,0.422855,-0.215251,-0.465792,0.323702,1,0.96


In [156]:
merged_df = pd.DataFrame()
for i,df in enumerate(liste_df):
    if i==0:
        merged_df = df
    else:
        last_two_columns = df.iloc[:, -2:]
        merged_df = pd.concat([merged_df, last_two_columns], axis=1)
        
added_list = ['pred_label ' + s + '_meta_model_pycaret_binary_values' for s in liste_column_monovalue]
zeros_data = pd.DataFrame(0, index=np.arange(len(merged_df)), columns= added_list)
merged_df = pd.concat([merged_df, zeros_data], axis=1)
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 586 entries, 0 to 585
Data columns (total 73 columns):
 #   Column                                                                                      Non-Null Count  Dtype  
---  ------                                                                                      --------------  -----  
 0   index                                                                                       586 non-null    int32  
 1   Value_1                                                                                     586 non-null    float32
 2   Value_2                                                                                     586 non-null    float32
 3   Value_3                                                                                     586 non-null    float32
 4   Value_4                                                                                     586 non-null    float32
 5   Value_5                                    

In [157]:
merged_df.head()

Unnamed: 0,index,Value_1,Value_2,Value_3,Value_4,Value_5,Value_6,Value_7,Value_8,Value_9,...,pred_label Type_of_Food_Allergy_TPO_meta_model_pycaret_binary_values,pred_score Type_of_Food_Allergy_TPO_meta_model_pycaret_binary_values,pred_label Type_of_Food_Allergy_Tree_Nuts_meta_model_pycaret_binary_values,pred_score Type_of_Food_Allergy_Tree_Nuts_meta_model_pycaret_binary_values,pred_label Type_of_Venom_Allergy_ATCD_Venom_meta_model_pycaret_binary_values,pred_score Type_of_Venom_Allergy_ATCD_Venom_meta_model_pycaret_binary_values,pred_label Type_of_Venom_Allergy_IGE_Venom_meta_model_pycaret_binary_values,pred_score Type_of_Venom_Allergy_IGE_Venom_meta_model_pycaret_binary_values,pred_label Type_of_Food_Allergy_Other_meta_model_pycaret_binary_values,pred_label Type_of_Food_Allergy_Cereals_&_Seeds_meta_model_pycaret_binary_values
0,0,-1.102905,1.08534,-0.990184,0.92062,-0.89522,1.032566,-0.41457,0.213286,-0.728638,...,0,1.0,0,0.94,0,1.0,0,1.0,0,0
1,1,-0.843507,0.82089,-0.849093,0.760614,-0.822458,0.957897,-0.547661,0.325485,-0.965944,...,0,0.93,0,1.0,0,1.0,0,1.0,0,0
2,2,-0.925698,0.906453,-0.781579,0.711893,-0.919611,1.058947,-0.352484,0.16451,-0.425268,...,0,0.66,0,0.86,0,1.0,0,0.99,0,0
3,3,-0.877881,0.86555,-0.825042,0.75494,-0.798731,0.934011,-0.354227,0.167034,-0.571669,...,0,1.0,0,0.97,0,1.0,0,1.0,0,0
4,4,0.172201,-0.183142,0.080547,-0.156115,0.706231,-0.651334,-0.223131,0.039624,-0.038391,...,0,1.0,0,1.0,0,1.0,0,1.0,0,0


In [158]:
target_columns = ['Type_of_Food_Allergy_Cereals_&_Seeds', 'Type_of_Food_Allergy_Other_Legumes', 'Allergy_Present', 'Type_of_Food_Allergy_Mammalian_Milk', 'Type_of_Food_Allergy_Other',
                  'Type_of_Respiratory_Allergy_IGE_Mite_Cockroach', 'Venom_Allergy', 'Type_of_Respiratory_Allergy_ARIA', 'Type_of_Respiratory_Allergy_IGE_Pollen_Gram',
                  'Type_of_Respiratory_Allergy_IGE_Pollen_Herb', 'Food_Allergy', 'Type_of_Food_Allergy_Oral_Syndrom','Type_of_Food_Allergy_Tree_Nuts', 'Severe_Allergy',
                  'Type_of_Food_Allergy_Aromatics', 'Type_of_Venom_Allergy_IGE_Venom', 'Type_of_Venom_Allergy_ATCD_Venom', 'Type_of_Respiratory_Allergy_CONJ', 'Type_of_Food_Allergy_Peanut',
                  'Type_of_Food_Allergy_Egg', 'Type_of_Food_Allergy_Fish', 'Type_of_Respiratory_Allergy_GINA', 'Respiratory_Allergy', 'Type_of_Food_Allergy_TPO',
                  'Type_of_Respiratory_Allergy_IGE_Pollen_Tree', 'Type_of_Food_Allergy_Fruits_and_Vegetables', 'Type_of_Respiratory_Allergy_IGE_Molds_Yeast',
                  'Type_of_Respiratory_Allergy_IGE_Dander_Animals', 'Type_of_Food_Allergy_Shellfish']

for elem in target_columns:
    original_validation_data[elem]=  merged_df['pred_label '+ elem + '_meta_model_pycaret_binary_values']

In [159]:
original_validation_data.head(30)

Unnamed: 0,trustii_id,Patient_ID,Chip_Code,Chip_Type,Chip_Image_Name,Age,Gender,Blood_Month_sample,French_Residence_Department,French_Region,...,Type_of_Food_Allergy_Egg,Type_of_Food_Allergy_Fish,Type_of_Respiratory_Allergy_GINA,Respiratory_Allergy,Type_of_Food_Allergy_TPO,Type_of_Respiratory_Allergy_IGE_Pollen_Tree,Type_of_Food_Allergy_Fruits_and_Vegetables,Type_of_Respiratory_Allergy_IGE_Molds_Yeast,Type_of_Respiratory_Allergy_IGE_Dander_Animals,Type_of_Food_Allergy_Shellfish
0,1,PMP0156,22 262C 3858,ISAC_V2,,8.0,1.0,6.0,deptBBB,regionJ,...,0,0,1,1,0,1,0,0,1,0
1,4,PCR0234,02AHX0DC,ALEX,02AHX0DC.bmp,14.0,1.0,7.0,deptL,regionD,...,0,0,0,1,0,0,0,0,1,0
2,5,PCR0532,02AUN372,ALEX,02AUN372.png,32.0,0.0,10.0,deptUUU,regionF,...,0,0,1,1,0,1,0,1,1,0
3,7,GJH0147,EKF3830_4,ISAC_V2,EKF3830_4_2200444337_2023_2_17_11_58_24.bmp,65.0,1.0,8.0,deptQ,regionF,...,0,0,0,0,0,0,0,0,0,0
4,8,TXV0009,881204001164,ISAC_V1,1G20027_2_881204001164_2012_4_25_18_32_58.bmp,5.0,0.0,4.0,deptII,regionC,...,0,0,0,0,0,0,0,0,0,0
5,9,PCR0118,02AFA752,ALEX,,49.0,0.0,1.0,deptXXX,regionI,...,0,0,0,1,0,0,0,0,0,0
6,10,QVW0214,AB02627_3,ISAC_V1,,6.0,1.0,2.0,deptY,regionD,...,0,0,1,1,0,1,0,1,1,0
7,15,TXV0157,881602013302,ISAC_V1,BAF4027_4_881602013302_2016_2_23_16_38_11.bmp,13.0,1.0,2.0,deptRR,regionB,...,0,0,1,1,0,0,0,0,0,0
8,18,WQW0190,223112546,ISAC_V2,END0E30_1_223112546_2023_1_3_16_20_19.bmp,12.0,0.0,11.0,deptOO,regionL,...,0,0,0,0,0,0,0,0,0,0
9,23,TXV0282,881903001372,ISAC_V1,CXG1527_3_881903001372_2019_3_14_3_51_59.bmp,8.0,0.0,3.0,deptEE,regionC,...,0,0,0,1,0,0,0,0,0,0


In [160]:
original_validation_data.to_csv('Submission_Pytorch_resampling_KStrat_fold_meta_imblearn_classification_2.csv', index=False, encoding='UTF-8')