In [1]:
import os
import sys
sys.path.append(os.path.abspath(os.path.join(os.path.dirname('src'), '..')))

import pandas as pd
from sklearn.model_selection import train_test_split
import wandb

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.optim.lr_scheduler as lr

seed_value = 42
torch.manual_seed(seed_value)
torch.cuda.manual_seed_all(seed_value)
generator = torch.Generator()
generator.manual_seed(seed_value)
torch.backends.cudnn.deterministic = True

from functools import partial

from src.Trainer.Trainer import Trainer
from src.Models.Classifiers import *
from src.Trainer.Model_class import Model_class
from src.Trainer.Loss_class import Loss_class

from src.Models.Autoencoders import *
from src.Models.JoinedModel import JoinedModel

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
HIDDEN_PARAM = 512
LATENT_REPR = 5

BATCH_SIZE = 1024

In [3]:
df = pd.read_csv('../data/df_to_enc.csv')

In [4]:
def prepare_data_for_test(X_data, y_data, test_ratio):
    # Prepare dataset for testing
    X_train, X_test, y_train, y_test = train_test_split(X_data,
                                                     y_data,
                                                     shuffle = True,
                                                     stratify = y_data,
                                                     random_state = 42,
                                                     test_size = test_ratio)
    
    return X_train, X_test, y_train, y_test

In [5]:
def prepare_data_for_enc(X_data, y_data, autoenc_requared):

    # Check that amount rows for enc less than length of data
    if autoenc_requared >= len(X_data):
        raise ValueError("The number of rows for autoencoder more than amount of X_train data")
    
    autoenc_ratio = autoenc_requared/len(X_data)
    
    X_to_enc, X_to_clas,\
    y_to_enc, y_to_clas = train_test_split(X_data,
                                           y_data,
                                           shuffle = True,
                                           stratify = y_data,
                                           random_state = 42,
                                           train_size = autoenc_ratio)
    
    X_train, X_test = train_test_split(X_to_enc,
                                       shuffle = True,
                                       random_state = 42,
                                       train_size = 0.9)
    
    return X_train, X_test, X_to_clas, y_to_clas

In [6]:
def prepare_data_for_classif(X_data, y_data, classif_requared):

    # Prepare dataset for encoder
    if classif_requared >= len(X_data):
        raise ValueError("The number of rows for classifier more than amount of X_train data")
    classif_ratio = classif_requared / len(X_data)
    
    X_train, X_test,\
    y_train, y_test = train_test_split(X_data,
                                       y_data,
                                       shuffle = True,
                                       stratify = y_data,
                                       random_state = 42,
                                       train_size = classif_ratio)
    
    return X_train, y_train

In [7]:
def make_dataloader(*data, encoder_data = False):
    
    if len(data) > 1:
        data_list = [data[i] for i in range(len(data))]
        dataset = pd.concat(data_list, axis = 1)
    else:
        dataset = data[0]
    if encoder_data == False:
        dataset = TableDatasetDF(dataset)
        dataloader = DataLoader(
            dataset,
            batch_size=BATCH_SIZE, 
            shuffle=True,
            generator=generator
        )
    else:
        dataset = EncoderDataset(dataset)
        dataloader = DataLoader(
            dataset,
            batch_size=BATCH_SIZE, 
            shuffle=True,
            generator=generator
        )

    return dataloader

In [8]:
def prepare_data(X_data, y_data, test_ratio, autoenc_requared, classif_requared):
    X_train, X_test, y_train, y_test = prepare_data_for_test(X_data, y_data, test_ratio)
    X_encoder_train, X_encoder_test, X_to_clas, y_to_clas = prepare_data_for_enc(X_train, y_train, autoenc_requared)
    X_train_classif, y_train_classif = prepare_data_for_classif(X_to_clas, y_to_clas, classif_requared)

    test_dl = make_dataloader(X_test, y_test)
    train_dl = make_dataloader(X_train_classif, y_train_classif)

    enc_train_dl = make_dataloader(X_encoder_train, encoder_data=True)
    enc_test_dl = make_dataloader(X_encoder_test, encoder_data=True)

    return train_dl, test_dl, enc_train_dl, enc_test_dl

In [9]:
# train_dl,\
# test_dl, enc_train_dl, enc_test_dl = prepare_data(df.drop(columns = ['Machine failure']),
#                                                   df['Machine failure'], 0.2, 6000, 300)

In [10]:
def train_cycle(df, list_amount = [50, 100, 500, 1000, 1999]):
    for labels_amount in list_amount:
        ############################################################
        # PREPARE DATA
        ############################################################
        train_dl, test_dl, enc_train_dl, enc_test_dl = prepare_data(df.drop(columns = ['Machine failure']),
                                                                    df['Machine failure'], 0.2, 6000, labels_amount)

        ############################################################
        # TRAIN AUTOENCODER
        ############################################################
        # Set training settings
        autoencoder = Autoencoder(enc_train_dl.dataset.data.shape[1], HIDDEN_PARAM, LATENT_REPR)
        loss = Encoder_loss(nn.MSELoss())
        model_factory = partial(Model_class)
        optimizer_factory = partial(torch.optim.AdamW)
        scheduler_factory = partial(lr.ExponentialLR)

        model_params = dict(model=autoencoder,
                            device=device)

        optimizer_params = dict(weight_decay=1e-3, lr=1e-2)
        scheduler_params = dict(gamma=0.95)

        learning_params = dict(batch_size=BATCH_SIZE, num_epoch=30)

        wandb_init_params = dict(
            name=f'Autoencoder_simple_HidParam-{HIDDEN_PARAM}_Latent-{LATENT_REPR}',
            project="Internship_project",
            dir = '../logs/'
        )
        # Start training
        trainer = Trainer(enc_train_dl,
                          enc_test_dl,
                          loss,
                          model_factory=model_factory,
                          optimizer_factory=optimizer_factory,
                          scheduler_factory=scheduler_factory,
                          model_params=model_params,
                          optimizer_params=optimizer_params,
                          scheduler_params=scheduler_params,
                          log=False,
                          wandb_init_params=wandb_init_params,
                          model_dir='../logs/nn_models/autoencoder/',
                          saving_model=False
                          )
        trainer.train_model(learning_params)
        wandb.finish()

        ############################################################
        # TRAIN CLASSIFIER
        ############################################################
        
        classifier = Simple_classifier(train_dl.dataset.data.shape[1], 50)
        jm = JoinedModel(trainer.model.model.encoder, classifier)

        loss = Loss_class(FocalLoss(gamma=3))
        model_factory = partial(Model_class)
        optimizer_factory = partial(torch.optim.AdamW)
        scheduler_factory = partial(lr.ExponentialLR)

        model_params = dict(model=jm,
                            device=device)

        optimizer_params = dict(weight_decay=1e-3, lr=1e-2)
        scheduler_params = dict(gamma=0.95)

        learning_params = dict(batch_size=BATCH_SIZE, num_epoch=20)

        wandb_init_params = dict(
            name=f'JM_NumLab-{labels_amount}_LatDim-{LATENT_REPR}',
            project="Internship_project",
            dir = '../logs/'
        )
        
        trainer = Trainer(train_dl,
                          test_dl,
                          loss,
                          model_factory=model_factory,
                          optimizer_factory=optimizer_factory,
                          scheduler_factory=scheduler_factory,
                          model_params=model_params,
                          optimizer_params=optimizer_params,
                          scheduler_params=scheduler_params,
                          log=True,
                          wandb_init_params=wandb_init_params,
                          model_dir='../logs/nn_models/joined_models/',
                          saving_model=False
                          )
        
        trainer.train_model(learning_params)
        wandb.finish()

In [11]:
train_cycle(df)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mdmitrii_fomin[0m ([33mdmitrii_fomin_uga[0m). Use [1m`wandb login --relogin`[0m to force relogin


I'm studying hard now🧐, don't disturb!: 100%|██████████| 6/6 [00:02<00:00,  2.35it/s]
Let's see how good I am...: 100%|██████████| 1/1 [00:00<?, ?it/s]


Epoch: 1 of 30, 0.043 min


I'm studying hard now🧐, don't disturb!: 100%|██████████| 6/6 [00:00<00:00, 91.34it/s]
Let's see how good I am...: 100%|██████████| 1/1 [00:00<00:00, 121.21it/s]


Epoch: 2 of 30, 0.002 min


I'm studying hard now🧐, don't disturb!: 100%|██████████| 6/6 [00:00<00:00, 74.00it/s]
Let's see how good I am...: 100%|██████████| 1/1 [00:00<00:00, 120.85it/s]


Epoch: 3 of 30, 0.001 min


I'm studying hard now🧐, don't disturb!: 100%|██████████| 6/6 [00:00<00:00, 80.88it/s]
Let's see how good I am...: 100%|██████████| 1/1 [00:00<?, ?it/s]


Epoch: 4 of 30, 0.002 min


I'm studying hard now🧐, don't disturb!: 100%|██████████| 6/6 [00:00<00:00, 80.31it/s]
Let's see how good I am...: 100%|██████████| 1/1 [00:00<00:00, 169.54it/s]


Epoch: 5 of 30, 0.001 min


I'm studying hard now🧐, don't disturb!: 100%|██████████| 6/6 [00:00<00:00, 100.18it/s]
Let's see how good I am...: 100%|██████████| 1/1 [00:00<00:00, 116.03it/s]


Epoch: 6 of 30, 0.001 min


I'm studying hard now🧐, don't disturb!: 100%|██████████| 6/6 [00:00<00:00, 104.31it/s]
Let's see how good I am...: 100%|██████████| 1/1 [00:00<00:00, 77.94it/s]


Epoch: 7 of 30, 0.001 min


I'm studying hard now🧐, don't disturb!: 100%|██████████| 6/6 [00:00<00:00, 91.93it/s]
Let's see how good I am...: 100%|██████████| 1/1 [00:00<00:00, 186.24it/s]


Epoch: 8 of 30, 0.002 min


I'm studying hard now🧐, don't disturb!: 100%|██████████| 6/6 [00:00<00:00, 76.41it/s]
Let's see how good I am...: 100%|██████████| 1/1 [00:00<00:00, 120.33it/s]


Epoch: 9 of 30, 0.002 min


I'm studying hard now🧐, don't disturb!: 100%|██████████| 6/6 [00:00<00:00, 82.03it/s]
Let's see how good I am...: 100%|██████████| 1/1 [00:00<00:00, 118.43it/s]


Epoch: 10 of 30, 0.001 min


I'm studying hard now🧐, don't disturb!: 100%|██████████| 6/6 [00:00<00:00, 79.55it/s]
Let's see how good I am...: 100%|██████████| 1/1 [00:00<00:00, 191.11it/s]


Epoch: 11 of 30, 0.001 min


I'm studying hard now🧐, don't disturb!: 100%|██████████| 6/6 [00:00<00:00, 66.83it/s]
Let's see how good I am...: 100%|██████████| 1/1 [00:00<00:00, 61.62it/s]


Epoch: 12 of 30, 0.002 min


I'm studying hard now🧐, don't disturb!: 100%|██████████| 6/6 [00:00<00:00, 43.46it/s]
Let's see how good I am...: 100%|██████████| 1/1 [00:00<00:00, 60.65it/s]


Epoch: 13 of 30, 0.003 min


I'm studying hard now🧐, don't disturb!: 100%|██████████| 6/6 [00:00<00:00, 67.22it/s]
Let's see how good I am...: 100%|██████████| 1/1 [00:00<00:00, 119.69it/s]


Epoch: 14 of 30, 0.002 min


I'm studying hard now🧐, don't disturb!: 100%|██████████| 6/6 [00:00<00:00, 73.17it/s]
Let's see how good I am...: 100%|██████████| 1/1 [00:00<00:00, 97.57it/s]


Epoch: 15 of 30, 0.002 min


I'm studying hard now🧐, don't disturb!: 100%|██████████| 6/6 [00:00<00:00, 82.05it/s]
Let's see how good I am...: 100%|██████████| 1/1 [00:00<00:00, 113.45it/s]


Epoch: 16 of 30, 0.001 min


I'm studying hard now🧐, don't disturb!: 100%|██████████| 6/6 [00:00<00:00, 81.99it/s]
Let's see how good I am...: 100%|██████████| 1/1 [00:00<?, ?it/s]


Epoch: 17 of 30, 0.001 min


I'm studying hard now🧐, don't disturb!: 100%|██████████| 6/6 [00:00<00:00, 80.62it/s]
Let's see how good I am...: 100%|██████████| 1/1 [00:00<00:00, 118.94it/s]


Epoch: 18 of 30, 0.002 min


I'm studying hard now🧐, don't disturb!: 100%|██████████| 6/6 [00:00<00:00, 78.30it/s]
Let's see how good I am...: 100%|██████████| 1/1 [00:00<?, ?it/s]


Epoch: 19 of 30, 0.001 min


I'm studying hard now🧐, don't disturb!: 100%|██████████| 6/6 [00:00<00:00, 81.80it/s]
Let's see how good I am...: 100%|██████████| 1/1 [00:00<00:00, 123.58it/s]


Epoch: 20 of 30, 0.001 min


I'm studying hard now🧐, don't disturb!: 100%|██████████| 6/6 [00:00<00:00, 82.62it/s]
Let's see how good I am...: 100%|██████████| 1/1 [00:00<00:00, 102.75it/s]


Epoch: 21 of 30, 0.001 min


I'm studying hard now🧐, don't disturb!: 100%|██████████| 6/6 [00:00<00:00, 101.07it/s]
Let's see how good I am...: 100%|██████████| 1/1 [00:00<00:00, 89.94it/s]


Epoch: 22 of 30, 0.001 min


I'm studying hard now🧐, don't disturb!: 100%|██████████| 6/6 [00:00<00:00, 82.20it/s]
Let's see how good I am...: 100%|██████████| 1/1 [00:00<00:00, 124.39it/s]


Epoch: 23 of 30, 0.001 min


I'm studying hard now🧐, don't disturb!: 100%|██████████| 6/6 [00:00<00:00, 83.68it/s]
Let's see how good I am...: 100%|██████████| 1/1 [00:00<00:00, 153.62it/s]


Epoch: 24 of 30, 0.001 min


I'm studying hard now🧐, don't disturb!: 100%|██████████| 6/6 [00:00<00:00, 91.26it/s]
Let's see how good I am...: 100%|██████████| 1/1 [00:00<00:00, 96.10it/s]


Epoch: 25 of 30, 0.001 min


I'm studying hard now🧐, don't disturb!: 100%|██████████| 6/6 [00:00<00:00, 84.05it/s]
Let's see how good I am...: 100%|██████████| 1/1 [00:00<00:00, 122.24it/s]


Epoch: 26 of 30, 0.001 min


I'm studying hard now🧐, don't disturb!: 100%|██████████| 6/6 [00:00<00:00, 56.90it/s]
Let's see how good I am...: 100%|██████████| 1/1 [00:00<?, ?it/s]


Epoch: 27 of 30, 0.002 min


I'm studying hard now🧐, don't disturb!: 100%|██████████| 6/6 [00:00<00:00, 90.52it/s]
Let's see how good I am...: 100%|██████████| 1/1 [00:00<00:00, 95.87it/s]


Epoch: 28 of 30, 0.001 min


I'm studying hard now🧐, don't disturb!: 100%|██████████| 6/6 [00:00<00:00, 92.92it/s]
Let's see how good I am...: 100%|██████████| 1/1 [00:00<00:00, 123.29it/s]


Epoch: 29 of 30, 0.001 min


I'm studying hard now🧐, don't disturb!: 100%|██████████| 6/6 [00:00<00:00, 68.59it/s]
Let's see how good I am...: 100%|██████████| 1/1 [00:00<?, ?it/s]

Epoch: 30 of 30, 0.002 min





0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
lr,██▇▇▆▆▆▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁
test_loss,█▅▄▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss,█▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,29.0
lr,0.00215
test_loss,0.01815
train_loss,0.02045


I'm studying hard now🧐, don't disturb!: 100%|██████████| 1/1 [00:00<00:00, 83.93it/s]
Let's see how good I am...: 100%|██████████| 2/2 [00:00<00:00, 41.66it/s]


OSError: [WinError 1314] Клиент не обладает требуемыми правами: 'd:\\Github\\Internship-project\\logs\\nn_models\\joined_models\\JM_NumLab-50_LatDim-5_state_dict.pth' -> '../logs/wandb\\run-20230529_204538-6f06bu0w\\files\\nn_models\\joined_models\\JM_NumLab-50_LatDim-5_state_dict.pth'