In [1]:
import os
import sys
sys.path.append(os.path.abspath(os.path.join(os.path.dirname('src'), '..')))

import pandas as pd
from sklearn.model_selection import train_test_split
import wandb

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.optim.lr_scheduler as lr
from src.models.classifiers import *
from src.models.gev_nn import *
from src.models.autoencoders import *
from src.trainer.trainer import TrainerClassifier, Model_class

random_seed = 42
np.random.seed(random_seed)


torch.backends.cudnn.deterministic = True

from functools import partial

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
seeds = np.random.choice(100, 10, replace=False)
seeds

array([83, 53, 70, 45, 44, 39, 22, 80, 10,  0])

In [5]:
for seed in seeds:
    generator = torch.Generator()
    generator.manual_seed(int(seed))

    df = pd.read_csv('../data/prepared_data.csv')

    X_train,X_test,y_train,y_test = train_test_split(df.drop(columns=['Machine failure']),
                                                 df['Machine failure'],
                                                 shuffle=True,
                                                 stratify=df['Machine failure'], random_state=seed,
                                                 train_size=0.7)

    BATCH_SIZE = 2048
    NUM_FEATURES = X_train.shape[1]

    df_train = pd.concat([X_train, y_train], axis = 1)
    df_test = pd.concat([X_test, y_test], axis = 1)

    train_dataset = ClassifierDataset(df_train)
    val_dataset = ClassifierDataset(df_test)

    train_dl = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE, 
    shuffle=True,
    generator=generator
    )

    val_dl = DataLoader(
        val_dataset,
        batch_size=BATCH_SIZE, 
        shuffle=True,
        generator=generator
    )

    INIT_PARAM = 256
    encoder = nn.Sequential(
                nn.Linear(NUM_FEATURES, INIT_PARAM),
                nn.BatchNorm1d(INIT_PARAM),
                nn.ReLU(),

                nn.Linear(INIT_PARAM, int(INIT_PARAM/2)),
                nn.BatchNorm1d(int(INIT_PARAM/2)),
                nn.ReLU(),

                nn.Linear(int(INIT_PARAM/2), int(INIT_PARAM/4)),
                nn.BatchNorm1d(int(INIT_PARAM/4)),
                nn.ReLU(),

                nn.Linear(int(INIT_PARAM/4), int(INIT_PARAM/8)),
                nn.BatchNorm1d(int(INIT_PARAM/8)),
                nn.ReLU(),

                nn.Linear(int(INIT_PARAM/8), int(INIT_PARAM/16)),
                nn.BatchNorm1d(int(INIT_PARAM/16)),
                nn.ReLU(),

                nn.Linear(int(INIT_PARAM/16), int(INIT_PARAM/32)),
                nn.BatchNorm1d(int(INIT_PARAM/32)),
                nn.ReLU(),

                nn.Linear(int(INIT_PARAM/32), int(INIT_PARAM/64)),
                nn.BatchNorm1d(int(INIT_PARAM/64)),
                nn.ReLU(),

                nn.Linear(int(INIT_PARAM/64), int(INIT_PARAM/64)),
                nn.BatchNorm1d(int(INIT_PARAM/64)),
                nn.ReLU(),

                nn.Linear(int(INIT_PARAM/64), 8)
            )

    INIT_PARAM = 256
    weighted_model = nn.Sequential(

                nn.Linear(NUM_FEATURES, INIT_PARAM),
                nn.BatchNorm1d(INIT_PARAM),
                nn.ReLU(),

                nn.Linear(INIT_PARAM, int(INIT_PARAM/2)),
                nn.BatchNorm1d(int(INIT_PARAM/2)),
                nn.ReLU(),

                nn.Linear(int(INIT_PARAM/2), int(INIT_PARAM/4)),
                nn.BatchNorm1d(int(INIT_PARAM/4)),
                nn.ReLU(),

                nn.Linear(int(INIT_PARAM/4), int(INIT_PARAM/2)),
                nn.BatchNorm1d(int(INIT_PARAM/2)),
                nn.ReLU(),

                nn.Linear(int(INIT_PARAM/2), int(INIT_PARAM)),
                nn.BatchNorm1d(int(INIT_PARAM)),
                nn.ReLU(),

                nn.Linear(INIT_PARAM, NUM_FEATURES)
            )

    INIT_PARAM = 128
    main_classifier = nn.Sequential(

                nn.Linear(NUM_FEATURES, INIT_PARAM),
                nn.BatchNorm1d(INIT_PARAM),
                nn.ReLU(),

                nn.Linear(INIT_PARAM, int(INIT_PARAM/2)),
                nn.BatchNorm1d(int(INIT_PARAM/2)),
                nn.ReLU(),

                nn.Linear(int(INIT_PARAM/2), int(INIT_PARAM/4)),
                nn.BatchNorm1d(int(INIT_PARAM/4)),
                nn.ReLU(),

                nn.Linear(int(INIT_PARAM/4), 1)
            )

    model = GevNN(encoder, weighted_model, main_classifier)

    loss_func = GevLoss(nn.BCELoss())
    loss = LossWrapper(loss_func, gev = True)
    model_factory = partial(Model_class)
    optimizer_factory = partial(torch.optim.AdamW)
    scheduler_factory = partial(lr.ExponentialLR)

    model_params = dict(model=model,
                        device=device)

    optimizer_params = dict(weight_decay=1e-3, lr=1e-2)
    scheduler_params = dict(gamma=0.95)

    learning_params = dict(batch_size=BATCH_SIZE, num_epoch=40)

    wandb_init_params = dict(
        name=f'GEV_NN_{INIT_PARAM}_{BATCH_SIZE}',
        project="Internship_project",
        dir = '../logs/'
    )

    additional_params = dict(loss = loss_func,
                        p = 0,
                        sampling = 'None',
                        batch_size = BATCH_SIZE,
                        init_parameters = INIT_PARAM,
                        features_amount = df.shape[1],
                        random_seed = seed,
                        Is_Gev_NN = '+')

    trainer = TrainerClassifier(train_dl,
                  val_dl,
                  loss,
                  model_factory=model_factory,
                  optimizer_factory=optimizer_factory,
                  scheduler_factory=scheduler_factory,
                  model_params=model_params,
                  optimizer_params=optimizer_params,
                  scheduler_params=scheduler_params,
                  additional_params=additional_params,
                  log=True,
                  wandb_init_params=wandb_init_params,
                  model_dir='../logs/nn_models/classifier/',
                  saving_model=False
                  )

    trainer.train_model(learning_params)
    wandb.finish()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mdmitrii_fomin[0m ([33mdmitrii_fomin_uga[0m). Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='0.001 MB of 0.018 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.060034…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
lr,██▇▇▆▆▆▆▅▅▅▅▄▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
test_accuracy,▅▅▆▅▆▆▇▆▇▅▅▆▆▇▇▅▆▇▇███▆▇▇▇████▇█▆▇█▇▁▆▇▆
test_auc_score,▁▁▁▅▅▆▇▇▇█▇▇█▆█▇█▇▇████▇▇▇▇▇▆█▆▇▇▇▆▇▇▇▆▇
test_f1_score,▁▁▂▂▃▄▅▆▇▅▅▆▆▇█▅▇▇▇█▇█▆▇▇▇▇███▇█▆▇██▂▆█▆
test_f2_score,▃▄▄▄▅▅▆▆▇▅▅▆▆▇▇▅▇▇▇███▆▇▇▇▇███▇█▆▇█▇▁▆▇▆
test_fpr,▁▁▁▁▁▁▂▃▃▄▄▄▃▃▃▄▄▃▃▂▂▂▄▃▃▃▂▂▃▃▃▃▄▃▃▃█▅▃▄
test_loss,█▆▃▂▁▁▁▁▁▁▂▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▂▁▁▂▂▁▃▂▂▂▄▃▂▃
test_tpr,▁▁▁▁▂▃▄▅▇▆▇▆▆▆▇▆▇▆▇▆▆▇▇▆▇▆▆▆▇▇▆▇▇▇▇█████
train_accuracy,▁▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇█▇▇█▇██████████████

0,1
epoch,39.0
lr,0.00129
test_accuracy,0.96781
test_auc_score,0.94265
test_f1_score,0.97062
test_f2_score,0.96867
test_fpr,0.02495
test_loss,1.14575
test_tpr,0.77549
train_accuracy,0.99432




VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016673349766666946, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.018 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.060034…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
lr,██▇▇▆▆▆▆▅▅▅▅▄▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
test_accuracy,▇█▆▆▁▁▄▅▇▃▆▆▆▇█▇█▅▅▅▅▂▆▆▅▆▆▆▆▄▇▆▅▄▄▆▄▅▆▄
test_auc_score,▁▆▅▆▆▆▇▇▇▇██████████████▇▇▇▇▇▇██▇▇███▇▇█
test_f1_score,▁▂▁▃▂▃▄▆█▆▇▇▆██▇█▅▅▆▆▃▇▆▅▆▆▆▆▄█▆▅▅▅▇▅▆▇▅
test_f2_score,▅▆▄▅▁▂▄▅▇▄▆▆▆▇█▇█▅▅▅▅▂▆▆▅▆▆▆▆▄▇▆▅▅▄▆▄▅▆▄
test_fpr,▁▁▂▃▇▇▆▆▄█▆▆▅▅▄▄▄▅▅▆▆▆▆▅▅▅▄▅▅▅▅▅▅▆▇▆▆▆▆▇
test_loss,█▆▂▁▂▂▂▂▁▁▁▁▁▁▁▃▂▃▃▃▂▃▂▂▂▂▂▂▂▃▂▂▃▃▃▃▃▃▃▃
test_tpr,▁▁▂▃▅▆▆▇▆███▇▇▆▅▆▅▅▆▇▆▇▆▅▅▅▆▆▅▇▆▆▆▆▇▆▆▇▇
train_accuracy,▁▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇███▇▇█▇████████

0,1
epoch,39.0
lr,0.00129
test_accuracy,0.95835
test_auc_score,0.93155
test_f1_score,0.96029
test_f2_score,0.95904
test_fpr,0.02516
test_loss,1.1805
test_tpr,0.47318
train_accuracy,0.99393




VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01672061584999985, max=1.0)…

VBox(children=(Label(value='0.001 MB of 0.018 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.060027…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
lr,██▇▇▆▆▆▆▅▅▅▅▄▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
test_accuracy,▁▂▁▂▂▄▄▅▄▆▆▅▆▅▅▅▅▅▅▆▇▇█▆▆▇▆▆██▇▇▇▇▆▆▆▆▆▅
test_auc_score,▁▄▄▆▄▃▁▄▇▇▇▇██▇██▇▇███▇▇▇██▇██▇▇▇▇█▇▇█▇▇
test_f1_score,▁▂▁▂▃▅▆▆▆▆▇▇▇▆▆▆▇▆▆▆▇▇█▇▇▇▇▇██▇▇▇▇▇▆▇▆▆▆
test_f2_score,▁▂▁▂▂▄▅▆▅▆▆▆▇▆▅▆▆▅▅▆▇▇█▇▇▇▇▆██▇▇▇▇▇▆▇▆▆▆
test_fpr,▁▁▁▁▂▃▆▃▇▄▄█▃▅▃▄▆▂▂▂▃▃▃▄▃▂▂▂▂▃▂▂▂▂▃▂▂▂▃▃
test_loss,█▅▃▁▁▁▂▁▁▁▁▁▁▁▂▂▁▂▂▁▁▁▁▁▁▁▁▂▁▁▂▂▂▂▁▂▂▂▂▃
test_tpr,▁▁▁▁▂▄▆▆▇▆▆█▆▆▅▆▇▅▅▅▇▇▇▇▇▆▆▆▇█▆▆▆▆▆▆▆▅▅▆
train_accuracy,▁▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇███████████████████

0,1
epoch,39.0
lr,0.00129
test_accuracy,0.97823
test_auc_score,0.94642
test_f1_score,0.97523
test_f2_score,0.97675
test_fpr,0.0034
test_loss,1.20319
test_tpr,0.46324
train_accuracy,0.9911




VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01668326683333324, max=1.0)…

VBox(children=(Label(value='0.001 MB of 0.018 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.059987…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
lr,██▇▇▆▆▆▆▅▅▅▅▄▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
test_accuracy,█▇▁▇▅█▇▆▇▇▆▅▅▇▄▆▅▅▆▅▄▅▅▅▅▅▆▆▆▅▄▆▅▃▅▅▄▄▅▄
test_auc_score,▃▄▁▃▄▅▇▆▇█▇▇▆█▆▇▇▆▇▇▆▇▆▇▇▆▇▆▆▆▆▅▆▅▅▅▆▆▇▇
test_f1_score,▇▆▁▆▅▇▇▆▇█▆▅▅▇▅▆▆▅▆▆▅▅▅▆▆▆▆▆▆▅▄▆▆▄▅▆▅▅▅▄
test_f2_score,█▇▁▇▅██▇▇█▆▅▆▇▅▆▅▅▆▆▄▅▅▅▅▆▆▆▆▅▄▆▆▃▅▆▄▅▅▄
test_fpr,▁▃█▂▄▂▃▄▃▃▅▆▅▃▆▅▅▆▅▅▆▆▆▆▅▅▄▅▅▅▆▄▅▇▅▅▆▆▆▆
test_loss,▇▅▄▂▂▁▁▁▂▁▂▂▂▁▃▂▃▃▄▄▄▃▄▄▄▄▃▃▃▄▆▅▅▆▅▅▆█▆▆
test_tpr,▁▁▂▁▁▁▄▄▅█▇▇▅▆▇▇▇▆▇▇████▇█▇▇▆▆▆▅▆▆▆▇▇▇█▇
train_accuracy,▁▆▇▇▇▇▇▇▇▇▇▇▇███████████████████████████

0,1
epoch,39.0
lr,0.00129
test_accuracy,0.91383
test_auc_score,0.88694
test_f1_score,0.93245
test_f2_score,0.91954
test_fpr,0.07565
test_loss,1.43866
test_tpr,0.61962
train_accuracy,0.99309




VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01671167073333303, max=1.0)…