In [15]:
import os
import sys
sys.path.append(os.path.abspath(os.path.join(os.path.dirname('src'), '..')))

import pandas as pd
from sklearn.model_selection import train_test_split
import wandb

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.optim.lr_scheduler as lr
from src.models.classifiers import *
from src.trainer.trainer import TrainerClassifier, Model_class

seed_value = 42
torch.manual_seed(seed_value)
torch.cuda.manual_seed_all(seed_value)
generator = torch.Generator()
generator.manual_seed(seed_value)
torch.backends.cudnn.deterministic = True

from functools import partial

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [16]:
BATCH_SIZE = 1024
INIT_PARAM = 512

In [17]:
df = pd.read_csv('../data/prepared_data.csv')

In [18]:
X_train,X_test,y_train,y_test = train_test_split(df.drop(columns=['Machine failure']),
                                                 df['Machine failure'],
                                                 shuffle=True,
                                                 stratify=df['Machine failure'], random_state=42,
                                                 train_size=0.7)

In [19]:
from src.sampling_methods.sampler import DataSampler

In [22]:
sampler = DataSampler()

In [24]:
sampler.ROS(X_train, y_train)

array([[3.037e+02, 3.119e+02, 1.332e+03, ..., 0.000e+00, 0.000e+00,
        1.000e+00],
       [3.023e+02, 3.109e+02, 1.710e+03, ..., 0.000e+00, 1.000e+00,
        1.000e+00],
       [3.003e+02, 3.103e+02, 1.362e+03, ..., 1.000e+00, 0.000e+00,
        1.000e+00],
       ...,
       [2.985e+02, 3.095e+02, 1.385e+03, ..., 1.000e+00, 0.000e+00,
        1.000e+00],
       [3.022e+02, 3.113e+02, 1.510e+03, ..., 0.000e+00, 1.000e+00,
        0.000e+00],
       [3.011e+02, 3.100e+02, 1.433e+03, ..., 0.000e+00, 1.000e+00,
        0.000e+00]])

In [5]:
df_train = pd.concat([X_train, y_train], axis = 1)
df_test = pd.concat([X_test, y_test], axis = 1)

In [6]:
train_dataset = ClassifierDataset(df_train)
val_dataset = ClassifierDataset(df_test)

In [7]:
train_dl = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE, 
    shuffle=True,
    generator=generator
)

val_dl = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE, 
    shuffle=True,
    generator=generator
)

In [8]:
model = BaselineClassifier(X_train.shape[1], INIT_PARAM)

In [9]:
pytorch_total_params = sum(p.numel() for p in model.parameters())
pytorch_total_params

709842

In [11]:
loss = LossWrapper(nn.CrossEntropyLoss())
model_factory = partial(Model_class)
optimizer_factory = partial(torch.optim.AdamW)
scheduler_factory = partial(lr.ExponentialLR)

model_params = dict(model=model,
                    device=device)

optimizer_params = dict(weight_decay=1e-3, lr=1e-2)
scheduler_params = dict(gamma=0.90)

learning_params = dict(batch_size=BATCH_SIZE, num_epoch=40)

wandb_init_params = dict(
    name=f'BL_{INIT_PARAM}_{BATCH_SIZE}',
    project="Internship_project",
    dir = '../logs/'
)

In [12]:
trainer = TrainerClassifier(train_dl,
                  val_dl,
                  loss,
                  model_factory=model_factory,
                  optimizer_factory=optimizer_factory,
                  scheduler_factory=scheduler_factory,
                  model_params=model_params,
                  optimizer_params=optimizer_params,
                  scheduler_params=scheduler_params,
                  log=True,
                  wandb_init_params=wandb_init_params,
                  model_dir='../logs/nn_models/classifier/',
                  saving_model=False
                  )

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mdmitrii_fomin[0m ([33mdmitrii_fomin_uga[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [13]:
trainer.train_model(learning_params)
wandb.finish()

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
lr,█▇▇▆▆▅▅▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁▃▅▇████████████████████████████████████
test_auc_score,▃▅█▅▅▁▆▇▅▃▆▆▆▄▇▇▇▇▆▅▆▅▃▆▅▄▅▅▄▄▄▃▅▅▃▄▄▅▅▃
test_f1_score,▁▂▃▅▆▇▆▆▇▆▇▇▇▇▇▇▇▇█▇█▇▇███▇▇████████▇███
test_fpr,█▆▄▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_loss,██▇▄▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_tpr,█▇▇▆▄▂▂▂▂▁▁▄▂▁▂▂▂▃▄▃▅▄▃▆▄▅▃▄▅▅▄▄▅▅▄▄▃▅▅▅
train_accuracy,▁▇▇▇▇▇▇▇████████████████████████████████
train_auc_score,▁▄▆▇▇████▇██▇▇██▇█▇▇█▇█▇████▇███▇█▇█▇███

0,1
epoch,39.0
lr,0.00015
test_accuracy,0.98338
test_auc_score,0.86515
test_f1_score,0.86618
test_fpr,0.00689
test_loss,0.33515
test_tpr,0.70541
train_accuracy,0.9906
train_auc_score,0.90665
