In [14]:
import os
import sys
sys.path.append(os.path.abspath(os.path.join(os.path.dirname('src'), '..')))

import pandas as pd
from sklearn.model_selection import train_test_split
import wandb

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.optim.lr_scheduler as lr
from src.models.classifiers import *
from src.trainer.trainer import TrainerClassifier, Model_class

random_seed = 42
torch.manual_seed(random_seed)
generator = torch.Generator()
generator.manual_seed(random_seed)
torch.backends.cudnn.deterministic = True

from functools import partial

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [15]:
BATCH_SIZE = 1024
INIT_PARAM = 512

In [16]:
df = pd.read_csv('../data/prepared_data.csv')

In [17]:
X_train,X_test,y_train,y_test = train_test_split(df.drop(columns=['Machine failure']),
                                                 df['Machine failure'],
                                                 shuffle=True,
                                                 stratify=df['Machine failure'], random_state=random_seed,
                                                 train_size=0.7)

In [18]:
df_train = pd.concat([X_train, y_train], axis = 1)
df_test = pd.concat([X_test, y_test], axis = 1)

In [19]:
train_dataset = ClassifierDataset(df_train)
val_dataset = ClassifierDataset(df_test)

In [20]:
train_dl = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE, 
    shuffle=True,
    generator=generator
)

val_dl = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE, 
    shuffle=True,
    generator=generator
)

In [21]:
model = BaselineClassifier(X_train.shape[1], INIT_PARAM)

In [22]:
pytorch_total_params = sum(p.numel() for p in model.parameters())
pytorch_total_params

709842

In [23]:
# loss_func = FocalLoss(gamma=2)
loss_func = nn.CrossEntropyLoss()
loss = LossWrapper(loss_func)
model_factory = partial(Model_class)
optimizer_factory = partial(torch.optim.AdamW)
scheduler_factory = partial(lr.ExponentialLR)

model_params = dict(model=model,
                    device=device)

optimizer_params = dict(weight_decay=1e-3, lr=1e-2)
scheduler_params = dict(gamma=0.90)

learning_params = dict(batch_size=BATCH_SIZE, num_epoch=40)

wandb_init_params = dict(
    name=f'BL_{INIT_PARAM}_{BATCH_SIZE}',
    project="Internship_project",
    dir = '../logs/'
)

In [24]:
additional_params = dict(loss = loss_func,
                        p = 0,
                        sampling = 'None',
                        batch_size = BATCH_SIZE,
                        init_parammeters = INIT_PARAM,
                        features_amount = df.shape[1],
                        random_seed = random_seed)

In [25]:
trainer = TrainerClassifier(train_dl,
                  val_dl,
                  loss,
                  model_factory=model_factory,
                  optimizer_factory=optimizer_factory,
                  scheduler_factory=scheduler_factory,
                  model_params=model_params,
                  optimizer_params=optimizer_params,
                  scheduler_params=scheduler_params,
                  additional_params=additional_params,
                  log=True,
                  wandb_init_params=wandb_init_params,
                  model_dir='../logs/nn_models/classifier/',
                  saving_model=False
                  )

In [26]:
trainer.train_model(learning_params)
wandb.finish()

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
lr,█▇▇▆▆▅▅▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁▆██████████████████████████████████████
test_auc_score,▁█▇▅▅▅▂▅▆▅▆▆▇▇▆▇▆▇▇▇▆▆▆▇▇▆▆▆▆▇▇▆▆▇▇▇▇▇▇▇
test_f1_score,▁▅▇▇▇▇▇▇▇▇▇█▇▇▇▇▇▇███▇██████████████████
test_fpr,█▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_loss,█▄▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_tpr,█▇▅▃▄▃▂▃▂▁▃▃▃▂▂▂▂▂▄▃▃▂▃▃▄▃▃▃▃▃▄▃▃▃▄▄▄▄▄▃
train_accuracy,▁▇██████████████████████████████████████
train_auc_score,▁▆▆▇▇▄▆▆▇▇▇██▇▇█▇▇██▇█▇▇█▇▇▇▇▇▇▇▇██▇▇███

0,1
epoch,39.0
lr,0.00015
test_accuracy,0.97662
test_auc_score,0.87947
test_f1_score,0.79382
test_fpr,0.00829
test_loss,0.33814
test_tpr,0.53528
train_accuracy,0.98759
train_auc_score,0.91231
