In [3]:
import random
from collections import defaultdict
from copy import deepcopy

import numpy as np
import torch
from imblearn.over_sampling import SMOTE
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import KFold
from torch.utils.data import DataLoader, RandomSampler
from tqdm import tqdm_notebook as tqdm

from cnn1d.config import Config
from cnn1d.dataset import CustomDataset, get_df
from cnn1d.train import get_recall, train_model

import warnings
warnings.filterwarnings("ignore")

In [4]:
def seed_everything(seed=42):
    random.seed(seed)    
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
seed_everything()



X_train,y_train=get_df(Config.path_train)
#columns=X_train.columns
FOLDS=5
kfolds = KFold(n_splits=FOLDS)

drpt_res=dict()
#t=tqdm(np.round(np.arange(0.1,0.8,0.05),2))
result=defaultdict(lambda :{'recall':[],'cm':[],'epochs_loss':[]})
#for drpt in t:
drpt=0.5
k=0
splits = kfolds.split(X_train,y_train)    
errors=defaultdict(lambda: defaultdict(lambda: 0))   
t=tqdm(list(splits))
for train_index, valid_index in t:    
    X_tr, X_val = X_train[train_index], X_train[valid_index]
    y_tr, y_val = y_train[train_index], y_train[valid_index]    

    labels,counts=np.unique(y_tr, return_counts=True)
    _max=np.max(counts)*2
    sampling_strategy={label:_max  for label in labels }#if label==0 else int(_max+_max*0.4)
    smote = SMOTE(random_state=42, sampling_strategy=sampling_strategy, n_jobs=-1)  
    X_tr, y_tr = smote.fit_resample(X_tr, y_tr)

    dataset_train=CustomDataset(X_tr,y_tr,'Train',True)
    dataset_val=CustomDataset(X_val,y_val,'Val',False)

    dataloader_train = DataLoader(dataset_train, batch_size=128, sampler=RandomSampler(dataset_train),
                                  shuffle=False, num_workers=0, pin_memory=True)    
    dataloader_val = DataLoader(dataset_val, batch_size=128, shuffle=False, num_workers=0, pin_memory=True)    
    
    model,epochs_loss=train_model(drpt,dataloader_train,dataloader_val);
    
    recall,y_pred_valid = get_recall(model, dataloader_val)
    cm=confusion_matrix(y_true=y_val,y_pred=y_pred_valid)
    result[drpt]['recall'].append(recall)
    result[drpt]['cm'].append(cm)
    result[drpt]['epochs_loss'].append(epochs_loss)     
    
    dataloader_train = DataLoader(dataset_train, batch_size=128, shuffle=False, num_workers=0, pin_memory=True)
    t.set_postfix(avg_recall=result[drpt]['recall']) 
    #[tensor(0.9576), tensor(0.9545), tensor(0.9596), tensor(0.9545), tensor(0.9513)]]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]
  0%|          | 0/5 [00:00<?, ?it/s]


KeyboardInterrupt: 