<a href="https://colab.research.google.com/github/niobeus/snn_classification/blob/main/SNN_optuna.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Datasets

In [6]:
!pip install category_encoders

Collecting category_encoders
  Downloading category_encoders-2.2.2-py2.py3-none-any.whl (80 kB)
[?25l[K     |████                            | 10 kB 17.7 MB/s eta 0:00:01[K     |████████▏                       | 20 kB 6.3 MB/s eta 0:00:01[K     |████████████▏                   | 30 kB 4.5 MB/s eta 0:00:01[K     |████████████████▎               | 40 kB 4.4 MB/s eta 0:00:01[K     |████████████████████▎           | 51 kB 2.2 MB/s eta 0:00:01[K     |████████████████████████▍       | 61 kB 2.4 MB/s eta 0:00:01[K     |████████████████████████████▍   | 71 kB 2.5 MB/s eta 0:00:01[K     |████████████████████████████████| 80 kB 2.2 MB/s 
Installing collected packages: category-encoders
Successfully installed category-encoders-2.2.2


In [7]:
from category_encoders import LeaveOneOutEncoder
import numpy as np
import pandas as pd
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.metrics import roc_auc_score

import os

import torch.optim as optim
import torch.utils.data
from torchvision import datasets
from torchvision import transforms
import torch.nn as nn
import torch.nn.functional as F
import torch
from torch.utils.data import Dataset

  import pandas.util.testing as tm


In [8]:
def load_dataset(URL, target_name, cat_features, test_size=0.2, val_size=0.1):
  df = pd.read_csv(URL)
  target = df.pop(target_name)

  X, X_test, y, y_test = train_test_split(df, target, test_size=test_size)
  
  class_to_int = {c: i for i, c in enumerate(y.unique())}                                                                                                               
  y_int = [class_to_int[v] for v in y]                                                                                                                            
  y_test_int = [class_to_int[v] for v in y_test] 

  # encode categorical features
  cat_encoder = LeaveOneOutEncoder()
  cat_encoder.fit(X, y_int)
  X = cat_encoder.transform(X)

  X_test = cat_encoder.transform(X_test)

  X = X.values.astype('float32')
  X_test = X_test.values.astype('float32')
  y = np.array(y_int)
  y_test = np.array(y_test_int)

  if val_size:
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=val_size)
    return X_train, X_test, X_val, y_train, y_test, y_val

  return X, X_test, y, y_test

In [9]:
# datasets information
datasets = {
    'adult' : {
        'URL' : 'https://docs.google.com/uc?id=10eFO2rVlsQBUffn0b7UCAp28n0mkLCy7&export=download',
        'target_name' : '<=50K',
        'cat_features' : ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country']
        },

    'airlines' : {
        'URL' : 'https://www.openml.org/data/get_csv/66526/phpvcoG8S',
        'target_name' : 'Delay',
        'cat_features' : ['Airline', 'Flight', 'AirportFrom', 'AirportTo', 'DayOfWeek']
    },

    'albert' : {
        'URL' : 'https://www.openml.org/data/get_csv/19335520/file7b53746cbda2.arff',
        'target_name' : 'class',
        'cat_features' : []
        },

    'bank' : {
        'URL' : 'https://www.openml.org/data/get_csv/1586218/phpkIxskf',
        'target_name' : 'Class',
        'cat_features' : []
    },

    'blastchar' : {
        'URL' : 'https://vk.com/doc166590718_613866185',
        'target_name' : 'Churn',
        'cat_features' : ['gender', 'Partner', 'Dependents', 'PhoneService', 
                          'MultipleLines', 'InternetService', 'OnlineSecurity', 
                          'OnlineBackup', 'DeviceProtection', 'TechSupport', 
                          'StreamingTV', 'StreamingMovies', 'Contract', 
                          'PaperlessBilling', 'PaymentMethod']
    },

    'jasmine' : {
        'URL' : 'https://www.openml.org/data/get_csv/19335516/file79b563a1a18.arff',
        'target_name' : 'class',
        'cat_features' : []
    },

    # this dataset is not in public access
    # 'philippine' : {
        # 'URL' : 'http://www.causality.inf.ethz.ch/AutoML/philippine.zip',
        # 'target_name' : 'Delay',
        # 'cat_features' : ['Airline', 'Flight', 'AirportFrom', 'AirportTo', 'DayOfWeek']
    # },

    'shrutime' : {
        'URL' : 'https://vk.com/doc166590718_613869835',
        'target_name' : 'Exited',
        'cat_features' : ['Surname', 'Geography', 'Gender']
    },

    'spambase' : {
        'URL' : 'https://vk.com/doc166590718_613870389',
        'target_name' : 'class',
        'cat_features' : []
    },

    'QSARbio' : {
        'URL' : 'https://vk.com/doc166590718_613865746',
        'target_name' : 'class',
        'cat_features' : []
    }
}

#SNN Classifier

In [10]:
# handlers for dataloader
class DataSet(Dataset):
  def __init__(self, x, y):
    self.x = x
    self.y = y

  def __len__(self):
    return len(self.x)

  def __getitem__(self, i):
    return torch.tensor(self.x[i]), torch.tensor(self.y[i])

class Sampler:
  def __init__(self, X, y):
    self.dataset = DataSet(X, y)
  def sample(self, batch_size):
    n = len(self.dataset)
    idxs = torch.randperm(n)
    for i in range(0, n, batch_size):
      yield self.dataset[idxs[i: i + batch_size]]

In [11]:
def layer(input, output):
  return nn.Sequential(nn.Linear(input, output), nn.SELU(), nn.AlphaDropout(p=0.2))

In [12]:
class MLP(nn.Module):
  """
  Main model for SNN classifier.

  Parameters:
  ----------------------
  n_input : int
    The input size.
  n_hidden : list
    This list describe hidden layers of SNN. 
    The len of list is the number of hidden layers.
    Each element shows how many neurons are contained in the corresponding layer.
  n_output : int
    The output size.
      
  """
  def __init__(self, n_input, n_hidden, n_output):
    super(MLP, self).__init__()
    self.n_output = n_output
    n_hidden = [n_input] + n_hidden
    layers = [layer(n_hidden[i], n_hidden[i+1]) for i in range(len(n_hidden) - 1)]
    layers.append(nn.Sequential(nn.Linear(n_hidden[-1], n_output)))
    
    self.model = nn.Sequential(*layers)
    
  def forward(self, x):
    x = self.model(x)
    return x

#Оптимизация гиперпараметров с помощью Optuna

In [13]:
! pip install optuna

Collecting optuna
  Downloading optuna-2.9.1-py3-none-any.whl (302 kB)
[?25l[K     |█                               | 10 kB 17.0 MB/s eta 0:00:01[K     |██▏                             | 20 kB 6.2 MB/s eta 0:00:01[K     |███▎                            | 30 kB 4.6 MB/s eta 0:00:01[K     |████▎                           | 40 kB 4.4 MB/s eta 0:00:01[K     |█████▍                          | 51 kB 2.2 MB/s eta 0:00:01[K     |██████▌                         | 61 kB 2.4 MB/s eta 0:00:01[K     |███████▋                        | 71 kB 2.5 MB/s eta 0:00:01[K     |████████▋                       | 81 kB 2.8 MB/s eta 0:00:01[K     |█████████▊                      | 92 kB 2.9 MB/s eta 0:00:01[K     |██████████▉                     | 102 kB 2.3 MB/s eta 0:00:01[K     |████████████                    | 112 kB 2.3 MB/s eta 0:00:01[K     |█████████████                   | 122 kB 2.3 MB/s eta 0:00:01[K     |██████████████                  | 133 kB 2.3 MB/s eta 0:00:01[K    

In [14]:
import optuna
from optuna.trial import TrialState

In [15]:
DEVICE = torch.device("cuda")
BATCHSIZE = 128
DIR = os.getcwd()
EPOCHS = 10
LOSS = nn.CrossEntropyLoss()

dataset_info = datasets['adult']
X_train, X_test, X_valid, y_train, y_test, y_valid = load_dataset(dataset_info['URL'], dataset_info['target_name'], dataset_info['cat_features'], val_size=0.2)

CLASSES = len(set(y_train))

In [20]:
def weights_init(m):
    if isinstance(m, nn.Conv2d):
        torch.nn.init.xavier_normal_(tensor, gain=nn.init.calculate_gain('selu'))
        torch.nn.init.zero_(m.bias)

In [21]:
def define_model(trial):
    # We optimize the number of layers, hidden units and dropout ratio in each layer.
    n_layers = trial.suggest_int("n_layers", 1, 5)
    layers = []
 
    in_features = X_train.shape[1]
    for i in range(n_layers):
        out_features = trial.suggest_int("n_units_l{}".format(i), 4, in_features)
        layers.append(nn.Linear(in_features, out_features))
        layers.append(nn.SELU())
        p = trial.suggest_float("dropout_l{}".format(i), 0.05, 0.5)
        layers.append(nn.AlphaDropout(p))

        in_features = out_features
    layers.append(nn.Linear(in_features, CLASSES))
    # layers.append(nn.LogSoftmax(dim=1))

    return nn.Sequential(*layers)

In [22]:
def print_roc_auc_score(X, y, model):
  X = torch.tensor(X)
  X = X.to(DEVICE)
  print('ROC-AUC Score is: ', roc_auc_score(y_test, nn.Softmax(dim=1)(model(X)).argmax(1).to('cpu')))

In [23]:
def objective(trial):

    # Generate the model.
    model = define_model(trial).to(DEVICE)
    # And init it
    model.apply(weights_init)

    # Generate the optimizers
    # We optimize the learning rate and optimizers
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"])
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)

    # Get the dataset
    train_sampler = Sampler(X_train, y_train)
    valid_sampler = Sampler(X_valid, y_valid)

    # Training of the model.
    for epoch in range(EPOCHS):
        model.train()
        for data,target in train_sampler.sample(BATCHSIZE):

            data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)

            optimizer.zero_grad()
            output = model(data)
            loss = LOSS(output, target)
            loss.backward()
            optimizer.step()

        # Validation of the model.
        model.eval()
        correct = 0
        with torch.no_grad():
            
            # Uncomment this, if you want to minimize loss function
            # 
            # for data,target in valid_sampler.sample(BATCHSIZE):
            #     data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)
            #     output = model(data)
            #     loss = LOSS(output, target)
            # 

            # Uncomment this, if you want to maximize auc roc 
            # 
            out = model(torch.tensor(X_valid).to(DEVICE))
            y_pred = nn.Softmax(dim=1)(out).argmax(1).to('cpu')
            roc_auc = roc_auc_score(y_valid, y_pred)
            # 


        trial.report(roc_auc, epoch)
        # Handle pruning based on the intermediate value.
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return roc_auc

In [24]:
study = optuna.create_study(
    # case for auc roc
    direction="maximize",
    # Successive Halving is a bandit-based algorithm to identify the best one among multiple configurations
    pruner=optuna.pruners.SuccessiveHalvingPruner(min_early_stopping_rate=3),
    # Sampler using TPE (Tree-structured Parzen Estimator) algorithm.
    sampler=optuna.samplers.TPESampler(seed=42)
)

# We start the optimization
study.optimize(objective, n_trials=10)

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
  print("    {}: {}".format(key, value))

[32m[I 2021-10-01 16:23:45,965][0m A new study created in memory with name: no-name-4d2ddf02-e4c8-41c2-9914-5d3e2da7d4ed[0m
[32m[I 2021-10-01 16:23:49,598][0m Trial 0 finished with value: 0.5 and parameters: {'n_layers': 2, 'n_units_l0': 14, 'dropout_l0': 0.3793972738151323, 'n_units_l1': 10, 'dropout_l1': 0.12020838819909643, 'optimizer': 'SGD', 'lr': 0.002537815508265664}. Best is trial 0 with value: 0.5.[0m
[32m[I 2021-10-01 16:23:54,234][0m Trial 1 finished with value: 0.5032076984763432 and parameters: {'n_layers': 4, 'n_units_l0': 4, 'dropout_l0': 0.48645943347289744, 'n_units_l1': 4, 'dropout_l1': 0.4245991883601898, 'n_units_l2': 4, 'dropout_l2': 0.14555259980522428, 'n_units_l3': 4, 'dropout_l3': 0.13182123524319528, 'optimizer': 'SGD', 'lr': 0.0005342937261279777}. Best is trial 1 with value: 0.5032076984763432.[0m
[32m[I 2021-10-01 16:23:58,226][0m Trial 2 finished with value: 0.5008019246190858 and parameters: {'n_layers': 2, 'n_units_l0': 10, 'dropout_l0': 0.112

Study statistics: 
  Number of finished trials:  10
  Number of pruned trials:  0
  Number of complete trials:  10
Best trial:
  Value:  0.5216744258893562
  Params: 
    n_layers: 5
    n_units_l0: 9
    dropout_l0: 0.10381741067223577
    n_units_l1: 8
    dropout_l1: 0.3923532718776038
    n_units_l2: 6
    dropout_l2: 0.39693523097955247
    n_units_l3: 5
    dropout_l3: 0.28522977322189735
    n_units_l4: 4
    dropout_l4: 0.061438607034842836
    optimizer: SGD
    lr: 0.00018089390092767128


#Тестирование модели

In [25]:
class NNClassifier:
  """
  The SNN model with scikit-learn interface.

  Parameters:
  -----------------------------
  is_swats : bool
  SWATS allows you to change optimizer in learning process

  swats_n_epochs : int
  At what epoch SWATS should be applied

  """
  def __init__(self, MLP, batch_size, max_epochs, loss, optimizer, is_swats=True, swats_n_epochs=10, lr=0.1, device='cuda'):
    self.MLP = MLP.to(device)
    self.sampler = Sampler
    self.batch_size = batch_size
    self.optimizer = optimizer(self.MLP.parameters(), lr=lr)
    self.is_swats = is_swats
    self.swats_n_epochs = swats_n_epochs
    self.lr = lr
    self.loss = loss
    self.device = device
    self.max_epochs = max_epochs
  
  def fit(self, X_train, y_train):
    self.MLP.train()
    for i in range(self.max_epochs):
      sum_loss = 0

      if self.is_swats and i == (self.max_epochs - self.swats_n_epochs):
        self.optimizer = torch.optim.AdamW(self.MLP.parameters())

      # for g in self.optimizer.param_groups:
      #   g['lr'] = g['lr'] / 1.5
      
      for X,y in self.sampler(X_train, y_train).sample(self.batch_size):
        X, y = X.to(self.device), y.to(self.device)

        pred = self.MLP.forward(X)
        loss = self.loss(pred, y)
        sum_loss += loss.item()
        
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
      
      print('Epoch: {} Train loss: {:.5f}'.format(i, sum_loss / (len(y_train) /self.batch_size)))
      
  def predict(self, X):
    self.MLP.eval()
    with torch.no_grad():
      return self.predict_proba(X).argmax(1)
    
  def predict_proba(self, X):
    self.MLP.eval()
    with torch.no_grad():
      X = torch.tensor(X)
      X = X.to(self.device)
      return nn.Softmax(dim=1)(self.MLP.forward(X))

  def score(self, X_test, y_test):
    self.MLP.eval()
    size = len(y_test)
    test_loss, correct = 0, 0

    with torch.no_grad():
      for X,y in self.sampler(X_test, y_test).sample(self.batch_size):
        X, y = X.to(self.device), y.to(self.device)
        pred = self.MLP.forward(X)
        test_loss += self.loss(pred, y).item()
        correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= len(y_test) / self.batch_size
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [26]:
n_input = X_train.shape[1]
n_output = len(set(y_train))
n_hidden = [6, 5, 5]
print(n_input, n_output)
print(n_hidden)

14 2
[6, 5, 5]


In [27]:
mlp = MLP(n_input=n_input, 
          n_hidden=n_hidden, 
          n_output=n_output)

mlp.apply(weights_init)

batch_size = 64
epochs = 12
    
net = NNClassifier(mlp, 
                   batch_size=batch_size, 
                   optimizer=torch.optim.Adam,
                   is_swats=False,
                   swats_n_epochs=10,
                   lr=0.0015, 
                   loss=nn.CrossEntropyLoss(),
                   device='cpu', 
                   max_epochs=epochs)

In [28]:
# %%time
%%script false --no-raise-error
net.fit(X_train,y_train)

In [29]:
%%script false --no-raise-error
roc_auc_score(y_test, net.predict(X_test).to('cpu'))

In [30]:
%%script false --no-raise-error
net.score(X_test, y_test)