#### Load Data

In [1]:
import pycaret

In [2]:
pycaret.__version__

'2.3.1'

In [3]:
import numpy as np

In [4]:
from pycaret.datasets import get_data
index = get_data('index')

Unnamed: 0,Dataset,Data Types,Default Task,Target Variable 1,Target Variable 2,# Instances,# Attributes,Missing Values
0,anomaly,Multivariate,Anomaly Detection,,,1000,10,N
1,france,Multivariate,Association Rule Mining,InvoiceNo,Description,8557,8,N
2,germany,Multivariate,Association Rule Mining,InvoiceNo,Description,9495,8,N
3,bank,Multivariate,Classification (Binary),deposit,,45211,17,N
4,blood,Multivariate,Classification (Binary),Class,,748,5,N
5,cancer,Multivariate,Classification (Binary),Class,,683,10,N
6,credit,Multivariate,Classification (Binary),default,,24000,24,N
7,diabetes,Multivariate,Classification (Binary),Class variable,,768,9,N
8,electrical_grid,Multivariate,Classification (Binary),stabf,,10000,14,N
9,employee,Multivariate,Classification (Binary),left,,14999,10,N


In [5]:
data = get_data('electrical_grid')

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4,stabf
0,2.95906,3.079885,8.381025,9.780754,3.763085,-0.782604,-1.257395,-1.723086,0.650456,0.859578,0.887445,0.958034,unstable
1,9.304097,4.902524,3.047541,1.369357,5.067812,-1.940058,-1.872742,-1.255012,0.413441,0.862414,0.562139,0.78176,stable
2,8.971707,8.848428,3.046479,1.214518,3.405158,-1.207456,-1.27721,-0.920492,0.163041,0.766689,0.839444,0.109853,unstable
3,0.716415,7.6696,4.486641,2.340563,3.963791,-1.027473,-1.938944,-0.997374,0.446209,0.976744,0.929381,0.362718,unstable
4,3.134112,7.608772,4.943759,9.857573,3.525811,-1.125531,-1.845975,-0.554305,0.79711,0.45545,0.656947,0.820923,unstable


In [6]:
type(data)

pandas.core.frame.DataFrame

In [7]:
target = "stabf"

In [8]:
data[target].value_counts()

unstable    6380
stable      3620
Name: stabf, dtype: int64

In [9]:
data[target] = data[target].replace({"unstable":1, "stable":0}).astype(np.int64)

In [10]:
data[target].value_counts()

1    6380
0    3620
Name: stabf, dtype: int64

#### Pycaret Setup

In [11]:
from pycaret.classification import *
clf1 = setup(data = data, 
            target = target,
            train_size = 0.8,
            fold = 5,
            session_id = 123,
            log_experiment = True, 
            experiment_name = 'electrical_grid_1', 
            silent = True)

Unnamed: 0,Description,Value
0,session_id,123
1,Target,stabf
2,Target Type,Binary
3,Label Encoded,"0: 0, 1: 1"
4,Original Data,"(10000, 13)"
5,Missing Values,False
6,Numeric Features,12
7,Categorical Features,0
8,Ordinal Features,False
9,High Cardinality Features,False


#### Building a Random Forest Model

In [12]:
rf_model = create_model("rf")

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,0.9244,0.9796,0.9667,0.9189,0.9422,0.8331,0.8353
1,0.9275,0.9793,0.9549,0.933,0.9438,0.8417,0.8422
2,0.9225,0.981,0.9608,0.9211,0.9406,0.8294,0.8309
3,0.9081,0.9738,0.9461,0.913,0.9293,0.7983,0.7993
4,0.9044,0.9738,0.9471,0.9071,0.9267,0.7894,0.7909
Mean,0.9174,0.9775,0.9551,0.9186,0.9365,0.8184,0.8197
SD,0.0093,0.0031,0.0079,0.0087,0.0071,0.0206,0.0206


#### Building a Neural Network with SKORCH (PyTorch)

In [13]:
import torch.nn as nn

from skorch import NeuralNetClassifier
from sklearn.pipeline import Pipeline
from skorch.helper import DataFrameTransformer

In [14]:
class Net(nn.Module):
    def __init__(self, num_inputs=12, num_units_d1=200, num_units_d2=100):
        super(Net, self).__init__()

        self.dense0 = nn.Linear(num_inputs, num_units_d1)
        self.nonlin = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        self.dense1 = nn.Linear(num_units_d1, num_units_d2)
        self.output = nn.Linear(num_units_d2, 2)
        self.softmax = nn.Softmax(dim=-1)

    def forward(self, X, **kwargs):
        X = self.nonlin(self.dense0(X))
        X = self.dropout(X)
        X = self.nonlin(self.dense1(X))
        X = self.softmax(self.output(X))
        return X
    
### Including this as it throws an error of dtypes for target
class customNLLLoss(nn.Module):
    
    criterion = nn.NLLLoss()

    def __init__(self):
        super().__init__()

    def forward(self, logits, target):
        return self.criterion(logits, target.long())

In [15]:
net = NeuralNetClassifier(
    module=Net,
    criterion=customNLLLoss, ### Including this as it throws an error of dtypes for target
    max_epochs=30,
    lr=0.1,
    batch_size=32,
    train_split=None
)

# Reference: https://github.com/pycaret/pycaret/issues/700#issuecomment-879700610
nn_pipe = Pipeline(
    [
        ("transform", DataFrameTransformer()),
        ("net", net),
    ]
)

Passing the Neural network pipeline to the `create_mopel` function trains the model on the data 

In [16]:
skorch_model = create_model(nn_pipe)

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,0.8125,0.8845,0.8696,0.8416,0.8554,0.5891,0.5898
1,0.8231,0.8947,0.9039,0.8329,0.8669,0.6045,0.609
2,0.8181,0.8765,0.9128,0.8219,0.865,0.5888,0.5965
3,0.8119,0.8763,0.9403,0.8,0.8645,0.5635,0.5835
4,0.7906,0.8407,0.9373,0.7793,0.851,0.5086,0.5338
Mean,0.8112,0.8745,0.9128,0.8151,0.8606,0.5709,0.5825
SD,0.0111,0.0182,0.0257,0.0227,0.0062,0.0338,0.0258


In [17]:
best_model = compare_models(include=[skorch_model, rf_model], sort="AUC")

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
1,Random Forest Classifier,0.9174,0.9775,0.9551,0.9186,0.9365,0.8184,0.8197,0.34
0,NeuralNetClassifier,0.7996,0.8668,0.9161,0.8046,0.8538,0.5375,0.5624,2.772


###### Accessing Model History
We can access the losses, accuracies etc. that are available from the SKORCH model

In [18]:
history = skorch_model["net"].history

In [19]:
history[:, 'train_loss']

[-0.6610610520839691,
 -0.7196382769346237,
 -0.7426587610244751,
 -0.7538452484607696,
 -0.7612256941795349,
 -0.7650839459896087,
 -0.7662127826213837,
 -0.7704247462749482,
 -0.7771787281036378,
 -0.7834100489616393,
 -0.7827098631858825,
 -0.7803924105167389,
 -0.775869559764862,
 -0.7804064474105835,
 -0.7812140264511108,
 -0.7859221305847168,
 -0.7843049790859222,
 -0.7848344831466675,
 -0.7868650348186493,
 -0.7809707291126251,
 -0.7899959633350372,
 -0.7903886778354645,
 -0.7927532296180725,
 -0.7950916113853455,
 -0.7951122634410858,
 -0.7985119786262512,
 -0.7946390352249145,
 -0.7893673963546753,
 -0.7903198027610778,
 -0.7956815068721771]

#### Tuning Neural Network

In [20]:
import torch.optim as optim

In [21]:
skorch_model.get_params().keys()

dict_keys(['memory', 'steps', 'verbose', 'transform', 'net', 'transform__float_dtype', 'transform__int_dtype', 'transform__treat_int_as_categorical', 'net__module', 'net__criterion', 'net__optimizer', 'net__lr', 'net__max_epochs', 'net__batch_size', 'net__iterator_train', 'net__iterator_valid', 'net__dataset', 'net__train_split', 'net__callbacks', 'net__predict_nonlinearity', 'net__warm_start', 'net__verbose', 'net__device', 'net__classes', 'net__callbacks__epoch_timer', 'net__callbacks__train_loss', 'net__callbacks__train_loss__name', 'net__callbacks__train_loss__lower_is_better', 'net__callbacks__train_loss__on_train', 'net__callbacks__valid_loss', 'net__callbacks__valid_loss__name', 'net__callbacks__valid_loss__lower_is_better', 'net__callbacks__valid_loss__on_train', 'net__callbacks__valid_acc', 'net__callbacks__valid_acc__scoring', 'net__callbacks__valid_acc__lower_is_better', 'net__callbacks__valid_acc__on_train', 'net__callbacks__valid_acc__name', 'net__callbacks__valid_acc__tar

In [22]:
net.get_params().keys()

dict_keys(['module', 'criterion', 'optimizer', 'lr', 'max_epochs', 'batch_size', 'iterator_train', 'iterator_valid', 'dataset', 'train_split', 'callbacks', 'predict_nonlinearity', 'warm_start', 'verbose', 'device', 'classes', 'callbacks__epoch_timer', 'callbacks__train_loss', 'callbacks__train_loss__name', 'callbacks__train_loss__lower_is_better', 'callbacks__train_loss__on_train', 'callbacks__valid_loss', 'callbacks__valid_loss__name', 'callbacks__valid_loss__lower_is_better', 'callbacks__valid_loss__on_train', 'callbacks__valid_acc', 'callbacks__valid_acc__scoring', 'callbacks__valid_acc__lower_is_better', 'callbacks__valid_acc__on_train', 'callbacks__valid_acc__name', 'callbacks__valid_acc__target_extractor', 'callbacks__valid_acc__use_caching', 'callbacks__print_log', 'callbacks__print_log__keys_ignored', 'callbacks__print_log__sink', 'callbacks__print_log__tablefmt', 'callbacks__print_log__floatfmt', 'callbacks__print_log__stralign'])

In [23]:
custom_grid = {
	'net__max_epochs':[20, 30],
	'net__lr': [0.01, 0.05, 0.1],
	'net__module__num_units_d1': [50, 100, 150, 200],
	'net__module__num_units_d2': [50, 100, 150, 200],
	'net__optimizer': [optim.Adam, optim.SGD, optim.RMSprop]}

In [24]:
tuned_skorch_model = tune_model(skorch_model, custom_grid=custom_grid)

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,0.7875,0.8505,0.902,0.7931,0.844,0.515,0.5256
1,0.8044,0.87,0.9157,0.8045,0.8565,0.5533,0.5649
2,0.8025,0.8687,0.9314,0.7945,0.8575,0.5422,0.5608
3,0.7812,0.8491,0.8296,0.828,0.8288,0.526,0.526
4,0.7712,0.8399,0.9569,0.7521,0.8422,0.4473,0.4928
Mean,0.7894,0.8556,0.9071,0.7944,0.8458,0.5168,0.534
SD,0.0126,0.0118,0.0429,0.0246,0.0106,0.0372,0.0265


In [25]:
best_model = compare_models(include=[tuned_skorch_model, skorch_model, rf_model], sort="AUC")

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
2,Random Forest Classifier,0.9174,0.9775,0.9551,0.9186,0.9365,0.8184,0.8197,0.378
1,NeuralNetClassifier,0.8106,0.8797,0.9338,0.8027,0.8629,0.5623,0.5817,2.338
0,NeuralNetClassifier,0.786,0.8521,0.905,0.7907,0.8437,0.5091,0.5218,1.47
