In [30]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

In [32]:
import torch
from torch import nn
from torch.nn.utils import weight_norm
import torch.optim as optim
import torch.nn.functional as F

from skorch import NeuralNetClassifier, NeuralNet
from skorch.callbacks import EpochScoring
from skorch.callbacks import LRScheduler, EarlyStopping
from torch.optim.lr_scheduler import ReduceLROnPlateau


import warnings
warnings.filterwarnings("ignore")

#device = 'cuda' if torch.cuda.is_available() else 'cpu'
device = 'cpu'
torch.manual_seed(0)

feature_dictionary_size= 360
num_features = 75

def residual_block(in_features, out_features, p_drop, non_linear = nn.ReLU(), *args, **kwargs):
    return nn.Sequential(
        nn.Dropout(p = p_drop),
        weight_norm(nn.Linear(in_features, out_features)),
        non_linear
    )

class TPSResidual(nn.Module):
    def __init__(self, num_class = 9, dropout = 0.3, linear_nodes=32, linear_out=16, emb_output=4, num_block=3):
        super().__init__()
        self.num_block = num_block
        self.final_module_list = nn.ModuleList()

        self.embedding = nn.Embedding(feature_dictionary_size, emb_output)
        self.flatten = nn.Flatten()

        self.linear = weight_norm(nn.Linear(emb_output * num_features, linear_nodes ))

        for res_num in range(self.num_block):
            self.non_linear = nn.ELU() if res_num %2 else nn.ReLU()
            self.lin_out = linear_out if res_num == (self.num_block - 1) else linear_nodes
            self.final_module_list.append(residual_block(emb_output * num_features + (res_num + 1) * linear_nodes, 
                                self.lin_out, dropout, self.non_linear))
        self.out = nn.Linear(linear_out, num_class)

        # non-linearity - activation function
        self.selu = nn.SELU()
        self.dropout = nn.Dropout(p = dropout)

    def forward(self, x):
        x = torch.tensor(x).to(torch.int64)

        # Embedding
        e = self.embedding(x)
        e = self.flatten(e)

        h1 = self.dropout(e)
        h1 = self.linear(h1)
        h1 = self.selu(h1)

        ri = torch.cat((e, h1), 1)
        for res_num in range(self.num_block):
            rx = self.final_module_list[res_num](ri)
            ri = torch.cat((ri, rx), 1)
        
        return F.softmax(self.out(rx), dim =-1)

lr_scheduler = LRScheduler(policy = ReduceLROnPlateau, monitor = 'valid_loss', mode = 'min', patience = 3, factor = 0.1, verbose = True)
early_stopping = EarlyStopping(monitor='valid_loss', patience = 10, threshold = 0.0001, threshold_mode='rel', lower_is_better=True)

net = NeuralNetClassifier(TPSResidual, device = device, lr = 0.001, max_epochs = 50, callbacks = [lr_scheduler, early_stopping])

In [15]:
df = pd.read_csv("../../data/tabular/train_fold.csv")

In [16]:
df.head()

Unnamed: 0,id,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,...,feature_67,feature_68,feature_69,feature_70,feature_71,feature_72,feature_73,feature_74,target,kfold
0,0,0,0,6,1,0,0,0,0,7,...,0,0,0,0,0,2,0,0,Class_6,0.0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,Class_6,1.0
2,2,0,0,0,0,0,1,0,3,0,...,0,0,0,1,0,0,0,0,Class_2,0.0
3,3,0,0,7,0,1,5,2,2,0,...,4,0,2,2,0,4,3,0,Class_8,1.0
4,4,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,Class_2,3.0


In [26]:
kfold = 0

In [28]:
X_train

Unnamed: 0,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,...,feature_65,feature_66,feature_67,feature_68,feature_69,feature_70,feature_71,feature_72,feature_73,feature_74
1,0,0,0,0,0,0,0,0,0,0,...,0,2,0,0,0,0,0,0,1,0
3,0,0,7,0,1,5,2,2,0,1,...,0,0,4,0,2,2,0,4,3,0
4,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,15,0,0,0,1,0,0,0,2,...,0,0,0,0,0,0,0,0,0,0
6,0,1,2,1,0,2,0,0,0,2,...,0,1,2,0,2,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
199994,0,2,1,1,2,0,0,1,0,6,...,3,1,60,2,0,0,7,6,0,0
199996,0,2,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
199997,1,2,0,0,0,2,0,1,8,4,...,4,1,0,1,1,1,0,1,0,0
199998,0,0,2,0,2,1,0,0,3,1,...,0,0,0,3,2,1,0,0,1,0


In [33]:
train_df = df[df['kfold'] != kfold]
valid_df = df[df['kfold']==kfold]
lencoder = LabelEncoder()

X_train = train_df.drop(['id','target','kfold'],axis=1).values.astype('float32')
y_train = lencoder.fit_transform(train_df['target']).astype('int64')

net.fit(X_train, y_train)

  epoch    train_loss    valid_acc    valid_loss      dur
-------  ------------  -----------  ------------  -------
      1        [36m2.0569[0m       [32m0.2907[0m        [35m1.9227[0m  40.2589


<class 'skorch.classifier.NeuralNetClassifier'>[initialized](
  module_=TPSResidual(
    (final_module_list): ModuleList(
      (0): Sequential(
        (0): Dropout(p=0.3, inplace=False)
        (1): Linear(in_features=332, out_features=32, bias=True)
        (2): ReLU()
      )
      (1): Sequential(
        (0): Dropout(p=0.3, inplace=False)
        (1): Linear(in_features=364, out_features=32, bias=True)
        (2): ELU(alpha=1.0)
      )
      (2): Sequential(
        (0): Dropout(p=0.3, inplace=False)
        (1): Linear(in_features=396, out_features=16, bias=True)
        (2): ReLU()
      )
    )
    (embedding): Embedding(360, 4)
    (flatten): Flatten(start_dim=1, end_dim=-1)
    (linear): Linear(in_features=300, out_features=32, bias=True)
    (non_linear): ReLU()
    (out): Linear(in_features=16, out_features=9, bias=True)
    (selu): SELU()
    (dropout): Dropout(p=0.3, inplace=False)
  ),
)