# University of Liverpool - Ion Switching 

"University of Liverpool - Ion Switching" Kaggle competition! In this competition, contestants are challenged to predict the number of open ion channels based on electrophysiological signals from human cells. This is an important problem because potential solutions can have far-reaching impacts. From human diseases to how climate change affects plants, faster detection of ion channels could greatly accelerate solutions to major world problems.

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import GroupKFold, GroupShuffleSplit, LeaveOneGroupOut
import gc
from fastai import *
from fastai.tabular import *
from pytorch_toolbelt import losses as L

In [2]:
train = pd.read_csv("/home/shanmugam/fastai/ion/data-without-drift/train_clean.csv")
test = pd.read_csv("/home/shanmugam/fastai/ion/data-without-drift/test_clean.csv")

In [3]:
ss = pd.read_csv("/home/shanmugam/fastai/ion/sample_submission.csv", dtype={'time':str})

In [4]:
train['filter'] = 0
test['filter'] = 2

In [5]:
ts1 = pd.concat([train, test], axis=0, sort=False).reset_index(drop=True)

In [6]:
ts1

Unnamed: 0,time,signal,open_channels,filter
0,0.0001,-2.7600,0.0,0
1,0.0002,-2.8557,0.0,0
2,0.0003,-2.4074,0.0,0
3,0.0004,-3.1404,0.0,0
4,0.0005,-3.1525,0.0,0
...,...,...,...,...
6999995,699.9996,-2.9092,,2
6999996,699.9997,-2.7422,,2
6999997,699.9998,-2.8285,,2
6999998,699.9999,-2.9092,,2


## Feature Engineering

A new feature using pandas cut. Numpy is used to create bins of ranging 15 values which is used to set the labels at each level

In [11]:
np.linspace(0.0000, 700., num=14 + 1)

array([  0.,  50., 100., 150., 200., 250., 300., 350., 400., 450., 500., 550., 600., 650., 700.])

In [8]:
pd.cut(ts1['time'], bins=np.linspace(0.0000, 700., num=14 + 1), labels=list(range(14))).astype(int)

0           0
1           0
2           0
3           0
4           0
           ..
6999995    13
6999996    13
6999997    13
6999998    13
6999999    13
Name: time, Length: 7000000, dtype: int64

In [11]:
ts1['time2'] = pd.cut(ts1['time'], bins=np.linspace(0.0000, 700., num=14 + 1), labels=list(range(14)), include_lowest=True).astype(int)

In [15]:
ts1['time2'].value_counts()

13    500000
12    500000
11    500000
10    500000
9     500000
8     500000
7     500000
6     500000
5     500000
4     500000
3     500000
2     500000
1     500000
0     500000
Name: time2, dtype: int64

In [9]:
np.random.seed(321)
ts1['group'] = pd.cut(ts1['time'], bins=np.linspace(0.0000, 700., num=14*125 + 1), labels=list(range(14*125)), include_lowest=True).astype(int)

In [10]:
ts1['group'].value_counts()

1749    4000
586     4000
575     4000
576     4000
577     4000
        ... 
1170    4000
1171    4000
1172    4000
1173    4000
0       4000
Name: group, Length: 1750, dtype: int64

In [15]:
np.random.seed(321)

y = ts1.loc[ts1['filter']==0, 'open_channels']
group = ts1.loc[ts1['filter']==0, 'group']
X = ts1.loc[ts1['filter']==0, 'signal']

In [16]:
np.random.seed(321)
skf = GroupKFold(n_splits=5)
splits = [x for x in skf.split(X, y, group)]

In [17]:
use_cols = [col for col in ts1.columns if col not in ['index','filter','group', 'open_channels', 'time', 'time2']] 

In [30]:
use_cols

['signal']

In [18]:
for col in use_cols:
    col_mean = ts1[col].mean()
    ts1[col] = ts1[col].fillna(col_mean)

In [19]:
val_preds_all = np.zeros((ts1[ts1['filter']==0].shape[0], 11))
test_preds_all = np.zeros((ts1[ts1['filter']==2].shape[0], 11))

In [20]:
times = ts1.loc[ts1['filter']==0, 'time']
groups = ts1.loc[ts1['filter']==0, 'group']

In [21]:
new_splits = []
for sp in splits:
    new_split = []
    new_split.append(np.unique(groups[sp[0]]))
    new_split.append(np.unique(groups[sp[1]]))
    new_splits.append(new_split)

In [22]:
ts1[ts1['filter']==0].groupby('group').apply(lambda x: x[use_cols].values)

group
0       [[-2.76], [-2.8557], [-2.4074], [-3.1404], [-3...
1       [[-2.7315], [-2.966], [-2.905], [-2.713], [-2....
2       [[-2.7916], [-2.7168], [-2.9530000000000003], ...
3       [[-2.6076], [-2.7435], [-2.798], [-3.069], [-2...
4       [[-2.8839], [-2.9509], [-3.0634], [-3.188], [-...
                              ...                        
1245    [[3.9097650003934232], [3.5378961686331207], [...
1246    [[3.3394226830619003], [4.49855394039967], [5....
1247    [[5.48969729903853], [6.9973286257316065], [5....
1248    [[4.369009785352983], [5.399041161614716], [5....
1249    [[1.3412809287908494], [3.3961123348033384], [...
Length: 1250, dtype: object

In [26]:
trainval = np.array(list(ts1[ts1['filter']==0].groupby('group').apply(lambda x: x[use_cols].values)))
test = np.array(list(ts1[ts1['filter']==2].groupby('group').apply(lambda x: x[use_cols].values)))
trainval_y = np.array(list(ts1[ts1['filter']==0].groupby('group').apply(lambda x: x[['open_channels']].values)))

In [27]:
trainval.shape

(1250, 4000, 1)

In [28]:
# transpose to B x C x L
trainval = trainval.transpose((0,2,1))
test = test.transpose((0,2,1))

In [29]:
trainval.shape

(1250, 1, 4000)

In [17]:
trainval_y = trainval_y.reshape(trainval_y.shape[:2])
test_y = np.zeros((test.shape[0], trainval_y.shape[1]))

In [18]:
#Credits https://www.kaggle.com/pradeeppathak9/gamma-log-facies-type-prediction

class EarlyStopping:
    def __init__(self, patience=5, delta=0, checkpoint_path='checkpoint.pt', is_maximize=True):
        self.patience, self.delta, self.checkpoint_path = patience, delta, checkpoint_path
        self.counter, self.best_score = 0, None
        self.is_maximize = is_maximize

    def load_best_weights(self, model):
        model.load_state_dict(torch.load(self.checkpoint_path))

    def __call__(self, score, model):
        if self.best_score is None or \
        (score > self.best_score + self.delta if self.is_maximize else score < self.best_score - self.delta):
            torch.save(model.state_dict(), self.checkpoint_path)
            self.best_score, self.counter = score, 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                return True
        return False

    
class Seq2SeqRnn(nn.Module):
    def __init__(self, input_size, seq_len, hidden_size, output_size, num_layers=1, bidirectional=False, dropout=.3,
            hidden_layers = [100, 200]):
        
        super().__init__()
        self.input_size = input_size
        self.seq_len = seq_len
        self.hidden_size = hidden_size
        self.num_layers=num_layers
        self.bidirectional=bidirectional
        self.output_size=output_size
        self.rnn = nn.GRU(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, 
                           bidirectional=bidirectional, batch_first=True,dropout=0.5)
#         self.relu = [nn.ReLU(inplace=True)]
#         self.bn = BatchNorm1dFlat(hidden_size*2)
        
         # Input Layer
        if hidden_layers and len(hidden_layers):
            
#             first_layer  = nn.Linear(hidden_size*2 if bidirectional else hidden_size, hidden_layers[0])
            first_layer  = nn.Linear(hidden_size*2 if bidirectional else hidden_size, hidden_layers[0])

            # Hidden Layers
#             for i in range(len(hidden_layers) - 1):
#                 self.hidden_layers = nn.ModuleList(
#                     [first_layer]+ [nn.ReLU] + [BatchNorm1dFlat(hidden_layers[i])] + [nn.Linear(hidden_layers[i], hidden_layers[i+1])] 
#                 )
            self.hidden_layers = nn.ModuleList(
                [first_layer]+[nn.Linear(hidden_layers[i], hidden_layers[i+1]) for i in range(len(hidden_layers) - 1)]
            )
#             nn.init.kaiming_normal_(self.hidden_layers[0].weight.data)   
#             nn.init.kaiming_normal_(self.hidden_layers[3].weight.data)  
            for layer in self.hidden_layers: nn.init.kaiming_normal_(layer.weight.data) 
            self.intermediate_layer = nn.Linear(hidden_layers[-1], self.input_size)
#             self.intermediate_layer = nn.ModuleList([nn.ReLU] + 
#                 [BatchNorm1dFlat(hidden_layers[-1])] + [nn.Linear(hidden_layers[-1], self.input_size)])
#             output layers
#             self.output_layer = nn.ModuleList([nn.ReLU] + [BatchNorm1dFlat(hidden_layers[-1])] + [nn.Linear(hidden_layers[-1], output_size)])
            
#             nn.init.kaiming_normal_(self.output_layer[2].weight.data) 
            self.output_layer = nn.Linear(hidden_layers[-1], output_size)
            nn.init.kaiming_normal_(self.output_layer.weight.data) 
        else:
            self.hidden_layers = []
            self.intermediate_layer = nn.Linear(hidden_size*2 if bidirectional else hidden_siz, self.input_size)
            self.output_layer = nn.Linear(hidden_size*2 if bidirectional else hidden_size, output_size)
            nn.init.kaiming_normal_(self.output_layer.weight.data) 

        self.activation_fn = torch.relu
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x):
        batch_size = x.size(0)
#         pdb.set_trace()
        x = x.permute(0,2,1)
        outputs, hidden = self.rnn(x)        
        
#         x = self.dropout(self.bn(self.activation_fn(outputs)))
        x = self.dropout(self.activation_fn(outputs))
#         x = self.dropout(self.bn(outputs))
        for hidden_layer in self.hidden_layers:
            x = self.activation_fn(hidden_layer(x))
#             x = nn.BatchNorm1d(x)
            x = self.dropout(x)
            
        x = self.output_layer(x)

        return x


class IonDataset(Dataset):
    """Car dataset."""

    def __init__(self, data, labels, training=True, transform=None, flip=0.5, noise_level=0, class_split=0.0):
        self.data = data
        self.labels = labels
        self.transform = transform
        self.training = training
        self.flip = flip
        self.noise_level = noise_level
        self.class_split = class_split

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        
        data = self.data[idx]
        labels = self.labels[idx]
        if np.random.rand() < self.class_split:
            data, labels = class_split(data, labels)
        if  np.random.rand() < self.noise_level:
            data = data * torch.FloatTensor(10000).uniform_(1-self.noise_level, 1+self.noise_level)
        if np.random.rand() < self.flip:
            data = torch.flip(data, dims=[1])
            labels = np.flip(labels, axis=0).copy().astype(int)

        return [data, labels.astype(int)]

In [19]:
trainval = torch.Tensor(trainval)
test = torch.Tensor(test)

In [24]:
from fastai.text import *

Creating dataset with raw data and passed into dataloader to create databunch using fastai DataBlock API

Created learner which adds loss function, Optimzer, Data, Model architecture and metrics

Using Cyclical learning rate to find best LR. Fastai's fit_one_cycle uses Pytorch to take care of epochs, updating weights, Cyclical learning rate.

In [23]:
batchsize = 16
train_dataset = IonDataset(trainval[train_index],  trainval_y[train_index], flip=False, noise_level=0.0, class_split=0.0)
train_dataloader = DataLoader(train_dataset, batchsize, shuffle=True, num_workers=8, pin_memory=True)

valid_dataset = IonDataset(trainval[val_index],  trainval_y[val_index], flip=False)
valid_dataloader = DataLoader(valid_dataset, batchsize, shuffle=False, num_workers=4, pin_memory=True)

test_dataset = IonDataset(test,  test_y, flip=False, noise_level=0.0, class_split=0.0)
test_dataloader = DataLoader(test_dataset, batchsize, shuffle=False, num_workers=8, pin_memory=True)
test_preds_iter = np.zeros((2000000, 11))
it = 0
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
model=Seq2SeqRnn(input_size=trainval.shape[1], seq_len=4000, hidden_size=128, output_size=11, num_layers=2, hidden_layers=[64,64,64],
                         bidirectional=True).to(device)
# model=AWD_LSTM(
#     vocab_sz=2173186,
#     emb_sz=300,
#     n_hid=128,
#     n_layers=3
# )
tab_db = DataBunch(train_dataloader,valid_dataloader,test_dl=test_dataloader)
learn = Learner(tab_db,model,metrics=accuracy)
learn.loss_func = L.FocalLoss()
# pdb.set_trace()
learn.fit_one_cycle(5,max_lr=1e-03)
# learn.lr_find()
# letarn.recorder.plot()

epoch,train_loss,valid_loss,accuracy,time
0,1.828502,1.199452,0.258593,00:40
1,1.159508,0.614697,0.413528,00:38
2,0.81803,0.481437,0.565702,00:38
3,0.653011,0.422544,0.627596,00:38
4,0.586374,0.413832,0.63218,00:38


Adding Weight decay (which is derived from L2 regulaization)

In [24]:
learn.fit_one_cycle(20,max_lr=1e-03,wd=0.3) #4 	0.168433 	0.088413 	0.952356 	00:37

epoch,train_loss,valid_loss,accuracy,time
0,0.559647,0.405109,0.636881,00:38
1,0.54306,0.379792,0.674035,00:38
2,0.502991,0.33892,0.772469,00:37
3,0.441368,0.270398,0.839622,00:38
4,0.366526,0.187199,0.862911,00:38
5,0.290662,0.137177,0.899491,00:38
6,0.23359,0.109309,0.93797,00:38
7,0.19196,0.089051,0.948371,00:38
8,0.16718,0.081339,0.954287,00:38
9,0.14817,0.075588,0.957227,00:38


In [28]:
learn.fit_one_cycle(5,max_lr=1e-03,wd=0.5)

epoch,train_loss,valid_loss,accuracy,time
0,0.099925,0.063498,0.958258,00:38
1,0.099063,0.062657,0.959556,00:38
2,0.097159,0.060038,0.960665,00:38
3,0.09459,0.059969,0.961173,00:38
4,0.093994,0.059637,0.961197,00:38


In [29]:
learn.fit_one_cycle(5,max_lr=1e-03,wd=0.2) #4 	0.105642 	0.062846 	0.959638 	00:37

epoch,train_loss,valid_loss,accuracy,time
0,0.095697,0.060092,0.960504,00:38
1,0.097109,0.059292,0.960677,00:38
2,0.09484,0.058318,0.961158,00:38
3,0.093028,0.057904,0.961332,00:38
4,0.09083,0.057838,0.961458,00:38


In [121]:
learn.fit_one_cycle(20,max_lr=1e-03,wd=0.2)

epoch,train_loss,valid_loss,accuracy,time
0,0.077504,0.052646,0.965893,00:37
1,0.075643,0.052545,0.965881,00:37
2,0.075885,0.052111,0.966223,00:37
3,0.074461,0.052644,0.965718,00:37
4,0.072923,0.051417,0.96574,00:37
5,0.071123,0.050809,0.965617,00:37
6,0.069802,0.050763,0.965054,00:37
7,0.068603,0.049812,0.96599,00:38
8,0.067997,0.049176,0.966812,00:37
9,0.066466,0.049494,0.96666,00:38


In [126]:
learn.fit_one_cycle(5,max_lr=1e-04,wd=0.4)

epoch,train_loss,valid_loss,accuracy,time
0,0.058972,0.047453,0.967437,00:37
1,0.060869,0.047358,0.967298,00:38
2,0.059651,0.047538,0.966973,00:37
3,0.060599,0.048216,0.966734,00:38
4,0.062677,0.047507,0.966986,00:37
5,0.108158,0.054906,0.964487,00:37
6,0.077356,0.048419,0.966872,00:37
7,0.066978,0.047832,0.966939,00:38
8,0.062759,0.047731,0.967206,00:38
9,0.062792,0.047389,0.967272,00:38


In [129]:
learn.fit_one_cycle(5,max_lr=1e-05,wd=0.4)

epoch,train_loss,valid_loss,accuracy,time
0,0.060139,0.046635,0.967425,00:40
1,0.059982,0.046715,0.967444,00:37
2,0.059452,0.046654,0.96744,00:37
3,0.058777,0.046662,0.967441,00:37
4,0.058645,0.046662,0.967451,00:37


In [30]:
learn.fit_one_cycle(5,max_lr=1e-03/2,wd=0.2)

epoch,train_loss,valid_loss,accuracy,time
0,0.088762,0.05793,0.961434,00:38
1,0.089389,0.057414,0.961286,00:37
2,0.088169,0.057249,0.96143,00:38
3,0.089261,0.056769,0.961456,00:38
4,0.088912,0.056954,0.961461,00:47


In [132]:
learn.save('model938')

In [31]:
tab_db = DataBunch(train_dataloader,valid_dataloader,test_dl=test_dataloader)

In [90]:
import sys

In [127]:
with torch.no_grad():
            pred_list = []
            for x, y in test_dataloader:
                x = x.to(device)
                y = y.to(device)

                predictions = model(x[:,:trainval.shape[1],:])
                predictions_ = predictions.view(-1, predictions.shape[-1]) 

                pred_list.append(F.softmax(predictions_, dim=1).cpu().numpy())
            test_preds = np.vstack(pred_list)
test_preds_iter += test_preds
test_preds_all += test_preds
if not os.path.exists("./predictions/test"):
    os.makedirs("./predictions/test")
np.save('./predictions/test/gru_clean_fold_{}_iter_{}_raw.npy'.format(index, it), arr=test_preds_iter)
np.save('./predictions/test/gru_clean_fold_{}_raw.npy'.format(index), arr=test_preds_all)

test_preds_all = test_preds_all/np.sum(test_preds_all, axis=1)[:, None]
test_pred_frame = pd.DataFrame({'time': ss['time'].astype(str),
                                'open_channels': np.argmax(test_preds_all, axis=1)})
test_pred_frame.to_csv("./sub_gru.csv", index=False)
                

In [124]:
test_preds.shape

(2000000, 11)

With this submission got public score of **93.6** Accuracy

Using Kfold, rest are same as before

In [21]:
for index, (train_index, val_index ) in enumerate(new_splits[0:], start=0):
    print("Fold : {}".format(index))
    
    batchsize = 16
    train_dataset = IonDataset(trainval[train_index],  trainval_y[train_index], flip=False, noise_level=0.0, class_split=0.0)
    train_dataloader = DataLoader(train_dataset, batchsize, shuffle=True, num_workers=8, pin_memory=True)

    valid_dataset = IonDataset(trainval[val_index],  trainval_y[val_index], flip=False)
    valid_dataloader = DataLoader(valid_dataset, batchsize, shuffle=False, num_workers=4, pin_memory=True)

    test_dataset = IonDataset(test,  test_y, flip=False, noise_level=0.0, class_split=0.0)
    test_dataloader = DataLoader(test_dataset, batchsize, shuffle=False, num_workers=8, pin_memory=True)
    test_preds_iter = np.zeros((2000000, 11))
    it = 0
    for it in range(1):
        device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
        model=Seq2SeqRnn(input_size=trainval.shape[1], seq_len=4000, hidden_size=64, output_size=11, num_layers=2, hidden_layers=[64,64,64],
                         bidirectional=True).to(device)
        tab_db = DataBunch(train_dataloader,valid_dataloader,test_dataloader)
        learn = Learner(tab_db,model,metrics=accuracy)
        learn.loss_func = L.FocalLoss()
        learn.fit_one_cycle(75,max_lr=1e-03,wd=0.3)
        learn.fit_one_cycle(5,max_lr=1e-05,wd=0.4)
        with torch.no_grad():
            pred_list = []
            for x, y in test_dataloader:
                x = x.to(device)
                y = y.to(device)

                predictions = model(x[:,:trainval.shape[1],:])
                predictions_ = predictions.view(-1, predictions.shape[-1]) 

                pred_list.append(F.softmax(predictions_, dim=1).cpu().numpy())
            test_preds = np.vstack(pred_list)
       
        test_preds_iter += test_preds
        test_preds_all += test_preds
test_preds_all = test_preds_all/np.sum(test_preds_all, axis=1)[:, None]
test_pred_frame = pd.DataFrame({'time': ss['time'].astype(str),
                                'open_channels': np.argmax(test_preds_all, axis=1)})
test_pred_frame.to_csv("./gru_preds_f.csv", index=False)

Fold : 0


epoch,train_loss,valid_loss,accuracy,time
0,1.800372,1.537654,0.07467,00:40
1,1.564714,1.150961,0.37585,00:37
2,1.309197,0.853187,0.21526,00:38
3,1.110981,0.745089,0.420607,00:37
4,0.959301,0.643196,0.525761,00:37
5,0.82207,0.522439,0.579138,00:37
6,0.695421,0.447077,0.649214,00:37
7,0.592935,0.397174,0.701082,00:37
8,0.511972,0.339566,0.748374,00:37
9,0.440347,0.282752,0.789178,00:37


epoch,train_loss,valid_loss,accuracy,time
0,0.056674,0.048306,0.966307,00:37
1,0.058176,0.048337,0.966304,00:37
2,0.060025,0.04833,0.96631,00:37
3,0.060173,0.048315,0.966295,00:37
4,0.059632,0.048303,0.966291,00:37


Fold : 1


epoch,train_loss,valid_loss,accuracy,time
0,2.229596,1.844389,0.029044,00:38
1,1.936204,1.570099,0.055949,00:37
2,1.669754,1.204423,0.110272,00:37
3,1.389547,0.877489,0.351609,00:37
4,1.143318,0.677635,0.62356,00:37
5,0.945871,0.549111,0.654419,00:37
6,0.790118,0.467756,0.664961,00:37
7,0.667228,0.406013,0.681354,00:37
8,0.5651,0.352248,0.727535,00:37
9,0.490268,0.300493,0.794018,00:37


epoch,train_loss,valid_loss,accuracy,time
0,0.060788,0.044396,0.9691,00:37
1,0.05985,0.044398,0.969085,00:37
2,0.059128,0.044402,0.969137,00:37
3,0.059073,0.044407,0.969067,00:37
4,0.059233,0.044406,0.969083,00:37


Fold : 2


epoch,train_loss,valid_loss,accuracy,time
0,1.949904,1.669874,0.015267,00:37
1,1.680364,1.298646,0.079328,00:37
2,1.400155,0.90079,0.334868,00:37
3,1.160022,0.730737,0.335544,00:37
4,0.992547,0.638931,0.497178,00:37
5,0.869206,0.548889,0.558817,00:37
6,0.754637,0.477314,0.653046,00:37
7,0.643802,0.385188,0.708576,00:37
8,0.547274,0.332317,0.75389,00:37
9,0.467647,0.282572,0.791383,00:37


epoch,train_loss,valid_loss,accuracy,time
0,0.060285,0.045804,0.968059,00:37
1,0.059219,0.045802,0.968094,00:37
2,0.058288,0.045811,0.968101,00:37
3,0.058701,0.045796,0.968118,00:37
4,0.060305,0.045793,0.96811,00:37


Fold : 3


epoch,train_loss,valid_loss,accuracy,time
0,2.114261,1.758155,0.366148,00:37
1,1.841714,1.485049,0.305515,00:37
2,1.590619,1.151063,0.244123,00:37
3,1.335566,0.857526,0.253997,00:37
4,1.125165,0.705303,0.501842,00:37
5,0.948132,0.572477,0.586977,00:37
6,0.783835,0.463284,0.593499,00:37
7,0.653329,0.405501,0.665221,00:37
8,0.558347,0.346628,0.732759,00:37
9,0.478151,0.288473,0.790135,00:37


epoch,train_loss,valid_loss,accuracy,time
0,0.059977,0.046825,0.967453,00:37
1,0.058216,0.046829,0.967452,00:37
2,0.05921,0.046844,0.967453,00:37
3,0.059045,0.046837,0.967451,00:37
4,0.059425,0.04683,0.967449,00:37


Fold : 4


epoch,train_loss,valid_loss,accuracy,time
0,2.025953,1.811958,0.054616,00:37
1,1.827508,1.566131,0.313023,00:37
2,1.577731,1.117879,0.350683,00:37
3,1.287897,0.789196,0.418521,00:37
4,1.06457,0.637428,0.629691,00:37
5,0.893298,0.52031,0.670931,00:37
6,0.743724,0.433586,0.646102,00:37
7,0.620338,0.375509,0.702091,00:37
8,0.523538,0.326683,0.753695,00:37
9,0.446661,0.272638,0.795117,00:37


epoch,train_loss,valid_loss,accuracy,time
0,0.060503,0.047016,0.967406,00:37
1,0.060938,0.046982,0.967391,00:37
2,0.059739,0.047028,0.967422,00:37
3,0.061194,0.047009,0.967407,00:37
4,0.06069,0.047017,0.967403,00:37


With this submission got public score of **93.8** Accuracy