In [1]:
!pip install torchcontrib

Collecting torchcontrib
  Downloading torchcontrib-0.0.2.tar.gz (11 kB)
Building wheels for collected packages: torchcontrib
  Building wheel for torchcontrib (setup.py) ... [?25l- \ done
[?25h  Created wheel for torchcontrib: filename=torchcontrib-0.0.2-py3-none-any.whl size=7532 sha256=b0429249aa34b8862f7a8d7c6c053774776c720c6e24813896a6890246666ac2
  Stored in directory: /root/.cache/pip/wheels/91/58/d0/f03811c3e34e1f14031294b5f30d8693689972af874d1225b8
Successfully built torchcontrib
Installing collected packages: torchcontrib
Successfully installed torchcontrib-0.0.2


In [2]:
import os
os.system('pip install pytorch_toolbelt')
import gc

import math, copy, time
import matplotlib.pyplot as plt
import seaborn
from tqdm.notebook import tqdm
import numpy as np
import pandas as pd

from sklearn.model_selection import GroupKFold, GroupShuffleSplit, LeaveOneGroupOut
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from functools import partial

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split
from torchaudio import transforms, functional
from torch.autograd import Variable
from pytorch_toolbelt import losses as L
from torchcontrib.optim import SWA
import torchcontrib
from transformers import AdamW
from transformers import get_linear_schedule_with_warmup

tqdm.pandas()
seaborn.set_context(context="talk")
%matplotlib inline

  from pandas import Panel


# Data preprocessing 

In [3]:
# read data
def read_data():
    train = pd.read_csv('/kaggle/input/clean-kalman/train_clean_kalman.csv', dtype={'time': np.float32, 'signal': np.float32, 'open_channels':np.int32})
    test  = pd.read_csv('/kaggle/input/clean-kalman/test_clean_kalman.csv', dtype={'time': np.float32, 'signal': np.float32})
    #from https://www.kaggle.com/sggpls/wavenet-with-shifted-rfc-proba and
    # https://www.kaggle.com/c/liverpool-ion-switching/discussion/144645
    Y_train_proba = np.load("/kaggle/input/ion-shifted-rfc-proba/Y_train_proba.npy")
    Y_test_proba = np.load("/kaggle/input/ion-shifted-rfc-proba/Y_test_proba.npy")
    
    for i in range(11):
        train[f"proba_{i}"] = Y_train_proba[:, i]
        test[f"proba_{i}"] = Y_test_proba[:, i]
        
    sub  = pd.read_csv('/kaggle/input/liverpool-ion-switching/sample_submission.csv', dtype={'time': np.float32})
    return train, test, sub

# create batches of 4000 observations
def batching(df, batch_size):
    #print(df)
    df['group'] = df.groupby(df.index//batch_size, sort=False)['signal'].agg(['ngroup']).values
    df['group'] = df['group'].astype(np.uint16)
    return df

# normalize the data (standard scaler). We can also try other scalers for a better score!
def normalize(train, test):
    train_input_mean = train.signal.mean()
    train_input_sigma = train.signal.std()
    train['signal'] = (train.signal - train_input_mean) / train_input_sigma
    test['signal'] = (test.signal - train_input_mean) / train_input_sigma
    return train, test

# get lead and lags features
def lag_with_pct_change(df, windows):
    for window in windows:    
        df['signal_shift_pos_' + str(window)] = df.groupby('group')['signal'].shift(window).fillna(0)
        df['signal_shift_neg_' + str(window)] = df.groupby('group')['signal'].shift(-1 * window).fillna(0)
    return df

# main module to run feature engineering. Here you may want to try and add other features and check if your score imporves :).
def run_feat_engineering(df, batch_size):
    # create batches
    df = batching(df, batch_size = batch_size)
    # create leads and lags (1, 2, 3 making them 6 features)
    df = lag_with_pct_change(df, [1, 2, 3])
    # create signal ** 2 (this is the new feature)
    df['signal_2'] = df['signal'] ** 2
    return df

# fillna with the mean and select features for training
def feature_selection(train, test):
    features = [col for col in train.columns if col not in ['index', 'group', 'open_channels', 'time']]
    train = train.replace([np.inf, -np.inf], np.nan)
    test = test.replace([np.inf, -np.inf], np.nan)
    for feature in features:
        feature_mean = pd.concat([train[feature], test[feature]], axis = 0).mean()
        train[feature] = train[feature].fillna(feature_mean)
        test[feature] = test[feature].fillna(feature_mean)
    return train, test, features


def split(GROUP_BATCH_SIZE=4000, SPLITS=5):
    print('Reading Data Started...')
    train, test, sample_submission = read_data()
    train, test = normalize(train, test)
    print('Reading and Normalizing Data Completed')
    print('Creating Features')
    print('Feature Engineering Started...')
    train = run_feat_engineering(train, batch_size=GROUP_BATCH_SIZE)
    test = run_feat_engineering(test, batch_size=GROUP_BATCH_SIZE)
    train, test, features = feature_selection(train, test)
    print(train.head())
    print('Feature Engineering Completed...')

    target = ['open_channels']
    group = train['group']
    kf = GroupKFold(n_splits=SPLITS)
    splits = [x for x in kf.split(train, train[target], group)]
    new_splits = []
    for sp in splits:
        new_split = []
        new_split.append(np.unique(group[sp[0]]))
        new_split.append(np.unique(group[sp[1]]))
        new_split.append(sp[1])
        new_splits.append(new_split)
    target_cols = ['open_channels']
    print(train.head(), train.shape)
    train_tr = np.array(list(train.groupby('group').apply(lambda x: x[target_cols].values))).astype(np.float32)
    train = np.array(list(train.groupby('group').apply(lambda x: x[features].values)))
    test = np.array(list(test.groupby('group').apply(lambda x: x[features].values)))
    print(train.shape, test.shape, train_tr.shape)
    return train, test, train_tr, new_splits

# Dataset class 

In [4]:
class IronDataset(Dataset):
    def __init__(self, data, labels, training=True, transform=None, seq_len=4000, flip=0.5, noise_level=0, class_split=0.0):
        self.data = data
        self.labels = labels
        self.transform = transform
        self.training = training
        self.flip = flip
        self.noise_level = noise_level
        self.class_split = class_split
        self.seq_len = seq_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        data = torch.tensor( self.data[idx] , dtype =torch.float ) 
        labels = torch.tensor(self.labels[idx] , dtype = torch.long)
        data=Variable(data, requires_grad=False)
        labels=Variable(labels, requires_grad=False)
        return {
            'data' :data , 
            'labels' : labels , 
           
        }

# Model class 

In [5]:
class Sequence_model(nn.Module):
   
    def __init__(self, encoder, src_encode, generator):
        
        super(Sequence_model, self).__init__()
        self.encoder = encoder
        self.src_encode = src_encode
        self.generator = generator
        
    def forward(self, src ):

        out = self.encode(src, src_mask=None)
        out = self.generator(out) 
        return out 
    
    def encode(self, src, src_mask=None):
        return self.encoder(self.src_encode(src))
    
    
class Generator(nn.Module) : 
    'Define standard linear + softmax generation step . ' 
    def __init__(self, d_model , vocab) : 
        super(Generator, self).__init__()
        self.proj = nn.Linear(d_model, vocab)

    def forward(self, x):
        return self.proj(x)

## WaveBlock 

In [6]:
class Wave_Block(nn.Module):
    
    def __init__(self,in_channels,out_channels,dilation_rates):
        super(Wave_Block,self).__init__()
        self.num_rates = dilation_rates
        self.convs = nn.ModuleList()
        self.filter_convs = nn.ModuleList()
        self.gate_convs = nn.ModuleList()
        
        self.convs.append(nn.Conv1d(in_channels,out_channels,kernel_size=1))
        dilation_rates = [2**i for i in range(dilation_rates)]
        for dilation_rate in dilation_rates:
            self.filter_convs.append(nn.Conv1d(out_channels,out_channels,kernel_size=3,padding=dilation_rate,dilation=dilation_rate))
            self.gate_convs.append(nn.Conv1d(out_channels,out_channels,kernel_size=3,padding=dilation_rate,dilation=dilation_rate))
            self.convs.append(nn.Conv1d(out_channels,out_channels,kernel_size=1))
            
    def forward(self,x):
        x = self.convs[0](x)
        res = x
        for i in range(self.num_rates):
            x = F.tanh(self.filter_convs[i](x))*F.sigmoid(self.gate_convs[i](x))
            x = self.convs[i+1](x)
            #x += res
            res = torch.add(res, x)
        return res
    

## Encoder 

In [7]:
def clones(module, N):
    "Produce N identical layers."
    return nn.ModuleList([copy.deepcopy(module) for _ in range(N)])
class Encoder(nn.Module) : 
    'core encoder is a stack of N layers' 
    def __init__(self,layer,N) : 
        super(Encoder,self).__init__()  
        self.layers = clones(layer,N) 
        self.norm = LayerNorm(layer.size) 
    def forward(self,x,mask=None) : 
        'pass the input (and mask) through each layer in turn. ' 
        for layer in self.layers : 
            x = layer(x,mask) 
        return self.norm(x) 
class LayerNorm(nn.Module):
    "Construct a layernorm module (See citation for details)."
    def __init__(self, features, eps=1e-6):
        super(LayerNorm, self).__init__()
        self.a_2 = nn.Parameter(torch.ones(features))
        self.b_2 = nn.Parameter(torch.zeros(features))
        self.eps = eps

    def forward(self, x):
        mean = x.mean(-1, keepdim=True)
        std = x.std(-1, keepdim=True)
        return self.a_2 * (x - mean) / (std + self.eps) + self.b_2
class SublayerConnection(nn.Module):
    """
    A residual connection followed by a layer norm.
    Note for code simplicity the norm is first as opposed to last.
    """
    def __init__(self, size, dropout):
        super(SublayerConnection, self).__init__()
        self.norm = LayerNorm(size)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, sublayer):
        "Apply residual connection to any sublayer with the same size."
        return x + self.dropout(sublayer(self.norm(x)))
class EncoderLayer(nn.Module):
    "Encoder is made up of self-attn and feed forward (defined below)"
    def __init__(self, size, self_attn, feed_forward, dropout):
        super(EncoderLayer, self).__init__()
        self.self_attn = self_attn
        self.feed_forward = feed_forward
        self.sublayer = clones(SublayerConnection(size, dropout), 2)
        self.size = size

    def forward(self, x, mask):
        "Follow Figure 1 (left) for connections."
        x = self.sublayer[0](x, lambda x: self.self_attn(x, x, x, mask))
        return self.sublayer[1](x, self.feed_forward)

In [8]:
def attention(query, key, value, mask=None, dropout=None):
    "Compute 'Scaled Dot Product Attention'"
    d_k = query.size(-1)
    scores = torch.matmul(query, key.transpose(-2, -1)) \
             / math.sqrt(d_k)
    if mask is not None:
        scores = scores.masked_fill(mask == 0, -1e9)
    p_attn = F.softmax(scores, dim = -1)
    if dropout is not None:
        p_attn = dropout(p_attn)
    return torch.matmul(p_attn, value), p_attn
class MultiHeadedAttention(nn.Module):
    def __init__(self, h, d_model, dropout=0.1):
        "Take in model size and number of heads."
        super(MultiHeadedAttention, self).__init__()
        assert d_model % h == 0
        # We assume d_v always equals d_k
        self.d_k = d_model // h
        self.h = h
        self.linears = clones(nn.Linear(d_model, d_model), 4)
        self.attn = None
        self.dropout = nn.Dropout(p=dropout)
        
    def forward(self, query, key, value, mask=None):
        "Implements Figure 2"
        if mask is not None:
            # Same mask applied to all h heads.
            mask = mask.unsqueeze(1)
        nbatches = query.size(0)
        
        # 1) Do all the linear projections in batch from d_model => h x d_k 
        query, key, value = \
            [l(x).view(nbatches, -1, self.h, self.d_k).transpose(1, 2)
             for l, x in zip(self.linears, (query, key, value))]
        
        # 2) Apply attention on all the projected vectors in batch. 
        x, self.attn = attention(query, key, value, mask=mask, 
                                 dropout=self.dropout)
        
        # 3) "Concat" using a view and apply a final linear. 
        x = x.transpose(1, 2).contiguous() \
             .view(nbatches, -1, self.h * self.d_k)
        return self.linears[-1](x)
class PositionwiseFeedForward(nn.Module):
    "Implements FFN equation."
    def __init__(self, d_model, d_ff, dropout=0.1):
        super(PositionwiseFeedForward, self).__init__()
        self.w_1 = nn.Linear(d_model, d_ff)
        self.w_2 = nn.Linear(d_ff, d_model)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        return self.w_2(self.dropout(F.relu(self.w_1(x))))

## Input Layer

In [9]:
class Numrical_input(nn.Module):
    def __init__(self, d_model, vocab):
        super(Numrical_input, self).__init__()
        self.lut = nn.Linear(vocab, d_model)
        self.d_model = d_model

    def forward(self, x):
        return  self.lut(x) * math.sqrt(self.d_model)

In [10]:
class PositionalEncoding(nn.Module):
    "Implement the PE function."
    def __init__(self, d_model, dropout, max_len=4000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)
        
        # Compute the positional encodings once in log space.
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) *
                             -(math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)
        
    def forward(self, x):
        x = x + Variable(self.pe[:, :x.size(1)], 
                         requires_grad=False)
        return self.dropout(x)

## Full Model 

In [11]:
def make_model(src_vocab, tgt_vocab, N=3, 
               d_model=512, d_ff=2048, h=8, dropout=0.1):
    "Helper: Construct a model from hyperparameters."
    c = copy.deepcopy
    attn = MultiHeadedAttention(h, d_model)
    ff = PositionwiseFeedForward(d_model, d_ff, dropout)
    position = PositionalEncoding(d_model, dropout)
    model = Sequence_model(Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N),
        nn.Sequential(Numrical_input(d_model, src_vocab), c(position)),
        Generator(d_model, tgt_vocab))
    
    # This was important from their code. 
    # Initialize parameters with Glorot / fan_avg.
    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform(p)
    return model

In [12]:
# Small example model.
tmp_model = make_model(19, 14)
tmp_model

  app.launch_new_instance()


Sequence_model(
  (encoder): Encoder(
    (layers): ModuleList(
      (0): EncoderLayer(
        (self_attn): MultiHeadedAttention(
          (linears): ModuleList(
            (0): Linear(in_features=512, out_features=512, bias=True)
            (1): Linear(in_features=512, out_features=512, bias=True)
            (2): Linear(in_features=512, out_features=512, bias=True)
            (3): Linear(in_features=512, out_features=512, bias=True)
          )
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (feed_forward): PositionwiseFeedForward(
          (w_1): Linear(in_features=512, out_features=2048, bias=True)
          (w_2): Linear(in_features=2048, out_features=512, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (sublayer): ModuleList(
          (0): SublayerConnection(
            (norm): LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): SublayerConnection(
            (norm): LayerNorm()

# Engine 

In [13]:
class NoamOpt:
    "Optim wrapper that implements rate."
    def __init__(self, model_size, factor, warmup, optimizer):
        self.optimizer = optimizer
        self._step = 0
        self.warmup = warmup
        self.factor = factor
        self.model_size = model_size
        self._rate = 0
        
    def step(self):
        "Update parameters and rate"
        self._step += 1
        rate = self.rate()
        for p in self.optimizer.param_groups:
            p['lr'] = rate
        self._rate = rate
        self.optimizer.step()
        
    def rate(self, step = None):
        "Implement `lrate` above"
        if step is None:
            step = self._step
        return self.factor * \
            (self.model_size ** (-0.5) *
            min(step ** (-0.5), step * self.warmup ** (-1.5)))
        
def get_std_opt(model):
    return NoamOpt(model.src_embed[0].d_model, 2, 4000,
            torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9))

In [14]:
class EarlyStopping:
    def __init__(self, patience=5, delta=0, checkpoint_path='checkpoint.pt', is_maximize=True):
        self.patience, self.delta, self.checkpoint_path = patience, delta, checkpoint_path
        self.counter, self.best_score = 0, None
        self.is_maximize = is_maximize

    def load_best_weights(self, model):
        model.load_state_dict(torch.load(self.checkpoint_path))

    def __call__(self, score, model):
        if self.best_score is None or \
        (score > self.best_score + self.delta if self.is_maximize else score < self.best_score - self.delta):
            torch.save(model.state_dict(), self.checkpoint_path)
            self.best_score, self.counter = score, 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                return True
        return False

In [15]:
def loss_fn(outputs, targets):
    return L.FocalLoss()(outputs, targets)

In [16]:
def train_fn(data_loader, model, optimizer):
  model.train()
  
  train_losses =[]
  train_preds, train_true = torch.Tensor([]).to(device), torch.LongTensor([]).to(device)
  for bi, d in tqdm(enumerate(data_loader), total=len(data_loader)):
    
    d['data'] = d['data'].to(device,dtype=torch.float)
    d['labels'] = d['labels'].to(device,dtype= torch.long)
    optimizer.optimizer.zero_grad()
    
    predictions = model(d['data'])
    predictions_ = predictions.reshape(-1, predictions.shape[-1])
    y_ = d['labels'].view(-1)
 
    loss = loss_fn(predictions_, y_)
    loss.backward()
    optimizer.step()

    train_losses.append(loss.item())

    train_true = torch.cat([train_true, y_], 0)
    train_preds = torch.cat([train_preds, predictions_], 0)
     
  return train_losses,train_true,train_preds

In [17]:
def eval_fn(data_loader, model):
  model.eval()
  
  valid_losses=[]
  val_preds, val_true = torch.Tensor([]).to(device), torch.LongTensor([]).to(device)
  
  with torch.no_grad():
   
    for bi, d in tqdm(enumerate(data_loader), total=len(data_loader)):
      
      d['data'] = d['data'].to(device,dtype=torch.float)
      d['labels'] = d['labels'].to(device,dtype= torch.long)
    
      predictions = model(d['data'])
      predictions_ = predictions.reshape(-1, predictions.shape[-1])
      y_ = d['labels'].view(-1)
      loss = loss_fn(predictions_, y_)
      
      valid_losses.append(loss.item())
        
      val_true = torch.cat([val_true, y_], 0)
      val_preds = torch.cat([val_preds, predictions_], 0)
    
    return valid_losses, val_true ,val_preds

In [18]:
def predict(data_loader, model):
  model.eval()
  
  pred_list = []
  
  with torch.no_grad():
   
    for bi, d in tqdm(enumerate(data_loader), total=len(data_loader)):
      
      d['data'] = d['data'].to(device,dtype=torch.float)
    
      predictions = model(d['data'])
      predictions_ = predictions.reshape(-1, predictions.shape[-1])

      
        
      pred_list.append(F.softmax(predictions_, dim=1).cpu().numpy())
    
    return pred_list

In [19]:
def run(model,index,train_index,val_index):
    it = 1 
    
    print("Fold : {}".format(index))
    train_dataset = IronDataset(train[train_index], train_tr[train_index], seq_len=GROUP_BATCH_SIZE, flip=flip, noise_level=noise)
    train_dataloader = DataLoader(train_dataset, NNBATCHSIZE, shuffle=True,num_workers = 16)

    valid_dataset = IronDataset(train[val_index], train_tr[val_index], seq_len=GROUP_BATCH_SIZE, flip=False)
    valid_dataloader = DataLoader(valid_dataset, NNBATCHSIZE, shuffle=False)

    it = 0
    

    early_stopping = EarlyStopping(patience=5, is_maximize=True, checkpoint_path="./models/gru_clean_checkpoint_fold_{}_iter_{}.pt".format(index, it))


    
    optimizer =  NoamOpt(model.src_encode[0].d_model, 1, 400,
        torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9))
    
        
    avg_train_losses, avg_valid_losses = [], []
 
    
    for epoch in range(EPOCHS):
      start_time = time.time()
      print('**********************************')
      print("Folder : {} Epoch : {}".format(index, epoch))
      train_losses,train_true,train_preds=train_fn(train_dataloader, model, optimizer)
      train_loss = np.average(train_losses)
      avg_train_losses.append(train_loss)
      train_score = f1_score(train_true.cpu().detach().numpy(), train_preds.cpu().detach().numpy().argmax(1), labels=list(range(11)), average='macro')

      print( "train_loss: {:0.6f}, train_f1: {:0.6f}".format(train_loss, train_score))    
      
      valid_losses, val_true ,val_preds = eval_fn(valid_dataloader, model)
      valid_loss = np.average(valid_losses)
      avg_valid_losses.append(valid_loss)
      val_score = f1_score(val_true.cpu().detach().numpy(), val_preds.cpu().detach().numpy().argmax(1), labels=list(range(11)), average='macro')
      print( "valid_loss: {:0.6f}, valid_f1: {:0.6f}".format(valid_loss, val_score))
      if early_stopping(val_score, model):
        print("Early Stopping...")
        print("Best Val Score: {:0.6f}".format(early_stopping.best_score))
        break
      
      print("--- %s seconds ---" % (time.time() - start_time))
      model.load_state_dict(torch.load("./models/gru_clean_checkpoint_fold_{}_iter_{}.pt".format(index, it)))
    return avg_train_losses,avg_valid_losses

## Training 

In [20]:
# config 
import random
EPOCHS = 20
NNBATCHSIZE = 2
GROUP_BATCH_SIZE = 4000
LR = 0.001
SPLITS = 5
batchsize = 128
flip = False
noise = False
device = torch.device("cuda")
SEED = 1234
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [21]:
train, test, train_tr, new_splits = split()

Reading Data Started...
Reading and Normalizing Data Completed
Creating Features
Feature Engineering Started...
     time    signal  open_channels   proba_0   proba_1   proba_2   proba_3  \
0  0.0001 -1.148772              0  0.966731  0.028343  0.004812  0.000114   
1  0.0002 -1.184075              0  0.996045  0.003466  0.000426  0.000063   
2  0.0003 -1.012891              0  0.976313  0.018989  0.004677  0.000021   
3  0.0004 -1.298012              0  0.996002  0.003625  0.000326  0.000046   
4  0.0005 -1.303999              0  0.997465  0.002335  0.000158  0.000042   

   proba_4  proba_5  proba_6  ...  proba_9  proba_10  group  \
0      0.0      0.0      0.0  ...      0.0       0.0      0   
1      0.0      0.0      0.0  ...      0.0       0.0      0   
2      0.0      0.0      0.0  ...      0.0       0.0      0   
3      0.0      0.0      0.0  ...      0.0       0.0      0   
4      0.0      0.0      0.0  ...      0.0       0.0      0   

   signal_shift_pos_1  signal_shift_neg_

In [22]:
!mkdir models 

In [23]:
for index, (train_index, val_index, _) in enumerate(new_splits[0:], start=0):
    model = make_model(19, 14)
    model = model.cuda()
    run(model,index,train_index,val_index)

  app.launch_new_instance()


Fold : 0
**********************************
Folder : 0 Epoch : 0


HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))


train_loss: 0.283503, train_f1: 0.833060


HBox(children=(FloatProgress(value=0.0, max=125.0), HTML(value='')))


valid_loss: 0.078952, valid_f1: 0.930326
--- 181.5971348285675 seconds ---
**********************************
Folder : 0 Epoch : 1


HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))


train_loss: 0.071285, train_f1: 0.930262


HBox(children=(FloatProgress(value=0.0, max=125.0), HTML(value='')))


valid_loss: 0.075972, valid_f1: 0.935743
--- 183.02260732650757 seconds ---
**********************************
Folder : 0 Epoch : 2


HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))


train_loss: 0.059058, train_f1: 0.932678


HBox(children=(FloatProgress(value=0.0, max=125.0), HTML(value='')))


valid_loss: 0.063351, valid_f1: 0.936240
--- 183.60982251167297 seconds ---
**********************************
Folder : 0 Epoch : 3


HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))


train_loss: 0.052295, train_f1: 0.936521


HBox(children=(FloatProgress(value=0.0, max=125.0), HTML(value='')))


valid_loss: 0.054617, valid_f1: 0.936260
--- 184.154940366745 seconds ---
**********************************
Folder : 0 Epoch : 4


HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))


train_loss: 0.049895, train_f1: 0.936594


HBox(children=(FloatProgress(value=0.0, max=125.0), HTML(value='')))


valid_loss: 0.048993, valid_f1: 0.936927
--- 183.91814470291138 seconds ---
**********************************
Folder : 0 Epoch : 5


HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))


train_loss: 0.048592, train_f1: 0.936801


HBox(children=(FloatProgress(value=0.0, max=125.0), HTML(value='')))


valid_loss: 0.048941, valid_f1: 0.937211
--- 184.25727224349976 seconds ---
**********************************
Folder : 0 Epoch : 6


HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))


train_loss: 0.046885, train_f1: 0.937198


HBox(children=(FloatProgress(value=0.0, max=125.0), HTML(value='')))


valid_loss: 0.047470, valid_f1: 0.937851
--- 183.95649933815002 seconds ---
**********************************
Folder : 0 Epoch : 7


HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))


train_loss: 0.046546, train_f1: 0.937187


HBox(children=(FloatProgress(value=0.0, max=125.0), HTML(value='')))


valid_loss: 0.050238, valid_f1: 0.934933
--- 184.0982358455658 seconds ---
**********************************
Folder : 0 Epoch : 8


HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))


train_loss: 0.046298, train_f1: 0.937359


HBox(children=(FloatProgress(value=0.0, max=125.0), HTML(value='')))


valid_loss: 0.046936, valid_f1: 0.937917
--- 184.25945496559143 seconds ---
**********************************
Folder : 0 Epoch : 9


HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))


train_loss: 0.045664, train_f1: 0.937574


HBox(children=(FloatProgress(value=0.0, max=125.0), HTML(value='')))


valid_loss: 0.048341, valid_f1: 0.936869
--- 184.4871654510498 seconds ---
**********************************
Folder : 0 Epoch : 10


HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))


train_loss: 0.044785, train_f1: 0.937669


HBox(children=(FloatProgress(value=0.0, max=125.0), HTML(value='')))


valid_loss: 0.046138, valid_f1: 0.937774
--- 184.52624535560608 seconds ---
**********************************
Folder : 0 Epoch : 19


HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))


train_loss: 0.044847, train_f1: 0.937674


HBox(children=(FloatProgress(value=0.0, max=125.0), HTML(value='')))


train_loss: 0.052519, train_f1: 0.936449


HBox(children=(FloatProgress(value=0.0, max=125.0), HTML(value='')))


train_loss: 0.049869, train_f1: 0.936670


HBox(children=(FloatProgress(value=0.0, max=125.0), HTML(value='')))


valid_loss: 0.044022, valid_f1: 0.939297
--- 182.81961274147034 seconds ---
**********************************
Folder : 1 Epoch : 10


HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))


valid_loss: 0.044483, valid_f1: 0.939068
Early Stopping...
Best Val Score: 0.939297
Fold : 2
**********************************
Folder : 2 Epoch : 0


HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))


train_loss: 0.077216, train_f1: 0.929416


HBox(children=(FloatProgress(value=0.0, max=125.0), HTML(value='')))


valid_loss: 0.057656, valid_f1: 0.936676
--- 185.4622540473938 seconds ---
**********************************
Folder : 2 Epoch : 3


HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))


train_loss: 0.060965, train_f1: 0.935835


HBox(children=(FloatProgress(value=0.0, max=125.0), HTML(value='')))


train_loss: 0.049286, train_f1: 0.937202


HBox(children=(FloatProgress(value=0.0, max=125.0), HTML(value='')))


valid_loss: 0.045947, valid_f1: 0.937062
--- 184.3993353843689 seconds ---
**********************************
Folder : 2 Epoch : 11


HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))


train_loss: 0.048380, train_f1: 0.937338


HBox(children=(FloatProgress(value=0.0, max=125.0), HTML(value='')))


valid_loss: 0.045880, valid_f1: 0.936955
--- 184.3420147895813 seconds ---
**********************************
Folder : 2 Epoch : 12


HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))


train_loss: 0.083900, train_f1: 0.926948


HBox(children=(FloatProgress(value=0.0, max=125.0), HTML(value='')))


train_loss: 0.050254, train_f1: 0.937213


HBox(children=(FloatProgress(value=0.0, max=125.0), HTML(value='')))


valid_loss: 0.049343, valid_f1: 0.937342
--- 183.76602911949158 seconds ---
**********************************
Folder : 3 Epoch : 10


HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))


train_loss: 0.312870, train_f1: 0.812023


HBox(children=(FloatProgress(value=0.0, max=125.0), HTML(value='')))


train_loss: 0.049163, train_f1: 0.937032


HBox(children=(FloatProgress(value=0.0, max=125.0), HTML(value='')))


valid_loss: 0.047281, valid_f1: 0.937064
Early Stopping...
Best Val Score: 0.937644


# Make prediction 

In [24]:
class IronDataset_test(Dataset):
    def __init__(self, data, training=True, transform=None, seq_len=4000, flip=0.5, noise_level=0, class_split=0.0):
        self.data = data
        self.transform = transform
        self.training = training
        self.flip = flip
        self.noise_level = noise_level
        self.class_split = class_split
        self.seq_len = seq_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        data = torch.tensor( self.data[idx] , dtype =torch.float ) 
        data=Variable(data, requires_grad=False)
        return {
            'data' :data , 
           
        }

In [25]:
all_pred = []
for i in range(5):
    device = torch.device('cuda')
    model = make_model(19, 14)
    model.load_state_dict(torch.load(f'../working/models/gru_clean_checkpoint_fold_{i}_iter_0.pt'))
    model =model.to(device)
    test_dataset = IronDataset_test(test, seq_len=GROUP_BATCH_SIZE, flip=flip, noise_level=noise)
    test_dataloader = DataLoader(test_dataset, NNBATCHSIZE, shuffle=True,num_workers = 16)
    pred = predict(test_dataloader,model )
    pred = np.vstack(pred)
    all_pred.append(pred)

  app.launch_new_instance()


HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




In [26]:
ss = pd.read_csv('../input/liverpool-ion-switching/sample_submission.csv')

In [27]:
pred = all_pred[0]
for i in range(1,5) : 
    pred+= all_pred[i]
pred = pred/5 
pred =  np.argmax(pred, axis=1)
test_pred_frame = pd.DataFrame({'time': ss['time'].astype(str),
                                'open_channels': pred})
test_pred_frame.to_csv("./submission.csv", index=False)