In [None]:
!git clone https://github.com/jose-carmona/keras-transformer.git
!cd keras-transformer && pip install .

In [None]:
import numpy as np 
import pandas as pd

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

train_path = "/kaggle/input/data-without-drift/train_clean.csv"
test_path = "/kaggle/input/data-without-drift/test_clean.csv"

train_data = pd.read_csv(train_path)
test_data = pd.read_csv(test_path)

# batch7 not clean still. We remove it in train
bs = 500000
train_data = train_data[0:bs*7].append(train_data[bs*8:])

In [None]:
def reduce_mem_usage(df, verbose=True):
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)

    end_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))

    return df

### Params

In [None]:
max_open_channels = 10
shift = 30
n_features = 1
train_size = 0.2
eval_size = 0.01
epochs = 8

### Features

In [None]:
features = []
result = []

for i in reversed(range(shift)):
    j=i+1
    features.append(f'signal_{j}')
    result.append(f'open_channels_{j}')

features.append(f'signal')
result.append('open_channels')

In [None]:
def create_shift_columns(df, column, shift=5):
    for i in range(1,shift+1):
        df[f'{column}_{i}'] = df[f'{column}'].shift(i)

    return df.fillna(0)

def extract_columns(df, columns, shape):
    return np.reshape(df[columns].values, shape)

### Model

In [None]:
from keras import backend as K

K.clear_session()

In [None]:
from keras.layers import Input, Dense, Add, BatchNormalization, Dropout
from keras.models import Model
from keras.optimizers import Adam, SGD
from keras.losses import CategoricalCrossentropy
from keras_transformer.transformer import TransformerACT, TransformerBlock
from keras.callbacks import ModelCheckpoint
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical

import matplotlib.pyplot as plt

# train_size = 0.005 / eval_size = 0.05
# H = 1 / depth = 16 / BS = 32 / dense = 64 --> Score = 0.8430171235798919
# H = 4 / depth = 16 / BS = 32 / dense = 64 --> Score = 0.784785769989308
# H = 1 / depth = 32 / BS = 32 / dense = 64 --> Score = 0.8533075081800677
# H = 1 / depth = 48 / BS = 32 / dense = 64 --> Score = 0.8464452560468306
# H = 1 / depth = 32 / BS = 32 / dense = 32 --> Score = 0.7489237855413848
# H = 1 / depth = 32 / BS = 32 / dense = 96 --> Score = 0.7958475746068415
# BatchNormalization / lr = 0.0015 --> Score = 0.7999503052690716
# BatchNormalization / lr = 0.01 --> Score = 0.7472128169138309
# optimizers = SGD --> X

# shift = 20 (BN) --> Score = 0.8027335987295007
# shift = 30 (BN) --> Score = 0.8224195152711268
# shift = 40 (BN) --> Score = 0.7339714910308296
# shift = 30 --> Score = 0.8747070792176976 *BEST*


class TransformerModel():
    
    def __init__(self, n_features, max_open_channels, epochs):
        self.type = 'Transformer'
        self.n_features = n_features
        self.shape = (max_open_channels+1, n_features)
        self.max_open_channels = max_open_channels
        
        self.lr = 0.0015
        self.epochs = epochs
        self.batch_size = 32
        self.train_verbose = 1
        
        self.num_heads = 1
        self.dense_units = 64
        self.transformer_depth = 32
        self.transformer_dropout: float = 0.1
            
        self.checkpoint = ModelCheckpoint(self.type + ".hdf5", 
                                          monitor='val_accuracy',
                                          verbose=1,
                                          save_best_only=True,
                                          mode='auto',
                                          period=1)



    def create_model(self):
        inp = Input(shape = self.shape)
        
        next_step_input = inp
        next_step_input = Dense(self.dense_units)(next_step_input)
        
        act_layer = TransformerACT(name='adaptive_computation_time')
        transformer_block = TransformerBlock(name = 'transformer',
                                             num_heads = self.num_heads,
                                             residual_dropout = self.transformer_dropout,
                                             attention_dropout = self.transformer_dropout,
                                             # Allow bi-directional attention
                                             use_masking = False)

        act_output = next_step_input
        for i in range(self.transformer_depth):
            next_step_input = transformer_block(next_step_input)
            next_step_input, act_output = act_layer(next_step_input)

        act_layer.finalize()
        next_step_input = act_output
        
        #-- next_step_input = BatchNormalization()(next_step_input)
        #-- next_step_input = Dropout(0.2)(next_step_input)
        
        out = Dense(self.max_open_channels+1, activation = 'softmax', name = 'out')(next_step_input)
        
        self.model = Model(inputs = inp, outputs = out)
    
    def compile_model(self):
        opt = Adam(lr = self.lr)
        # opt = SGD(lr = self.lr)
        self.model.compile(loss = CategoricalCrossentropy(), optimizer = opt, metrics = ['accuracy'])
    
    def create_and_compile(self):
        print('Create Model...')
        self.create_model()
        print('Compile Model...')
        self.compile_model()

    def print(self):
        print(self.model.summary())
        
    def fit(self, X, y):
        y = to_categorical(y, num_classes = self.max_open_channels+1)
        X_train, X_valid, y_train, y_valid = train_test_split(X, y)
        self.history = self.model.fit(X_train,
                                      y_train,
                                      validation_data = (X_valid,y_valid),
                                      epochs = self.epochs,
                                      batch_size = self.batch_size,
                                      callbacks = [self.checkpoint],
                                      verbose = self.train_verbose)

    def predict(self, X):
        preds = np.argmax(self.model.predict(X), axis=-1)
        return preds[:,shift]
     
    def load_weights(self):
        self.model.load_weights(self.type + ".hdf5")
    
    def plot(self):
        plt.figure(figsize=(20,5))

        # summarize history for accuracy
        plt.subplot(1, 2, 1)
        plt.plot(self.history.history['accuracy'])
        plt.plot(self.history.history['val_accuracy'])
        plt.title('model accuracy')
        plt.ylabel('accuracy')
        plt.xlabel('epoch')
        plt.legend(['train', 'test'], loc='upper left')

        # summarize history for loss
        plt.subplot(1, 2, 2)
        plt.plot(self.history.history['loss'])
        plt.plot(self.history.history['val_loss'])
        plt.title('model loss')
        plt.ylabel('loss')
        plt.xlabel('epoch')
        plt.legend(['train', 'test'], loc='upper left')

        plt.show()

In [None]:
m = TransformerModel(n_features, shift, epochs)
m.create_and_compile()

### Fit Model

In [None]:
!date

train_data = create_shift_columns(train_data, 'signal', shift)
train_data = create_shift_columns(train_data, 'open_channels', shift)
train_data = reduce_mem_usage(train_data)

X = extract_columns(train_data, features, (-1,shift+1,n_features))
cl = train_data['open_channels'].values
y = extract_columns(train_data, result, (-1,shift+1,1))

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_valid, y_train, y_valid = train_test_split(X, y, stratify = cl, test_size = 1-train_size)

In [None]:
m.fit(X_train, y_train)

In [None]:
m.plot()

### Eval

In [None]:
!date

m.load_weights()

In [None]:
from sklearn.metrics import f1_score

X1, _, y1, _ = train_test_split(X_valid, y_valid, test_size = 1-eval_size)
print('Score =', f1_score(y1[:,shift], m.predict(X1) ,average='macro'))

In [None]:
import gc

del train_data
del X, y, X_train, X_valid, y_train, y_valid
del X1, y1
gc.collect()

### Predict

In [None]:
!date

test_data = create_shift_columns(test_data, 'signal', shift)
test_data = reduce_mem_usage(test_data)

t = extract_columns(test_data, features, (-1,shift+1,n_features))
r = m.predict(t)

### Submission

In [None]:
!date

submission = pd.read_csv('/kaggle/input/liverpool-ion-switching/sample_submission.csv')
submission['open_channels'] = r
submission.open_channels.hist()
plt.show()
submission.to_csv('submission.csv', float_format='%0.4f', index = False)

!date