In [1]:
import tensorflow as tf

gpus = tf.config.experimental.list_physical_devices('GPU')

tf.config.experimental.set_virtual_device_configuration(
    gpus[0], 
    [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1024*5)]
)

print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  1


In [2]:
import numpy as np
import pandas as pd

from utils.Block import Model, Block
from utils.train import train

import pathlib
import matplotlib.pyplot as plt
import seaborn as sns
import xgboost as xgb

from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import make_scorer
from sklearn import ensemble, linear_model
from xgboost.sklearn import XGBClassifier

from catboost import Pool, CatBoostClassifier

X_submit = pd.concat([ 
    pd.read_csv('./data/420_X_submit.csv'), 
    pd.read_csv('./data/510_X_submit_preds.csv', header=None),
    pd.read_csv('./data/520_X_submit_preds.csv', header=None),
    pd.read_csv('./data/522_X_submit_preds.csv', header=None)
], axis=1)

X_train = pd.concat([
    pd.read_csv('./data/420_X_train.csv'), 
    pd.read_csv('./data/510_X_train_preds.csv', header=None),
    pd.read_csv('./data/520_X_train_preds.csv', header=None),
    pd.read_csv('./data/522_X_train_preds.csv', header=None)
], axis=1)

X_test = pd.concat([
    pd.read_csv('./data/420_X_test.csv'),
    pd.read_csv('./data/510_X_test_preds.csv', header=None), 
    pd.read_csv('./data/520_X_test_preds.csv', header=None),
    pd.read_csv('./data/522_X_test_preds.csv', header=None)
], axis=1)

X_submit = X_submit.set_index('id')
X_train  = X_train.set_index('id')
X_test   = X_test.set_index('id')


y_submit = pd.read_csv('./data/004_test.csv', index_col='id')
y_train = pd.read_csv('./data/410_y_train.csv', index_col='id')
y_test = pd.read_csv('./data/410_y_test.csv', index_col='id')


y_train = pd.get_dummies(y_train['label'])
y_test = pd.get_dummies(y_test['label'])

X_submit = X_submit.values
X_train = X_train.values
X_test = X_test.values
y_train = np.squeeze(y_train.values)
y_test = np.squeeze(y_test.values)

X_train.shape

(80000, 440)

In [3]:
y_train

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 1, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 1, 0, 0],
       [0, 0, 1, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 1]], dtype=uint8)

In [4]:
X_train = tf.expand_dims(tf.convert_to_tensor(X_train, dtype='float64'), 2)
y_train = tf.convert_to_tensor(y_train, dtype='float64')
X_test  = tf.expand_dims(tf.convert_to_tensor(X_test, dtype='float64'), 2)
y_test  = tf.convert_to_tensor(y_test, dtype='float64')
X_submit = tf.expand_dims(tf.convert_to_tensor(X_submit, dtype='float64'), 2)

In [5]:
y_train

<tf.Tensor: id=4, shape=(80000, 13), dtype=float64, numpy=
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])>

In [9]:
class ResidualBlock(tf.keras.Model):
    '''
    Residual layer for time series model.

    Ref: https://medium.com/the-artificial-impostor/notes-understanding-tensorflow-part-3-7f6633fcc7c7, 
         https://link.springer.com/content/pdf/10.1007%2Fs10618-019-00619-1.pdf
    '''
    
    def __init__(self, filters=[64, 64, 64], kernel_size=3):
        super().__init__()
        self.conv1 = tf.keras.layers.Conv1D(filters[0], kernel_size=1)
        self.conv2 = tf.keras.layers.Conv1D(filters[1], kernel_size=kernel_size, padding='same')
        self.conv3_skip = tf.keras.layers.Conv1D(filters[2], kernel_size=1)
        
        self.batchnorm1 = tf.keras.layers.BatchNormalization()
        self.batchnorm2 = tf.keras.layers.BatchNormalization()
        self.batchnorm3 = tf.keras.layers.BatchNormalization()
        
        self.add = tf.keras.layers.Add()
        
    def call(self, X):
        
        save_X = X
        
        X = self.conv1(X)
        X = self.batchnorm1(X)
        X = tf.nn.relu(X)
        
        X = self.conv2(X)
        X = self.batchnorm2(X)
        X = tf.nn.relu(X)
        
        X = self.add([X, self.conv3_skip(save_X)])
        X = self.batchnorm3(X)
        X = tf.nn.relu(X)
        
        return X

In [10]:
class Model(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.block1 = ResidualBlock([8, 8, 8], 32)
        self.flat = tf.keras.layers.Flatten()
        self.fc1  = Block(256)
        self.out = tf.keras.layers.Dense(13, dtype='float64')
        
    def call(self, X):
        X = self.block1(X)
        X = self.flat(X)
        X = self.fc1(X)
        X = self.out(X)
        return X

In [11]:
y_train

<tf.Tensor: id=4, shape=(80000, 13), dtype=float64, numpy=
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])>

In [12]:
model = Model()

In [13]:
model = train(model, X_train, y_train, X_test, y_test, batch_size=2048)



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Epoch: 0 	 training_losses: 1.6414969581045034 	 testing_losses: 1.720705143039493
Epoch: 20 	 training_losses: 0.9358884954736737 	 testing_losses: 1.2290583422782038
Epoch: 40 	 training_losses: 0.754500843426197 	 testing_losses: 1.2308605025576922
early stopping


In [14]:
model_name = '623_stack_nn'
model.save_weights('M_336/checkpoints/'+model_name+'/model')

# Predict Submission

In [15]:
X_test_pred   = np.squeeze(np.array(model(X_test)))
X_submit_pred = np.squeeze(np.array(model(X_submit)))


X_test_pred.shape

(20000, 13)

In [16]:
df_submit = pd.DataFrame.from_records(tf.nn.softmax(X_submit_pred).numpy())

cols = ['class'+str(i) for i in range(13)]

df_submit.columns = cols
df_submit.head()

Unnamed: 0,class0,class1,class2,class3,class4,class5,class6,class7,class8,class9,class10,class11,class12
0,0.002913,0.000784,0.163588,0.031841,0.05364,1.45487e-05,0.002004,0.009546,0.001562,0.005432,0.696661,0.000431,0.031582
1,1.5e-05,0.004859,0.769855,0.095586,0.063292,2.9837e-07,0.007175,0.022311,0.002237,0.011204,0.021714,5e-05,0.001702
2,0.001407,0.017295,0.108494,0.758484,0.006962,8.577958e-07,0.012168,0.004268,0.046176,0.005111,0.029042,0.000144,0.010447
3,0.000786,0.051103,0.225741,0.230985,0.303194,1.774857e-05,0.00869,0.095057,0.005118,0.016379,0.046884,0.001966,0.014079
4,0.001428,0.000793,0.014073,0.009252,0.012324,3.587268e-06,0.000238,8.7e-05,0.000505,0.000565,0.956289,0.000312,0.00413


In [17]:
df_submit = pd.concat([
    y_submit.reset_index(),
    df_submit
], axis=1)

df_submit = df_submit.set_index('id')
df_submit

Unnamed: 0_level_0,class0,class1,class2,class3,class4,class5,class6,class7,class8,class9,class10,class11,class12
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
151807,0.002913,0.000784,0.163588,0.031841,0.053640,1.454870e-05,0.002004,9.546311e-03,0.001562,0.005432,0.696661,0.000431,0.031582
118131,0.000015,0.004859,0.769855,0.095586,0.063292,2.983700e-07,0.007175,2.231107e-02,0.002237,0.011204,0.021714,0.000050,0.001702
110921,0.001407,0.017295,0.108494,0.758484,0.006962,8.577958e-07,0.012168,4.267542e-03,0.046176,0.005111,0.029042,0.000144,0.010447
105149,0.000786,0.051103,0.225741,0.230985,0.303194,1.774857e-05,0.008690,9.505670e-02,0.005118,0.016379,0.046884,0.001966,0.014079
143868,0.001428,0.000793,0.014073,0.009252,0.012324,3.587268e-06,0.000238,8.715323e-05,0.000505,0.000565,0.956289,0.000312,0.004130
...,...,...,...,...,...,...,...,...,...,...,...,...,...
146316,0.005228,0.007074,0.087281,0.599973,0.184585,1.637012e-07,0.002016,9.135180e-03,0.008317,0.012874,0.025766,0.000465,0.057285
121816,0.002411,0.000124,0.069908,0.020502,0.545830,2.362115e-09,0.000192,6.125157e-07,0.000076,0.000017,0.360189,0.000011,0.000738
106217,0.000075,0.015843,0.033067,0.132337,0.768338,1.408737e-07,0.003799,7.219953e-04,0.003845,0.002537,0.037502,0.000218,0.001715
103515,0.013956,0.003484,0.107968,0.093555,0.291676,7.317501e-07,0.002702,2.692701e-04,0.014063,0.007449,0.055135,0.001405,0.408337


In [18]:
df_submit.to_csv('010_submit.csv')