In [1]:
import tensorflow as tf

gpus = tf.config.experimental.list_physical_devices('GPU')

tf.config.experimental.set_virtual_device_configuration(
    gpus[0], 
    [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1024*5)]
)

print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  1


In [2]:
import numpy as np
import pandas as pd

from utils.Block import Model, Block
from utils.train import train

import pathlib
import matplotlib.pyplot as plt
import seaborn as sns
import xgboost as xgb

from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import make_scorer
from sklearn import ensemble, linear_model
from xgboost.sklearn import XGBClassifier

from catboost import Pool, CatBoostClassifier

X_submit = pd.concat([ 
    pd.read_csv('./data/430_X_submit.csv'), 
    pd.read_csv('./data/441_X_submit.csv').drop('id', axis=1), 
    pd.read_csv('./data/510_X_submit_preds.csv', header=None),
    pd.read_csv('./data/520_X_submit_preds.csv', header=None),
    pd.read_csv('./data/522_X_submit_preds.csv', header=None)
], axis=1)

X_train = pd.concat([
    pd.read_csv('./data/430_X_train.csv'), 
    pd.read_csv('./data/441_X_train.csv').drop('id', axis=1),
    pd.read_csv('./data/510_X_train_preds.csv', header=None),
    pd.read_csv('./data/520_X_train_preds.csv', header=None),
    pd.read_csv('./data/522_X_train_preds.csv', header=None)
], axis=1)

X_test = pd.concat([
    pd.read_csv('./data/430_X_test.csv'),
    pd.read_csv('./data/441_X_test.csv').drop('id', axis=1),
    pd.read_csv('./data/510_X_test_preds.csv', header=None), 
    pd.read_csv('./data/520_X_test_preds.csv', header=None),
    pd.read_csv('./data/522_X_test_preds.csv', header=None)
], axis=1)

X_submit = X_submit.set_index('id')
X_train  = X_train.set_index('id')
X_test   = X_test.set_index('id')

y_submit = pd.read_csv('./data/004_test.csv', index_col='id')
y_train = pd.read_csv('./data/410_y_train.csv', index_col='id')
y_test = pd.read_csv('./data/410_y_test.csv', index_col='id')

y_train = pd.get_dummies(y_train['label'])
y_test = pd.get_dummies(y_test['label'])

X_submit = X_submit.values
X_train = X_train.values
X_test = X_test.values
y_train = np.squeeze(y_train.values)
y_test = np.squeeze(y_test.values)

X_train.shape

(80000, 565)

In [3]:
y_train

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 1, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 1, 0, 0],
       [0, 0, 1, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 1]], dtype=uint8)

In [4]:
X_train = tf.expand_dims(tf.convert_to_tensor(X_train, dtype='float64'), 2)
y_train = tf.convert_to_tensor(y_train, dtype='float64')
X_test  = tf.expand_dims(tf.convert_to_tensor(X_test, dtype='float64'), 2)
y_test  = tf.convert_to_tensor(y_test, dtype='float64')
X_submit = tf.expand_dims(tf.convert_to_tensor(X_submit, dtype='float64'), 2)

In [5]:
y_train

<tf.Tensor: id=4, shape=(80000, 13), dtype=float64, numpy=
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])>

In [6]:
class ResidualBlock(tf.keras.Model):
    '''
    Residual layer for time series model.

    Ref: https://medium.com/the-artificial-impostor/notes-understanding-tensorflow-part-3-7f6633fcc7c7, 
         https://link.springer.com/content/pdf/10.1007%2Fs10618-019-00619-1.pdf
    '''
    
    def __init__(self, filters=[64, 64, 64], kernel_size=3):
        super().__init__()
        self.conv1 = tf.keras.layers.Conv1D(filters[0], kernel_size=1)
        self.conv2 = tf.keras.layers.Conv1D(filters[1], kernel_size=kernel_size, padding='same')
        self.conv3_skip = tf.keras.layers.Conv1D(filters[2], kernel_size=1)
        
        self.batchnorm1 = tf.keras.layers.BatchNormalization()
        self.batchnorm2 = tf.keras.layers.BatchNormalization()
        self.batchnorm3 = tf.keras.layers.BatchNormalization()
        
        self.add = tf.keras.layers.Add()
        
    def call(self, X):
        
        save_X = X
        
        X = self.conv1(X)
        X = self.batchnorm1(X)
        X = tf.nn.relu(X)
        
        X = self.conv2(X)
        X = self.batchnorm2(X)
        X = tf.nn.relu(X)
        
        X = self.add([X, self.conv3_skip(save_X)])
        X = self.batchnorm3(X)
        X = tf.nn.relu(X)
        
        return X

In [7]:
class Model(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.block1 = ResidualBlock([8, 8, 8], 32)
        self.flat = tf.keras.layers.Flatten()
        self.fc1  = Block(128)
        self.out = tf.keras.layers.Dense(13, dtype='float64')
        
    def call(self, X):
        X = self.block1(X)
        X = self.flat(X)
        X = self.fc1(X)
        X = self.out(X)
        return X

In [8]:
y_train

<tf.Tensor: id=4, shape=(80000, 13), dtype=float64, numpy=
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])>

In [9]:
model = Model()

In [10]:
model = train(model, X_train, y_train, X_test, y_test, batch_size=5000,lr=0.01)



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Epoch: 0 	 training_losses: 3.4146093259024886 	 testing_losses: 3.364652561243749
Epoch: 20 	 training_losses: 1.2577342704985865 	 testing_losses: 1.2548203760760015
Epoch: 40 	 training_losses: 1.227949239836795 	 testing_losses: 1.2361729639974885
Epoch: 60 	 training_losses: 1.2170447401415887 	 testing_losses: 1.2309669378038344
Epoch: 80 	 training_losses: 1.2099274334371604 	 testing_losses: 1.2309196330325844
Epoch: 100 	 training_losses: 1.2100856028449813 	 testing_losses: 1.235131096873407
early stopping


In [None]:
model_name = '623_stack_nn'
model.save_weights('M_336/checkpoints/'+model_name+'/model')

# Predict Submission

In [13]:
X_test_pred   = np.squeeze(np.array(model(X_test)))
X_submit_pred = np.squeeze(np.array(model(X_submit)))


X_test_pred.shape

(20000, 13)

In [15]:
df_submit = pd.DataFrame.from_records(tf.nn.softmax(X_submit_pred).numpy())

cols = ['class'+str(i) for i in range(13)]

df_submit.columns = cols
df_submit.head()

Unnamed: 0,class0,class1,class2,class3,class4,class5,class6,class7,class8,class9,class10,class11,class12
0,0.00747,0.007442,0.160487,0.023385,0.048526,5.9e-05,0.009071,0.006161,0.009937,0.006545,0.696164,0.002307,0.022446
1,0.002819,0.02725,0.549443,0.255405,0.067093,3e-06,0.009243,0.006681,0.006988,0.007077,0.062307,0.002286,0.003404
2,0.008203,0.02944,0.1012,0.734709,0.013121,1e-06,0.013086,0.003601,0.02132,0.006894,0.050909,0.001703,0.015813
3,0.007321,0.098349,0.170118,0.336388,0.196407,1.3e-05,0.012196,0.026994,0.027323,0.018648,0.087134,0.007791,0.011317
4,0.002402,0.001579,0.015773,0.003498,0.01084,7e-06,0.00143,0.001963,0.002015,0.001467,0.951169,0.00054,0.007318


In [16]:
df_submit = pd.concat([
    y_submit.reset_index(),
    df_submit
], axis=1)

df_submit = df_submit.set_index('id')
df_submit

Unnamed: 0_level_0,class0,class1,class2,class3,class4,class5,class6,class7,class8,class9,class10,class11,class12
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
151807,0.007470,0.007442,0.160487,0.023385,0.048526,5.892949e-05,0.009071,0.006161,0.009937,0.006545,0.696164,0.002307,0.022446
118131,0.002819,0.027250,0.549443,0.255405,0.067093,3.175621e-06,0.009243,0.006681,0.006988,0.007077,0.062307,0.002286,0.003404
110921,0.008203,0.029440,0.101200,0.734709,0.013121,1.461303e-06,0.013086,0.003601,0.021320,0.006894,0.050909,0.001703,0.015813
105149,0.007321,0.098349,0.170118,0.336388,0.196407,1.306586e-05,0.012196,0.026994,0.027323,0.018648,0.087134,0.007791,0.011317
143868,0.002402,0.001579,0.015773,0.003498,0.010840,6.667874e-06,0.001430,0.001963,0.002015,0.001467,0.951169,0.000540,0.007318
...,...,...,...,...,...,...,...,...,...,...,...,...,...
146316,0.005603,0.015884,0.084298,0.724209,0.088018,3.106997e-06,0.008461,0.004257,0.015850,0.007089,0.021938,0.001814,0.022576
121816,0.000678,0.001743,0.144539,0.039718,0.303191,8.135428e-08,0.000754,0.000481,0.000435,0.001270,0.502950,0.000283,0.003957
106217,0.001405,0.027324,0.027470,0.110047,0.761581,1.371465e-06,0.001361,0.011685,0.004393,0.006127,0.041779,0.002743,0.004084
103515,0.010315,0.007023,0.097104,0.106760,0.286495,1.863471e-05,0.009029,0.000315,0.020649,0.006500,0.109453,0.001182,0.345156


In [17]:
df_submit.to_csv('007_submit.csv')