In [1]:
import tensorflow as tf

gpus = tf.config.experimental.list_physical_devices('GPU')

tf.config.experimental.set_virtual_device_configuration(
    gpus[0], 
    [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1024*7)]
)

print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  1


In [2]:
import numpy as np
import pandas as pd

from utils.Block import Model, Block
from utils.train import train

import pathlib
import matplotlib.pyplot as plt
import seaborn as sns
import xgboost as xgb

from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import make_scorer
from sklearn import ensemble, linear_model
from xgboost.sklearn import XGBClassifier

from catboost import Pool, CatBoostClassifier

X_submit = pd.concat([ 
    pd.read_csv('./data/420_X_submit.csv'), 
    pd.read_csv('./data/510_X_submit_preds.csv', header=None),
], axis=1)

X_train = pd.concat([
    pd.read_csv('./data/420_X_train.csv'), 
    pd.read_csv('./data/510_X_train_preds.csv', header=None)
], axis=1)

X_test = pd.concat([
    pd.read_csv('./data/420_X_test.csv'),
    pd.read_csv('./data/510_X_test_preds.csv', header=None), 
], axis=1)

X_submit = X_submit.set_index('id')
X_train  = X_train.set_index('id')
X_test   = X_test.set_index('id')


y_submit = pd.read_csv('./data/004_test.csv', index_col='id')
y_train = pd.read_csv('./data/420_y_train.csv', index_col='id')
y_test = pd.read_csv('./data/420_y_test.csv', index_col='id')


y_train = pd.get_dummies(y_train['label'])
y_test = pd.get_dummies(y_test['label'])

X_submit = X_submit.values
X_train = X_train.values
X_test = X_test.values
y_train = np.squeeze(y_train.values)
y_test = np.squeeze(y_test.values)

X_train.shape

(80000, 362)

In [3]:
y_train

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 1, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 1, 0, 0],
       [0, 0, 1, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 1]], dtype=uint8)

In [4]:
X_train = tf.expand_dims(tf.convert_to_tensor(X_train, dtype='float64'), 2)
y_train = tf.convert_to_tensor(y_train, dtype='float64')
X_test  = tf.expand_dims(tf.convert_to_tensor(X_test, dtype='float64'), 2)
y_test  = tf.convert_to_tensor(y_test, dtype='float64')
X_submit = tf.expand_dims(tf.convert_to_tensor(X_submit, dtype='float64'), 2)

In [5]:
y_train

<tf.Tensor: id=4, shape=(80000, 13), dtype=float64, numpy=
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])>

In [6]:
class ResidualBlock(tf.keras.Model):
    '''
    Residual layer for time series model.

    Ref: https://medium.com/the-artificial-impostor/notes-understanding-tensorflow-part-3-7f6633fcc7c7, 
         https://link.springer.com/content/pdf/10.1007%2Fs10618-019-00619-1.pdf
    '''
    
    def __init__(self, filters=[64, 64, 64], kernel_size=3):
        super().__init__()
        self.conv1 = tf.keras.layers.Conv1D(filters[0], kernel_size=1, padding='causal')
        self.conv2 = tf.keras.layers.Conv1D(filters[1], kernel_size=kernel_size, padding='causal')
        self.conv3_skip = tf.keras.layers.Conv1D(filters[2], kernel_size=1, padding='same')
        
        self.batchnorm1 = tf.keras.layers.BatchNormalization()
        self.batchnorm2 = tf.keras.layers.BatchNormalization()
        self.batchnorm3 = tf.keras.layers.BatchNormalization()
        
        self.add = tf.keras.layers.Add()
        
    def call(self, X):
        
        save_X = X
        
        X = self.conv1(X)
        X = self.batchnorm1(X)
        X = tf.nn.relu(X)
        
        X = self.conv2(X)
        X = self.batchnorm2(X)
        X = tf.nn.relu(X)
        
        X = self.add([X, self.conv3_skip(save_X)])
        X = self.batchnorm3(X)
        X = tf.nn.relu(X)
        
        return X

In [7]:
class Model(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.block1 = ResidualBlock([16, 8, 8], 32)
        self.flat = tf.keras.layers.Flatten()
        self.fc1  = Block(32)
        self.out = tf.keras.layers.Dense(13, dtype='float64')
        
    def call(self, X):
        X = self.block1(X)
        X = self.flat(X)
        X = self.fc1(X)
        X = self.out(X)
        return X

In [8]:
y_train

<tf.Tensor: id=4, shape=(80000, 13), dtype=float64, numpy=
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])>

In [9]:
model = Model()

In [10]:
model = train(model, X_train, y_train, X_test, y_test, batch_size=5000, lr=0.001)



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Epoch: 0 	 training_losses: 1.797694574077228 	 testing_losses: 1.7797307982143658
Epoch: 20 	 training_losses: 1.3693819525396527 	 testing_losses: 1.3707923295835567
Epoch: 40 	 training_losses: 1.3332922274605385 	 testing_losses: 1.3506877125977856
Epoch: 60 	 training_losses: 1.319100071322303 	 testing_losses: 1.3484745628202746
Epoch: 80 	 training_losses: 1.3042385191988752 	 testing_losses: 1.3506853066961761
early stopping


In [12]:
model_name = '621_stack_nn'
model.save_weights('M_336/checkpoints/'+model_name+'/model')

# Predict Submission

In [13]:
X_test_pred   = np.squeeze(np.array(model(X_test)))
X_submit_pred = np.squeeze(np.array(model(X_submit)))

X_test_pred.shape

(20000, 13)

In [14]:
df_submit = pd.DataFrame.from_records(tf.nn.softmax(X_submit_pred).numpy())

cols = ['class'+str(i) for i in range(13)]

df_submit.columns = cols
df_submit.head()

Unnamed: 0,class0,class1,class2,class3,class4,class5,class6,class7,class8,class9,class10,class11,class12
0,0.047228,0.013356,0.217787,0.193598,0.081389,2.7e-05,0.014487,0.056235,0.014428,0.007649,0.315274,0.00486,0.033682
1,0.003169,0.016689,0.336033,0.493583,0.099343,3e-06,0.003218,0.022196,0.003294,0.002439,0.017738,0.000969,0.001325
2,0.007505,0.045805,0.249439,0.567025,0.015138,0.000426,0.022931,0.008063,0.030113,0.009485,0.030294,0.002458,0.011316
3,0.008496,0.098438,0.241219,0.254036,0.131018,0.000401,0.012611,0.134592,0.01265,0.017318,0.072497,0.014126,0.0026
4,0.003903,0.009389,0.103867,0.030968,0.226976,2e-06,0.003461,0.008484,0.003665,0.005484,0.589723,0.00381,0.01027


In [15]:
df_submit = pd.concat([
    y_submit.reset_index(),
    df_submit
], axis=1)

df_submit = df_submit.set_index('id')
df_submit

Unnamed: 0_level_0,class0,class1,class2,class3,class4,class5,class6,class7,class8,class9,class10,class11,class12
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
151807,0.047228,0.013356,0.217787,0.193598,0.081389,2.736982e-05,0.014487,0.056235,0.014428,0.007649,0.315274,0.004860,0.033682
118131,0.003169,0.016689,0.336033,0.493583,0.099343,2.812383e-06,0.003218,0.022196,0.003294,0.002439,0.017738,0.000969,0.001325
110921,0.007505,0.045805,0.249439,0.567025,0.015138,4.263303e-04,0.022931,0.008063,0.030113,0.009485,0.030294,0.002458,0.011316
105149,0.008496,0.098438,0.241219,0.254036,0.131018,4.005186e-04,0.012611,0.134592,0.012650,0.017318,0.072497,0.014126,0.002600
143868,0.003903,0.009389,0.103867,0.030968,0.226976,1.898504e-06,0.003461,0.008484,0.003665,0.005484,0.589723,0.003810,0.010270
...,...,...,...,...,...,...,...,...,...,...,...,...,...
146316,0.009113,0.039865,0.199072,0.402397,0.167257,2.188314e-04,0.023544,0.005639,0.045632,0.017598,0.032225,0.004367,0.053072
121816,0.000870,0.001354,0.398294,0.045998,0.439797,1.133270e-09,0.000503,0.000964,0.000456,0.000433,0.108553,0.000157,0.002621
106217,0.000099,0.036782,0.009516,0.101348,0.811699,3.241779e-05,0.001200,0.003254,0.003389,0.013585,0.013678,0.004415,0.001002
103515,0.028187,0.011836,0.241232,0.180302,0.251149,2.426629e-05,0.018618,0.006435,0.030647,0.009383,0.072243,0.003095,0.146850


In [16]:
df_submit.to_csv('006_submit.csv')