In [13]:
import tensorflow as tf

gpus = tf.config.experimental.list_physical_devices('GPU')

tf.config.experimental.set_virtual_device_configuration(
    gpus[0], 
    [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1024*5)]
)

print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  1


In [14]:
import numpy as np
import pandas as pd

from utils.Block import Model, Block
from utils.train import train

import pathlib
import matplotlib.pyplot as plt
import seaborn as sns
import xgboost as xgb

from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import make_scorer
from sklearn import ensemble, linear_model
from xgboost.sklearn import XGBClassifier

from catboost import Pool, CatBoostClassifier

X_submit = pd.concat([ 
    pd.read_csv('./data/410_X_submit.csv'), 
    pd.read_csv('./data/510_X_submit_preds.csv', header=None),
], axis=1)

X_train = pd.concat([
    pd.read_csv('./data/410_X_train.csv'), 
    pd.read_csv('./data/510_X_train_preds.csv', header=None)
], axis=1)

X_test = pd.concat([
    pd.read_csv('./data/410_X_test.csv'),
    pd.read_csv('./data/510_X_test_preds.csv', header=None), 
], axis=1)

X_submit = X_submit.set_index('id')
X_train  = X_train.set_index('id')
X_test   = X_test.set_index('id')


y_submit = pd.read_csv('./data/004_test.csv', index_col='id')
y_train = pd.read_csv('./data/410_y_train.csv', index_col='id')
y_test = pd.read_csv('./data/410_y_test.csv', index_col='id')


y_train = pd.get_dummies(y_train['label'])
y_test = pd.get_dummies(y_test['label'])

X_submit = X_submit.values
X_train = X_train.values
X_test = X_test.values
y_train = np.squeeze(y_train.values)
y_test = np.squeeze(y_test.values)

X_train.shape

(80000, 332)

In [15]:
y_train

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 1, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 1, 0, 0],
       [0, 0, 1, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 1]], dtype=uint8)

In [16]:
X_train = tf.expand_dims(tf.convert_to_tensor(X_train, dtype='float64'), 2)
y_train = tf.convert_to_tensor(y_train, dtype='float64')
X_test  = tf.expand_dims(tf.convert_to_tensor(X_test, dtype='float64'), 2)
y_test  = tf.convert_to_tensor(y_test, dtype='float64')
X_submit = tf.expand_dims(tf.convert_to_tensor(X_submit, dtype='float64'), 2)

In [17]:
y_train

<tf.Tensor: id=1091911, shape=(80000, 13), dtype=float64, numpy=
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])>

In [18]:
class ResidualBlock(tf.keras.Model):
    '''
    Residual layer for time series model.

    Ref: https://medium.com/the-artificial-impostor/notes-understanding-tensorflow-part-3-7f6633fcc7c7, 
         https://link.springer.com/content/pdf/10.1007%2Fs10618-019-00619-1.pdf
    '''
    
    def __init__(self, filters=[64, 64, 64], kernel_size=3):
        super().__init__()
        self.conv1 = tf.keras.layers.Conv1D(filters[0], kernel_size=1)
        self.conv2 = tf.keras.layers.Conv1D(filters[1], kernel_size=kernel_size, padding='same')
        self.conv3_skip = tf.keras.layers.Conv1D(filters[2], kernel_size=1)
        
        self.batchnorm1 = tf.keras.layers.BatchNormalization()
        self.batchnorm2 = tf.keras.layers.BatchNormalization()
        self.batchnorm3 = tf.keras.layers.BatchNormalization()
        
        self.add = tf.keras.layers.Add()
        
    def call(self, X):
        
        save_X = X
        
        X = self.conv1(X)
        X = self.batchnorm1(X)
        X = tf.nn.relu(X)
        
        X = self.conv2(X)
        X = self.batchnorm2(X)
        X = tf.nn.relu(X)
        
        X = self.add([X, self.conv3_skip(save_X)])
        X = self.batchnorm3(X)
        X = tf.nn.relu(X)
        
        return X

In [27]:
class Model(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.block1 = ResidualBlock([4, 4, 4], 64)
        self.block2 = ResidualBlock([4, 4, 4], 32)
        self.flat = tf.keras.layers.Flatten()
        self.fc1  = Block(64)
        self.fc2  = Block(64)
        self.out = tf.keras.layers.Dense(13, dtype='float64')
        
    def call(self, X):
        X = self.block1(X)
        X = self.block2(X)
        X = self.flat(X)
        X = self.fc1(X)
        X = self.fc2(X)
        X = self.out(X)
        return X

In [28]:
y_train

<tf.Tensor: id=1091911, shape=(80000, 13), dtype=float64, numpy=
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])>

In [29]:
model = Model()

In [30]:
model = train(model, X_train, y_train, X_test, y_test, batch_size=5000)



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



ResourceExhaustedError: OOM when allocating tensor with shape[5000,332,4] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:BiasAdd] name: model_3/residual_block_6/conv1d_18/BiasAdd/

In [28]:
model_name = '610_stack_nn'
model.save_weights('M_336/checkpoints/'+model_name+'/model')

# Predict Submission

In [11]:
X_test_pred   = np.squeeze(np.array(model(X_test)))
X_submit_pred = np.squeeze(np.array(model(X_submit)))


X_test_pred.shape

(20000, 13)

In [12]:
df_submit = pd.DataFrame.from_records(tf.nn.softmax(X_submit_pred).numpy())

cols = ['class'+str(i) for i in range(13)]

df_submit.columns = cols
df_submit.head()

Unnamed: 0,class0,class1,class2,class3,class4,class5,class6,class7,class8,class9,class10,class11,class12
0,0.00553,0.017018,0.194206,0.108872,0.134826,0.0003163015,0.004019,0.002715,0.005599,0.008661,0.334522,0.007391,0.176324
1,0.000257,0.011931,0.475083,0.29719,0.053025,4.676128e-06,0.004643,0.086381,0.006704,0.010666,0.044764,0.002092,0.007259
2,0.001272,0.020573,0.260378,0.566276,0.007327,3.032361e-06,0.023211,0.002836,0.033851,0.009986,0.055066,0.000711,0.018509
3,0.000794,0.029563,0.281463,0.201157,0.353089,1.93182e-05,0.003307,0.012233,0.004631,0.004232,0.077967,0.002857,0.028688
4,0.000685,0.003632,0.144441,0.03165,0.114033,2.39802e-07,0.001304,2.4e-05,0.003656,0.000616,0.682896,0.000202,0.016859


In [17]:
df_submit = pd.concat([
    y_submit.reset_index(),
    df_submit
], axis=1)

df_submit = df_submit.set_index('id')
df_submit

Unnamed: 0_level_0,class0,class1,class2,class3,class4,class5,class6,class7,class8,class9,class10,class11,class12
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
151807,0.014811,0.012477,0.211700,0.101431,0.114950,7.378615e-06,0.003703,0.024584,0.004041,0.005654,0.351503,0.003975,0.151164
118131,0.000258,0.015453,0.405367,0.340845,0.083052,4.245976e-07,0.004999,0.068152,0.011348,0.013953,0.049724,0.000234,0.006617
110921,0.001428,0.018323,0.221510,0.687187,0.007387,4.925802e-07,0.004762,0.003744,0.012540,0.001526,0.019810,0.000331,0.021452
105149,0.000448,0.041217,0.289852,0.154545,0.340828,5.808531e-07,0.001026,0.051054,0.002478,0.002529,0.099349,0.001727,0.014948
143868,0.004046,0.004195,0.113199,0.063066,0.215284,4.050218e-06,0.001415,0.000359,0.002075,0.000393,0.583034,0.001678,0.011251
...,...,...,...,...,...,...,...,...,...,...,...,...,...
146316,0.004989,0.027021,0.180711,0.304243,0.302709,3.391095e-07,0.007697,0.030366,0.020216,0.010280,0.026087,0.000294,0.085388
121816,0.003338,0.030038,0.198639,0.098427,0.366782,1.087257e-05,0.004343,0.000250,0.002856,0.002260,0.285694,0.000349,0.007013
106217,0.001167,0.016636,0.019767,0.130547,0.731146,9.241584e-07,0.002916,0.000458,0.006342,0.008682,0.075869,0.000110,0.006358
103515,0.026274,0.022292,0.204729,0.193230,0.275964,6.213573e-07,0.003710,0.001423,0.018218,0.004550,0.059175,0.002238,0.188195


In [18]:
df_submit.to_csv('005_submit.csv')