In [1]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Conv2D, MaxPooling2D, BatchNormalization, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.impute import SimpleImputer, KNNImputer

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.utils import class_weight

import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

from utils import get_data, plot_cm_keras, filter_columns

%matplotlib inline
matplotlib.rcParams['figure.figsize'] = [15, 15]

2021-12-09 15:03:34.207620: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1


In [2]:
def define_model(input_shape):
    ip = Input(shape=(input_shape, input_shape, 1), name="input")
    
    x = Conv2D(64, (5, 5), padding="valid", activation="relu")(ip)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Conv2D(64, (5, 5), padding="valid", activation="relu")(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Conv2D(64, (3, 3), padding="valid", activation="relu")(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Flatten()(x)
    
    x = Dense(units=1024, activation="relu")(x)
    x = Dense(units=256, activation="relu")(x)
    op = Dense(units=19, activation="softmax")(x)
    model = Model(inputs=ip, outputs=op, name="full_model")
    model.summary()
    
    learning_rate = 0.001
    optimizer = Adam(learning_rate, amsgrad=True)
    model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])
    return model

In [3]:
XY_train, X_test, inverse_target_map = get_data(min_size=None, min_size_test=None, nan_thresh=40, fill_nan=None)
train_columns = list(XY_train.columns)
train_columns.remove("TARGET_NUM")

Y_train = XY_train["TARGET_NUM"].values

min_max_scaler = MinMaxScaler(feature_range=(2, 10))
X_train_minmax = min_max_scaler.fit(XY_train[train_columns])
x_train = X_train_minmax.transform(XY_train[train_columns])

imp = SimpleImputer(missing_values=np.nan, strategy='constant', fill_value=1)
imp_train = imp.fit(x_train)
x_train_full = imp_train.transform(x_train)

x_train_full_df = pd.DataFrame(x_train_full, columns=train_columns, index=XY_train.index)

min_size = 150

for c in x_train_full_df.columns:
    if c != "TARGET_NUM":
        x_train_full_df[c][x_train_full_df.groupby(c)[c].transform('size') <= min_size] = 0

stand_scaler = StandardScaler()
X_train_stand = stand_scaler.fit(x_train_full_df[train_columns])
x_train_stand = X_train_stand.transform(x_train_full_df[train_columns])

In [4]:
x_train_stand.shape

(1760089, 43)

In [5]:
del X_test
del x_train
del x_train_full
del x_train_full_df
del XY_train

In [6]:
x_train_image = [np.expand_dims(np.outer(instance, instance), axis=2) for instance in x_train_stand]

In [7]:
x_train_image = np.array(x_train_image)

In [None]:
y_train = to_categorical(Y_train)
X_train, X_test, y_train, y_test = train_test_split(x_train_image, y_train, test_size=0.30)

In [None]:
model = define_model(X_train[0].shape[0])
history = model.fit(X_train,
                    y_train,
                    batch_size=100,
                    epochs=150,
                    verbose=1,
                    validation_data=(X_test, y_test))

## Mask columns

In [12]:
def define_model(input_shape):
    ip = Input(shape=(input_shape, input_shape, 1), name="input")
    
    x = Conv2D(64, (3, 3), padding="valid", activation="relu")(ip)
    #x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Conv2D(64, (3, 3), padding="valid", activation="relu")(x)
    #x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Conv2D(64, (3, 3), padding="valid", activation="relu")(x)
    #x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Flatten()(x)
    
    x = Dense(units=1024, activation="relu")(x)
    x = Dense(units=256, activation="relu")(x)
    op = Dense(units=19, activation="softmax")(x)
    model = Model(inputs=ip, outputs=op, name="full_model")
    model.summary()
    
    learning_rate = 0.001
    optimizer = Adam(learning_rate, amsgrad=True)
    model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])
    return model

In [8]:
XY_train, X_test_ori, inverse_target_map = get_data(min_size=None, min_size_test=None, nan_thresh=10, fill_nan=None)
train_columns = list(XY_train.columns)
train_columns.remove("TARGET_NUM")

y_train_ori = XY_train["TARGET_NUM"].values
x_train = XY_train[train_columns].values

min_max_scaler = MinMaxScaler(feature_range=(0, 10)).fit(x_train)
x_train = min_max_scaler.transform(x_train)

imp = SimpleImputer(missing_values=np.nan, strategy='constant', fill_value=-1).fit(x_train)
x_train = imp.transform(x_train)

#rs = RobustScaler().fit(x_train)
#x_train = rs.transform(x_train)

stand_scaler = StandardScaler().fit(x_train)
x_train = stand_scaler.transform(x_train)

x_train = filter_columns(x_train)

In [9]:
x_train.shape

(3696242, 9)

In [10]:
del XY_train

In [11]:
x_train = [np.expand_dims(np.outer(instance, instance), axis=2) for instance in x_train]
x_train = np.array(x_train)

y_train = to_categorical(y_train_ori)
X_train, X_test, y_train, y_test = train_test_split(x_train, y_train, test_size=0.3, stratify=y_train_ori,
                                                    random_state=42)

In [None]:
model = define_model(X_train[0].shape[0])
history = model.fit(X_train,
                    y_train,
                    batch_size=100,
                    epochs=150,
                    verbose=1,
                    validation_data=(X_test, y_test))
plot_cm(model, X_test, y_test)

2021-12-09 15:11:53.526863: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcuda.so.1
2021-12-09 15:11:53.570879: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties: 
pciBusID: 0000:05:00.0 name: GeForce GTX 1080 Ti computeCapability: 6.1
coreClock: 1.582GHz coreCount: 28 deviceMemorySize: 10.91GiB deviceMemoryBandwidth: 451.17GiB/s
2021-12-09 15:11:53.571359: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1
2021-12-09 15:11:53.611938: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10
2021-12-09 15:11:53.636835: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcufft.so.10
2021-12-09 15:11:53.642535: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcurand.so

Model: "full_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (InputLayer)           [(None, 9, 9, 1)]         0         
_________________________________________________________________
conv2d (Conv2D)              (None, 7, 7, 64)          640       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 5, 5, 64)          36928     
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 3, 3, 64)          36928     
_________________________________________________________________
flatten (Flatten)            (None, 576)               0         
_________________________________________________________________
dense (Dense)                (None, 1024)              590848    
_________________________________________________________________
dense_1 (Dense)              (None, 256)               2

2021-12-09 15:11:56.299159: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10
2021-12-09 15:11:56.711042: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudnn.so.7


Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
  919/25874 [>.............................] - ETA: 2:59 - loss: 2.1843 - accuracy: 0.2944