- Try a simple DNN for classification prediction
- Use the 64f data at first
- Also try with/without standardizing the data (maybe for NN the results are different)

In [11]:
# !pip install tensorflow
# !pip install scikeras

In [4]:
import tensorflow as tf
print(tf. __version__)

2.10.1


In [5]:
# Neural networks

from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
# from tensorflow.keras.layers import Add, Input, Dense, Dropout, BatchNormalization, Embedding, Flatten, Concatenate, Reshape, Conv1D, AveragePooling1D, Multiply, MaxPool1D, Activation
from tensorflow.keras.regularizers import l1, l2
from scikeras.wrappers import KerasClassifier
from tensorflow_addons.layers import WeightNormalization

In [6]:
import pandas as pd 
import numpy as np
import sklearn
import pickle
import matplotlib.pyplot as plt

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import ExtraTreesClassifier, StackingClassifier, BaggingClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split, GridSearchCV, KFold, RandomizedSearchCV, cross_val_score

In [7]:
def make_submission(preds):
    assert len(preds) == 5000
    
    # Read labels
    with open('test_labels_sorted.npy', 'rb') as f:
        test_labels = np.load(f)
    len(test_labels)
    
    submission = pd.DataFrame(columns=['id', 'class'])
    for label, pred in zip(test_labels, preds):
        submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
    return submission

In [8]:
def plot_confusion_matrix(y_val_from_train, y_pred_from_train):
    fig, ax = plt.subplots(figsize=(8, 5))
    cmp = ConfusionMatrixDisplay(confusion_matrix(y_val_from_train, y_pred_from_train))
    cmp.plot(ax=ax)
    plt.show();

In [9]:
# Load X_train
with open('X_train_75_std.npy', 'rb') as f:
    X_train = np.load(f)
print(X_train.shape)
X_train

(9000, 75)


array([[ 0.66868409,  0.16353656,  0.63277468, ..., -0.68600237,
        -0.75071056, -0.6719989 ],
       [ 0.37202545,  0.27579237,  0.65904756, ...,  0.06479231,
        -0.48439794,  0.56611352],
       [-1.67047552, -1.2187742 , -2.37040642, ..., -0.65838631,
        -0.38095167, -0.1542973 ],
       ...,
       [-0.24270722, -0.14209669, -0.32285351, ...,  0.37591901,
        -0.37936549,  0.58761686],
       [ 0.38630325,  0.31266521,  0.37767149, ...,  0.83458681,
         0.3138927 ,  0.79569199],
       [ 0.33077964, -0.0822815 , -0.51143673, ..., -1.1650511 ,
        -0.410986  ,  0.44873132]])

In [10]:
# Load y_train
with open('y_train.npy', 'rb') as f:
    y_train = np.load(f)
print(y_train.shape)
y_train

(9000,)


array([ 7, 13,  6, ...,  3,  3, 18], dtype=int64)

In [11]:
# Load X_test
with open('X_test_75_std.npy', 'rb') as f:
    X_test = np.load(f)
print(X_test.shape)
X_test

(5000, 75)


array([[-0.59806336,  0.6264903 , -0.15256881, ...,  1.17376188,
         0.23880484,  0.48877589],
       [-0.47352949,  0.94441404, -0.23499654, ..., -0.41222519,
        -0.40989637, -0.65814035],
       [-0.64168924, -1.56209892, -1.06875674, ...,  0.3385773 ,
        -0.56342902,  0.74796695],
       ...,
       [ 1.00103725, -0.3371114 ,  0.37115316, ..., -1.16055904,
        -0.46010399, -0.62722221],
       [-0.98752639, -1.20156778, -1.41423994, ..., -0.81406819,
        -0.78249592,  1.12406601],
       [-0.21177287, -0.85742305, -0.54931118, ...,  0.02750228,
        -0.76809252, -1.25225668]])

## Make small changes to y_train

In [12]:
y_train -= 1

In [13]:
y_train = tf.keras.utils.to_categorical(y_train, num_classes=20)

In [14]:
print(y_train.shape)

(9000, 20)


## Let's Standard Scale X_train and X_test

In [127]:
# from sklearn.preprocessing import StandardScaler
# scaler = StandardScaler()

In [128]:
# print(X_train)
# print()
# X_train = scaler.fit_transform(X_train)
# print(X_train)

[[-8.81665000e-01  1.04447100e+00  1.59010940e-01 ...  7.64136797e-02
   2.23461397e-02  1.29471645e+01]
 [-1.10552300e+00  1.12647200e+00  1.74547752e-01 ...  1.13979068e-01
   2.85805084e-02  1.38184886e+01]
 [-2.64679000e+00  3.47160000e-02 -1.61696002e+00 ...  7.77954261e-02
   3.10021816e-02  1.33114980e+01]
 ...
 [-1.56939900e+00  8.21211000e-01 -4.06112460e-01 ...  1.29546035e-01
   3.10393141e-02  1.38336216e+01]
 [-1.09474900e+00  1.15340700e+00  8.15228667e-03 ...  1.52495097e-01
   4.72684630e-02  1.39800549e+01]
 [-1.13664700e+00  8.64905000e-01 -5.17633651e-01 ...  5.24448737e-02
   3.02990790e-02  1.37358806e+01]]

[[ 0.66868409  0.16353656  0.63277468 ... -0.68600237 -0.75071056
  -0.6719989 ]
 [ 0.37202545  0.27579237  0.65904756 ...  0.06479231 -0.48439794
   0.56611352]
 [-1.67047552 -1.2187742  -2.37040642 ... -0.65838631 -0.38095167
  -0.1542973 ]
 ...
 [-0.24270722 -0.14209669 -0.32285351 ...  0.37591901 -0.37936549
   0.58761686]
 [ 0.38630325  0.31266521  0.37767

In [129]:
# print(X_test)
# print()
# X_test = scaler.transform(X_test)
# print(X_test)

[[-1.83755000e+00  1.38265100e+00 -3.05412347e-01 ...  1.69465441e-01
   4.55106589e-02  1.37640621e+01]
 [-1.74357700e+00  1.61488900e+00 -3.54157081e-01 ...  9.01118934e-02
   3.03245873e-02  1.29569175e+01]
 [-1.87047000e+00 -2.16077000e-01 -8.47212227e-01 ...  1.27677673e-01
   2.67303936e-02  1.39464683e+01]
 ...
 [-6.30872000e-01  6.78756000e-01  4.29758667e-03 ...  5.26696304e-02
   2.91492287e-02  1.29786762e+01]
 [-2.13143800e+00  4.72850000e-02 -1.05151830e+00 ...  7.00060125e-02
   2.16020454e-02  1.42111488e+01]
 [-1.54605600e+00  2.98677000e-01 -5.40031205e-01 ...  1.12113293e-01
   2.19392284e-02  1.25388069e+01]]

[[-0.59806336  0.6264903  -0.15256881 ...  1.17376188  0.23880484
   0.48877589]
 [-0.47352949  0.94441404 -0.23499654 ... -0.41222519 -0.40989637
  -0.65814035]
 [-0.64168924 -1.56209892 -1.06875674 ...  0.3385773  -0.56342902
   0.74796695]
 ...
 [ 1.00103725 -0.3371114   0.37115316 ... -1.16055904 -0.46010399
  -0.62722221]
 [-0.98752639 -1.20156778 -1.41423

In [130]:
# # Save X_train
# with open('X_train_75_std.npy', 'wb') as f:
#     np.save(f, X_train)
    
# with open('X_test_75_std.npy', 'wb') as f:
#     np.save(f, X_test)

## Neural Network

In [17]:
input_shape=75
depth=1
output_dim=20
batch_mod=2
num_neurons=16
drop_rate=0.1


def create_neural_network():
    inputs = tf.keras.layers.Input(shape=(input_shape,))
    batch1 = tf.keras.layers.BatchNormalization()(inputs)
    hidden1 = tf.keras.layers.Dense(num_neurons, activation='sigmoid')(batch1)
    dropout1 = tf.keras.layers.Dropout(drop_rate)(hidden1)
    hidden2 = tf.keras.layers.Dense(int(num_neurons/2), activation='sigmoid')(dropout1)
    
    added_layer = tf.keras.layers.Concatenate()([batch1] + [hidden2])
    [batch1].append(added_layer)
    b1 = tf.keras.layers.BatchNormalization()(added_layer)

    h1 = tf.keras.layers.Dense(num_neurons, activation='sigmoid')(b1)
    d1 = tf.keras.layers.Dropout(drop_rate)(h1)
    h2 = tf.keras.layers.Dense(int(num_neurons/2), activation='sigmoid')(d1)
    d2 = tf.keras.layers.Dropout(drop_rate)(h2)
    h3 = tf.keras.layers.Dense(int(num_neurons/2), activation='sigmoid')(d2)
    d3 = tf.keras.layers.Dropout(drop_rate)(h3)
    h4 = tf.keras.layers.Dense(int(num_neurons/2), activation='sigmoid')(d3)
    c1 = tf.keras.layers.Concatenate()([batch1] + [h4])
    output = tf.keras.layers.Dense(output_dim, activation='softmax')(c1)
    
    model = tf.keras.Model(inputs=inputs, outputs=output)
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

- Adam, epochs 300, bs 128, val_loss=0.18
- Adamax, epochs 300, bs 128, val_loss=0.19

In [18]:
estimator_neural_net = KerasClassifier(build_fn=create_neural_network)
estimator_neural_net.fit(X_train, y_train, validation_split=0.05, epochs=300, batch_size=128) # val loss 0.1864

Epoch 1/300




Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300

KeyboardInterrupt: 

### Train on full dataset

In [138]:
estimator_neural_net = KerasClassifier(build_fn=create_neural_network)
estimator_neural_net.fit(X_train, y_train, epochs=300, batch_size=128)

Epoch 1/300




Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78/300
Epoch 7

Epoch 165/300
Epoch 166/300
Epoch 167/300
Epoch 168/300
Epoch 169/300
Epoch 170/300
Epoch 171/300
Epoch 172/300
Epoch 173/300
Epoch 174/300
Epoch 175/300
Epoch 176/300
Epoch 177/300
Epoch 178/300
Epoch 179/300
Epoch 180/300
Epoch 181/300
Epoch 182/300
Epoch 183/300
Epoch 184/300
Epoch 185/300
Epoch 186/300
Epoch 187/300
Epoch 188/300
Epoch 189/300
Epoch 190/300
Epoch 191/300
Epoch 192/300
Epoch 193/300
Epoch 194/300
Epoch 195/300
Epoch 196/300
Epoch 197/300
Epoch 198/300
Epoch 199/300
Epoch 200/300
Epoch 201/300
Epoch 202/300
Epoch 203/300
Epoch 204/300
Epoch 205/300
Epoch 206/300
Epoch 207/300
Epoch 208/300
Epoch 209/300
Epoch 210/300
Epoch 211/300
Epoch 212/300
Epoch 213/300
Epoch 214/300
Epoch 215/300
Epoch 216/300
Epoch 217/300
Epoch 218/300
Epoch 219/300
Epoch 220/300
Epoch 221/300
Epoch 222/300
Epoch 223/300
Epoch 224/300
Epoch 225/300
Epoch 226/300
Epoch 227/300
Epoch 228/300
Epoch 229/300
Epoch 230/300
Epoch 231/300
Epoch 232/300
Epoch 233/300
Epoch 234/300
Epoch 235/300
Epoch 

In [139]:
preds = create_neural_network.predict(X_test)
preds



array([[0, 0, 1, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0]])

In [140]:
preds_as_array = np.argmax(preds, axis=1)
preds_as_array += 1
preds_as_array

array([3, 4, 5, ..., 4, 5, 1], dtype=int64)

In [141]:
sub = make_submission(preds_as_array)
sub

  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = s

  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = s

  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = s

  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = s

  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = s

  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = s

  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = s

Unnamed: 0,id,class
0,10001,3
1,10002,4
2,10004,5
3,10008,20
4,10009,17
...,...,...
4995,23986,19
4996,23991,12
4997,23992,4
4998,23998,5


In [142]:
sub.to_csv('submission_keras_1depth_300e_128bs_64f_std.csv', index=False)