In [28]:
import numpy as np
import pandas as pd
import pickle
import os
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import tensorflow
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.losses import CategoricalCrossentropy

In [2]:
def load_X_file(train_test):
    if train_test == "train":
        npy_file = "data/X_train.npy"
        df_file = "X_train_df.txt"
    elif train_test == "test":
        npy_file = "data/X_test.npy"
        df_file = "X_test_df.txt"
    else:
        raise Exception(f"Invalid type: {train_test}")
        
    if not os.path.exists(df_file):
        npy = np.load(npy_file)
        df = pd.DataFrame([x.flatten() for x in npy])
        df_file_id = open(df_file,'wb')
        pickle.dump(df,df_file_id)
        df_file_id.close()
    else:
        with open(df_file, "rb") as fid:
            df = pickle.load(fid,encoding="bytes")

    return df
    

In [3]:
X_train_df = load_X_file("train")

In [4]:
X_test_df = load_X_file("test")

In [5]:
X_train_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,3062,3063,3064,3065,3066,3067,3068,3069,3070,3071
0,75,78,80,74,76,78,83,84,83,101,...,83,101,102,94,98,101,90,90,97,89
1,69,73,73,75,78,78,75,77,77,77,...,73,78,78,79,103,103,100,101,104,96
2,72,72,72,78,78,78,78,78,74,114,...,74,71,73,72,82,83,80,95,97,91
3,68,67,67,75,72,73,89,81,81,141,...,79,68,69,72,76,73,74,95,87,87
4,73,73,75,74,73,75,75,72,70,111,...,99,81,78,87,66,64,68,71,71,73
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39204,12,11,12,11,11,13,13,11,13,13,...,13,12,11,13,10,10,14,11,11,12
39205,11,9,10,10,9,11,10,9,11,11,...,11,10,9,12,11,10,13,11,11,13
39206,10,10,12,12,12,13,13,11,13,11,...,10,10,10,12,11,11,13,11,11,13
39207,13,12,15,13,13,16,13,12,13,10,...,17,13,12,14,11,9,11,11,10,11


In [6]:
X_test_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,3062,3063,3064,3065,3066,3067,3068,3069,3070,3071
0,116,139,174,116,137,171,118,138,172,117,...,164,115,138,167,117,138,167,114,140,170
1,59,70,61,87,80,63,92,81,63,96,...,58,62,59,62,73,67,70,75,70,70
2,52,40,38,51,39,37,50,38,37,50,...,34,41,40,38,44,43,43,47,46,44
3,132,125,87,100,92,60,82,73,34,96,...,152,235,239,196,255,251,217,255,240,180
4,37,34,30,42,38,32,46,40,36,44,...,35,46,41,38,48,43,40,49,45,42
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12625,193,180,112,121,131,83,83,94,67,71,...,65,112,101,87,143,104,87,89,58,52
12626,22,21,21,25,24,24,27,28,28,32,...,41,45,41,40,46,42,39,47,43,40
12627,24,27,34,23,25,32,24,25,32,25,...,29,18,21,30,19,22,32,18,22,31
12628,47,58,73,49,53,67,59,57,63,35,...,37,28,32,40,36,37,40,29,27,22


In [7]:
scaler = StandardScaler()
scaler_fit = scaler.fit(X_train_df)
X_train_df = scaler.transform(X_train_df) 
X_test_df = scaler.transform(X_test_df)

In [8]:
y_train = np.load("data/y_train.npy")
y_train.shape

(39209, 43)

In [9]:
y_train_df = pd.DataFrame(y_train)
y_train_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,33,34,35,36,37,38,39,40,41,42
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [10]:
y_train_df.tail()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,33,34,35,36,37,38,39,40,41,42
39204,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
39205,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
39206,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
39207,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
39208,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [19]:
nn = Sequential()

nn.add(Dense(1800, input_dim=3072, activation='relu'))
nn.add(Dense(450, activation='relu'))
nn.add(Dense(43, activation='softmax'))
nn.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 1800)              5531400   
_________________________________________________________________
dense_4 (Dense)              (None, 450)               810450    
_________________________________________________________________
dense_5 (Dense)              (None, 43)                19393     
Total params: 6,361,243
Trainable params: 6,361,243
Non-trainable params: 0
_________________________________________________________________


In [29]:
# compile model
nn.compile(loss=CategoricalCrossentropy(), optimizer="adam", metrics=["accuracy","mse"])

In [25]:
X_train_df_1, X_val_df, y_train__df_1, y_val_df = train_test_split(X_train_df, y_train_df, test_size=0.1)

In [30]:
model = nn.fit(X_train_df_1, y_train__df_1, validation_data=(X_val_df, y_val_df), epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100


Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100


Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
