In [196]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError

In [197]:
def read_csv_data(filename):
    return pd.read_csv(filename)

In [198]:
def create_model(lr):
    inputs = Input(shape=(75,))
    x = Dense(512, activation='relu')(inputs)
    x = Dense(256, activation='relu')(x)
    x = Dense(128, activation='relu')(x)
    outputs = Dense(9, activation='softmax')(x)
    
    model = Model(inputs=[inputs], outputs=[outputs])
    model.compile(optimizer=Adam(lr=lr), loss=MeanSquaredError(), metrics=['mean_squared_error'])
    
    return model

In [199]:
def split_xy(X):
    y = X['target']
    x = X.drop(columns='target')
    
    return x,y

In [200]:
def create_y(y):
    new_y = []
    for i in y.to_list():
        new_row = [0, 0, 0,
                   0, 0, 0,
                   0, 0, 0]
        
        if i == 'Class_1':
            new_row[0] = 1.0
        elif i == 'Class_2':
            new_row[1] = 1.0
        elif i == 'Class_3':
            new_row[2] = 1.0
        elif i == 'Class_4':
            new_row[3] = 1.0
        elif i == 'Class_5':
            new_row[4] = 1.0
        elif i == 'Class_6':
            new_row[5] = 1.0
        elif i == 'Class_7':
            new_row[6] = 1.0
        elif i == 'Class_8':
            new_row[7] = 1.0
        elif i == 'Class_9':
            new_row[8] = 1.0
        else:
            print("ERROR! TARGET IS OUT OF RANGE!")
            
        new_y.append(new_row)
        
    return pd.DataFrame(new_y)

# TRAIN

In [201]:
# READ TRAIN DATA
df = read_csv_data('train.csv')

In [202]:
# CREATE TF MODEL
tf_model = create_model(lr=0.0001)

In [203]:
# SPLIT X AND Y
X, Y = split_xy(df)
# SPLIT Y TARGETS
y = create_y(Y)
# SPLIT TRAIN TEST
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)

In [204]:
tf_model.fit(X_train.drop(columns='id').to_numpy(), y_train.to_numpy(), epochs=100, verbose=1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x7fd4e276cee0>

# TEST

In [210]:
y_pred = tf_model.predict(X_test.drop(columns='id').to_numpy())

In [211]:
y_pred

array([[4.0484443e-03, 1.5169465e-04, 3.0666724e-04, ..., 2.2396706e-03,
        4.1414669e-04, 9.5324540e-01],
       [3.5982473e-05, 2.5462414e-06, 1.1818248e-05, ..., 2.1607836e-04,
        9.9949038e-01, 4.1290608e-08],
       [2.5153359e-02, 6.8791315e-04, 6.5722503e-03, ..., 8.6617917e-02,
        3.6268643e-01, 4.8134792e-01],
       ...,
       [2.6888377e-04, 5.5210869e-04, 1.1621396e-04, ..., 7.8838208e-04,
        9.9777669e-01, 2.3823201e-05],
       [4.8108824e-02, 7.1038622e-01, 1.2764469e-01, ..., 1.4608941e-02,
        5.0230283e-02, 1.0400582e-02],
       [9.8523488e-03, 4.7943933e-09, 2.9518254e-05, ..., 4.2492265e-04,
        1.3301775e-02, 9.7605306e-01]], dtype=float32)

In [212]:
r2_score(y_test, y_pred)

-0.30672040151740293

# MY PREDICTION

In [213]:
# READ TEST DATA
df = read_csv_data('test.csv')
y_pred = tf_model.predict(df.drop(columns='id').to_numpy())

In [214]:
# CREATE SUBMISSION FILE
id = df['id'].to_list()

# CREATE DICTIONARY FOR DATAFRAME
dataframe_dictionary = {
    'id': id,
    'Class_1': y_pred[:,0],
    'Class_2': y_pred[:,1],
    'Class_3': y_pred[:,2],
    'Class_4': y_pred[:,3],
    'Class_5': y_pred[:,4],
    'Class_6': y_pred[:,5],
    'Class_7': y_pred[:,6],
    'Class_8': y_pred[:,7],
    'Class_9': y_pred[:,8],
}

# CREATE DATAFRAME
submission_df = pd.DataFrame(dataframe_dictionary, index=None)
submission_df.to_csv('submission.csv', index=None)