In [97]:
#### Setup ####

import numpy as np
import pandas as pd
from scipy import stats
from itertools import repeat
from collections import Counter

import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
import matplotlib.pyplot as plt

from sklearn.linear_model import Lasso, LinearRegression, LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report
from sklearn.decomposition import PCA
from sklearn import svm
from sklearn.preprocessing import MinMaxScaler, StandardScaler, Normalizer
from sklearn.ensemble import IsolationForest
from sklearn.model_selection import cross_validate, GridSearchCV, KFold
from sklearn.feature_selection import SelectFromModel, RFE, chi2, SelectKBest
from sklearn.svm import SVR, OneClassSVM
from sklearn.ensemble import RandomForestClassifier
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import LabelEncoder

from imblearn.over_sampling import SMOTE

import lightgbm as lgb
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.utils import np_utils

vColors = ["#049DD9", "#03A64A", "#F2AC29", "#F2CA80", "#F22929"]

In [125]:
#### Preprocessing

# Import

X_train = pd.read_csv("X_train.csv").drop("id", axis=1)

y_train = pd.read_csv("y_train.csv")
y_train = y_train['y'].astype(str)

X_test = pd.read_csv("X_test.csv").drop("id", axis=1)

# Scaling

scaler = StandardScaler()

X_train = pd.DataFrame(
    scaler.fit_transform(
        X_train),
    columns=X_train.columns)


X_test = pd.DataFrame(
    scaler.fit_transform(
        X_test),
    columns=X_test.columns)


In [127]:
#### Upsampling

oversample = SMOTE(sampling_strategy = "auto", k_neighbors = 5, n_jobs = -1)

X_train, y_train = oversample.fit_resample(X_train, y_train)

counter = Counter(y_train)
print(counter)

X_train.shape

Counter({'1': 3600, '0': 3600, '2': 3600})


(10800, 1000)

In [148]:
METRICS = [
      keras.metrics.Precision(name='precision'),
      keras.metrics.Recall(name='recall'),
      keras.metrics.AUC(name='auc'),
]

model = Sequential()
model.add(Dense(10, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(5, activation='relu'))
model.add(Dense(3, activation='softmax'))
model.summary()

model.compile(loss= "categorical_crossentropy" , optimizer='adam', metrics= METRICS)

history = model.fit(X_train, np_utils.to_categorical(y_train), epochs=200, batch_size=32,  verbose=1, validation_split=0.3)

fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(15, 5))

axes[0].plot(history.history['auc'])
axes[0].plot(history.history['val_auc'])
axes[0].legend(['train auc', 'validation auc'], loc='lower right')

axes[1].plot(history.history['loss'])
axes[1].plot(history.history['val_loss'])
axes[1].legend(['train loss', 'validation loss'], loc='upper right')

fig.show()


Model: "sequential_28"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_78 (Dense)             (None, 10)                10010     
_________________________________________________________________
dense_79 (Dense)             (None, 5)                 55        
_________________________________________________________________
dense_80 (Dense)             (None, 3)                 18        
Total params: 10,083
Trainable params: 10,083
Non-trainable params: 0
_________________________________________________________________
Train on 7559 samples, validate on 3241 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Ep

In [114]:
# Predict Test Data

model.fit(X_train, np_utils.to_categorical(y_train),epochs=100, batch_size=30, validation_split = 0.2)

preds = model.predict(X_test).argmax(axis = -1)

dfResults = pd.DataFrame({"id": list(range(0, 4100, 1)), "y": preds})

dfResults.to_csv("Results.csv", sep=',', index=False)
