##Importing the libraries##

In [0]:
import numpy as np
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import Normalizer

##Loading the training data

In [0]:
data = pd.read_csv('data_train.csv')

##Peek into the dataset##

In [0]:
data.head()

Unnamed: 0,feature-1,feature-2,feature-3,feature-4,feature-5,feature-6,feature-7,feature-8,feature-9,feature-10,label
0,20.306201,7.246964,30.746074,70.738222,82.089549,44.537815,56.908667,17.261529,27.450978,42.425691,7
1,26.752616,16.39676,30.277686,39.271342,80.597014,31.302522,76.580799,50.305849,63.13725,29.51448,3
2,23.448827,14.291499,31.549011,57.42432,86.56717,37.394956,78.454333,44.758231,59.215678,34.578621,4
3,21.998386,10.850202,32.920712,65.819695,86.56717,39.07563,74.004684,50.59121,58.431376,38.060054,3
4,26.188554,10.364373,31.381732,63.53736,98.507465,36.554622,66.042155,52.787542,75.294116,40.01876,6


##Checking if there exists any NaN value in the dataset

In [0]:
data.isnull().sum()

feature-1     0
feature-2     0
feature-3     0
feature-4     0
feature-5     0
feature-6     0
feature-7     0
feature-8     0
feature-9     0
feature-10    0
label         0
dtype: int64

##Overview of the dataset##

In [0]:
data.describe()

Unnamed: 0,feature-1,feature-2,feature-3,feature-4,feature-5,feature-6,feature-7,feature-8,feature-9,feature-10,label
count,6374.0,6374.0,6374.0,6374.0,6374.0,6374.0,6374.0,6374.0,6374.0,6374.0,6374.0
mean,23.166521,12.426431,30.589465,57.230358,83.959795,37.104464,73.845752,49.324234,61.621946,35.661977,3.465955
std,6.564205,3.80537,8.188267,10.730085,14.449739,4.825946,6.12152,12.434835,12.977496,5.084599,1.860678
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.889547,-3.921559,0.0,1.0
25%,21.917804,11.174091,30.946806,51.455617,85.074635,34.033614,73.302109,42.536299,56.470584,32.829086,2.0
50%,23.045928,12.226719,32.218131,57.712983,86.56717,36.554622,75.175644,50.566766,61.568621,35.353277,3.0
75%,24.335213,13.562751,33.38909,64.004197,86.56717,39.705884,76.580799,57.378338,68.62745,38.366644,5.0
max,100.0,100.0,100.0,100.0,100.0,60.504201,119.906328,103.1134,100.392151,57.71504,7.0


###Segregating the features from the 'label'

In [0]:
features = list(set(data.columns) - {'label'})
features

['feature-2',
 'feature-9',
 'feature-7',
 'feature-10',
 'feature-8',
 'feature-3',
 'feature-5',
 'feature-4',
 'feature-1',
 'feature-6']

##Data Preprocessing

Using Standard Scaler to scale the dataset so as to get better performance

In [0]:
scaler = StandardScaler()
scaler.fit(data[features])
scaled_data = pd.DataFrame(scaler.transform(data[features]))
scaled_data.columns = features
scaled_data = scaled_data.join(data['label'])
scaled_data['label'] = scaled_data['label'] - 1

In [0]:
scaled_data.head()

Unnamed: 0,feature-2,feature-9,feature-7,feature-10,feature-8,feature-3,feature-5,feature-4,feature-1,feature-6,label
0,-1.361201,-2.633301,-2.767027,1.33034,-2.578661,0.019127,-0.129441,1.258976,-0.435779,1.54041,6
1,1.043431,0.116773,0.446827,-1.209138,0.078947,-0.038079,-0.232741,-1.673838,0.546354,-1.202334,2
2,0.490153,-0.185433,0.752908,-0.213083,-0.367223,0.117195,0.180459,0.018078,0.04301,0.060199,3
3,-0.414244,-0.245873,0.025965,0.471672,0.101897,0.284728,0.180459,0.800554,-0.177969,0.408484,2
4,-0.541924,1.053612,-1.274881,0.856926,0.278538,0.096764,1.006856,0.587833,0.460417,-0.113943,5


Although it helps very little here, also Normalizing the dataset using Normalizer from sklearn.

In [0]:
scaler = Normalizer().fit(scaled_data[features])
scaled_data = pd.DataFrame(scaler.transform(scaled_data[features]))
scaled_data.columns = features
scaled_data = scaled_data.join(data['label'])
scaled_data['label'] = scaled_data['label'] - 1

In [0]:
scaled_data.head()

Unnamed: 0,feature-2,feature-9,feature-7,feature-10,feature-8,feature-3,feature-5,feature-4,feature-1,feature-6,label
0,-0.252648,-0.488758,-0.513578,0.24692,-0.478616,0.00355,-0.024025,0.233674,-0.080883,0.28591,6
1,0.384307,0.043009,0.164571,-0.445339,0.029077,-0.014025,-0.085721,-0.616493,0.201228,-0.442833,2
2,0.472996,-0.178942,0.726553,-0.205624,-0.354369,0.113092,0.174142,0.017445,0.041505,0.058091,3
3,-0.347793,-0.206431,0.0218,0.396009,0.085551,0.239053,0.15151,0.672132,-0.14942,0.342956,2
4,-0.232462,0.451955,-0.54687,0.367585,0.119481,0.041508,0.431899,0.252155,0.197499,-0.048877,5


Here're the labels.

In [0]:
set(scaled_data['label'])

{0, 1, 2, 3, 4, 5, 6}

##Setting up the Neural Network for predicting the class the player belongs to.

###Importing Keras libraries

In [0]:
from keras.layers.core import Dense, Activation
from keras.models import Sequential
from keras.optimizers import Adam
from keras.utils import np_utils

###Defining the model

Using the 'tanh' and 'softmax' activation functions.

The model will be like :  tanh() --> tanh() --> tanh() -->tanh() --> tanh() --> softmax()

Also, will be using the 'Categorical Cross Entropy' loss function and Adam() optimizer.

In [0]:
def nn_model(input_dim):
    model = Sequential()
    model.add(Dense(200, input_dim=input_dim))
    model.add(Activation('tanh'))
    
    model.add(Dense(200))
    model.add(Activation('tanh'))
    
    model.add(Dense(200))
    model.add(Activation('tanh'))
    
    model.add(Dense(200))
    model.add(Activation('tanh'))
    
    model.add(Dense(200))
    model.add(Activation('tanh'))
    
    model.add(Dense(7))
    model.add(Activation('softmax'))

    model.compile(loss='categorical_crossentropy', optimizer = Adam())
    return model

###Training the model

Training the model with the features and label by passing a batch of size 256 at a time, which goes on for 75 iterations.
Also, creating a Validation set with 20% data so as to prevent overfitting of data.

In [0]:
nn = nn_model(len(features))
nn.fit(scaled_data[features].values, np_utils.to_categorical(scaled_data.label.values), verbose=1, validation_split = 0.2, epochs = 75, batch_size=256)

Train on 5099 samples, validate on 1275 samples
Epoch 1/75
Epoch 2/75
Epoch 3/75
Epoch 4/75
Epoch 5/75
Epoch 6/75
Epoch 7/75
Epoch 8/75
Epoch 9/75
Epoch 10/75
Epoch 11/75
Epoch 12/75
Epoch 13/75
Epoch 14/75
Epoch 15/75
Epoch 16/75
Epoch 17/75
Epoch 18/75
Epoch 19/75
Epoch 20/75
Epoch 21/75
Epoch 22/75
Epoch 23/75
Epoch 24/75
Epoch 25/75
Epoch 26/75
Epoch 27/75
Epoch 28/75
Epoch 29/75
Epoch 30/75
Epoch 31/75
Epoch 32/75
Epoch 33/75
Epoch 34/75
Epoch 35/75
Epoch 36/75
Epoch 37/75
Epoch 38/75
Epoch 39/75
Epoch 40/75
Epoch 41/75
Epoch 42/75
Epoch 43/75
Epoch 44/75
Epoch 45/75
Epoch 46/75
Epoch 47/75
Epoch 48/75
Epoch 49/75
Epoch 50/75
Epoch 51/75
Epoch 52/75
Epoch 53/75
Epoch 54/75
Epoch 55/75
Epoch 56/75
Epoch 57/75
Epoch 58/75
Epoch 59/75
Epoch 60/75
Epoch 61/75
Epoch 62/75
Epoch 63/75
Epoch 64/75
Epoch 65/75
Epoch 66/75
Epoch 67/75
Epoch 68/75
Epoch 69/75
Epoch 70/75
Epoch 71/75
Epoch 72/75
Epoch 73/75
Epoch 74/75
Epoch 75/75


<keras.callbacks.History at 0x7f91b06f7dd8>

##Time for prediction!

###Loading the test dataset

Also, scaling the datset with Standard Scaler.

In [0]:
test = pd.read_csv('data_test.csv')

scaler = StandardScaler()
scaler.fit(test[features])
test_data = pd.DataFrame(scaler.transform(test[features]))
test_data.columns = features

###Predicting the result

In [0]:
result = nn.predict_classes(test_data) + 1
result

array([6, 2, 4, ..., 6, 2, 3])

In [0]:
test_data.head()

Unnamed: 0,feature-2,feature-9,feature-7,feature-10,feature-8,feature-3,feature-5,feature-4,feature-1,feature-6
0,-0.16477,0.817177,-1.333763,0.306915,2.008506,8.614379,-5.75785,2.102741,-3.751447,-0.563865
1,0.45519,1.340513,1.009032,0.285681,0.538641,0.423308,-3.195267,1.320126,-0.195867,0.801358
2,1.088629,0.263056,0.598016,-1.099599,0.497528,-0.012829,0.187342,-0.135302,-0.025931,-0.990498
3,-0.555615,0.355409,-1.292661,1.279876,-1.467037,0.003785,1.007368,0.050291,0.457734,0.374725
4,-0.137816,-0.137141,0.474711,0.464312,0.759993,-0.087596,0.187342,0.866456,-0.012859,0.673368


In [0]:
len(result)

3000

##Creating the output file for submission

In [0]:
import numpy as np
np.savetxt("submission.csv", result, fmt='%s')