##Importing the libraries##

In [0]:
import numpy as np
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss
from sklearn.preprocessing import StandardScaler
import seaborn as sns

##**Loading the training data**##

In [0]:
data = pd.read_csv('train_data.csv')

##Peek into the dataset##

In [0]:
data.head()

Unnamed: 0,Avg_firing_dist,Avg_Srv_time,Avg_ping,Total_travel_dist,Avg_damage
0,264.22,19.06,96.79,6332.0,1788.88
1,366.66,24.42,96.67,6603.7,1750.92
2,259.54,16.37,97.5,6145.8,1838.12
3,183.23,13.52,97.31,6584.7,1869.24
4,299.71,19.74,97.82,6701.55,1818.88


##Checking if there exists any NaN value in the dataset##

In [0]:
data.isnull().sum()

Avg_firing_dist      0
Avg_Srv_time         0
Avg_ping             0
Total_travel_dist    0
Avg_damage           0
dtype: int64

##Overview of the dataset##

In [0]:
data.describe()

Unnamed: 0,Avg_firing_dist,Avg_Srv_time,Avg_ping,Total_travel_dist,Avg_damage
count,2000.0,2000.0,2000.0,2000.0,2000.0
mean,259.133945,16.745235,97.127825,6969.946375,1823.70634
std,97.717665,3.814559,0.540658,1394.220249,69.414587
min,38.54,8.11,95.3,2680.2,1706.16
25%,174.8125,12.96,96.76,5976.7,1764.36
50%,268.575,15.93,97.09,7096.75,1811.98
75%,341.375,20.45,97.49,8087.8375,1883.24
max,487.43,24.42,98.94,9520.2,1988.04


###Segregating the features from the 'label'

In [0]:
features = list(set(data.columns) - {'Avg_damage'})
features

['Avg_ping', 'Total_travel_dist', 'Avg_Srv_time', 'Avg_firing_dist']

##Data Preprocessing

Using Standard Scaler to scale the dataset so as to get better performance

In [0]:
scaler = StandardScaler()
scaler.fit(data[features])
scaled_data = pd.DataFrame(scaler.transform(data[features]))
scaled_data.columns = features
scaled_data = scaled_data.join(data['Avg_damage'])

In [0]:
scaled_data.head()

Unnamed: 0,Avg_ping,Total_travel_dist,Avg_Srv_time,Avg_firing_dist,Avg_damage
0,-0.624997,-0.457679,0.606975,0.052061,1788.88
1,-0.847004,-0.262755,2.01247,1.10065,1750.92
2,0.688546,-0.591264,-0.098394,0.004156,1838.12
3,0.337035,-0.276386,-0.845718,-0.776962,1869.24
4,1.280566,-0.192555,0.785284,0.415342,1818.88


Scaling the 'label' as well by a factor of 1000.

In [0]:
scaled_data['Avg_damage'] = scaled_data['Avg_damage']/1000

In [0]:
scaled_data.head()

Unnamed: 0,Avg_ping,Total_travel_dist,Avg_Srv_time,Avg_firing_dist,Avg_damage
0,-0.624997,-0.457679,0.606975,0.052061,1.78888
1,-0.847004,-0.262755,2.01247,1.10065,1.75092
2,0.688546,-0.591264,-0.098394,0.004156,1.83812
3,0.337035,-0.276386,-0.845718,-0.776962,1.86924
4,1.280566,-0.192555,0.785284,0.415342,1.81888


##Setting up the Neural Network for predicting the Average Damage

###Importing Keras libraries

In [0]:
from keras.layers.core import Dense, Activation
from keras.models import Sequential
from keras.optimizers import Adam
from keras.utils import np_utils

###Defining the model

Using the 'ReLU - Rectified Linear Unit' and 'Linear' activation functions.

The model will be like :  relu() --> linear()

Also, will be using the Mean Squared Error loss function and RMSProp() optimizer.

In [0]:
def nn_model(input_dim):
    model = Sequential()
    model.add(Dense(10, input_dim=input_dim))
    model.add(Activation('relu'))
    
    model.add(Dense(1))
    model.add(Activation('linear'))

    model.compile(loss='mean_absolute_error', optimizer=Adam())
    return model

###Training the model

Training the model with the features and label by passing a batch of size 256 at a time, which goes on for 1000 iterations.
Also, creating a Validation set with 20% data so as to prevent overfitting of data.

In [0]:
nn = nn_model(len(features))
nn.fit(scaled_data[features], scaled_data['Avg_damage'], verbose=1, validation_split = 0.2, epochs=1000, batch_size=256)

Train on 1600 samples, validate on 400 samples
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1

<keras.callbacks.History at 0x7fd8f8937128>

##Time for prediction!

###Loading the test dataset.

Also, scaling the datset with Standard Scaler.

In [0]:
test = pd.read_csv('test_data.csv')

scaler = StandardScaler()
scaler.fit(test[features])
test_data = pd.DataFrame(scaler.transform(test[features]))
test_data.columns = features

In [0]:
test_data.head()

Unnamed: 0,Avg_ping,Total_travel_dist,Avg_Srv_time,Avg_firing_dist
0,-0.633884,0.161292,0.533723,0.840465
1,0.099256,-1.569874,-0.823939,-0.158796
2,-1.442217,0.25275,-0.442176,0.29431
3,0.569217,0.793513,-0.917484,-0.903185
4,1.057977,0.690442,0.159544,-0.292301


###Predicting the result.

In [0]:
result = nn.predict(test_data) * 1000
result

array([[1762.2316],
       [1847.3726],
       [1792.4703],
       [1887.4484],
       [1823.2941],
       [1760.1783],
       [1745.0029],
       [1942.3347],
       [1723.3568],
       [1777.0928],
       [1781.4883],
       [1886.7306],
       [1875.561 ],
       [1745.2429],
       [1750.7045],
       [1734.6979],
       [1771.5371],
       [1838.7068],
       [1783.1173],
       [1882.4216],
       [1823.7384],
       [1949.121 ],
       [1760.2009],
       [1767.802 ],
       [1717.9905],
       [1835.9469],
       [1902.9534],
       [1912.2959],
       [1772.9922],
       [1801.7332],
       [1878.0509],
       [1860.1066],
       [1950.1287],
       [1829.1453],
       [1943.4237],
       [1850.1597],
       [1959.9006],
       [1817.5398],
       [1852.3729],
       [1776.087 ],
       [1719.5265],
       [1786.2865],
       [1896.1229],
       [1730.9076],
       [1896.5917],
       [1749.0459],
       [1747.9425],
       [1739.3842],
       [1804.0796],
       [1925.6002],


In [0]:
len(result)

200

##Creating the output file for submission

In [0]:
import numpy as np
np.savetxt("submission.csv", result, fmt='%s')