In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import TensorBoard
import pandas as pd
import os
import seaborn as sns
import matplotlib.pyplot as plt
import xgboost as  xgb
from sklearn import preprocessing

In [None]:
os.listdir('../input')

In [None]:
data = pd.read_csv(os.path.join('../input','train.csv'))

In [None]:
data.head()

In [None]:
data.info()

In [None]:
data.isnull().sum()

In [None]:
data.describe()

In [None]:
#sns.heatmap(data)

In [None]:
data.columns

In [None]:
cols = ['Elevation',
 'Aspect',
 'Slope',
 'Horizontal_Distance_To_Hydrology',
 'Vertical_Distance_To_Hydrology',
 'Horizontal_Distance_To_Roadways',
 'Hillshade_9am',
 'Hillshade_Noon',
 'Hillshade_3pm',
 'Horizontal_Distance_To_Fire_Points',
 'Wilderness_Area1',
 'Wilderness_Area2',
 'Wilderness_Area3',
 'Wilderness_Area4']

Shuffle the data:

In [None]:
data = data.reindex(np.random.permutation(data.index))

In [None]:
df = data.copy()

Normalize entries:

In [None]:
#dataset = full dataset
#cols = columns to normalize data in
def NormalizeData(dataset, cols):
    dataset[cols] = (dataset[cols] -  dataset[cols].min())/(dataset[cols].max() - dataset[cols].min())
    return dataset

df = NormalizeData(df, cols)
df.head()

Create training data:
<br>Here we will eliminate Id and Cover_Type columns.

In [None]:
X_train = df.iloc[:,1:-1]

Print the first 5 rows of X_train 

In [None]:
X_train.head()

Create training label one_hot matrix:
<br> - Copy Cover_Type column from <b>data</b> dataset
<br> - Apply <b>to_categorical</b> function

In [None]:
y = data['Cover_Type'].copy()

In [None]:
lb = preprocessing.LabelBinarizer()
y = lb.fit_transform(y)

In [None]:
y[1000]

Create the model:
<br> Our model will consist of:
<br> - One input layer with relu activation and glorot_uniform initialization
<br> - One output layer with softmax activation
<br> - We will Adam as our optimizer and categorical_crossentropy for our loss
<br> - Accuracy will be our metrics
<br> - We will train our model for 20 epcohs with a batch_size of 32

In [None]:
model = Sequential()
model.add(Dense(768, input_dim=54, kernel_initializer='glorot_uniform', activation='relu'))
model.add(Dense(384, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(7, activation='softmax'))

model.summary()

In [None]:
model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(lr=0.0003), metrics=['accuracy'])

In [None]:
#NAME = 'Forest 512_1024_d02_1024_d02_8_sgd_0.03'
#tensorboard = TensorBoard(log_dir='logs/{}'.format(NAME))

In [None]:
model.fit(X_train,y, epochs=40, batch_size=16, validation_split=0.3)

In [None]:
df_test = pd.read_csv('../input/test.csv')
df_test.head()

In [None]:
X_test = df_test.copy()
X_test = NormalizeData(X_test, cols)
X_test.head()

In [None]:
X_test.drop(columns='Id', axis=1, inplace=True)

In [None]:
X_test.head()

In [None]:
preds = model.predict(X_test)

In [None]:
sub = pd.DataFrame({"Id": df_test.iloc[:,0].values,"Cover_Type": lb.inverse_transform(preds)})
sub.to_csv("submission.csv", index=False) 

In [None]:
sub.head()