In [1]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.decomposition.kernel_pca import KernelPCA
from sklearn.metrics import classification_report
from sklearn.preprocessing import PolynomialFeatures
from sklearn import preprocessing
from sklearn import ensemble

In [2]:
trainData = pd.read_csv("../input/train.csv")
testData = pd.read_csv("../input/test.csv")

In [3]:
trainData.isnull().sum()

id               0
bone_length      0
rotting_flesh    0
hair_length      0
has_soul         0
color            0
type             0
dtype: int64

In [4]:
testData.isnull().sum()

id               0
bone_length      0
rotting_flesh    0
hair_length      0
has_soul         0
color            0
dtype: int64

In [5]:
np.unique(trainData[['type']].values)

array(['Ghost', 'Ghoul', 'Goblin'], dtype=object)

In [6]:
np.unique(trainData[['color']].values)

array(['black', 'blood', 'blue', 'clear', 'green', 'white'], dtype=object)

In [7]:
np.unique(testData[['color']].values)

array(['black', 'blood', 'blue', 'clear', 'green', 'white'], dtype=object)

In [8]:
trainData.head()

Unnamed: 0,id,bone_length,rotting_flesh,hair_length,has_soul,color,type
0,0,0.354512,0.350839,0.465761,0.781142,clear,Ghoul
1,1,0.57556,0.425868,0.531401,0.439899,green,Goblin
2,2,0.467875,0.35433,0.811616,0.791225,black,Ghoul
3,4,0.776652,0.508723,0.636766,0.884464,black,Ghoul
4,5,0.566117,0.875862,0.418594,0.636438,green,Ghost


In [9]:
lbl = preprocessing.LabelEncoder()
lbl.fit(list(trainData['color'].values)) 
trainData['color'] = lbl.transform(list(trainData['color'].values))

lbl = preprocessing.LabelEncoder()
lbl.fit(list(trainData['type'].values)) 
trainData['type'] = lbl.transform(list(trainData['type'].values))

In [10]:
trainData.head()

Unnamed: 0,id,bone_length,rotting_flesh,hair_length,has_soul,color,type
0,0,0.354512,0.350839,0.465761,0.781142,3,1
1,1,0.57556,0.425868,0.531401,0.439899,4,2
2,2,0.467875,0.35433,0.811616,0.791225,0,1
3,4,0.776652,0.508723,0.636766,0.884464,0,1
4,5,0.566117,0.875862,0.418594,0.636438,4,0


In [11]:
yTrain = trainData['type'].values
xTrain = trainData.drop(["id", "type"], axis=1)
xTrain.head()

Unnamed: 0,bone_length,rotting_flesh,hair_length,has_soul,color
0,0.354512,0.350839,0.465761,0.781142,3
1,0.57556,0.425868,0.531401,0.439899,4
2,0.467875,0.35433,0.811616,0.791225,0
3,0.776652,0.508723,0.636766,0.884464,0
4,0.566117,0.875862,0.418594,0.636438,4


In [12]:
model = ensemble.RandomForestClassifier(n_estimators=170)
model.fit(xTrain, yTrain)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=170, n_jobs=None,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [13]:
model.score(xTrain,yTrain)

1.0

In [14]:
lbl = preprocessing.LabelEncoder()
lbl.fit(list(testData['color'].values)) 
testData['color'] = lbl.transform(list(testData['color'].values))

In [15]:
testData.head()

Unnamed: 0,id,bone_length,rotting_flesh,hair_length,has_soul,color
0,3,0.471774,0.387937,0.706087,0.698537,0
1,6,0.427332,0.645024,0.565558,0.451462,5
2,9,0.549602,0.491931,0.660387,0.449809,0
3,10,0.638095,0.682867,0.471409,0.356924,5
4,13,0.361762,0.583997,0.377256,0.276364,0


In [16]:
yTest = testData['id'].values
xTest = testData.drop(["id"], axis=1)
xTest.head()

Unnamed: 0,bone_length,rotting_flesh,hair_length,has_soul,color
0,0.471774,0.387937,0.706087,0.698537,0
1,0.427332,0.645024,0.565558,0.451462,5
2,0.549602,0.491931,0.660387,0.449809,0
3,0.638095,0.682867,0.471409,0.356924,5
4,0.361762,0.583997,0.377256,0.276364,0


In [17]:
pred = model.predict(xTest)
my_submission = pd.DataFrame({'ID': yTest, 'y': pred})

In [18]:
predic = pd.read_csv('../input/sample_submission.csv')

In [19]:
my_submission_new = []
i = 0
for row in my_submission.iterrows():
    my = {}
    my['id'] = predic.id[i]
    if(row[1]['y'] ==0):
        my['type'] = 'Ghost'
    elif(row[1]['y'] ==1):
        my['type'] = 'Ghoul'
    else:
        my['type'] = 'Goblin'
    my_submission_new.append(my)
    i = i+1

In [20]:
df = pd.DataFrame(my_submission_new, columns=["id","type"])

In [21]:
df.to_csv('submission.csv', index=False)