In [56]:
from __future__ import division, print_function, unicode_literals

import numpy as np
import pandas as pd
import os

np.random.seed(42)

%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize = 14)
mpl.rc('xtick', labelsize = 12)
mpl.rc('ytick', labelsize = 12)

PROJECT_ROOT_DIR = "."
CHAPTER_ID = "classification"

def save_fig(fig_id, tight_layout = True):
    path = os.path.join(PROJECT_ROOT_DIR, "images", CAPTER_ID, fig_id + ".png")
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format='png', dpi = 300)

In [57]:
# import dataset
dataset = pd.read_csv("el4233-2018-2019-02-klasifikasi-train.csv")
dataset = dataset.drop('No', axis = 1)
dataset.head()

Unnamed: 0,X0,X1,Y
0,0.684314,1.102777,0
1,0.218752,0.122695,1
2,0.034522,0.329218,1
3,-0.323019,1.479858,0
4,2.303018,-0.369103,1


In [58]:
# Preparing for data training
# x = dataset.iloc[:,0:3].values
# y = dataset.iloc[:, 3].values
y = np.array(dataset['Y'])
x = dataset.drop('Y', axis = 1)

# Divide data into training dan testing set
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)

print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

print(y_train)

(800, 2)
(800,)
(200, 2)
(200,)
[0 1 1 1 0 0 0 0 0 1 1 1 0 0 1 0 0 0 0 1 1 1 1 1 0 0 0 1 1 0 0 1 1 0 1 1 1
 0 1 1 1 0 1 0 0 0 1 1 1 1 1 1 0 0 1 0 1 0 0 0 1 0 1 1 1 1 1 0 0 1 1 0 1 1
 0 1 1 1 0 0 1 1 1 1 1 0 1 0 0 0 0 0 1 1 0 1 1 1 0 1 0 0 0 0 1 1 1 1 1 1 1
 0 1 1 1 0 1 0 0 1 0 1 0 1 0 0 1 0 1 1 1 1 0 0 0 1 1 1 1 0 0 1 0 0 0 0 1 0
 1 1 0 0 0 0 1 1 1 0 1 0 1 1 1 1 1 1 0 1 1 0 0 0 0 0 0 1 1 0 1 1 1 1 1 0 0
 0 1 0 1 0 0 1 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 1 0 0 0 0 1
 0 1 1 0 0 0 0 0 1 1 0 1 1 0 1 1 1 0 0 1 1 0 1 0 0 1 0 0 1 0 1 0 0 0 1 0 1
 0 0 1 0 1 1 1 0 0 1 1 1 0 0 0 0 1 0 1 1 0 0 1 0 0 1 1 0 1 1 1 0 1 0 0 1 0
 1 0 1 1 1 1 0 0 0 1 0 0 1 1 0 1 0 1 1 0 1 0 1 1 1 0 0 1 0 1 1 0 1 1 0 0 0
 1 0 1 0 1 0 0 0 1 0 1 1 1 0 1 1 1 1 0 1 1 1 1 1 0 1 1 1 1 1 0 0 0 0 1 1 1
 1 1 0 0 1 0 0 0 0 1 1 1 1 0 0 0 0 0 1 1 1 1 0 1 0 0 0 1 0 1 0 1 0 1 1 1 1
 0 1 1 1 0 1 1 0 0 1 0 0 0 0 1 0 0 1 0 1 1 1 1 0 0 1 0 0 0 1 0 1 0 0 0 1 1
 0 1 0 0 0 0 0 1 1 0 1 1 0 1 1 1 0 0 0 0 1 1 0 0 1 1 0 0 1 1 1 0 1 1

In [59]:
# Training the algorithm
from sklearn.ensemble import RandomForestClassifier

randomforest = RandomForestClassifier(n_estimators = 20, random_state = 42)
randomforest.fit(x_train, y_train)
y_pred = randomforest.predict(x_test)

print(y_pred)
# print(y_pred.round())

[1 1 1 1 0 1 1 1 0 0 1 1 0 1 0 0 0 1 0 0 1 1 0 1 0 1 0 1 1 1 0 1 1 1 1 0 1
 0 0 1 0 0 1 1 1 1 0 0 0 0 1 0 0 0 1 1 0 0 0 1 0 1 1 0 1 1 0 0 1 0 0 0 0 0
 1 0 1 0 1 0 1 1 1 0 0 0 1 0 1 0 0 1 0 1 1 0 0 1 0 1 1 0 0 1 0 1 0 0 1 0 1
 0 1 1 0 1 0 0 1 0 1 0 1 1 0 0 0 0 0 0 0 0 0 1 1 0 0 1 1 1 1 1 1 0 1 1 0 0
 0 0 0 1 0 1 0 1 0 1 1 1 1 0 1 1 1 0 0 1 1 0 1 0 0 1 0 1 0 1 1 0 1 0 1 0 0
 0 0 1 1 1 1 0 1 1 0 0 0 0 1 0]


In [60]:
# Evaluating the data model
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
print(accuracy_score(y_test, y_pred))

[[96  5]
 [ 6 93]]
              precision    recall  f1-score   support

           0       0.94      0.95      0.95       101
           1       0.95      0.94      0.94        99

   micro avg       0.94      0.94      0.94       200
   macro avg       0.95      0.94      0.94       200
weighted avg       0.95      0.94      0.94       200

0.945


In [61]:
from sklearn.model_selection import cross_validate

accuracy = cross_validate(regressor,x_train,y_train,cv=10)['test_score']
print('The accuracy is: ',sum(accuracy)/len(accuracy)*100,'%')

The accuracy is:  96.76072433192687 %


In [62]:
# Predict the data test
datatest = pd.read_csv('el4233-2018-2019-02-klasifikasi-test.csv')
data_test = datatest.drop('No', axis = 1)

data_test.head()

Unnamed: 0,X0,X1
0,1.030522,0.892364
1,1.318739,-0.49859
2,1.2101,0.498914
3,-0.644977,1.346281
4,0.042483,0.550473


In [63]:
x_prediction = data_test

x_prediction.shape

(1000, 2)

In [64]:
# predict the result
y_prediction = randomforest.predict(x_prediction)

print(y_prediction)
y_prediction.shape

[0 1 0 0 1 0 1 0 1 0 0 1 1 0 1 1 0 1 0 0 1 0 0 1 1 0 1 0 0 0 1 0 1 0 1 0 0
 1 1 1 0 1 0 1 0 0 1 1 0 1 1 0 0 1 1 0 1 0 1 0 0 0 1 1 1 0 0 1 1 0 1 1 0 0
 1 1 0 0 1 0 1 1 0 1 1 0 1 0 1 1 1 0 0 1 1 1 0 1 0 1 0 1 1 1 0 0 1 0 1 1 0
 0 0 0 1 0 1 0 0 1 1 0 0 0 0 1 1 1 1 1 0 1 1 0 1 1 0 0 0 0 0 0 1 1 0 0 0 0
 0 0 0 1 1 0 1 1 1 1 1 1 0 0 1 1 1 0 1 0 1 0 0 0 1 0 0 1 0 0 0 0 1 0 0 1 1
 1 1 0 0 1 0 0 0 1 0 1 1 0 0 0 1 0 0 1 0 1 1 0 0 1 0 1 1 0 1 1 0 1 1 1 0 0
 0 0 0 1 0 0 0 1 1 0 0 0 0 0 1 1 0 0 1 0 1 0 0 0 1 1 1 1 1 1 0 1 1 0 1 1 1
 1 1 0 0 0 1 0 0 0 0 0 1 1 0 0 1 1 0 1 1 0 0 1 0 1 1 0 1 1 1 0 0 1 0 0 1 1
 1 1 0 0 0 0 0 1 1 1 1 1 1 0 0 0 1 0 1 1 0 1 0 1 1 0 1 1 1 1 1 1 0 1 0 0 0
 1 1 1 0 1 1 1 1 0 0 1 1 0 0 0 1 1 0 1 1 0 1 0 1 1 1 1 0 1 0 1 1 0 0 0 1 1
 1 0 0 1 1 0 0 1 1 0 0 1 1 0 1 1 1 1 1 1 0 0 0 0 1 1 1 0 1 1 1 0 1 1 1 1 0
 0 0 0 1 1 0 0 1 1 1 0 0 0 0 1 1 1 0 0 1 1 1 0 0 0 1 1 0 1 0 1 1 0 1 1 1 1
 1 0 0 1 0 1 0 0 1 1 1 1 1 1 0 0 0 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 1 1 0 1 1
 1 0 1 1 0 1 1 0 0 1 0 0 

(1000,)

In [65]:
datatest['prediksi'] = pd.Series(y_prediction)
print(datatest)
datatest = datatest.iloc[:, 0:3]
datatest.to_csv('random_forest_result.csv')

       No        X0        X1  prediksi
0    1000  1.030522  0.892364         0
1    1001  1.318739 -0.498590         1
2    1002  1.210100  0.498914         0
3    1003 -0.644977  1.346281         0
4    1004  0.042483  0.550473         1
5    1005 -0.886007  0.899231         0
6    1006  0.808737 -0.280190         1
7    1007  0.998703  0.231264         0
8    1008  1.839213  0.062754         1
9    1009  0.414872  1.454819         0
10   1010  1.034116  0.681630         0
11   1011  0.689122 -0.417326         1
12   1012  0.091345  0.383053         1
13   1013 -1.213295  0.670943         0
14   1014  0.455126 -0.090785         1
15   1015  1.201385  0.000165         1
16   1016 -0.618245  0.707295         0
17   1017  1.794065 -0.164296         1
18   1018  1.306753  0.688382         0
19   1019  0.987622  0.308359         0
20   1020  2.042701  0.301285         1
21   1021  0.700648  1.395533         0
22   1022  1.064990  0.418018         0
23   1023  1.658129 -0.330109         1
