In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report


In [2]:
def import_data():
    balance_data=pd.read_csv("data.csv",header=None)
    print(len(balance_data))
    print(balance_data.shape)
    print(balance_data.head())
    return balance_data

In [3]:
def split_dataset(balance_data):
    X=balance_data.values[:,1:5]
    Y=balance_data.values[:,0]
    X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.3,random_state=100)
    return X,Y,X_train,X_test,Y_train,Y_test

In [4]:
def train_using_gini(X_train,X_test,Y_train):
    clf_gini=DecisionTreeClassifier(criterion='gini',max_depth=3,random_state=100,min_samples_leaf=5)
    clf_gini.fit(X_train,Y_train)
    return clf_gini

In [5]:
def train_using_entropy(X_train,X_test,Y_train):
    clf_entropy=DecisionTreeClassifier(criterion='entropy',max_depth=3,random_state=100,min_samples_leaf=5)
    clf_entropy.fit(X_train,Y_train)
    return clf_entropy

In [6]:
def prediction(X_test,clf_object):
    Y_pred=clf_object.predict(X_test)
    print("Predicted values:\n",Y_pred)
    return Y_pred

In [7]:
def cal_accuracy(Y_test,Y_pred):
    print("Confusion Matrix:\n",confusion_matrix(Y_test,Y_pred))
    print("Accuracy Score:\n",accuracy_score(Y_test,Y_pred))
    print("Classification Report:\n",classification_report(Y_test,Y_pred))
    

In [8]:
data=import_data()

X,Y,X_train,X_test,Y_train,Y_test=split_dataset(data)

clf_gini=train_using_gini(X_train,X_test,Y_train)
clf_entropy=train_using_entropy(X_train,X_test,Y_train)

print("Results using gini index:\n")
y_pred_gini=prediction(X_test,clf_gini)
cal_accuracy(Y_test,y_pred_gini)

print("Results using entropy:\n")
y_pred_entropy=prediction(X_test,clf_entropy)
cal_accuracy(Y_test,y_pred_entropy)



625
(625, 5)
   0  1  2  3  4
0  B  1  1  1  1
1  R  1  1  1  2
2  R  1  1  1  3
3  R  1  1  1  4
4  R  1  1  1  5
Results using gini index:

Predicted values:
 ['R' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'R' 'L' 'L' 'L' 'R' 'L' 'R' 'L'
 'L' 'R' 'L' 'R' 'L' 'L' 'R' 'L' 'L' 'L' 'R' 'L' 'L' 'L' 'R' 'L' 'L' 'L'
 'L' 'R' 'L' 'L' 'R' 'L' 'R' 'L' 'R' 'R' 'L' 'L' 'R' 'L' 'R' 'R' 'L' 'R'
 'R' 'L' 'R' 'R' 'L' 'L' 'R' 'R' 'L' 'L' 'L' 'L' 'L' 'R' 'R' 'L' 'L' 'R'
 'R' 'L' 'R' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'L' 'R' 'R' 'L' 'R' 'L'
 'R' 'R' 'L' 'L' 'L' 'R' 'R' 'L' 'L' 'L' 'R' 'L' 'R' 'R' 'R' 'R' 'R' 'R'
 'R' 'L' 'R' 'L' 'R' 'R' 'L' 'R' 'R' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'L'
 'L' 'L' 'L' 'R' 'R' 'R' 'R' 'L' 'R' 'R' 'R' 'L' 'L' 'R' 'L' 'R' 'L' 'R'
 'L' 'L' 'R' 'L' 'L' 'R' 'L' 'R' 'L' 'R' 'R' 'R' 'L' 'R' 'R' 'R' 'R' 'R'
 'L' 'L' 'R' 'R' 'R' 'R' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'L' 'R' 'R'
 'L' 'R' 'R' 'L' 'L' 'R' 'R' 'R']
Confusion Matrix:
 [[ 0  6  7]
 [ 0 67 18]
 [ 0 19 71]]
Accuracy Score:
 0.

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
