In [125]:
import pandas as pd

In [126]:
import numpy as np

In [127]:
from sklearn.metrics import confusion_matrix

In [128]:
from sklearn.cross_validation import train_test_split

In [129]:
from sklearn.tree import DecisionTreeClassifier

In [130]:
from sklearn.metrics import accuracy_score

In [131]:
from sklearn.metrics import classification_report

In [132]:
def import_data():
    balance_data=pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/balance-scale/balance-scale.data', sep=',',header=None)
    print("Dataset Length:",len(balance_data))
    print("Dataset Shape:",balance_data.shape)
    print("Dataset:",balance_data.head())
    return balance_data

In [133]:
def splitdataset(bal_data):
    X=bal_data.values[:,1:5]
    Y=bal_data.values[:,0]
    X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.3,random_state=100)
    return X,Y,X_train,X_test,Y_train,Y_test

In [134]:
def train_gini(X_train,Y_train):
    clf_gini=DecisionTreeClassifier(criterion="gini",random_state=100,max_depth=3,min_samples_leaf=5)
    clf_gini.fit(X_train,Y_train)
    return clf_gini
    

In [135]:
def train_entropy(X_train,Y_train):
    clf_entropy=DecisionTreeClassifier(criterion="entropy",random_state=100,max_depth=3,min_samples_leaf=5)
    clf_entropy.fit(X_train,Y_train)
    return clf_entropy

In [136]:
def prediction(X_test,clf_obj):
    y_pred=clf_obj.predict(X_test)
    print("Predicted values:")
    print(y_pred)
    return y_pred

In [137]:
def cal_accuracy(Y_test,Y_pred):
    print("Confusion Matrix:")
    print(confusion_matrix(Y_test,Y_pred))
    print("Accuracy:",accuracy_score(Y_test,Y_pred)*100)
    print("Classification Report",classification_report(Y_test,Y_pred))

In [138]:
def main():
    print("Starts")
    data=import_data()
    X,Y,X_train,X_test,Y_train,Y_test=splitdataset(data)
    clf_gini=train_gini(X_train,Y_train)
    clf_entropy=train_entropy(X_train,Y_train)
    print("Result using Gini index:")
    y_pred_gini=prediction(X_test,clf_gini)
    cal_accuracy(Y_test,y_pred_gini)
    print("Result using Entropy")
    y_pred_entropy=prediction(X_test,clf_entropy)
    cal_accuracy(Y_test,y_pred_entropy)
    print("End")

In [139]:
if __name__=="__main__": 
    main() 

Starts
Dataset Length: 625
Dataset Shape: (625, 5)
Dataset:    0  1  2  3  4
0  B  1  1  1  1
1  R  1  1  1  2
2  R  1  1  1  3
3  R  1  1  1  4
4  R  1  1  1  5
Result using Gini index:
Predicted values:
['R' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'R' 'L' 'L' 'L' 'R' 'L' 'R' 'L'
 'L' 'R' 'L' 'R' 'L' 'L' 'R' 'L' 'L' 'L' 'R' 'L' 'L' 'L' 'R' 'L' 'L' 'L'
 'L' 'R' 'L' 'L' 'R' 'L' 'R' 'L' 'R' 'R' 'L' 'L' 'R' 'L' 'R' 'R' 'L' 'R'
 'R' 'L' 'R' 'R' 'L' 'L' 'R' 'R' 'L' 'L' 'L' 'L' 'L' 'R' 'R' 'L' 'L' 'R'
 'R' 'L' 'R' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'L' 'R' 'R' 'L' 'R' 'L'
 'R' 'R' 'L' 'L' 'L' 'R' 'R' 'L' 'L' 'L' 'R' 'L' 'R' 'R' 'R' 'R' 'R' 'R'
 'R' 'L' 'R' 'L' 'R' 'R' 'L' 'R' 'R' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'L'
 'L' 'L' 'L' 'R' 'R' 'R' 'R' 'L' 'R' 'R' 'R' 'L' 'L' 'R' 'L' 'R' 'L' 'R'
 'L' 'L' 'R' 'L' 'L' 'R' 'L' 'R' 'L' 'R' 'R' 'R' 'L' 'R' 'R' 'R' 'R' 'R'
 'L' 'L' 'R' 'R' 'R' 'R' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'L' 'R' 'R'
 'L' 'R' 'R' 'L' 'L' 'R' 'R' 'R']
Confusion Matrix:
[[ 0  6  7]
 

  'precision', 'predicted', average, warn_for)
