In [38]:
# Importing the required packages 
import numpy as np 
import pandas as pd 
from sklearn.metrics import confusion_matrix 
from sklearn.model_selection import train_test_split 
from sklearn.tree import DecisionTreeClassifier 
from sklearn.metrics import accuracy_score 
from sklearn.metrics import classification_report 

In [55]:
# Function importing Dataset 
def importdata(): 
    data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-'+
                               'databases/balance-scale/balance-scale.data', 
                                sep= ',', header = None) 
      
    # Printing the dataswet shape 
    print ("Dataset Lenght: ", len(data)) 
    print ("Dataset Shape: ", data.shape) 
      
    # Printing the dataset obseravtions 
    print ("Dataset: ",data.head()) 
    return data 

In [56]:
# Function to split the dataset 
def splitdataset(data): 
  
    # Seperating the target variable 
    X = data.values[:, 1:5] 
    y = data.values[:, 0] 
  
    # Spliting the dataset into train and test 
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 100) 
      
    return X, y, X_train, X_test, y_train, y_test 
      

In [57]:
# Function to perform training with giniIndex. 
def train_using_gini(X_train, y_train): 
  
    # Decision tree with gini index 
    clf_gini = DecisionTreeClassifier(criterion = "gini", random_state = 100, max_depth=3, min_samples_leaf=5) 
  
    # Performing training 
    clf_gini.fit(X_train, y_train) 
    return clf_gini

In [58]:
# Function to perform training with entropy. 
def tarin_using_entropy(X_train, y_train): 
  
    # Decision tree with entropy 
    clf_entropy = DecisionTreeClassifier(criterion = "entropy", random_state = 100, max_depth = 3, min_samples_leaf = 5) 
  
    # Performing training 
    clf_entropy.fit(X_train, y_train) 
    return clf_entropy 

In [59]:
# Function to make predictions with Gini Index
def prediction_gini(X_test, clf_gini): 
  
    # Predicton on test with giniIndex 
    y_pred_gini = clf_gini.predict(X_test) 
    print("Predicted values:") 
    print(y_pred_gini) 
    return y_pred_gini

In [60]:
# Function to make predictions with Information Gain
def prediction_entr(X_test, clf_entropy): 
  
    # Predicton on test with entropy 
    y_pred_entr = clf_entropy.predict(X_test) 
    print("Predicted values:") 
    print(y_pred_entr) 
    return y_pred_entr

In [61]:
# Function to calculate accuracy using Gini Index
def cal_accuracy_gini(y_test, y_pred_gini): 
      
    print("Confusion Matrix: ", confusion_matrix(y_test, y_pred_gini)) 
      
    print ("Accuracy : ", accuracy_score(y_test, y_pred_gini)*100) 
      
    print("Report : ", classification_report(y_test, y_pred_gini)) 

In [62]:
# Function to calculate accuracy using entropy
def cal_accuracy_entr(y_test, y_pred_entr): 
      
    print("Confusion Matrix: ", confusion_matrix(y_test, y_pred_entr)) 
      
    print ("Accuracy : ", accuracy_score(y_test, y_pred_entr)*100) 
      
    print("Report : ", classification_report(y_test, y_pred_entr)) 

In [65]:
# Driver code 
def main(): 
      
    # Building Phase 
    data = importdata() 
    X, y, X_train, X_test, y_train, y_test = splitdataset(data) 
    clf_gini = train_using_gini(X_train, y_train) 
    clf_entropy = tarin_using_entropy(X_train, y_train) 
      
    # Operational Phase 
    print("Results Using Gini Index:") 
      
    # Prediction using gini 
    y_pred_gini = prediction_gini(X_test, clf_gini)
    
    cal_accuracy_gini(y_test, y_pred_gini) 
      
    print("Results Using Entropy:") 
    
    # Prediction using entropy 
    y_pred_entr = prediction_entr(X_test, clf_entropy) 
    
    cal_accuracy_entr(y_test, y_pred_entr) 

In [66]:
# Calling main function 
if __name__=="__main__": 
    main() 

Dataset Lenght:  625
Dataset Shape:  (625, 5)
Dataset:     0  1  2  3  4
0  B  1  1  1  1
1  R  1  1  1  2
2  R  1  1  1  3
3  R  1  1  1  4
4  R  1  1  1  5
Results Using Gini Index:
Predicted values:
['R' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'R' 'L' 'L' 'L' 'R' 'L' 'R' 'L'
 'L' 'R' 'L' 'R' 'L' 'L' 'R' 'L' 'L' 'L' 'R' 'L' 'L' 'L' 'R' 'L' 'L' 'L'
 'L' 'R' 'L' 'L' 'R' 'L' 'R' 'L' 'R' 'R' 'L' 'L' 'R' 'L' 'R' 'R' 'L' 'R'
 'R' 'L' 'R' 'R' 'L' 'L' 'R' 'R' 'L' 'L' 'L' 'L' 'L' 'R' 'R' 'L' 'L' 'R'
 'R' 'L' 'R' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'L' 'R' 'R' 'L' 'R' 'L'
 'R' 'R' 'L' 'L' 'L' 'R' 'R' 'L' 'L' 'L' 'R' 'L' 'R' 'R' 'R' 'R' 'R' 'R'
 'R' 'L' 'R' 'L' 'R' 'R' 'L' 'R' 'R' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'L'
 'L' 'L' 'L' 'R' 'R' 'R' 'R' 'L' 'R' 'R' 'R' 'L' 'L' 'R' 'L' 'R' 'L' 'R'
 'L' 'L' 'R' 'L' 'L' 'R' 'L' 'R' 'L' 'R' 'R' 'R' 'L' 'R' 'R' 'R' 'R' 'R'
 'L' 'L' 'R' 'R' 'R' 'R' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'L' 'R' 'R'
 'L' 'R' 'R' 'L' 'L' 'R' 'R' 'R']
Confusion Matrix:  [[ 0  6  7]
 [ 

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
