# Decision tree implementation of Tabla strokes

In [31]:
# Importing the required packages
import numpy as np 
import pandas as pd 
import scikitplot as skplt
from sklearn.metrics import confusion_matrix 
from sklearn.model_selection import train_test_split 
from sklearn.tree import DecisionTreeClassifier 
from sklearn.metrics import accuracy_score 
from sklearn.metrics import classification_report

In [5]:
# Function to import the Audio features dataset 
def importdata(): 
    my_data = pd.read_csv('all_strokes.csv') 
      
    # Printing the dataswet shape 
    print ("Dataset Lenght: ", len(my_data)) 
    print ("Dataset Shape: ", my_data.shape) 
      
    # Printing the dataset obseravtions 
    print ("Dataset: ",my_data.head()) 
    return my_data 

In [16]:
# Function to split the dataset into train and test set
def splitdataset(my_data): 
  
    # Seperating the target variable 
    X = my_data.iloc[:,:-1] 
    y = my_data["output"] 
  
    # Spliting the dataset into train and test 
    X_train, X_test, y_train, y_test = train_test_split(  
    X, y, test_size = 0.3, random_state = 100) 
      
    return X, y, X_train, X_test, y_train, y_test

In [7]:
# Function to perform training with giniIndex. 
def train_using_gini(X_train, X_test, y_train): 
  
    # Creating the classifier object 
    clf_gini = DecisionTreeClassifier(criterion = "gini", 
            random_state = 100,max_depth=3, min_samples_leaf=5) 
  
    # Performing training 
    clf_gini.fit(X_train, y_train) 
    return clf_gini 

In [8]:
# Function to perform training with entropy. 
def tarin_using_entropy(X_train, X_test, y_train): 
  
    # Decision tree with entropy 
    clf_entropy = DecisionTreeClassifier( 
            criterion = "entropy", random_state = 100, 
            max_depth = 3, min_samples_leaf = 5) 
  
    # Performing training 
    clf_entropy.fit(X_train, y_train) 
    return clf_entropy

In [9]:
# Function to make predictions 
def prediction(X_test, clf_object): 
  
    # Predicton on test with giniIndex 
    y_pred = clf_object.predict(X_test) 
    print("Predicted values:") 
    print(y_pred) 
    return y_pred

In [10]:
# Function to calculate accuracy 
def cal_accuracy(y_test, y_pred): 
      
    print("Confusion Matrix: ", 
        confusion_matrix(y_test, y_pred)) 
      
    print ("Accuracy : ", 
    accuracy_score(y_test,y_pred)*100) 
      
    print("Report : ", 
    classification_report(y_test, y_pred)) 

In [30]:
# Driver code 
def main(): 
      
    # Building Phase 
    data = importdata() 
    X, y, X_train, X_test, y_train, y_test = splitdataset(data) 
    clf_gini = train_using_gini(X_train, X_test, y_train) 
    clf_entropy = tarin_using_entropy(X_train, X_test, y_train) 
      
    # Operational Phase 
    print("Results Using Gini Index:") 
      
    # Prediction using gini 
    y_pred_gini = prediction(X_test, clf_gini) 
    cal_accuracy(y_test, y_pred_gini) 
      
    print("Results Using Entropy:") 
    # Prediction using entropy 
    y_pred_entropy = prediction(X_test, clf_entropy) 
    cal_accuracy(y_test, y_pred_entropy) 
      
       
    #skplt.metrics.plot_confusion_matrix(y,y_pred_gini,normalize=True)
    #skplt.metrics.plot_confusion_matrix(y,y_pred_entropy,normalize=True)


In [32]:
# Calling main function 
if __name__=="__main__": 
    main() 

Dataset Lenght:  21361
Dataset Shape:  (21361, 63)
Dataset:     Mean_Mem20_ZeroCrossings_HopSize512_WinSize512_Sum_AudioCh0  \
0                                           0.048828             
1                                           0.048730             
2                                           0.047363             
3                                           0.045996             
4                                           0.044727             

   Mean_Mem20_Centroid_Power_powerFFT_WinHamming_HopSize512_WinSize512_Sum_AudioCh0  \
0                                           0.036029                                  
1                                           0.035697                                  
2                                           0.034925                                  
3                                           0.033907                                  
4                                           0.032867                                  

   Mean_Mem20_Rollo

Results Using Gini Index:
Predicted values:
['N' 'Ge' 'Na' ... 'N' 'Na' 'Ge']
Confusion Matrix:  [[481   5   0   3   0  84  71   0   0   4   0   0   0]
 [ 25 222  62  22   0 378   5   0   0   0   0   0   0]
 [  0   0 436   0   0 197   9   0   0   0   0   0   0]
 [  3   0   1 644   0  33   0   0   0   1   0   0   0]
 [ 32   0   0   0   0  96 177   0   0   0   0   0   0]
 [  3   0   8   0   0 575  39   0   0   0   0   0   0]
 [ 47   0   0   0   0  29 579   0   0   0   0   0   0]
 [  0   0   7   0   0 261   0   0   0   0   0   0   0]
 [  0   0   7   0   0 249   2   0   0   0   0   0   0]
 [  6   0   0   0   0 331  38   0   0   5   0   0   0]
 [ 28   0   0   0   0 155 484   0   0   0   0   0   0]
 [  0   0   5   0   0 123   0   0   0   0   0   0   0]
 [ 54   0   1   0   0 231 151   0   0   0   0   0   0]]
Accuracy :  45.90419722265564
Report :                precision    recall  f1-score   support

         Dha       0.71      0.74      0.72       648
        Dhin       0.98      0.31     

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
