In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LogisticRegressionCV
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler


In [2]:
data = pd.read_csv('D:/projects/pythonProject/voice.csv',header=0)
training, test =train_test_split(data,test_size=0.25)
training_y = training['label']
test_y = test['label']
training_x= training.drop(columns=['label'])
test_x= test.drop(columns=['label'])

#preprocessing 
le = preprocessing.LabelEncoder()   # converting the strings to values
le.fit(training_y)
training_y=le.transform(training_y)
test_y = le.transform(test_y)

scaler = StandardScaler()    #normalization        
scaler.fit(training_x)
training_x = scaler.transform(training_x)
test_x = scaler.transform(test_x)


# Logistic Regression

In [3]:
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression()
cv_scores = cross_val_score(clf, training_x, training_y, cv=10,scoring='accuracy')
lr_traning =round(cv_scores.mean(),6)*100
cv_scores = cross_val_score(clf, test_x, test_y, cv=10,scoring='accuracy')
lr_test = round(cv_scores.mean(),56)*100
print("Training Accuracy:"+str(lr_traning))
print("Test Accuracy:"+str(lr_test))

Training Accuracy:96.97
Test Accuracy:97.60422752353134


# KNN

In [4]:
from sklearn.neighbors import KNeighborsClassifier

knn_cv = KNeighborsClassifier(n_neighbors=1)
cv_scores = cross_val_score(knn_cv, training_x, training_y, cv=10,scoring='accuracy')
knn_traning =round(cv_scores.mean(),6)*100
cv_scores = cross_val_score(knn_cv, test_x, test_y, cv=10,scoring='accuracy')
knn_test = round(cv_scores.mean(),56)*100
print("Training Accuracy:"+str(knn_traning))
print("Test Accuracy:"+str(knn_test))

Training Accuracy:97.3483
Test Accuracy:96.35585037325545


# Naive Bayes

In [5]:
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
cv_scores = cross_val_score(clf, training_x, training_y, cv=10,scoring='accuracy')
nb_traning =round(cv_scores.mean(),6)*100
cv_scores = cross_val_score(clf, test_x, test_y, cv=10,scoring='accuracy')
nb_test = round(cv_scores.mean(),6)*100
print("Training Accuracy:"+str(nb_traning))
print("Test Accuracy:"+str(nb_test))

Training Accuracy:88.0073
Test Accuracy:91.15729999999999


# Decesion Tree

In [6]:
from sklearn import tree
clf = tree.DecisionTreeClassifier()
cv_scores = cross_val_score(clf, training_x, training_y, cv=10,scoring='accuracy')
dt_traning =round(cv_scores.mean(),6)*100
cv_scores = cross_val_score(clf, test_x, test_y, cv=10,scoring='accuracy')
dt_test = round(cv_scores.mean(),6)*100
print("Training Accuracy:"+str(dt_traning))
print("Test Accuracy:"+str(dt_test))

Training Accuracy:96.1706
Test Accuracy:95.70840000000001


# Random Forest

In [7]:
from sklearn.ensemble import RandomForestClassifier 
clf = RandomForestClassifier()
cv_scores = cross_val_score(clf, training_x, training_y, cv=10,scoring='accuracy')
rf_traning =round(cv_scores.mean(),6)*100
cv_scores = cross_val_score(clf, test_x, test_y, cv=10,scoring='accuracy')
rf_test = round(cv_scores.mean(),6)*100
print("Training Accuracy:"+str(rf_traning))
print("Test Accuracy:"+str(rf_test))

  from numpy.core.umath_tests import inner1d


Training Accuracy:97.8538
Test Accuracy:96.71640000000001


# SVM

In [8]:
from sklearn.svm import SVC
clf = SVC()
cv_scores = cross_val_score(clf, training_x, training_y, cv=10,scoring='accuracy')
svm_traning =round(cv_scores.mean(),6)*100
cv_scores = cross_val_score(clf, test_x, test_y, cv=10,scoring='accuracy')
svm_test = round(cv_scores.mean(),6)*100
print("Training Accuracy:"+str(svm_traning))
print("Test Accuracy:"+str(svm_test))

Training Accuracy:97.77080000000001
Test Accuracy:97.4824


# ANN

In [9]:
from sklearn.neural_network import MLPClassifier
clf = MLPClassifier()
cv_scores = cross_val_score(clf, training_x, training_y, cv=10,scoring='accuracy')
ann_traning =round(cv_scores.mean(),6)*100
cv_scores = cross_val_score(clf, test_x, test_y, cv=10,scoring='accuracy')
ann_test = round(cv_scores.mean(),6)*100
print("Training Accuracy:"+str(ann_traning))
print("Test Accuracy:"+str(ann_test))



Training Accuracy:97.897
Test Accuracy:97.4841




# Accuracy Comparison

In [10]:
import matplotlib.pyplot as plt; plt.rcdefaults()
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

 
objects = ('LR','','KNN','','NB','','RF','','D-TREE','','SVM','','ANN')
y_pos = np.arange(14)
performance = [97.727,lr_test,97.727,knn_test,89.394,nb_test,96.717,dt_test,97.601,rf_test,97.97,svm_test,98.358,ann_test]
plt.bar(y_pos, performance, align='center',color=('red','blue','red','blue','red','blue','red','blue','red','blue','red','blue',))
plt.xticks(y_pos, objects)
plt.ylabel('Accuracy(%)')
plt.xlabel('Classification Algorithms')
red_patch = mpatches.Patch(color='red', label='Original Accuracy')
blue_patch = mpatches.Patch(color='blue', label='Reproduced Accuracy')
plt.legend(handles=[red_patch,blue_patch]) 
plt.show()

<Figure size 640x480 with 1 Axes>

# Parameter tuning -Grid search

In [11]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score


#SVM
svc = SVC()
parameters = {'kernel':('linear', 'rbf','poly'), 'C':[0.3, 0.9],'gamma':[0.01,0.09]}
clf = GridSearchCV(svc,parameters,cv=10,scoring='accuracy')
clf.fit(training_x,training_y)
gs_svm_training= round(clf.best_score_,6)*100
y_pred = clf.predict(test_x)
gs_svm_test = round(accuracy_score(test_y, y_pred),6)*100
print("Training Accuracy:"+str(gs_svm_training))
print("Test Accuracy:"+str(gs_svm_test))

Training Accuracy:97.8114
Test Accuracy:98.48479999999999


In [12]:
print(clf.best_estimator_)

SVC(C=0.9, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.09, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)


In [13]:
#ANN 
mlp = MLPClassifier()
parameters = {'learning_rate_init':[0.001,0.004]}
clf = GridSearchCV(mlp,parameters,cv=10,scoring='accuracy')
clf.fit(training_x,training_y)
gs_svm_training= round(clf.best_score_,6)*100
y_pred = clf.predict(test_x)
gs_svm_test = round(accuracy_score(test_y, y_pred),6)*100
print("Training Accuracy:"+str(gs_svm_training))
print("Test Accuracy:"+str(gs_svm_test))



Training Accuracy:97.85350000000001
Test Accuracy:98.2323


In [14]:
%%time
print(clf.best_estimator_)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.004, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)
Wall time: 0 ns


# Parameter Tuning - Randomized Search 

In [15]:
%%time
from sklearn.model_selection import RandomizedSearchCV
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score


#SVM
svc = SVC()
parameters = {'kernel':('linear', 'rbf','poly'), 'C':[0.3, 0.9],'gamma':[0.01,0.09]}
clf = RandomizedSearchCV(svc,parameters,cv=10,scoring='accuracy')
clf.fit(training_x,training_y)
gs_svm_training= round(clf.best_score_,6)*100
y_pred = clf.predict(test_x)
gs_svm_test = round(accuracy_score(test_y, y_pred),6)*100
print("Training Accuracy:"+str(gs_svm_training))
print("Test Accuracy:"+str(gs_svm_test))

Training Accuracy:97.8114
Test Accuracy:98.48479999999999
Wall time: 10.8 s


In [17]:
%%time
#ANN 
mlp = MLPClassifier()
parameters = {'learning_rate_init':[0.001,0.004]}
clf = GridSearchCV(mlp,parameters,cv=10,scoring='accuracy')
clf.fit(training_x,training_y)
gs_svm_training= round(clf.best_score_,6)*100
y_pred = clf.predict(test_x)
gs_svm_test = round(accuracy_score(test_y, y_pred),6)*100
print("Training Accuracy:"+str(gs_svm_training))
print("Test Accuracy:"+str(gs_svm_test))



Training Accuracy:97.9798
Test Accuracy:98.1061
Wall time: 17.6 s
