# Classifiers

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
infogain_features=pd.read_csv('infogain_features.csv')

In [3]:
infogain_features

Unnamed: 0,age,MFCC-1,MFCC-3,MFCC-6,MFCC-11,MFCC-12,MFCC-10,1st-derivative,status
0,0,-0.129712,-0.218797,-0.297418,-0.182316,-0.113946,0.478073,0.845591,0
1,0,0.199122,0.130936,0.309148,0.124619,0.314070,0.151716,-0.001756,0
2,0,-0.040987,-0.314355,-0.042278,0.415858,-0.509541,0.155618,0.472397,0
3,0,0.188057,-0.344777,0.237518,-0.405171,0.207515,0.539237,-0.187704,0
4,0,-0.063104,-0.503092,0.244397,0.049041,-0.377004,0.497271,0.032773,0
...,...,...,...,...,...,...,...,...,...
1359,4,0.017790,-0.015766,-0.068605,0.367106,-0.123831,-0.230444,0.096462,1
1360,4,0.784509,-0.508726,0.243823,0.507744,-0.028612,-0.078936,-0.218072,1
1361,4,-0.452221,0.573179,0.073117,-0.049506,0.143463,0.230552,0.561805,1
1362,4,-0.057950,0.036295,0.132781,-0.044978,0.461731,-0.127805,0.130035,1


# SVM

In [79]:
# Sklearn modules & classes
from sklearn.linear_model import Perceptron, LogisticRegression
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import datasets
from sklearn import metrics

X = infogain_features.drop('status', axis=1)
y = infogain_features['status']
 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1, stratify=y)
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

#The Support Vector Classifier (SVC)
svc = SVC(C=1,kernel='rbf')
 
# Fit the model
svc.fit(X_train_std, y_train)


# Make the predictions
y_predict = svc.predict(X_test_std)
 
# Measure the performance
print("Accuracy score %.3f" %metrics.accuracy_score(y_test, y_predict))

Accuracy score 0.780


# Naive Bayes Classifier

In [80]:
from sklearn.naive_bayes import GaussianNB

model = GaussianNB()

model.fit(X_train, y_train)

from sklearn.metrics import (
    accuracy_score,
    confusion_matrix,
    ConfusionMatrixDisplay,
    f1_score,
    classification_report,
)

y_pred = model.predict(X_test)

accuray = accuracy_score(y_pred, y_test)
f1 = f1_score(y_pred, y_test, average="weighted")

print("Accuracy:", accuray)
print("F1 Score:", f1)

Accuracy: 0.748780487804878
F1 Score: 0.7498746978196968


# Random Forest

In [87]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, ConfusionMatrixDisplay
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from scipy.stats import randint



X = infogain_features.drop('status', axis=1)
y = infogain_features['status']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

rf = RandomForestClassifier(n_estimators=190,max_depth=11,random_state=42)
rf.fit(X_train, y_train)

RandomForestClassifier
RandomForestClassifier()
y_pred = rf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.8058608058608059


# KNN

In [50]:
from sklearn.neighbors import KNeighborsClassifier

X = infogain_features.drop('status', axis=1)
y = infogain_features['status']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

knn_classifier = KNeighborsClassifier(n_neighbors=5,metric='euclidean', weights='uniform')  # You can choose the value of k (number of neighbors) based on your preference or using cross-validation.

knn_classifier.fit(X_train, y_train)


KNeighborsClassifier
KNeighborsClassifier()
y_pred = knn_classifier.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

classification_rep = classification_report(y_test, y_pred)
print("Classification Report:\n", classification_rep)

conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", conf_matrix)

Accuracy: 0.7948717948717948
Classification Report:
               precision    recall  f1-score   support

           0       0.82      0.78      0.80       144
           1       0.77      0.81      0.79       129

    accuracy                           0.79       273
   macro avg       0.79      0.80      0.79       273
weighted avg       0.80      0.79      0.80       273

Confusion Matrix:
 [[113  31]
 [ 25 104]]


# Gradient Boosting Classifier 

In [47]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV

X = infogain_features.drop('status', axis=1)
y = infogain_features['status']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create an instance of the Gradient Boosting Classifier
gb_classifier = GradientBoostingClassifier(learning_rate= 0.1, max_depth= 3, n_estimators= 50)

# Fit the classifier to the training data
gb_classifier.fit(X_train, y_train)

# Make predictions on the test set
y_pred = gb_classifier.predict(X_test)

# Evaluate the classifier's performance
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

classification_rep = classification_report(y_test, y_pred)
print("Classification Report:\n", classification_rep)

conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", conf_matrix)


Accuracy: 0.7582417582417582
Classification Report:
               precision    recall  f1-score   support

           0       0.77      0.76      0.77       144
           1       0.74      0.75      0.75       129

    accuracy                           0.76       273
   macro avg       0.76      0.76      0.76       273
weighted avg       0.76      0.76      0.76       273

Confusion Matrix:
 [[110  34]
 [ 32  97]]
