In [1]:
# Julia Chancey
# Multimodal ML - hw 1 question 3
# Classification

In [5]:
# imports ordered alphabetically
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [8]:
# Loading the dataset
data = pd.read_csv('data.csv')

In [7]:
# Preprocessing the data
X = data[['ASR_Error', 'Intent_Error', 'Duration', 'Gender']]
y = data['Purchase']

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initializing the classifiers
lr = LogisticRegression(random_state=42)
svm = SVC(kernel='linear', random_state=42)
nb = GaussianNB()
rf = RandomForestClassifier(n_estimators=100, random_state=42)

# Fitting the classifiers on the training data
lr.fit(X_train, y_train)
svm.fit(X_train, y_train)
nb.fit(X_train, y_train)
rf.fit(X_train, y_train)

# Making predictions on the testing data
lr_pred = lr.predict(X_test)
svm_pred = svm.predict(X_test)
nb_pred = nb.predict(X_test)
rf_pred = rf.predict(X_test)

# Evaluating the performance of the classifiers
print("Logistic Regression: ")
print(confusion_matrix(y_test, lr_pred))
print("Accuracy: ", accuracy_score(y_test, lr_pred))
print("Precision: ", precision_score(y_test, lr_pred))
print("Recall: ", recall_score(y_test, lr_pred))
print("F1-Score: ", f1_score(y_test, lr_pred))

print("SVM: ")
print(confusion_matrix(y_test, svm_pred))
print("Accuracy: ", accuracy_score(y_test, svm_pred))
print("Precision: ", precision_score(y_test, svm_pred))
print("Recall: ", recall_score(y_test, svm_pred))
print("F1-Score: ", f1_score(y_test, svm_pred))

print("Naive Bayes: ")
print(confusion_matrix(y_test, nb_pred))
print("Accuracy: ", accuracy_score(y_test, nb_pred))
print("Precision: ", precision_score(y_test, nb_pred))
print("Recall: ", recall_score(y_test, nb_pred))
print("F1-Score: ", f1_score(y_test, nb_pred))

print("Random Forest: ")
print(confusion_matrix(y_test, rf_pred))
print("Accuracy: ", accuracy_score(y_test, rf_pred))
print("Precision: ", precision_score(y_test, rf_pred))
print("Recall: ", recall_score(y_test, rf_pred))
print("F1-Score: ", f1_score(y_test, rf_pred))

Logistic Regression: 
[[10  0]
 [ 1  9]]
Accuracy:  0.95
Precision:  1.0
Recall:  0.9
F1-Score:  0.9473684210526316
SVM: 
[[10  0]
 [ 1  9]]
Accuracy:  0.95
Precision:  1.0
Recall:  0.9
F1-Score:  0.9473684210526316
Naive Bayes: 
[[10  0]
 [ 1  9]]
Accuracy:  0.95
Precision:  1.0
Recall:  0.9
F1-Score:  0.9473684210526316
Random Forest: 
[[10  0]
 [ 1  9]]
Accuracy:  0.95
Precision:  1.0
Recall:  0.9
F1-Score:  0.9473684210526316
