In [428]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'
from xgboost import XGBClassifier
import graphviz
from sklearn import tree
import matplotlib.pyplot as plt

In [2]:
data = pd.read_csv("../Data/ACME-HappinessSurvey2020.csv")

In [3]:
y = data["Y"]

In [4]:
columns  = list(data.columns)
columns.remove("Y")

In [5]:
X = data[columns]

In [219]:
feature_names = [
    "order delivered on time",
    "contents were as expected",
    "I ordered everything I wanted to order",
    "I paid a good price",
    "I am satisfied with the courier",
    "the app is easy to order"
]

In [392]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

## Decision Tree

In [532]:
dt_classifier = DecisionTreeClassifier(criterion="entropy", min_samples_leaf=32)
val_scores_dt = cross_val_score(dt_classifier, X, y, scoring="accuracy", cv=6)
np.mean(val_scores_dt)

0.6507936507936507

In [536]:
xgb = AdaBoostClassifier(learning_rate=0.001, n_estimators=100)
val_scores_xgb = cross_val_score(xgb, X, y, scoring="accuracy", cv=6)
np.mean(val_scores_xgb)

0.6507936507936507

In [559]:
rf = RandomForestClassifier(n_estimators=100, min_samples_leaf=16)
val_scores_rf = cross_val_score(rf, X, y, scoring="accuracy", cv=6)
np.mean(val_scores_rf)

0.5793650793650794

In [550]:
dt_classifier = DecisionTreeClassifier(criterion="entropy", min_samples_leaf=32)
dt_classifier = dt_classifier.fit(X_train, y_train)

In [394]:
y_preds = dt_classifier.predict(X_test)

In [395]:
acc_score = accuracy_score(y_test, y_preds)

In [396]:
acc_score

0.7631578947368421

In [397]:
fig = plt.figure(figsize=(25,20))
_ = tree.plot_tree(
    dt_classifier,
    feature_names=feature_names,
    class_names=["unhappy", "happy"],
    filled=True
)

plt.savefig("dt.png", dpi=300)

## SVM

In [372]:
svm_model = SVC(kernel="poly", degree=5)
svm_model = svm_model.fit(X_train, y_train)

In [373]:
y_preds = svm_model.predict(X_test)

In [374]:
acc_score = accuracy_score(y_test, y_preds)

In [375]:
acc_score

0.5789473684210527