# Basic imports

In [None]:
import io
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from google.colab import files

# Loading data

In [None]:
f = files.upload()

In [None]:
df = pd.read_csv(io.StringIO(f['stress_classification.csv'].decode('utf-8')), sep=";")
df

# 1. Data exploration

In [None]:
df.describe()

In [None]:
df.isnull().any()

In [None]:
df["stress_level"].value_counts()

In [None]:
f, ax = plt.subplots(len(df.columns) - 1, 1, figsize=(8,40))
for i, col in enumerate(df.columns):
    if col != "stress_level":
        sns.histplot(data=df[col], kde=True, ax=ax[i])

In [None]:
f, ax = plt.subplots(1, 1, figsize=(8,6))
sns.heatmap(df.drop("stress_level", axis=1).corr(), annot=True, ax=ax)

# 2. Data preprocessing

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
X = df.drop("stress_level", axis=1)
y = df["stress_level"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
print(X_train_scaled.shape)
print(X_test_scaled.shape)

# 3. KNN model training

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
knn = KNeighborsClassifier(n_neighbors=5)

In [None]:
knn.fit(X_train_scaled, y_train)

In [None]:
y_pred_knn = knn.predict(X_test_scaled)
y_pred_knn.shape

# 4. KNN model evaluation

In [None]:
from sklearn.metrics import (
    classification_report,
    accuracy_score,
    f1_score,
    roc_auc_score,
    confusion_matrix
)

In [None]:
print(classification_report(y_test, y_pred_knn))

In [None]:
f, ax = plt.subplots(1, 1, figsize=(8,6))
cm = confusion_matrix(y_test, y_pred_knn)
sns.heatmap(cm, cmap="Blues", annot=True, fmt="g", ax=ax)

In [None]:
print(f"Accuracy: {accuracy_score(y_test, y_pred_knn)}")
print(f"F1 score: {f1_score(y_test, y_pred_knn, average='macro')}")

# 5. Decision Tree model training

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
dt = DecisionTreeClassifier()

In [None]:
dt.fit(X_train_scaled, y_train)

In [None]:
y_pred_dt = knn.predict(X_test_scaled)
y_pred_dt.shape

# 6. Decision Tree model evaluation

In [None]:
print(classification_report(y_test, y_pred_dt))

In [None]:
f, ax = plt.subplots(1, 1, figsize=(8,6))
cm = confusion_matrix(y_test, y_pred_dt)
sns.heatmap(cm, cmap="Blues", annot=True, fmt="g", ax=ax)

In [None]:
print(f"Accuracy: {accuracy_score(y_test, y_pred_dt)}")
print(f"F1 score: {f1_score(y_test, y_pred_dt, average='macro')}")

In [None]:
pd.Series(dt.feature_importances_, index=X.columns).plot(kind="barh")