# Importation des bibliothèques

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.svm import SVC
from sklearn.ensemble import BaggingClassifier, VotingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, ConfusionMatrixDisplay
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score

# Dataset Exploration 

In [None]:
# Explore rapidly the dataset

df = pd.read_csv("student_depression_dataset.csv")

df.head()
df.info()
df.isna().sum()
df.describe()

Our dataset contains 27 901 observations and 18 variables. It combines numerical  and categorial values, which demands an extraction for some columns before the modelisation. Our target is the column " Depression" defined by binear values.

In [None]:
# Prepare the dataset

target_col = "Depression" 

X = df.drop(columns=[target_col])
y = df[target_col]

#Delete numerical values
X = pd.get_dummies(X, drop_first=True)

# Train / Test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=0, stratify=y
)


Certains columns have enacoded categorial variables.And SVM can not traite them,  So we delete them before doing the extraction.

# SVM

In [None]:
#SVM
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


# Decision Tree & Grid Search

In [None]:

#Decision Tree & Grid Search
dt = DecisionTreeClassifier(random_state=42)

param_grid_dt = {
    'criterion': ['gini', 'entropy', 'log_loss'],
    'max_depth': [2, 4, 6, 8, 10, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

grid_dt = GridSearchCV(
    dt,
    param_grid_dt,
    cv=2,
    scoring='accuracy',
    refit=True,
    verbose=1
)

grid_dt.fit(X_train, y_train)

print("Best DT params:", grid_dt.best_params_)
print("Best DT CV score:", grid_dt.best_score_)


# Decision Tree  Evaluation 

In [19]:
y_pred_dt = grid_dt.predict(X_test)

print("Decision Tree – Test Accuracy:", accuracy_score(y_test, y_pred_dt))
print("Decision Tree – F1:", f1_score(y_test, y_pred_dt, average='binary'))
print(classification_report(y_test, y_pred_dt))
print(confusion_matrix(y_test, y_pred_dt))


Decision Tree – Test Accuracy: 0.8249417667084752
Decision Tree – F1: 0.8546347269751525
              precision    recall  f1-score   support

           0       0.81      0.75      0.78      2313
           1       0.83      0.88      0.85      3268

    accuracy                           0.82      5581
   macro avg       0.82      0.81      0.82      5581
weighted avg       0.82      0.82      0.82      5581

[[1732  581]
 [ 396 2872]]


# SVM & Grid Search

In [None]:
svm = SVC(probability=True)

param_grid_svm = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf'],
    'gamma': ['scale', 'auto']
}

grid_svm = GridSearchCV(
    svm,
    param_grid_svm,
    cv=2,
    scoring='accuracy',
    refit=True,
    verbose=1
)

grid_svm.fit(X_train_scaled, y_train)

print("Best SVM params:", grid_svm.best_params_)
print("Best SVM CV score:", grid_svm.best_score_)


Fitting 2 folds for each of 12 candidates, totalling 24 fits


# SVM Evaluation 

In [None]:
y_pred_svm = grid_svm.predict(X_test_scaled)

print("SVM – Test Accuracy:", accuracy_score(y_test, y_pred_svm))
print("SVM – F1:", f1_score(y_test, y_pred_svm, average='binary'))
print(classification_report(y_test, y_pred_svm))
print(confusion_matrix(y_test, y_pred_svm))


# Bagging Decision Tree

In [None]:
bag_dt = BaggingClassifier(
    estimator=grid_dt.best_estimator_,
    n_estimators=20,
    max_samples=0.8,
    random_state=42
)

bag_dt.fit(X_train, y_train)

y_pred_bag_dt = bag_dt.predict(X_test)

print("Bagging DT – Accuracy:", accuracy_score(y_test, y_pred_bag_dt))
print("Bagging DT – F1:", f1_score(y_test, y_pred_bag_dt))


# Bagging SVM

In [41]:
bag_svm = BaggingClassifier(
    estimator=grid_svm.best_estimator_,
    n_estimators=20,
    max_samples=0.8,
    random_state=42
)

bag_svm.fit(X_train_scaled, y_train)

y_pred_bag_svm = bag_svm.predict(X_test_scaled)

print("Bagging SVM – Accuracy:", accuracy_score(y_test, y_pred_bag_svm))
print("Bagging SVM – F1:", f1_score(y_test, y_pred_bag_svm))


NameError: name 'grid_svm' is not defined

# Voting Classifier

In [None]:
voting = VotingClassifier(
    estimators=[
        ('bag_svm', bag_svm),
        ('bag_dt', bag_dt)
    ],
    voting='soft'
)

voting.fit(X_train_scaled, y_train)

y_pred_vote = voting.predict(X_test_scaled)

print("Voting – Accuracy:", accuracy_score(y_test, y_pred_vote))
print("Voting – F1:", f1_score(y_test, y_pred_vote))
print(confusion_matrix(y_test, y_pred_vote))
