<a href="https://colab.research.google.com/github/sasichintada/ml-workshop/blob/main/classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

####**Student Depression Dataset**

**Features**

    ID: Unique identifier for each student.
    Age: Age of the student.
    Gender: Gender (e.g., Male, Female).
    City: Geographic region
    CGPA: Grade Point Average or other academic scores.
    Sleep Duration: Average daily sleep duration.
    Profession:The occupation or job type of the individual
    Work Pressure:The level of stress or pressure related to one's work
    Academic Pressure:The stress of an individual feels due to academic performance or workload.
    Study Satisfaction:The individual's satisfaction with their study or learning experience.
    Job Satisfaction:The level of satisfaction an individual feels regarding their job or work environment.
    Dietary Habits:The typical eating patterns or food choices of an individual

In [22]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [23]:
df = pd.read_csv('/content/Student Depression Dataset.csv')
print(df.head())

   id  Gender   Age           City Profession  Academic Pressure  \
0   2    Male  33.0  Visakhapatnam    Student                5.0   
1   8  Female  24.0      Bangalore    Student                2.0   
2  26    Male  31.0       Srinagar    Student                3.0   
3  30  Female  28.0       Varanasi    Student                3.0   
4  32  Female  25.0         Jaipur    Student                4.0   

   Work Pressure  CGPA  Study Satisfaction  Job Satisfaction  \
0            0.0  8.97                 2.0               0.0   
1            0.0  5.90                 5.0               0.0   
2            0.0  7.03                 5.0               0.0   
3            0.0  5.59                 2.0               0.0   
4            0.0  8.13                 3.0               0.0   

      Sleep Duration Dietary Habits   Degree  \
0          5-6 hours        Healthy  B.Pharm   
1          5-6 hours       Moderate      BSc   
2  Less than 5 hours        Healthy       BA   
3          7-8

In [24]:
print(df.isnull().sum())

id                                       0
Gender                                   0
Age                                      0
City                                     0
Profession                               0
Academic Pressure                        0
Work Pressure                            0
CGPA                                     0
Study Satisfaction                       0
Job Satisfaction                         0
Sleep Duration                           0
Dietary Habits                           0
Degree                                   0
Have you ever had suicidal thoughts ?    0
Work/Study Hours                         0
Financial Stress                         3
Family History of Mental Illness         0
Depression                               0
dtype: int64


In [26]:
df['Financial Stress'].dropna(inplace=True)

In [27]:
print(df.isnull().sum())

id                                       0
Gender                                   0
Age                                      0
City                                     0
Profession                               0
Academic Pressure                        0
Work Pressure                            0
CGPA                                     0
Study Satisfaction                       0
Job Satisfaction                         0
Sleep Duration                           0
Dietary Habits                           0
Degree                                   0
Have you ever had suicidal thoughts ?    0
Work/Study Hours                         0
Financial Stress                         0
Family History of Mental Illness         0
Depression                               0
dtype: int64


In [35]:
for i in range(len(df['Work/Study Hours'])):
    value = df['Work/Study Hours'][i]

    if isinstance(value, str) and '-' in value:
        start, end = value.split('-')
        df['Work/Study Hours'][i] = (int(start.strip()) + int(end.strip())) / 2
    elif isinstance(value, str):
        df['Work/Study Hours'][i] = int(value.split()[0])

In [46]:
label_encoder = LabelEncoder()
df['Gender'] = label_encoder.fit_transform(df['Gender'])

In [47]:
X = df.drop(columns=['id', 'Depression'])
y = df['Depression']

X = X.apply(pd.to_numeric, errors='coerce')

print(X.isna().sum())

imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_imputed, y, test_size=0.2, random_state=42)

Gender               0
Age                  0
Academic Pressure    0
Work Pressure        0
CGPA                 0
                    ..
Degree_ME            0
Degree_MHM           0
Degree_MSc           0
Degree_Others        0
Degree_PhD           0
Length: 104, dtype: int64


 'Family History of Mental Illness']. At least one non-missing value is needed for imputation with strategy='mean'.


In [48]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

**Logistic Regression Model**

In [51]:
logreg = LogisticRegression()
logreg.fit(X_train, y_train)
y_pred_logreg = logreg.predict(X_test)

#Regression
accuracy_logreg = accuracy_score(y_test, y_pred_logreg)
precision_logreg = precision_score(y_test, y_pred_logreg)
recall_logreg = recall_score(y_test, y_pred_logreg)
f1_logreg = f1_score(y_test, y_pred_logreg)

print(f"Accuracy: {accuracy_logreg}")
print(f"Precision: {precision_logreg}")
print(f"Recall: {recall_logreg}")
print(f"F1-Score: {f1_logreg}")


Accuracy: 0.7831929761691453
Precision: 0.7987625220978197
Recall: 0.8372452130945027
F1-Score: 0.8175512665862484


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


**Naive Bayes Model**

In [54]:
nb = GaussianNB()
nb.fit(X_train, y_train)
y_pred_nb = nb.predict(X_test)

#Naive Bayes
accuracy_nb = accuracy_score(y_test, y_pred_nb)
precision_nb = precision_score(y_test, y_pred_nb)
recall_nb = recall_score(y_test, y_pred_nb)
f1_nb = f1_score(y_test, y_pred_nb)

print(f"Accuracy: {accuracy_nb}")
print(f"Precision: {precision_nb}")
print(f"Recall: {recall_nb}")
print(f"F1-Score: {f1_nb}")

Accuracy: 0.42089231320551873
Precision: 0.875
Recall: 0.0021618282890673254
F1-Score: 0.004313000616142945


**K-Nearest Neighbors Model**

In [53]:
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)

#KNN
accuracy_knn = accuracy_score(y_test, y_pred_knn)
precision_knn = precision_score(y_test, y_pred_knn)
recall_knn = recall_score(y_test, y_pred_knn)
f1_knn = f1_score(y_test, y_pred_knn)

print(f"Accuracy: {accuracy_knn}")
print(f"Precision: {precision_knn}")
print(f"Recall: {recall_knn}")
print(f"F1-Score: {f1_knn}")

Accuracy: 0.7538075613689303
Precision: 0.7736347621843805
Recall: 0.8137739345274861
F1-Score: 0.7931968693558098


**Decision Tree Model**

In [55]:
dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)

#Decision Tree
accuracy_dt = accuracy_score(y_test, y_pred_dt)
precision_dt = precision_score(y_test, y_pred_dt)
recall_dt = recall_score(y_test, y_pred_dt)
f1_dt = f1_score(y_test, y_pred_dt)

print(f"Accuracy: {accuracy_dt}")
print(f"Precision: {precision_dt}")
print(f"Recall: {recall_dt}")
print(f"F1-Score: {f1_dt}")

Accuracy: 0.6930657588245834
Precision: 0.731130645650197
Recall: 0.7449042618900555
F1-Score: 0.7379531895364846


**Support Vector Classifier Model**

In [56]:
svc = SVC()
svc.fit(X_train, y_train)
y_pred_svc = svc.predict(X_test)

#SVC
accuracy_svc = accuracy_score(y_test, y_pred_svc)
precision_svc = precision_score(y_test, y_pred_svc)
recall_svc = recall_score(y_test, y_pred_svc)
f1_svc = f1_score(y_test, y_pred_svc)

print(f"Accuracy: {accuracy_svc}")
print(f"Precision: {precision_svc}")
print(f"Recall: {recall_svc}")
print(f"F1-Score: {f1_svc}")

Accuracy: 0.7865973839813654
Precision: 0.7995317529997074
Recall: 0.8437306979617047
F1-Score: 0.8210368144252442
