In [None]:
import pandas as pd

df = pd.read_csv('Student Depression Dataset.csv')

df['Gender'] = df['Gender'].map({'Male': 1, 'Female': 0})

yes_no_columns = ['Have you ever had suicidal thoughts ?', 'Family History of Mental Illness']
for col in yes_no_columns:
    df[col] = df[col].map({'Yes': 1, 'No': 0})

columns_to_remove = df.select_dtypes(include=['object'])
df = df.drop(columns=columns_to_remove)

output_file = 'Processed_Student_Depression_Dataset.csv'
df.to_csv(output_file, index=False)

In [None]:
dataset = pd.read_csv('Processed_Student_Depression_Dataset.csv')
X = dataset.iloc[:, 1:-1].values
y = dataset.iloc[:, -1].values

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [None]:
print(X_train[0])
print(y_train)

[ 1.   31.    1.    0.    8.89  4.    0.    1.   10.    1.    1.  ]
[0 0 1 ... 1 1 0]


In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [None]:
from sklearn.impute import SimpleImputer
import numpy as np

imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
X_train = imputer.fit_transform(X_train)
X_test = imputer.transform(X_test)

from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

classifier = SVC()
parameters = [{'C': [1000.0, 5000.0, 10000.0], 'gamma':[0.00005, 0.0005, 0.001]}]
gr = GridSearchCV(estimator=classifier, param_grid=parameters, scoring='accuracy', cv=5)
gr.fit(X_train, y_train)

params = gr.best_params_
print(params)

classifier = SVC(kernel='rbf', C=params['C'], gamma=params['gamma'])
classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)

{'C': 5000.0, 'gamma': 5e-05}


In [None]:
from sklearn.impute import SimpleImputer
import numpy as np

imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
X_train = imputer.fit_transform(X_train)
X_test = imputer.transform(X_test)

from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state = 0)
classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(f"{accuracy_score(y_test, y_pred)*100:.2f}%")

[[1863  463]
 [ 386 2869]]
84.79%
