In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, accuracy_score
from sklearn.preprocessing import StandardScaler

In [None]:
# Predict whether they will be diagnosed with mental health disorder
df = pd.read_csv('./02_Student_Mental_Health_2021-10-10.csv')

# Only select yes or no answers for diagnosis
data = df.loc[df["Diagnosis"] < 3.0 ] 

# Find columns with the mainly no data
miss_data = data.isnull().sum() * 100 / len(data)
drops = []
for count, val in enumerate(miss_data):
    if val != 0:
        drops.append(count)

# Drop empty columns and first four columns that only have information 
# about the time they took the quiz
drops = [0, 1, 2, 3, 6, 21, 118]
data = data.drop(data.columns[drops], axis=1)

# Place the target variable at the end of the data frame
col = data.pop("Diagnosis")
data = data.assign(Diagnosis=col)

In [None]:
# Split data
# Make train and test sets
x = data.iloc[:,:139]
y = data.iloc[:, 139]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=500)

In [None]:
# Decision Tree Algorithms

# Random Forest
model = RandomForestClassifier(n_estimators=50, oob_score=True, random_state=300)
model = model.fit(x_train, y_train)
pred = model.predict(x_test)
roc_auc_score(y_test, pred)

# Gradient Boosting
model = GradientBoostingClassifier(n_estimators=2000, learning_rate=1, random_state=300)
model = model.fit(x_train, y_train)
pred = model.predict(x_test)
roc_auc_score(y_test, pred)

In [None]:
# Neural Network
# Neural network
from sklearn.neural_network import MLPClassifier

from sklearn.preprocessing import StandardScaler  
scaler = StandardScaler()  
scaler.fit(x_train)  
x_train = scaler.transform(x_train)  
x_test = scaler.transform(x_test) 

model = MLPClassifier(activation='logistic',alpha=.01, hidden_layer_sizes=(10, 5),max_iter=2000)
model = model.fit(x_train, y_train)
pred = model.predict(x_test)
print(roc_auc_score(y_test, pred))
print(accuracy_score(y_test, pred))