In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim

In [3]:
data = pd.read_csv('dataset/mental_health_dataset.csv') # Gets data from model
print(data.head())

   User_ID  Age      Gender  Occupation    Country Mental_Health_Condition  \
0        1   36  Non-binary       Sales     Canada                      No   
1        2   34      Female   Education         UK                     Yes   
2        3   65  Non-binary       Sales        USA                     Yes   
3        4   34        Male       Other  Australia                      No   
4        5   22      Female  Healthcare     Canada                     Yes   

  Severity Consultation_History Stress_Level  Sleep_Hours  Work_Hours  \
0   Medium                  Yes       Medium          7.1          46   
1      NaN                   No          Low          7.5          47   
2     High                   No          Low          8.4          58   
3      Low                   No       Medium          9.8          30   
4      Low                   No       Medium          4.9          62   

   Physical_Activity_Hours  
0                        5  
1                        8  
2    

In [None]:
pd.get_dummies(data)

In [None]:
# getting input for variables (eventually to be replaced by web-app
# usr_age = input('Enter your age: ')
# usr_gender = input('Enter your gender (M, F, NB, NA): ')
# usr_sleep = input('How many hours a night do you sleep? : ')
# usr_work = input('How many hours a week do you work? : ')
# usr_fitness = input('How many hours a week do you excercise? : ')
# usr_consult = input('Have you been consulted for a mental health condition in the past?: ')
# usr_stress = input('What is your stress level?: ')


In [None]:
#correlation btwn age and hours worked
plt.figure(figsize=(10, 6))
plt.scatter(data['Age'], data['Work_Hours'], alpha=0.5)
plt.title("Correlation between Age and Work Hours")
plt.xlabel("Age")
plt.ylabel("Work Hours")
plt.grid(True)
plt.show()

In [None]:
plt.bar(data['Gender'], data['Sleep_Hours'], color='blue')

In [4]:
label_encoders = {}
categorical_columns = ['Gender', 'Occupation', 'Country', 'Severity', 'Consultation_History', 'Stress_Level']

In [5]:
# loop to convert values into numerical data that can be used by the model
for column in categorical_columns: 
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le

In [6]:
# selects target data, which is mental health condition in this case (we are trying to predict it)
target_encoder = LabelEncoder()
data['Mental_Health_Condition'] = target_encoder.fit_transform(data['Mental_Health_Condition'])

In [7]:
# sets features and target
features = ['Age', 'Gender', 'Stress_Level', 'Sleep_Hours', 'Work_Hours', 'Physical_Activity_Hours']
target = 'Mental_Health_Condition'

In [9]:
X = data[features] # conventional names to represent feats and target are X and y
y = data[target]

In [10]:
#for normalizing data (scales x so mean to 0 and SD to 1)
scaler = StandardScaler()
X = scaler.fit_transform(X) 

In [11]:
# splitting training and testing data 70% 30%
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=13)


In [12]:
# making tensors based off data
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)

In [15]:
print(X_train_tensor.shape)
print(X_test_tensor.shape)
print(y_train_tensor.shape)
print(y_test_tensor.shape)

torch.Size([700, 6])
torch.Size([300, 6])
torch.Size([700])
torch.Size([300])


In [18]:
#MODEL CREATION
class MentalHealthModel(nn.Module):
    def __init__(self, input_size):
        super(MentalHealthModel, self).__init__()
        # 3 fully connected linear layers
        self.fc1 = nn.Linear(input_size, 32) # outputs 32 neurons from input_size
        self.fc2 = nn.Linear(32, 16) # condenses 32 to 16
        self.fc3 = nn.Linear(16, 8) # condenses 16 neurons down to 8
        self.fc4 = nn.Linear(8, 2) # takes in the 8 returned from fc2 and outputs bool value
        
    #FORWARD PASS

    def forward(self, x):
        x = torch.relu(self.fc1(x)) #NON-LINEAR LAYERS 
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = self.fc3(x)
        return x

In [19]:
input_size = X_train_tensor.shape[1] 
model = MentalHealthModel(input_size) # model initialized
criterion = nn.CrossEntropyLoss() # loss function initialized
optimizer = optim.Adam(model.parameters(), lr=0.001) # optimizer initialized
