In [1]:
import pandas as pd
data = pd.read_csv('../data/preprocessed_data.csv')
print(data.head())

   Age  Number of Children  Physical Activity Level  Employment Status  \
0   31                   2                        2                  0   
1   55                   1                        0                  1   
2   78                   1                        0                  1   
3   58                   3                        1                  0   
4   18                   0                        0                  0   

      Income  Alcohol Consumption  Dietary Habits  Sleep Patterns  \
0   26265.67                    1               1               1   
1   42710.36                    2               0               1   
2  125332.79                    0               0               2   
3    9992.78                    1               1               0   
4    8595.08                    0               1               1   

   History of Mental Illness  History of Substance Abuse  ...  \
0                          1                           0  ...   
1         

split the data set

In [2]:
from sklearn.model_selection import train_test_split
X = data.drop('History of Mental Illness', axis=1)
y = data['History of Mental Illness']

X_train, X_test , y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42)

verify the split 

In [3]:
print("Training features shape:", X_train.shape)
print("Testing features shape:", X_test.shape)
print("Training target shape:", y_train.shape)
print("Testing target shape:", y_test.shape)

Training features shape: (331014, 20)
Testing features shape: (82754, 20)
Training target shape: (331014,)
Testing target shape: (82754,)


train the model 

In [4]:
from sklearn.ensemble import RandomForestClassifier

# Initialize the model with class weights
model = RandomForestClassifier(random_state=42, class_weight='balanced')

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", conf_matrix)

class_report = classification_report(y_test, y_pred)
print("Classification Report:\n", class_report)

Accuracy: 0.6642216690431882
Confusion Matrix:
 [[51817  5654]
 [22133  3150]]
Classification Report:
               precision    recall  f1-score   support

           0       0.70      0.90      0.79     57471
           1       0.36      0.12      0.18     25283

    accuracy                           0.66     82754
   macro avg       0.53      0.51      0.49     82754
weighted avg       0.60      0.66      0.60     82754



install the imblearn library 

In [None]:
!pip install imbalanced-learn

In [None]:
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Apply SMOTE to the training data
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

# Train the model on the resampled data
model = RandomForestClassifier(random_state=42)
model.fit(X_train_resampled, y_train_resampled)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", conf_matrix)

class_report = classification_report(y_test, y_pred)
print("Classification Report:\n", class_report)

In [7]:
!pip install tensorflow 

Defaulting to user installation because normal site-packages is not writeable


build and train the neural network 

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Step 1: Normalize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 2: Build the Neural Network
model = Sequential()

# Input layer
model.add(Dense(128, input_dim=X_train_scaled.shape[1], activation='relu'))  # 128 neurons, ReLU activation
model.add(Dropout(0.2))  # Dropout to prevent overfitting

# Hidden layers
model.add(Dense(64, activation='relu'))  # 64 neurons, ReLU activation
model.add(Dropout(0.2))  # Dropout

# Output layer
model.add(Dense(1, activation='sigmoid'))  # 1 neuron, sigmoid activation for binary classification

# Step 3: Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Step 4: Train the model
history = model.fit(X_train_scaled, y_train, epochs=20, batch_size=32, validation_split=0.2, class_weight={0: 1, 1: 2})

# Step 5: Make predictions
y_pred_prob = model.predict(X_test_scaled)
y_pred = (y_pred_prob > 0.5).astype(int)  # Convert probabilities to binary predictions

# Step 6: Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", conf_matrix)

class_report = classification_report(y_test, y_pred)
print("Classification Report:\n", class_report)

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Step 1: Normalize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 2: Build the Neural Network
model = Sequential()

# Input layer
model.add(Dense(256, input_dim=X_train_scaled.shape[1], activation='relu'))  # 256 neurons, ReLU activation
model.add(Dropout(0.3))  # 30% dropout

# Hidden layers
model.add(Dense(128, activation='relu'))  # 128 neurons, ReLU activation
model.add(Dropout(0.3))  # 30% dropout

model.add(Dense(64, activation='relu'))  # 64 neurons, ReLU activation
model.add(Dropout(0.3))  # 30% dropout

# Output layer
model.add(Dense(1, activation='sigmoid'))  # 1 neuron, sigmoid activation for binary classification

# Step 3: Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Step 4: Train the model with early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
history = model.fit(X_train_scaled, y_train, epochs=30, batch_size=64, validation_split=0.2, 
                    class_weight={0: 1, 1: 2}, callbacks=[early_stopping])

# Step 5: Make predictions
y_pred_prob = model.predict(X_test_scaled)
y_pred = (y_pred_prob > 0.5).astype(int)  # Convert probabilities to binary predictions

# Step 6: Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", conf_matrix)

class_report = classification_report(y_test, y_pred)
print("Classification Report:\n", class_report)

save the trained model 

In [8]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [9]:
model = Sequential()
model.add(Dense(64, input_shape=(20,), activation='relu'))  # Example input shape
model.add(Dense(1, activation='sigmoid'))

In [10]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Define the model
model = Sequential()
model.add(Dense(64, input_shape=(20,), activation='relu'))  # Example input shape
model.add(Dense(1, activation='sigmoid'))

# Print model summary
model.summary()

In [12]:
model.input_shape

(None, 20)

In [11]:
import joblib

# Save the model
joblib.dump(model, '../models/mental_health_model.pkl')

['../models/mental_health_model.pkl']