In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the dataset
file_path = "mental_health_wearable_data.csv"
df = pd.read_csv(file_path)

# Data Analysis of Each Column
column_analysis = {}

df.head()

Unnamed: 0,Heart_Rate_BPM,Sleep_Duration_Hours,Physical_Activity_Steps,Mood_Rating,Mental_Health_Condition
0,98,7.425124,13760,5,1
1,111,9.457572,11455,9,0
2,88,4.037103,9174,8,1
3,74,8.864101,12612,1,1
4,102,9.690217,3923,9,0


In [2]:
for col in df.columns:
    column_analysis[col] = {
        "Data Type": df[col].dtype,
        "Unique Values": df[col].nunique(),
        "Missing Values": df[col].isna().sum(),
        "Mean": df[col].mean() if df[col].dtype in ['int64', 'float64'] else None,
        "Std Dev": df[col].std() if df[col].dtype in ['int64', 'float64'] else None,
        "Min": df[col].min() if df[col].dtype in ['int64', 'float64'] else None,
        "Max": df[col].max() if df[col].dtype in ['int64', 'float64'] else None,
        "Most Common Value": df[col].mode()[0] if not df[col].isna().all() else None
    }

# Convert to DataFrame for better readability
column_analysis_df = pd.DataFrame(column_analysis).T
print(column_analysis_df)

                        Data Type Unique Values Missing Values      Mean  \
Heart_Rate_BPM              int64            60              0    89.822   
Sleep_Duration_Hours      float64         10000              0  6.978495   
Physical_Activity_Steps     int64          7130              0  8061.812   
Mood_Rating                 int64             9              0    5.0651   
Mental_Health_Condition     int64             2              0    0.5157   

                            Std Dev       Min       Max Most Common Value  
Heart_Rate_BPM            17.309144        60       119               113  
Sleep_Duration_Hours       1.730834  4.000317  9.998831          4.000317  
Physical_Activity_Steps  4067.11812      1000     14998              1201  
Mood_Rating                2.562522         1         9                 5  
Mental_Health_Condition    0.499778         0         1                 1  


In [3]:
# Select features and target
X = df.drop(columns=['Mental_Health_Condition'])
y = df['Mental_Health_Condition']

# Normalize numerical features
scaler = StandardScaler()
X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

In [4]:
X.head()

Unnamed: 0,Heart_Rate_BPM,Sleep_Duration_Hours,Physical_Activity_Steps,Mood_Rating
0,0.472491,0.258056,1.401108,-0.025406
1,1.223576,1.432374,0.83434,1.535634
2,-0.105268,-1.699492,0.273472,1.145374
3,-0.914129,1.089475,1.11883,-1.586446
4,0.703594,1.566792,-1.017679,1.535634


In [9]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Visualizing each feature against the target variable
feature_columns = ['Heart_Rate_BPM', 'Sleep_Duration_Hours', 'Physical_Activity_Steps', 'Mood_Rating']
target_column = 'Mental_Health_Condition'


# Build the model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')  # Binary classification output
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
epochs = 10  # Adjust based on performance
batch_size = 16
history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test))

# Evaluate the model
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_acc:.4f}")

# Save the model
model.save("mental_health_classification_model.h5")

Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - accuracy: 0.6159 - loss: 0.6418 - val_accuracy: 0.6715 - val_loss: 0.6031
Epoch 2/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.6547 - loss: 0.6115 - val_accuracy: 0.6700 - val_loss: 0.6037
Epoch 3/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.6642 - loss: 0.5993 - val_accuracy: 0.6690 - val_loss: 0.6050
Epoch 4/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.6718 - loss: 0.5954 - val_accuracy: 0.6725 - val_loss: 0.6034
Epoch 5/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.6685 - loss: 0.5979 - val_accuracy: 0.6720 - val_loss: 0.6028
Epoch 6/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.6676 - loss: 0.6027 - val_accuracy: 0.6740 - val_loss: 0.6041
Epoch 7/10
[1m500/500[0m [32m━━━━━━━



Test Accuracy: 0.6725


In [11]:
def predict_condition(input_data):
    input_df = pd.DataFrame([input_data], columns=X.columns)
    print(input_df)
    input_scaled = scaler.transform(input_df)
    print(input_scaled)
    prediction = model.predict(input_scaled)[0][0]
    print(prediction)
    return 'Likely Mental Health Condition' if prediction > 0.5 else 'Unlikely Mental Health Condition'

# Test with a specific row of data
test_row = {
    "Heart_Rate_BPM": 111,
    "Sleep_Duration_Hours": 9.45757234666566,
    "Physical_Activity_Steps": 11455,
    "Mood_Rating": 9
}
print(f"Predicted Condition for Custom Input: {predict_condition(test_row)}")

   Heart_Rate_BPM  Sleep_Duration_Hours  Physical_Activity_Steps  Mood_Rating
0             111              9.457572                    11455            9
[[1.22357631 1.43237371 0.83433959 1.53563437]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 130ms/step
0.0997673
Predicted Condition for Custom Input: Unlikely Mental Health Condition
