In [None]:
# !pip install gym

In [None]:
# !pip install stable-baselines3[extra]

In [None]:
# pip install --upgrade shimmy gymnasium

Collecting shimmy
  Using cached Shimmy-2.0.0-py3-none-any.whl.metadata (3.5 kB)
Collecting gymnasium
  Using cached gymnasium-1.1.1-py3-none-any.whl.metadata (9.4 kB)
Using cached Shimmy-2.0.0-py3-none-any.whl (30 kB)
Using cached gymnasium-1.1.1-py3-none-any.whl (965 kB)
Installing collected packages: gymnasium, shimmy
  Attempting uninstall: gymnasium
    Found existing installation: gymnasium 1.0.0
    Uninstalling gymnasium-1.0.0:
      Successfully uninstalled gymnasium-1.0.0
Successfully installed gymnasium-1.1.1 shimmy-2.0.0
Note: you may need to restart the kernel to use updated packages.


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
stable-baselines3 2.5.0 requires gymnasium<1.1.0,>=0.29.1, but you have gymnasium 1.1.1 which is incompatible.


In [1]:
import gymnasium as gym
import gym
import numpy as np
import random
from gym import spaces
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv("Mental_Health_Score_Dataframe.csv")

In [35]:
len(df)

128

In [3]:
df.head()

Unnamed: 0,Emotional_Score,Social_Media_Time_Score,Platform_Score,Usage_Pattern_Score,Impact_Score,Additional_Feature_Score,TFIDF_Score,Mental_Health_Score
0,84,13,20,18,27,38,0,200
1,56,13,10,2,0,35,0,116
2,97,23,19,18,61,34,0,252
3,114,13,19,11,54,63,0,274
4,87,13,19,18,25,10,0,172


In [4]:
class MentalHealthEnv(gym.Env):
    def __init__(self, df):
        super(MentalHealthEnv, self).__init__()

        #loading the dataset
        self.df = df.copy()
        self.current_index = 0


        #define state: 7 mental health scores (ranging between 0 to 300)
        self.observation_space = spaces.Box(low = 0, high = 300, shape = (7,), dtype = np.float32)

        # defining action space
        self.actions = [
            "Engage in Outdoor activities",
            "Reduce the social media usage",
            "Watch educational videos",
            "Increase mindfulness practicess like yoga",
            "Interact with positive communities",
            "Avoid stress-inducing content",
            "Follow a structured daily routine"
        ]

        self.action_space = spaces.Discrete(len(self.actions))

        #initiating state: random values between 50 and 250 (to avoid zero-starting issues)
        self.state = np.random.uniform(low = 50, high = 250, size = (7,))

        # define action effects (each action affects specific scores)
        self.action_effects = {
            "Engage in Outdoor activities": np.array([10, 5, 0, 8, -5, 7, 0]),
            "Reduce the social media usage": np.array([5, -15, 0, 0, 10, 0, 0]),
            "Watch educational videos": np.array([5, 0, 15, 7, -3, 10, 0]),
            "Increase mindfulness practicess like yoga": np.array([12, 0, 0, 5, -8, 15, 0]),
            "Interact with positive communities": np.array([6, 0, -8, 0, 12, 0, 0]),
            "Avoid stress-inducing content": np.array([6, 0, -8, 0, 12, 0, 0]),
            "Follow a structured daily routine": np.array([10, 0, 0, 0, 7, 8, 0])
        }
        self.state = self.get_state_from_df()


    def get_state_from_df(self):
        # extract the initial state from the dataframe based on the current index
        row = self.df.iloc[self.current_index]
        return np.array(row[:-1])
    
    def step(self, action):
        #apply an action and update mental health scores
        self.state = np.clip(self.state + self.action_effects[self.actions[action]], 0, 300)


        # reward: Improvement in mental health score 
        self.df.at[self.current_index, "Mental_Health_Score"] = np.sum(self.state)
        reward = self.df.at[self.current_index, "Mental_Health_Score"]

        #print(f"State Shape: {self.state.shape}")

        done = np.all(self.state >= 280) #stops if all the score are near 300
        
        return self.state, reward, done, {}

    def reset(self):
        """Move to the next user in the dataset or restart if finished."""
        self.current_index += 1  # Move to the next row
        if self.current_index >= len(self.df):
            self.current_index = 0  # Reset if all users are processed
        self.state = self.get_state_from_df()
        return self.state


In [5]:
env = MentalHealthEnv(df)

### Model Training

In [6]:
from stable_baselines3 import DQN, A2C



In [9]:
dqn_model = DQN("MlpPolicy", env, verbose=1)
dqn_model.learn(total_timesteps=5000)
dqn_model.save("mental_health_dqn_model")

# Train A2C
a2c_model = A2C("MlpPolicy", env, verbose=1)
a2c_model.learn(total_timesteps=5000)
a2c_model.save("mental_health_a2c_model")

Using cpu device




Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




------------------------------------
| time/                 |          |
|    fps                | 339      |
|    iterations         | 100      |
|    time_elapsed       | 1        |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -1.23    |
|    explained_variance | 0        |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | 5.62e+03 |
|    value_loss         | 2.35e+07 |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 353      |
|    iterations         | 200      |
|    time_elapsed       | 2        |
|    total_timesteps    | 1000     |
| train/                |          |
|    entropy_loss       | -1.3     |
|    explained_variance | 0        |
|    learning_rate      | 0.0007   |
|    n_updates          | 199      |
|    policy_loss        | 4.35e+03 |
|    value_loss         | 2.39e+07 |
-

In [36]:
def evaluate_model(model, env, num_episodes=len(df), max_steps=1):
    total_rewards = []
    action_counts = np.zeros(len(env.actions))
    user_suggestions = []

    for user_id in range(num_episodes):
        state = env.reset()
        done = False
        episode_reward = 0
        step_count = 0  # Track steps per episode
        actions_taken = []

        while not done and step_count < max_steps: 
            action, _ = model.predict(state)
            action_counts[action] += 1
            actions_taken.append(env.actions[action])
            state, reward, done, _ = env.step(action)
            episode_reward += reward
            step_count += 1

        total_rewards.append(episode_reward)
        user_suggestions.append((user_id + 1, actions_taken))

    return np.mean(total_rewards), np.std(total_rewards), action_counts, user_suggestions


# Load trained models
dqn_model = DQN.load("mental_health_dqn_model")
a2c_model = A2C.load("mental_health_a2c_model")

# Evaluate both models
dqn_mean, dqn_std, dqn_actions, dqn_suggestions = evaluate_model(dqn_model, env)
a2c_mean, a2c_std, a2c_actions, a2c_suggestions = evaluate_model(a2c_model, env)

In [37]:
dqn_mean

231.703125

In [38]:
dqn_std

34.81028030962082

In [39]:
a2c_mean

231.859375

In [40]:
a2c_std

36.340777366608094

In [43]:
print("\n🔵 DQN Model Suggestions:")
for user_id, actions in dqn_suggestions:
    print(f"User {user_id}: {actions}")

# print("\n🟢 A2C Model Suggestions:")
# for user_id, actions in a2c_suggestions:
#     print(f"User {user_id}: {actions}")


🔵 DQN Model Suggestions:
User 1: ['Increase mindfulness practicess like yoga']
User 2: ['Increase mindfulness practicess like yoga']
User 3: ['Increase mindfulness practicess like yoga']
User 4: ['Increase mindfulness practicess like yoga']
User 5: ['Watch educational videos']
User 6: ['Increase mindfulness practicess like yoga']
User 7: ['Increase mindfulness practicess like yoga']
User 8: ['Increase mindfulness practicess like yoga']
User 9: ['Increase mindfulness practicess like yoga']
User 10: ['Increase mindfulness practicess like yoga']
User 11: ['Increase mindfulness practicess like yoga']
User 12: ['Increase mindfulness practicess like yoga']
User 13: ['Increase mindfulness practicess like yoga']
User 14: ['Increase mindfulness practicess like yoga']
User 15: ['Increase mindfulness practicess like yoga']
User 16: ['Increase mindfulness practicess like yoga']
User 17: ['Increase mindfulness practicess like yoga']
User 18: ['Increase mindfulness practicess like yoga']
User 19: [