In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv(r'C:\Users\tejas\Github projects\RL - Mental health project\Reinforcement-Learning-for-Personalized-Digital-Well-Being\Structured survey data for RL implementation.csv')

In [3]:
df.shape

(128, 460)

In [4]:
state_columns = [col for col in df.columns if col != "Mental_Health_Score"]

In [5]:
states = df[state_columns].values

### Mental Health score 

In [6]:
# assigning weights based on the importance 
emotional_weights = {
    "Happiness" : 10, 
    "Anxiety": 3,
    "Jealousy": 4,
    "Loneliness": 5,
    "Inspiration": 8
}

social_media_time_weight = 2

tfidf_weights = 2

time_of_use_weights = {
    "Morning": 2,         # Positive: Starting the day with limited usage
    "Afternoon": 8,       # Neutral
    "Evening": 10,        # Negative: Could affect productivity
    "Late night": 3     # Highly negative: Disrupts sleep
}

platform_weights = {
    "Instagram": 4,      # Often linked to comparison and anxiety
    "Whatsapp": 6,       # Mostly neutral, but addictive
    "Youtube": 9,         # Can be educational or distracting
    "Twitter": 5,        # Can expose users to negativity
    "Facebook": 5,       # Has both positive and negative effects
    "LinkedIn": 10,        # More career-oriented, positive
    "Reddit": 7,         # Can be informative but also overwhelming
    "Snapchat": 4,       # Short-lived content, FOMO-inducing
    "Discord": 7          # Can be beneficial for communities
}

usage_pattern_weights = {
    "Watching videos": 9,   # Can be educational
    "Messaging": 7,        # Generally neutral, but can lead to distractions
    "Scrolling": 2,        # Mindless scrolling is highly negative
    "Posting": 5           # Can create pressure for validation
}

impact_weights = {
    "Has social media affected your sleep patterns?": 8,
    "Do you feel distracted during studies or work due to social media usage?": 10,
    "Do you feel a compulsive need to check your social media accounts frequently?": 7,
    "Have you been exposed to harmful or triggering content on social media?": 2
}

# New feature weight assignments
additional_feature_weights = {
    "Has social media contributed to your personal growth or skill development": 10,  # Positive effect
    "Do you feel pressire to maintain an ideal version of yourself on social media?": 2,  # Negative effect
    "Are you aware of how social media algorithms influence your feed?": 5,  # Can cause manipulation
    "Have your opinions on topics like politics, health or culture changed because of social media?": 5,  # Misinformation risk
    "Do you think there should be restrictions or guidelines on social media usage for teenagers?": 8  # Positive awareness
}


In [7]:
def calculate_mental_health_score(row):
    """ Computes a well-being score based on emotions, social media use, negative effects, and open-ended responses. """

    # 1️⃣ Emotional Score (Happiness, Anxiety, etc.)
    emotional_score = sum(row[feature] * weight for feature, weight in emotional_weights.items())

    # 2️⃣ Social Media Time Impact (Morning, Night, etc.)
    social_media_time_score = sum(row[feature] * weight for feature, weight in time_of_use_weights.items())

    # 3️⃣ Social Media Platform Impact (Instagram, WhatsApp, etc.)
    platform_score = sum(row[feature] * weight for feature, weight in platform_weights.items())

    # 4️⃣ Social Media Usage Pattern Impact (Scrolling, Posting, Messaging, etc.)
    usage_pattern_score = sum(row[feature] * weight for feature, weight in usage_pattern_weights.items())

    # 5️⃣ Negative Impact of Social Media (e.g., sleep issues, distractions)
    impact_score = sum(row[feature] * weight for feature, weight in impact_weights.items())

    # 6️⃣ New Contributing Features (Personal Growth, Pressure, etc.)
    additional_feature_score = sum(row[feature] * weight for feature, weight in additional_feature_weights.items())

    # 7️⃣ Open-Ended Response Impact (TF-IDF)
    tfidf_sum = row[[col for col in df.columns if "Specify_" in col]].sum() 
    tfidf_score = tfidf_sum * tfidf_weights if tfidf_sum > 0 else 0

    # Final Mental Health Score Calculation
    mhs = (emotional_score + social_media_time_score + platform_score +
           usage_pattern_score + impact_score + additional_feature_score + tfidf_score)

    return {
        "Emotional_Score": emotional_score,
        "Social_Media_Time_Score": social_media_time_score,
        "Platform_Score": platform_score,
        "Usage_Pattern_Score": usage_pattern_score,
        "Impact_Score": impact_score,
        "Additional_Feature_Score": additional_feature_score,
        "TFIDF_Score": tfidf_score,
        "Mental_Health_Score": mhs
    }


In [8]:
df["Mental_Health_Score"] = df.apply(calculate_mental_health_score, axis=1)
print(df[["Mental_Health_Score"]])

                                   Mental_Health_Score
0    {'Emotional_Score': 84.0, 'Social_Media_Time_S...
1    {'Emotional_Score': 56.0, 'Social_Media_Time_S...
2    {'Emotional_Score': 97.0, 'Social_Media_Time_S...
3    {'Emotional_Score': 114.0, 'Social_Media_Time_...
4    {'Emotional_Score': 87.0, 'Social_Media_Time_S...
..                                                 ...
123  {'Emotional_Score': 82.0, 'Social_Media_Time_S...
124  {'Emotional_Score': 96.0, 'Social_Media_Time_S...
125  {'Emotional_Score': 82.0, 'Social_Media_Time_S...
126  {'Emotional_Score': 65.0, 'Social_Media_Time_S...
127  {'Emotional_Score': 72.0, 'Social_Media_Time_S...

[128 rows x 1 columns]


In [9]:
# Apply function and convert the result into a separate DataFrame
mhs_df = df.apply(calculate_mental_health_score, axis=1, result_type="expand")

# Display the first few rows of the new DataFrame
mhs_df.head()


Unnamed: 0,Emotional_Score,Social_Media_Time_Score,Platform_Score,Usage_Pattern_Score,Impact_Score,Additional_Feature_Score,TFIDF_Score,Mental_Health_Score
0,84,13,20,18,27,38,0,200
1,56,13,10,2,0,35,0,116
2,97,23,19,18,61,34,0,252
3,114,13,19,11,54,63,0,274
4,87,13,19,18,25,10,0,172


In [10]:
positive_values = []
total_values = 0
for column in mhs_df.columns:
    for i in range(128):
        if mhs_df[column][i] > 0:
            positive_values.append(mhs_df[column][i])
            total_values +=1 
        else:
            total_values +=1            

print(len(positive_values))
print(total_values)

893
1024


In [11]:
mhs_df.to_csv("Mental_Health_Score_Dataframe.csv", index = False)