# 30 â€“ Mobile Sensing & Mental Health: Data & EDA

In this notebook we:

1. Load a synthetic mobile sensing dataset that simulates:
   - Mobility (steps, distance, locations)
   - Social interaction (calls/texts)
   - Sleep and phone usage patterns
2. Explore its relationship to a weekly `depression_score` and `high_risk` label.

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt

base_dir = os.path.dirname(os.path.dirname(os.getcwd())) if 'Speech_12052025' in os.getcwd() else "/mnt/data/cardiff_ai_talk_runbook"
data_path = os.path.join(base_dir, "data", "raw", "mental_health_mobile_sensing_synthetic.csv")
df = pd.read_csv(data_path)

print("Shape:", df.shape)
df.head()

In [None]:
# Basic statistics
df.describe()

In [None]:
# Distribution of depression scores
plt.hist(df["depression_score"], bins=20)
plt.xlabel("Depression score (0-27)")
plt.ylabel("Count")
plt.title("Distribution of weekly depression scores")
plt.show()

In [None]:
# High risk proportion
high_risk_counts = df["high_risk"].value_counts(normalize=True)
print(high_risk_counts)

high_risk_counts.plot(kind="bar")
plt.title("High risk vs low risk weeks")
plt.xlabel("High risk label")
plt.ylabel("Proportion")
plt.show()

In [None]:
# Relationship between average daily steps and depression score
plt.scatter(df["avg_daily_steps"], df["depression_score"], alpha=0.4)
plt.xlabel("Average daily steps")
plt.ylabel("Depression score")
plt.title("Steps vs depression score")
plt.show()