In [19]:
import pandas as pd
import numpy as np
import folium
from folium.plugins import HeatMap
from geopy.geocoders import Nominatim
import time
import plotly.express as px

In [20]:
df = pd.read_csv("Student Depression Dataset.csv")
# Drop rows with missing coordinates
df.dropna(inplace=True)

In [21]:
# online data
city_coords = {
    "Kalyan": [19.2403, 73.1305],
    "Srinagar": [34.0837, 74.7973],
    "Hyderabad": [17.3871, 78.4917],
    "Vasai-Virar": [19.3919, 72.8397],
    "Ludhiana": [30.9010, 75.8573],
    "Thane": [19.2183, 72.9781],
    "Kolkata": [22.5726, 88.3639],
    "Lucknow": [26.8467, 80.9462],
    "Agra": [27.1767, 78.0081],
    "Jaipur": [26.9124, 75.7873],
    "Patna": [25.5941, 85.1376],
    "Surat": [21.1702, 72.8311],
    "Bhopal": [23.2599, 77.4126],
    "Pune": [18.5204, 73.8567],
    "Visakhapatnam": [17.6868, 83.2185],
    "Ahmedabad": [23.0225, 72.5714],
    "Chennai": [13.0827, 80.2707],
    "Ghaziabad": [28.6692, 77.4538],
    "Meerut": [28.9845, 77.7064],
    "Mumbai": [19.0760, 72.8777],
    "Rajkot": [22.3039, 70.8022],
    "Bangalore": [12.9716, 77.5946],
    "Delhi": [28.7041, 77.1025],
    "Vadodara": [22.3072, 73.1812],
    "Varanasi": [25.3176, 82.9739],
    "Nagpur": [21.1458, 79.0882],
    "Indore": [22.7196, 75.8577],
    "Kanpur": [26.4499, 80.3319],
    "Nashik": [19.9975, 73.7898],
    "Faridabad": [28.4089, 77.3178]
}


In [22]:
# Map city names to coordinates
df['Coordinates'] = df['City'].map(city_coords)

# Group by 'City' and calculate total and depressed counts
grouped = df.groupby('City')['Depression'].agg(
    Total_Count='size',
    Depressed_Count='sum'
).reset_index()

# Calculate depression percentage
grouped['Depression_Percentage'] = (grouped['Depressed_Count'] / grouped['Total_Count']) * 100


# Normalize Depression_Percentage to a 0-1 scale
grouped['Normalized_Depression'] = grouped['Depression_Percentage'] / 100

# Create a folium map centered around India
m = folium.Map(location=[20.5937, 78.9629], zoom_start=5)

# Prepare heatmap data
heat_data = [[coord[0], coord[1], weight] for coord, weight in zip(df['Coordinates'], grouped['Normalized_Depression'])]

# Add HeatMap layer
HeatMap(heat_data, radius=15, blur=10, max_zoom=1).add_to(m)

# Save the map to an HTML file
m.save("depression_heatmap.html")

# Display the map in a Jupyter Notebook (optional)
m

In [23]:
grouped

Unnamed: 0,City,Total_Count,Depressed_Count,Depression_Percentage,Normalized_Depression
0,3.0,1,1,100.0,1.0
1,Agra,1094,585,53.473492,0.534735
2,Ahmedabad,951,640,67.297581,0.672976
3,Bangalore,767,467,60.886571,0.608866
4,Bhavna,2,2,100.0,1.0
5,Bhopal,934,579,61.991435,0.619914
6,Chennai,885,528,59.661017,0.59661
7,City,2,1,50.0,0.5
8,Delhi,768,468,60.9375,0.609375
9,Faridabad,461,271,58.785249,0.587852


- The cities selected by our LASSO: [Bhavna, Kibara, ME, Mira, Nalyan, Nandini, Saanvi, Vaanya]
- The cities that are selected by LASSO are all cities with only one entry of them, meaning they will not generalize
- The brightest city/areas are Kalyan, Meerut + Ghaziabad

In [24]:
df

Unnamed: 0,id,Gender,Age,City,Profession,Academic Pressure,Work Pressure,CGPA,Study Satisfaction,Job Satisfaction,Sleep Duration,Dietary Habits,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Family History of Mental Illness,Depression,Coordinates
0,2,Male,33.0,Visakhapatnam,Student,5.0,0.0,8.97,2.0,0.0,5-6 hours,Healthy,B.Pharm,Yes,3.0,1.0,No,1,"[17.6868, 83.2185]"
1,8,Female,24.0,Bangalore,Student,2.0,0.0,5.90,5.0,0.0,5-6 hours,Moderate,BSc,No,3.0,2.0,Yes,0,"[12.9716, 77.5946]"
2,26,Male,31.0,Srinagar,Student,3.0,0.0,7.03,5.0,0.0,Less than 5 hours,Healthy,BA,No,9.0,1.0,Yes,0,"[34.0837, 74.7973]"
3,30,Female,28.0,Varanasi,Student,3.0,0.0,5.59,2.0,0.0,7-8 hours,Moderate,BCA,Yes,4.0,5.0,Yes,1,"[25.3176, 82.9739]"
4,32,Female,25.0,Jaipur,Student,4.0,0.0,8.13,3.0,0.0,5-6 hours,Moderate,M.Tech,Yes,1.0,1.0,No,0,"[26.9124, 75.7873]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27896,140685,Female,27.0,Surat,Student,5.0,0.0,5.75,5.0,0.0,5-6 hours,Unhealthy,Class 12,Yes,7.0,1.0,Yes,0,"[21.1702, 72.8311]"
27897,140686,Male,27.0,Ludhiana,Student,2.0,0.0,9.40,3.0,0.0,Less than 5 hours,Healthy,MSc,No,0.0,3.0,Yes,0,"[30.901, 75.8573]"
27898,140689,Male,31.0,Faridabad,Student,3.0,0.0,6.61,4.0,0.0,5-6 hours,Unhealthy,MD,No,12.0,2.0,No,0,"[28.4089, 77.3178]"
27899,140690,Female,18.0,Ludhiana,Student,5.0,0.0,6.88,2.0,0.0,Less than 5 hours,Healthy,Class 12,Yes,10.0,5.0,No,1,"[30.901, 75.8573]"


In [25]:
df["Profession"].value_counts()

Student                   27867
Architect                     8
Teacher                       6
Digital Marketer              3
Content Writer                2
Chef                          2
Doctor                        2
Pharmacist                    2
Civil Engineer                1
UX/UI Designer                1
Educational Consultant        1
Manager                       1
Lawyer                        1
Entrepreneur                  1
Name: Profession, dtype: int64

In [26]:
df[df["Profession"] != "Student"] 

Unnamed: 0,id,Gender,Age,City,Profession,Academic Pressure,Work Pressure,CGPA,Study Satisfaction,Job Satisfaction,Sleep Duration,Dietary Habits,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Family History of Mental Illness,Depression,Coordinates
113,609,Male,21.0,Ahmedabad,Civil Engineer,5.0,0.0,7.83,1.0,0.0,5-6 hours,Healthy,MSc,Yes,11.0,3.0,Yes,1,"[23.0225, 72.5714]"
422,2105,Female,31.0,Vadodara,Architect,5.0,0.0,6.95,3.0,0.0,Less than 5 hours,Moderate,BSc,No,8.0,1.0,Yes,1,"[22.3072, 73.1812]"
1845,9483,Female,32.0,Kalyan,UX/UI Designer,3.0,0.0,9.6,2.0,0.0,7-8 hours,Moderate,PhD,Yes,5.0,3.0,No,1,"[19.2403, 73.1305]"
2249,11470,Female,28.0,Ahmedabad,Digital Marketer,5.0,0.0,9.72,3.0,0.0,More than 8 hours,Healthy,MA,Yes,10.0,5.0,Yes,1,"[23.0225, 72.5714]"
2440,12425,Male,25.0,Kalyan,Content Writer,5.0,0.0,8.5,2.0,0.0,5-6 hours,Moderate,B.Ed,Yes,0.0,2.0,Yes,1,"[19.2403, 73.1305]"
3220,16196,Male,33.0,Jaipur,Architect,2.0,0.0,7.13,4.0,0.0,More than 8 hours,Moderate,MSc,No,2.0,5.0,Yes,1,"[26.9124, 75.7873]"
3388,16959,Female,24.0,Mumbai,Architect,3.0,0.0,8.89,5.0,0.0,More than 8 hours,Unhealthy,MSc,Yes,3.0,5.0,Yes,1,"[19.076, 72.8777]"
3639,18291,Male,32.0,Agra,Educational Consultant,3.0,0.0,5.74,4.0,0.0,7-8 hours,Healthy,B.Ed,Yes,3.0,5.0,Yes,1,"[27.1767, 78.0081]"
3841,19226,Male,31.0,Rajkot,Teacher,3.0,0.0,7.48,5.0,0.0,5-6 hours,Unhealthy,MD,Yes,12.0,2.0,Yes,1,"[22.3039, 70.8022]"
5016,25193,Male,24.0,Kalyan,Teacher,4.0,0.0,9.66,5.0,0.0,7-8 hours,Moderate,BE,Yes,12.0,3.0,No,1,"[19.2403, 73.1305]"
