In [1]:
import pandas as pd
import numpy as np
import folium
from folium.plugins import HeatMap
from geopy.geocoders import Nominatim
import time
import plotly.express as px


In [2]:
df = pd.read_csv("Student Depression Dataset.csv")
# Drop rows with missing coordinates
df.dropna(inplace=True)

In [3]:
# online data
city_coords = {
    "Kalyan": [19.2403, 73.1305],
    "Srinagar": [34.0837, 74.7973],
    "Hyderabad": [17.3871, 78.4917],
    "Vasai-Virar": [19.3919, 72.8397],
    "Ludhiana": [30.9010, 75.8573],
    "Thane": [19.2183, 72.9781],
    "Kolkata": [22.5726, 88.3639],
    "Lucknow": [26.8467, 80.9462],
    "Agra": [27.1767, 78.0081],
    "Jaipur": [26.9124, 75.7873],
    "Patna": [25.5941, 85.1376],
    "Surat": [21.1702, 72.8311],
    "Bhopal": [23.2599, 77.4126],
    "Pune": [18.5204, 73.8567],
    "Visakhapatnam": [17.6868, 83.2185],
    "Ahmedabad": [23.0225, 72.5714],
    "Chennai": [13.0827, 80.2707],
    "Ghaziabad": [28.6692, 77.4538],
    "Meerut": [28.9845, 77.7064],
    "Mumbai": [19.0760, 72.8777],
    "Rajkot": [22.3039, 70.8022],
    "Bangalore": [12.9716, 77.5946],
    "Delhi": [28.7041, 77.1025],
    "Vadodara": [22.3072, 73.1812],
    "Varanasi": [25.3176, 82.9739],
    "Nagpur": [21.1458, 79.0882],
    "Indore": [22.7196, 75.8577],
    "Kanpur": [26.4499, 80.3319],
    "Nashik": [19.9975, 73.7898],
    "Faridabad": [28.4089, 77.3178]
}


In [4]:
# Map city names to coordinates
df['Coordinates'] = df['City'].map(city_coords)

# Group by 'City' and calculate total and depressed counts
grouped = df.groupby('City')['Depression'].agg(
    Total_Count='size',
    Depressed_Count='sum'
).reset_index()

# Calculate depression percentage
grouped['Depression_Percentage'] = (grouped['Depressed_Count'] / grouped['Total_Count']) * 100


# Normalize Depression_Percentage to a 0-1 scale
grouped['Normalized_Depression'] = grouped['Depression_Percentage'] / 100

# Create a folium map centered around India
m = folium.Map(location=[20.5937, 78.9629], zoom_start=5)

# Prepare heatmap data
heat_data = [[coord[0], coord[1], weight] for coord, weight in zip(df['Coordinates'], grouped['Normalized_Depression'])]

# Add HeatMap layer
HeatMap(heat_data, radius=15, blur=10, max_zoom=1).add_to(m)

# Save the map to an HTML file
m.save("depression_heatmap.html")

# Display the map in a Jupyter Notebook (optional)
m

In [5]:
grouped

Unnamed: 0,City,Total_Count,Depressed_Count,Depression_Percentage,Normalized_Depression
0,3.0,1,1,100.0,1.0
1,Agra,1094,585,53.473492,0.534735
2,Ahmedabad,951,640,67.297581,0.672976
3,Bangalore,767,467,60.886571,0.608866
4,Bhavna,2,2,100.0,1.0
5,Bhopal,934,579,61.991435,0.619914
6,Chennai,885,528,59.661017,0.59661
7,City,2,1,50.0,0.5
8,Delhi,768,468,60.9375,0.609375
9,Faridabad,461,271,58.785249,0.587852


- The cities selected by our LASSO: [Bhavna, Kibara, ME, Mira, Nalyan, Nandini, Saanvi, Vaanya]
- The cities that are selected by LASSO are all cities with only one entry of them, meaning they will not generalize
- The brightest city/areas are Kalyan, Meerut + Ghaziabad