In [1]:
#import dependencies
import pandas as pd
import os
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import gmaps
import pycountry
from config import gkey
import requests
gmaps.configure(api_key = gkey)

In [2]:
countries_df = pd.read_csv("cleaned_data_js_sc_rg.csv", encoding = "ISO-8859-1")

countries_df=countries_df[countries_df["Residence_ISO3"].notna()]
countries_df=countries_df[countries_df["Residence"].notna()]
print(len(countries_df))

11598


In [3]:
#group all responses by country
country_grp = countries_df.groupby(["Residence_ISO3", "Residence"])

country_means = pd.DataFrame(country_grp.mean().reset_index())
country_counts = pd.DataFrame(country_grp.size().reset_index())
country_counts = country_counts.rename(columns = {0:"Participant Count"})

country_stats = pd.merge(country_counts, country_means, on = ["Residence", "Residence_ISO3"], how = 'outer')
country_stats = country_stats.drop(["S. No.", "Timestamp"], axis = 1)
country_stats

Unnamed: 0,Residence_ISO3,Residence,Participant Count,Hours,streams,Narcissism,Age,GAD_T,SWL_T,SPIN_T
0,ALB,Albania,5,20.200000,8.400000,2.400000,21.000000,5.400000,16.000000,10.000000
1,ARE,UAE,10,19.300000,15.700000,1.700000,21.100000,6.600000,19.100000,21.900000
2,ARG,Argentina,92,24.413043,11.630435,1.967391,20.804348,4.782609,19.195652,18.282609
3,AUS,Australia,192,25.416667,9.828125,1.937500,20.588542,5.755208,18.619792,21.588542
4,AUT,Austria,129,22.372093,11.798450,2.085271,20.961240,4.333333,21.364341,18.705426
...,...,...,...,...,...,...,...,...,...,...
102,USA,USA,4018,20.341463,10.507218,1.995022,21.081135,5.547287,20.136386,20.137382
103,VCT,St Vincent,1,15.000000,4.000000,1.000000,19.000000,5.000000,26.000000,28.000000
104,VEN,Venezuela,28,25.214286,8.107143,2.000000,20.642857,5.857143,18.321429,18.857143
105,VNM,Vietnam,11,18.636364,4.181818,2.272727,22.090909,4.545455,14.090909,20.727273


# Mental health questionnaire heatmaps using google maps API

In [4]:
#Using country_stats dataframe, use pycountry to get alpha-2 country codes
country_stats['alpha2'] = ""

for index, row in country_stats.iterrows():
    try:
        country = row['Residence_ISO3']
        alpha2 = pycountry.countries.get(alpha_3 = country).alpha_2
        country_stats.loc[index, "alpha2"] = alpha2
        print(f"processing country: {row['Residence_ISO3']}...")
    except:
        print("country not found")
        pass
country_stats['alpha2'] = country_stats['alpha2'].str.lower()
country_stats.head()

processing country: ALB...
processing country: ARE...
processing country: ARG...
processing country: AUS...
processing country: AUT...
processing country: BEL...
processing country: BGD...
processing country: BGR...
processing country: BHR...
processing country: BIH...
processing country: BLR...
processing country: BLZ...
processing country: BOL...
processing country: BRA...
processing country: BRN...
processing country: CAN...
processing country: CHE...
processing country: CHL...
processing country: CHN...
processing country: COL...
processing country: CRI...
processing country: CYP...
processing country: CZE...
processing country: DEU...
processing country: DNK...
processing country: DOM...
processing country: DZA...
processing country: ECU...
processing country: EGY...
processing country: ESP...
processing country: EST...
processing country: FIN...
processing country: FJI...
processing country: FRA...
processing country: FRO...
processing country: GBR...
processing country: GEO...
p

Unnamed: 0,Residence_ISO3,Residence,Participant Count,Hours,streams,Narcissism,Age,GAD_T,SWL_T,SPIN_T,alpha2
0,ALB,Albania,5,20.2,8.4,2.4,21.0,5.4,16.0,10.0,al
1,ARE,UAE,10,19.3,15.7,1.7,21.1,6.6,19.1,21.9,ae
2,ARG,Argentina,92,24.413043,11.630435,1.967391,20.804348,4.782609,19.195652,18.282609,ar
3,AUS,Australia,192,25.416667,9.828125,1.9375,20.588542,5.755208,18.619792,21.588542,au
4,AUT,Austria,129,22.372093,11.79845,2.085271,20.96124,4.333333,21.364341,18.705426,at


In [5]:
#Use google maps API to get coordinates of each country
country_stats['lat'] = ""
country_stats['lng'] = ""
base_url = "https://maps.googleapis.com/maps/api/geocode/json"
params = {'key': gkey}

for index, row in country_stats.iterrows():
    alpha2 = row['alpha2']
    country = row['Residence']
        
    #add address and region to params
    params['address'] = country
    params['region'] = alpha2
    
    try:
        response = requests.get(base_url, params = params).json()
        country_stats.loc[index, 'lat'] = response['results'][0]['geometry']['location']['lat']
        country_stats.loc[index, 'lng'] = response['results'][0]['geometry']['location']['lng']
        print(f"processing country: {country}")
        
    except:
        print("country not found")
        pass

country_stats.head()

processing country: Albania
processing country: UAE
processing country: Argentina
processing country: Australia
processing country: Austria
processing country: Belgium
processing country: Bangladesh
processing country: Bulgaria
processing country: Bahrain
processing country: Bosnia and Herzegovina
processing country: Belarus
processing country: Belize
processing country: Bolivia
processing country: Brazil
processing country: Brunei
processing country: Canada
processing country: Switzerland
processing country: Chile
processing country: China
processing country: Colombia
processing country: Costa Rica
processing country: Cyprus
processing country: Czech Republic
processing country: Germany
processing country: Denmark
processing country: Dominican Republic
processing country: Algeria
processing country: Ecuador
processing country: Egypt
processing country: Spain
processing country: Estonia
processing country: Finland
processing country: Fiji
processing country: France
processing country: 

Unnamed: 0,Residence_ISO3,Residence,Participant Count,Hours,streams,Narcissism,Age,GAD_T,SWL_T,SPIN_T,alpha2,lat,lng
0,ALB,Albania,5,20.2,8.4,2.4,21.0,5.4,16.0,10.0,al,41.1533,20.1683
1,ARE,UAE,10,19.3,15.7,1.7,21.1,6.6,19.1,21.9,ae,23.4241,53.8478
2,ARG,Argentina,92,24.413043,11.630435,1.967391,20.804348,4.782609,19.195652,18.282609,ar,-38.4161,-63.6167
3,AUS,Australia,192,25.416667,9.828125,1.9375,20.588542,5.755208,18.619792,21.588542,au,-25.2744,133.775
4,AUT,Austria,129,22.372093,11.79845,2.085271,20.96124,4.333333,21.364341,18.705426,at,47.5162,14.5501


In [6]:
country_stats = country_stats.rename(columns = {'Hours':"Average Hours", 'streams':'Average Streaming Hours', 'Narcissism':'Average Narcissism Score', 'Age':'Average Participant Age', 'GAD_T': 'Average GAD Score', "SWL_T":"Average SWL Score","SPIN_T": "Average SPIN Score"})
country_stats.to_csv("country_stats.csv", index = False)

In [16]:
#create heatmap using country locations
locations = country_stats[['lat','lng']]

gad_avg = country_stats['Average GAD Score'].astype(float)
swl_avg = country_stats['Average SWL Score'].astype(float)
spin_avg = country_stats['Average SPIN Score'].astype(float)
narcissism_avg = country_stats['Average Narcissism Score'].astype(float)
participant_count = country_stats['Participant Count'].astype(float)
avg_hours = country_stats['Average Hours'].astype(float)

In [67]:
#plot heatmaps
fig1 = gmaps.figure(center = (21, 20), zoom_level =1.8)

#create heat layer for GAD_T
gad_avg_layer = gmaps.heatmap_layer(locations, weights = participant_count, 
                                   dissipating = False, max_intensity = 500, point_radius = 7)

fig1.add_layer(gad_avg_layer)

fig1

Figure(layout=FigureLayout(height='420px'))

# Chloropleth Maps using plotly

## GAD Scores

In [22]:
#Create bins to place values based on average GAD_T per country
bins = [0, 4, 9, 14, 21]

bin_labels = ["0-4: Minimal Anxiety", "5-9: Mild Anxiety", "10-14: Moderate Anxiety", "15-21: Severe Anxiety"]

country_stats['GAD Anxiety Score'] = pd.cut(country_stats['Average GAD Score'], 
                                            bins, labels = bin_labels, include_lowest = True)

country_stats

Unnamed: 0,Residence_ISO3,Residence,Participant Count,Average Hours,Average Streaming Hours,Average Narcissism Score,Average Participant Age,Average GAD Score,Average SWL Score,Average SPIN Score,alpha2,lat,lng,Participant Count Range,GAD Anxiety Score
0,ALB,Albania,5,20.200000,8.400000,2.400000,21.000000,5.400000,16.000000,10.000000,al,41.1533,20.1683,0 - 99,5-9: Mild Anxiety
1,ARE,UAE,10,19.300000,15.700000,1.700000,21.100000,6.600000,19.100000,21.900000,ae,23.4241,53.8478,0 - 99,5-9: Mild Anxiety
2,ARG,Argentina,92,24.413043,11.630435,1.967391,20.804348,4.782609,19.195652,18.282609,ar,-38.4161,-63.6167,0 - 99,5-9: Mild Anxiety
3,AUS,Australia,192,25.416667,9.828125,1.937500,20.588542,5.755208,18.619792,21.588542,au,-25.2744,133.775,100 - 499,5-9: Mild Anxiety
4,AUT,Austria,129,22.372093,11.798450,2.085271,20.961240,4.333333,21.364341,18.705426,at,47.5162,14.5501,100 - 499,5-9: Mild Anxiety
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102,USA,USA,4018,20.341463,10.507218,1.995022,21.081135,5.547287,20.136386,20.137382,us,37.0902,-95.7129,2000+,5-9: Mild Anxiety
103,VCT,St Vincent,1,15.000000,4.000000,1.000000,19.000000,5.000000,26.000000,28.000000,vc,12.9843,-61.2872,0 - 99,5-9: Mild Anxiety
104,VEN,Venezuela,28,25.214286,8.107143,2.000000,20.642857,5.857143,18.321429,18.857143,ve,6.42375,-66.5897,0 - 99,5-9: Mild Anxiety
105,VNM,Vietnam,11,18.636364,4.181818,2.272727,22.090909,4.545455,14.090909,20.727273,vn,14.0583,108.277,0 - 99,5-9: Mild Anxiety


In [60]:
country_stats.sort_values(by="GAD Anxiety Score", inplace = True)

fig = px.choropleth(country_stats, locations="Residence_ISO3",
                    color="GAD Anxiety Score",
                    hover_name = "Residence",
                    hover_data= {"Average GAD Score": True, 
                                 "Residence_ISO3": False, 
                                 "GAD Anxiety Score":False}, 
                    color_discrete_sequence =["#afd0aa", "#fe7171","#a10000", "#690000"])

fig.update_layout(
    title_text='Average GAD Score per Country',
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection_type='equirectangular'))

fig.update_layout(
    hoverlabel=dict(
        bgcolor="white",
        font_size=14,
        font_family="Ariel"))

fig.show()

## SWL Scores

In [51]:
#Create bins to place values based on average SWL_T per country
bins = [5, 9, 14, 19, 20, 25, 30, 35]

bin_labels = ["5-9: Extremely Dissatisfied", "10-14: Dissatisfied", "15-19: Slightly Dissatisfied", "20: Neutral", 
              "21-25: Slightly Satisfied", "26-30: Satisfied", "31-35: Extremely satisfied"]

country_stats['SWL Score'] = pd.cut(country_stats['Average SWL Score'], 
                                            bins, labels = bin_labels, include_lowest = True)

country_stats

Unnamed: 0,Residence_ISO3,Residence,Participant Count,Average Hours,Average Streaming Hours,Average Narcissism Score,Average Participant Age,Average GAD Score,Average SWL Score,Average SPIN Score,alpha2,lat,lng,Participant Count Range,GAD Anxiety Score,SWL Score
38,GLP,Guadeloupe,1,35.0,20.0,4.0,21.0,9.0,8.0,9.0,gp,16.265,-61.551,0 - 99,5-9: Mild Anxiety,5-9: Extremely Dissatisfied
46,IDN,Indonesia,1,25.0,10.0,2.0,24.0,14.0,9.0,26.0,id,-0.789275,113.921,0 - 99,10-14: Moderate Anxiety,5-9: Extremely Dissatisfied
14,BRN,Brunei,1,10.0,2.0,5.0,18.0,0.0,14.0,10.0,bn,4.53528,114.728,0 - 99,0-4: Minimal Anxiety,10-14: Dissatisfied
99,TWN,Taiwan,2,31.0,6.5,2.0,21.5,4.0,11.0,37.5,tw,23.6978,120.961,0 - 99,0-4: Minimal Anxiety,10-14: Dissatisfied
83,PSE,Palestine,1,7.0,5.0,1.0,18.0,11.0,11.0,10.0,ps,31.9522,35.2332,0 - 99,10-14: Moderate Anxiety,10-14: Dissatisfied
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89,SLV,El Salvador,1,25.0,5.0,2.0,18.0,4.0,27.0,12.0,sv,13.7942,-88.8965,0 - 99,0-4: Minimal Anxiety,26-30: Satisfied
103,VCT,St Vincent,1,15.0,4.0,1.0,19.0,5.0,26.0,28.0,vc,12.9843,-61.2872,0 - 99,5-9: Mild Anxiety,26-30: Satisfied
10,BLR,Belarus,1,20.0,12.0,5.0,19.0,1.0,29.0,7.0,by,53.7098,27.9534,0 - 99,0-4: Minimal Anxiety,26-30: Satisfied
71,NAM,Namibia,1,20.0,5.0,2.0,18.0,6.0,35.0,0.0,na,-22.9576,18.4904,0 - 99,5-9: Mild Anxiety,31-35: Extremely satisfied


In [59]:
country_stats.sort_values(by = "SWL Score", inplace = True)
fig = px.choropleth(country_stats, locations="Residence_ISO3",
                    color="SWL Score",
                    hover_name = "Residence",
                    hover_data= {"Average SWL Score": True, 
                                 "Residence_ISO3": False, 
                                 "SWL Score":False}, 
                    color_discrete_sequence = ["#690000","#ee0000","#fe7171","#d4a48f", "#62cb5c","#1d961f", "#005b04"])

fig.update_layout(
    title_text='Average Satisfaction with Life per Country',
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection_type='equirectangular'))

fig.update_layout(
    hoverlabel=dict(
        bgcolor="white",
        font_size=14,
        font_family="Ariel"))

fig.show()

## SPIN Scores

In [53]:
country_stats['Average SPIN Score'].max()

46.0

In [54]:
#Create bins to place values based on average Average SPIN Score per country
bins = [0, 20, 30, 40, 50, 68]

bin_labels = ["0-20: No Social Phobia", "21-30: Mild Social Phobia", "31-40: Moderate Social Phobia", 
              "41-50: Severe Social Phobia", "51-68: Very Severe Social Phobia"]

country_stats['SPIN Score'] = pd.cut(country_stats['Average SPIN Score'], 
                                            bins, labels = bin_labels, include_lowest = True)

country_stats

Unnamed: 0,Residence_ISO3,Residence,Participant Count,Average Hours,Average Streaming Hours,Average Narcissism Score,Average Participant Age,Average GAD Score,Average SWL Score,Average SPIN Score,alpha2,lat,lng,Participant Count Range,GAD Anxiety Score,SWL Score,SPIN Score
38,GLP,Guadeloupe,1,35.0,20.0,4.0,21.0,9.0,8.0,9.0,gp,16.265,-61.551,0 - 99,5-9: Mild Anxiety,5-9: Extremely Dissatisfied,0-20: No Social Phobia
46,IDN,Indonesia,1,25.0,10.0,2.0,24.0,14.0,9.0,26.0,id,-0.789275,113.921,0 - 99,10-14: Moderate Anxiety,5-9: Extremely Dissatisfied,21-30: Mild Social Phobia
14,BRN,Brunei,1,10.0,2.0,5.0,18.0,0.0,14.0,10.0,bn,4.53528,114.728,0 - 99,0-4: Minimal Anxiety,10-14: Dissatisfied,0-20: No Social Phobia
99,TWN,Taiwan,2,31.0,6.5,2.0,21.5,4.0,11.0,37.5,tw,23.6978,120.961,0 - 99,0-4: Minimal Anxiety,10-14: Dissatisfied,31-40: Moderate Social Phobia
83,PSE,Palestine,1,7.0,5.0,1.0,18.0,11.0,11.0,10.0,ps,31.9522,35.2332,0 - 99,10-14: Moderate Anxiety,10-14: Dissatisfied,0-20: No Social Phobia
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89,SLV,El Salvador,1,25.0,5.0,2.0,18.0,4.0,27.0,12.0,sv,13.7942,-88.8965,0 - 99,0-4: Minimal Anxiety,26-30: Satisfied,0-20: No Social Phobia
103,VCT,St Vincent,1,15.0,4.0,1.0,19.0,5.0,26.0,28.0,vc,12.9843,-61.2872,0 - 99,5-9: Mild Anxiety,26-30: Satisfied,21-30: Mild Social Phobia
10,BLR,Belarus,1,20.0,12.0,5.0,19.0,1.0,29.0,7.0,by,53.7098,27.9534,0 - 99,0-4: Minimal Anxiety,26-30: Satisfied,0-20: No Social Phobia
71,NAM,Namibia,1,20.0,5.0,2.0,18.0,6.0,35.0,0.0,na,-22.9576,18.4904,0 - 99,5-9: Mild Anxiety,31-35: Extremely satisfied,0-20: No Social Phobia


In [58]:
country_stats.sort_values(by = "Average SPIN Score", inplace = True)
fig = px.choropleth(country_stats, locations="Residence_ISO3",
                    color="SPIN Score",
                    hover_name = "Residence",
                    hover_data= {"Average SPIN Score": True, 
                                 "Residence_ISO3": False, 
                                 "SPIN Score":False}, 
                    color_discrete_sequence = ["#62cb5c","#d4a48f","#fe7171","#a10000", "#690000"])

fig.update_layout(
    title_text='Average Social Phobia Score per Country',
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection_type='equirectangular'))

fig.update_layout(
    hoverlabel=dict(
        bgcolor="white",
        font_size=14,
        font_family="Ariel"))

fig.show()