In [4]:

# Dependencies and Setup
import hvplot.pandas
import pandas as pd
import requests

# Import API key
from api_key import geoapify_key


In [5]:
# Load the CSV file created in Part 1 into a Pandas DataFrame
disorder_mean_data_df = pd.read_csv("disorder_mean_data.csv")

# Display sample data
disorder_mean_data_df

Unnamed: 0,Entity,Schizophrenia,Bipolar Disorder,Eating Disorders,Anxiety Disorders,Drug Use Disorders,Depression,Alcohol Use Disorders
0,Afghanistan,0.165805,0.707081,0.103619,4.874300,2.540350,4.134689,0.662099
1,Albania,0.200481,0.704041,0.169955,3.388445,0.511595,2.204725,1.847238
2,Algeria,0.197642,0.817575,0.210688,5.056578,1.704862,3.635739,0.665424
3,American Samoa,0.249050,0.468437,0.179264,3.316162,0.769095,2.937679,1.140915
4,Andorra,0.263749,0.963786,0.648336,5.310556,0.904643,3.727690,1.250189
...,...,...,...,...,...,...,...,...
191,Vietnam,0.242382,0.551291,0.122736,2.049616,0.680855,2.882866,1.729482
192,World,0.254472,0.595857,0.206738,3.776508,0.928238,3.440192,1.415836
193,Yemen,0.172509,0.735449,0.136990,4.851539,1.395040,4.017605,0.636085
194,Zambia,0.164778,0.605098,0.137875,3.582886,0.505557,3.570420,1.632610


In [6]:
# Loop through the rows of the original dataframe
for index, row in disorder_mean_data_df.iterrows():
    # Get the country name from the 'Entity' column
    country = row['Entity']
    
    # Construct the API URL
    url = f'https://restcountries.com/v2/name/{country}'

    # in python, if you are defining a string, you can use the format 
    # string = f'STRINGHERE'{variablehere}'morestringhere' 
    # where STRINGHERE is obviously plain text and then whatever is in {} is replaced by variables you have defined
    # this is used a lot later on to make code easier to read
    
    # Send a request to the API
    response = requests.get(url).json()
    
    # Initialize the capital variable to None
    capital = None
    
    # Loop through the results until a capital city is found
    for result in response:
        if 'capital' in result:
            capital = result['capital']
            break
    
    # Add the capital to the original dataframe
    disorder_mean_data_df.loc[index, 'Capital'] = capital
disorder_mean_data_df



Unnamed: 0,Entity,Schizophrenia,Bipolar Disorder,Eating Disorders,Anxiety Disorders,Drug Use Disorders,Depression,Alcohol Use Disorders,Capital
0,Afghanistan,0.165805,0.707081,0.103619,4.874300,2.540350,4.134689,0.662099,Kabul
1,Albania,0.200481,0.704041,0.169955,3.388445,0.511595,2.204725,1.847238,Tirana
2,Algeria,0.197642,0.817575,0.210688,5.056578,1.704862,3.635739,0.665424,Algiers
3,American Samoa,0.249050,0.468437,0.179264,3.316162,0.769095,2.937679,1.140915,Pago Pago
4,Andorra,0.263749,0.963786,0.648336,5.310556,0.904643,3.727690,1.250189,Andorra la Vella
...,...,...,...,...,...,...,...,...,...
191,Vietnam,0.242382,0.551291,0.122736,2.049616,0.680855,2.882866,1.729482,Hanoi
192,World,0.254472,0.595857,0.206738,3.776508,0.928238,3.440192,1.415836,
193,Yemen,0.172509,0.735449,0.136990,4.851539,1.395040,4.017605,0.636085,Sana'a
194,Zambia,0.164778,0.605098,0.137875,3.582886,0.505557,3.570420,1.632610,Lusaka


In [7]:
# Drop any rows where capital was not found
disorder_mean_data_df.dropna(subset=['Capital'], inplace=True)

# Let's Drop all columns besides Entity, Capital, And Anxiety
cols_to_drop = [col for col in disorder_mean_data_df.columns if col not in ['Entity', 'Capital', 'Anxiety Disorders']]
disorder_mean_data_df.drop(cols_to_drop, axis=1, inplace=True)
disorder_mean_data_df

Unnamed: 0,Entity,Anxiety Disorders,Capital
0,Afghanistan,4.874300,Kabul
1,Albania,3.388445,Tirana
2,Algeria,5.056578,Algiers
3,American Samoa,3.316162,Pago Pago
4,Andorra,5.310556,Andorra la Vella
...,...,...,...
190,Venezuela,2.864805,Caracas
191,Vietnam,2.049616,Hanoi
193,Yemen,4.851539,Sana'a
194,Zambia,3.582886,Lusaka


In [8]:
weather_api_key = "d5c7c5515e1490e64ea9671d04249086"
# Now we can use our other APIs to find the lat and lng from city name 
# we will iterate through the rows again and get the lat and lng from the city and then add columns based on the data we want


# URL GUIDE FROM DOCUMENTATION http://api.openweathermap.org/geo/1.0/direct?q={city name}&limit={limit}&appid={API key}
# base_url will copy and paste until first curly bracket, where we can insert city name later
base_url = "http://api.openweathermap.org/geo/1.0/direct?q="
limit = 1
# find city name from dataframe

# Loop through the rows of the original dataframe
for index, row in disorder_mean_data_df.iterrows():
    # Get the country name from the 'Entity' column
    cityName = row['Capital']
    
    # Construct the API URL
    constructed_url = f'{base_url}{cityName}&limit={limit}&appid={weather_api_key}'
    
    # Send a request to the API
    response = requests.get(constructed_url).json()
    # Initialize the lat + Lon variable to None
    currentLat = None
    currentLon = None
    
    # Loop through the results until Lat and Lng is found
    for result in response:
        if 'lat' in result:
            currentLat = result['lat']
        
        if 'lon' in result:
            currentLon = result['lon']
        
    
    # Add the capital to the original dataframe
    disorder_mean_data_df.loc[index, 'Lat'] = currentLat
    disorder_mean_data_df.loc[index, 'Lon'] = currentLon


disorder_mean_data_df



Unnamed: 0,Entity,Anxiety Disorders,Capital,Lat,Lon
0,Afghanistan,4.874300,Kabul,34.526011,69.177684
1,Albania,3.388445,Tirana,41.330514,19.825563
2,Algeria,5.056578,Algiers,36.775361,3.060188
3,American Samoa,3.316162,Pago Pago,-14.275479,-170.704830
4,Andorra,5.310556,Andorra la Vella,42.506939,1.521247
...,...,...,...,...,...
190,Venezuela,2.864805,Caracas,10.506093,-66.914601
191,Vietnam,2.049616,Hanoi,21.029450,105.854444
193,Yemen,4.851539,Sana'a,15.353857,44.205884
194,Zambia,3.582886,Lusaka,-15.416412,28.282479


In [9]:
#This does like the above but replaces lat and lng with the proper OpenWeatherMap API values as opposed to REST api
base_weather_url = "http://api.openweathermap.org/data/2.5/weather?"
units = "metric"

# Loop through the rows of the original dataframe
for index, row in disorder_mean_data_df.iterrows():
    # Get the country name from the 'Entity' column
    cityName = row['Capital']

    # Create endpoint URL with each city
    city_weather_url = f"{base_weather_url}appid={weather_api_key}&units={units}&q="

    #get the response from our api request and store it /transfer it to json
    weather_response = requests.get(city_weather_url + cityName).json()

    #get values from each response
    # some responses from the API don't include a Lat or Lon, and would throw errors if we tried to run the following code
    # so we just want to make sure we only run the code if the response contains a dictionary named [coord]
    if '[coord]' in result:
           city_lng = weather_response['coord']['lon']
           city_lat = weather_response['coord']['lat']
           disorder_mean_data_df.loc[index, 'Lat'] = city_lat
           disorder_mean_data_df.loc[index, 'Lon'] = city_lng
    
    

disorder_mean_data_df




Unnamed: 0,Entity,Anxiety Disorders,Capital,Lat,Lon
0,Afghanistan,4.874300,Kabul,34.526011,69.177684
1,Albania,3.388445,Tirana,41.330514,19.825563
2,Algeria,5.056578,Algiers,36.775361,3.060188
3,American Samoa,3.316162,Pago Pago,-14.275479,-170.704830
4,Andorra,5.310556,Andorra la Vella,42.506939,1.521247
...,...,...,...,...,...
190,Venezuela,2.864805,Caracas,10.506093,-66.914601
191,Vietnam,2.049616,Hanoi,21.029450,105.854444
193,Yemen,4.851539,Sana'a,15.353857,44.205884
194,Zambia,3.582886,Lusaka,-15.416412,28.282479


In [10]:
#SO MY WEATHERAPI METROSTAT allows for me to pull historical data, but I need to be able to pull it by a Weather Station
#Luckily, they have an API to translate Lat and Lon to a weather station
#So this API requests does that and adds the weather station to the dataframe
#Since I am not paying for the API, I have to add a delay between each request or it gets mad and rejects it
#That's the time.sleep function

#DONT RUN THIS A LOT BECAUSE EVERY API REQUEST COST ME MONEY !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
import time
# The following code gets the nearest weather station for the capital city lat and lon
# Set up the API endpoint and parameters
url = "https://meteostat.p.rapidapi.com/stations/nearby"
headers = {
	"X-RapidAPI-Key": "ddb174a9e1msh9f62499a2bb7e26p112383jsn5e15d8de033b",
	"X-RapidAPI-Host": "meteostat.p.rapidapi.com"
}

# Loop through the rows of the dataframe and make an API request for each capital city
for index, row in disorder_mean_data_df.iterrows():

    # only run the code if Station ID does not exist or is NaN. This saves API requests
    # if pd.isna(row['Station ID']):
        # Get the latitude and longitude from the dataframe
    querystring = {"lat":row["Lat"],"lon":row["Lon"]}
    
    # Make the API request and extract the nearest weather station's ID from the response
    response = requests.get(url, headers=headers, params=querystring).json()
    # the response comes back as a JSON dictionary with the value id as station ID
    # I found this in the documentation
    if response["data"]:
        station_id = response["data"][0]["id"]
    # Add the station ID to the original dataframe
        disorder_mean_data_df.loc[index, 'Station ID'] = station_id
    time.sleep(0.5)
    
# Preview the updated dataframe
disorder_mean_data_df

Unnamed: 0,Entity,Anxiety Disorders,Capital,Lat,Lon,Station ID
0,Afghanistan,4.874300,Kabul,34.526011,69.177684,40948
1,Albania,3.388445,Tirana,41.330514,19.825563,13601
2,Algeria,5.056578,Algiers,36.775361,3.060188,60369
3,American Samoa,3.316162,Pago Pago,-14.275479,-170.704830,91765
4,Andorra,5.310556,Andorra la Vella,42.506939,1.521247,LESU0
...,...,...,...,...,...,...
190,Venezuela,2.864805,Caracas,10.506093,-66.914601,80416
191,Vietnam,2.049616,Hanoi,21.029450,105.854444,VVGL0
193,Yemen,4.851539,Sana'a,15.353857,44.205884,41404
194,Zambia,3.582886,Lusaka,-15.416412,28.282479,67667


In [11]:
#DONT RUN THIS A LOT BECAUSE EVERY API REQUEST COST ME MONEY !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

#now that I have the nearest station, I can get average temperature

url = "https://meteostat.p.rapidapi.com/stations/normals"


headers = {
	"X-RapidAPI-Key": "ddb174a9e1msh9f62499a2bb7e26p112383jsn5e15d8de033b",
	"X-RapidAPI-Host": "meteostat.p.rapidapi.com"
}

# Loop through the rows of the dataframe and make an API request for each capital city
# This time we will pull the Station ID and get the average temperature from 2012-2017 and add that to the dataframe

for index, row in disorder_mean_data_df.iterrows():

    #check to reduce API requests
    
    # Get the latitude and longitude from the dataframe
    querystring = {"station":row['Station ID']}
    
    # Make the API request
    response = requests.get(url, headers=headers, params=querystring).json()
        #the format of the response can be found in meteostat's documentation
        #based on the website, i know it returns a json dict 
        #it actually returns two, but the one we want is in the dict called data

        # If there is actually data / aka only run the code if there is data otherwise you get errors
    if response['data']:
            # print(response) #for debugging purposes, let's us see what the api returns
            #we know the response is a dictionary so we can turn it into a DataFrame so we can run pandas functions on it
            # turn the response into a dataframe and then we can get the averages 
        df = pd.DataFrame(response['data'])
            # we just chose variable df because we are only using it for the below calculations
            # the response is separated into months, so we will have to take the average of all the month's average temperatures and then combine to make an average annual temp
        avg_temp = df['tavg'].mean()
            # same thing goes for precipitation and sunshine
        avg_precip = df['prcp'].mean()
        total_sun = df['tsun'].mean()
            # Assign the mean to a new column in the original DataFrame
        disorder_mean_data_df.at[index, 'Annual Precipitation'] = avg_precip
        disorder_mean_data_df.at[index, 'Avg Annual Temp'] = avg_temp
        disorder_mean_data_df.at[index, 'Total Sun'] = total_sun
    #delay so that our API doesn't get angry
time.sleep(0.5)
#display our new table
disorder_mean_data_df



    


Unnamed: 0,Entity,Anxiety Disorders,Capital,Lat,Lon,Station ID,Annual Precipitation,Avg Annual Temp,Total Sun
0,Afghanistan,4.874300,Kabul,34.526011,69.177684,40948,24.087500,,
1,Albania,3.388445,Tirana,41.330514,19.825563,13601,,,
2,Algeria,5.056578,Algiers,36.775361,3.060188,60369,58.225000,19.833333,
3,American Samoa,3.316162,Pago Pago,-14.275479,-170.704830,91765,258.552083,27.416667,9518.885417
4,Andorra,5.310556,Andorra la Vella,42.506939,1.521247,LESU0,,,
...,...,...,...,...,...,...,...,...,...
190,Venezuela,2.864805,Caracas,10.506093,-66.914601,80416,76.379167,23.427778,11975.539583
191,Vietnam,2.049616,Hanoi,21.029450,105.854444,VVGL0,,,
193,Yemen,4.851539,Sana'a,15.353857,44.205884,41404,,,
194,Zambia,3.582886,Lusaka,-15.416412,28.282479,67667,,,


In [12]:
# Review the DF and use this code block as an opportunity to clean up
disorder_mean_data_df

Unnamed: 0,Entity,Anxiety Disorders,Capital,Lat,Lon,Station ID,Annual Precipitation,Avg Annual Temp,Total Sun
0,Afghanistan,4.874300,Kabul,34.526011,69.177684,40948,24.087500,,
1,Albania,3.388445,Tirana,41.330514,19.825563,13601,,,
2,Algeria,5.056578,Algiers,36.775361,3.060188,60369,58.225000,19.833333,
3,American Samoa,3.316162,Pago Pago,-14.275479,-170.704830,91765,258.552083,27.416667,9518.885417
4,Andorra,5.310556,Andorra la Vella,42.506939,1.521247,LESU0,,,
...,...,...,...,...,...,...,...,...,...
190,Venezuela,2.864805,Caracas,10.506093,-66.914601,80416,76.379167,23.427778,11975.539583
191,Vietnam,2.049616,Hanoi,21.029450,105.854444,VVGL0,,,
193,Yemen,4.851539,Sana'a,15.353857,44.205884,41404,,,
194,Zambia,3.582886,Lusaka,-15.416412,28.282479,67667,,,


In [13]:
### Step 1: Create a map that displays a point for every city in the `disorder_mean_data_df` DataFrame that has an Annual
##The size of the point should be the anxiety in each city.

#decrease range of plots
#If we want to decrease the data we display, we can create a new DF
# Define the range of latitudes near the equator
equator_range = [(-5, 5)]
# Filter the dataframe to only include rows with latitude in the range of latitudes near the equator
df_to_plot = disorder_mean_data_df.query('Lat >= @equator_range[0][0] and Lat <= @equator_range[0][1]')
# use df_to_plot instead of disorder_mean_data_df in map_plot_1 to change the data 

# Normalize the "Anxiety Disorders" column and add it to the dataframe as a new column called "Normalized Anxiety Disorders"
#normalizing takes our maximum value and makes it equal to 1.0 and the lowest to 0.0 and then sorts the rest of the values based on that. 
disorder_mean_data_df['Normalized Anxiety Disorders'] = (disorder_mean_data_df['Anxiety Disorders'] - disorder_mean_data_df['Anxiety Disorders'].min()) / (disorder_mean_data_df['Anxiety Disorders'].max() - disorder_mean_data_df['Anxiety Disorders'].min())

#drop all negatives to avoid sqrt error when plotting
disorder_mean_data_df.drop(disorder_mean_data_df[disorder_mean_data_df['Avg Annual Temp'] < 0].index, inplace=True)

# Configure the map plot
map_plot_1 = disorder_mean_data_df.hvplot.points(
    "Lon",
    "Lat",
    geo=True,
    size="Avg Annual Temp",
    color="Capital",
    alpha="Normalized Anxiety Disorders",
    tiles="OSM",
    scale=2  # adjust this value to make the points larger
)
# Display the map plot_1
map_plot_1

In [14]:
#ANALYSIS
#The more opaque points seem to be smaller so that means lower temperature cities are more anxious
disorder_mean_data_df

Unnamed: 0,Entity,Anxiety Disorders,Capital,Lat,Lon,Station ID,Annual Precipitation,Avg Annual Temp,Total Sun,Normalized Anxiety Disorders
0,Afghanistan,4.874300,Kabul,34.526011,69.177684,40948,24.087500,,,0.435272
1,Albania,3.388445,Tirana,41.330514,19.825563,13601,,,,0.206308
2,Algeria,5.056578,Algiers,36.775361,3.060188,60369,58.225000,19.833333,,0.463360
3,American Samoa,3.316162,Pago Pago,-14.275479,-170.704830,91765,258.552083,27.416667,9518.885417,0.195169
4,Andorra,5.310556,Andorra la Vella,42.506939,1.521247,LESU0,,,,0.502497
...,...,...,...,...,...,...,...,...,...,...
190,Venezuela,2.864805,Caracas,10.506093,-66.914601,80416,76.379167,23.427778,11975.539583,0.125617
191,Vietnam,2.049616,Hanoi,21.029450,105.854444,VVGL0,,,,0.000000
193,Yemen,4.851539,Sana'a,15.353857,44.205884,41404,,,,0.431764
194,Zambia,3.582886,Lusaka,-15.416412,28.282479,67667,,,,0.236270
