# Geoapify Geocode Demo

In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import requests
import time
import json
from scipy.stats import linregress
from pprint import pprint

# API key
api_key = "60a6ffdd990b65ca16853197a6e0c84c"

In [3]:
path = "..\project_3\cities.json"

with open(path, 'r') as file:
    data = json.load(file)

# Initialize lists to store cities and coordinates
cities = []
coordinates = []

# Extract data from JSON
for entry in data:
    cities.append(entry['city'])
    coordinates.append((entry['latitude'], entry['longitude']))

# Print the lists
print("Cities:", cities)
print("Coordinates:", coordinates)

Cities: ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Philadelphia', 'Phoenix', 'San Antonio', 'San Diego', 'Dallas', 'San Jose', 'Austin', 'Indianapolis', 'Jacksonville', 'San Francisco', 'Columbus', 'Charlotte', 'Fort Worth', 'Detroit', 'El Paso', 'Memphis', 'Seattle', 'Denver', 'Washington', 'Boston', 'Nashville-Davidson', 'Baltimore', 'Oklahoma City', 'Louisville/Jefferson County', 'Portland', 'Las Vegas', 'Milwaukee', 'Albuquerque', 'Tucson', 'Fresno', 'Sacramento', 'Long Beach', 'Kansas City', 'Mesa', 'Virginia Beach', 'Atlanta', 'Colorado Springs', 'Omaha', 'Raleigh', 'Miami', 'Oakland', 'Minneapolis', 'Tulsa', 'Cleveland', 'Wichita', 'Arlington', 'New Orleans', 'Bakersfield', 'Tampa', 'Honolulu', 'Aurora', 'Anaheim', 'Santa Ana', 'St. Louis', 'Riverside', 'Corpus Christi', 'Lexington-Fayette', 'Pittsburgh', 'Anchorage', 'Stockton', 'Cincinnati', 'St. Paul', 'Toledo', 'Greensboro', 'Newark', 'Plano', 'Henderson', 'Lincoln', 'Buffalo', 'Jersey City', 'Chula Vista', 'Fort Way

In [46]:
# Test to see the json data
lat = 40.7128
lon = -74.0059
start = 1606460400
end = 1606546800
historical_url = f"http://api.openweathermap.org/data/2.5/air_pollution?lat={lat}&lon={lon}&appid={api_key}"
test_call = requests.get(historical_url).json()
print(test_call)

{'coord': {'lon': -74.0059, 'lat': 40.7128}, 'list': [{'main': {'aqi': 3}, 'components': {'co': 580.79, 'no': 9.5, 'no2': 79.51, 'o3': 0.49, 'so2': 4.47, 'pm2_5': 16.67, 'pm10': 23.03, 'nh3': 2.6}, 'dt': 1717471255}]}


In [13]:
historical_url = f"http://api.openweathermap.org/data/2.5/air_pollution/history?"

city_data = []

# Nov 28, 2020 - Nov 28, 2021
start = 1606460400
end = 1638082800
# Loop through all the cities in our list to fetch weather data
for i, (lat, lon) in enumerate(coordinates):

    # Create endpoint URL with each city
    city_url = f'{historical_url}lat={lat}&lon={lon}&start={start}&end={end}&appid={api_key}'

    # Run an API request for each of the cities
    try:
        # Parse the JSON and retrieve data
        city_air = requests.get(city_url).json()

        # Parse out latitude, longitude, max temp, humidity, cloudiness, wind speed, country, and date
        city_coords =  city_air['coord']
        city_stats = city_air['list'][0]['components']
        city_aqi = city_air['list'][0]['main']

        # Append the City information into city_data list
        city_data.append({"Coordinates": city_coords,
                          "Stats": city_stats,
                          "Air Quality Index": city_aqi
                        })
        print(f"Call {i} made successfully")

    # If an error is experienced, skip the city
    except Exception as e:
        print(f"An error occurred for city at {lat}, {lon}: {e}")
        raise
              
# Indicate that Data Loading is complete 
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

-----------------------------
Data Retrieval Complete      
-----------------------------


In [16]:
pollution_df = pd.DataFrame(city_data)
pollution_df

Unnamed: 0,Coordinates,Stats,Air Quality Index
0,"{'lon': -74.0059, 'lat': 40.7128}","{'co': 520.71, 'no': 32.19, 'no2': 28.45, 'o3'...",{'aqi': 2}
1,"{'lon': -118.2437, 'lat': 34.0522}","{'co': 907.9, 'no': 72.42, 'no2': 69.92, 'o3':...",{'aqi': 4}
2,"{'lon': -87.6298, 'lat': 41.8781}","{'co': 347.14, 'no': 0.34, 'no2': 31.19, 'o3':...",{'aqi': 1}
3,"{'lon': -95.3698, 'lat': 29.7604}","{'co': 247, 'no': 0.05, 'no2': 14.22, 'o3': 19...",{'aqi': 1}
4,"{'lon': -75.1652, 'lat': 39.9526}","{'co': 417.23, 'no': 6.82, 'no2': 25.71, 'o3':...",{'aqi': 1}
...,...,...,...
120,"{'lon': -118.2551, 'lat': 34.1425}","{'co': 240.33, 'no': 0.01, 'no2': 20.56, 'o3':...",{'aqi': 1}
121,"{'lon': -88.0399, 'lat': 30.6954}","{'co': 323.77, 'no': 0.08, 'no2': 15.77, 'o3':...",{'aqi': 1}
122,"{'lon': -85.6681, 'lat': 42.9634}","{'co': 280.38, 'no': 0, 'no2': 15.59, 'o3': 39...",{'aqi': 1}
123,"{'lon': -111.891, 'lat': 40.7608}","{'co': 297.07, 'no': 0.38, 'no2': 38.39, 'o3':...",{'aqi': 1}


## Run the next 3 cells just once.


In [29]:
# Reorder the Coordinates column

# Import ast to safely evaluate strings
import ast

# Function to convert string to dictionary
def str_to_dict(location):
    if isinstance(location, str):
        return ast.literal_eval(location)
    return location

# Apply the function to convert the 'Coordinates' column to a dictionary
pollution_df['Coordinates'] = pollution_df['Coordinates'].apply(str_to_dict)

# Create separate 'lat' and 'lon' columns
pollution_df['lat'] = pollution_df['Coordinates'].apply(lambda x: x['lat'])
pollution_df['lon'] = pollution_df['Coordinates'].apply(lambda x: x['lon'])

# Drop the original 'location' column if needed
pollution_df.drop(columns=['Coordinates'], inplace=True)

pollution_df

KeyError: 'Coordinates'

In [34]:
# Converting AQI to just a number

# Function to convert string to dictionary
def str_to_dict(aqi_str):
    if isinstance(aqi_str, str):
        return ast.literal_eval(aqi_str)
    return aqi_str

# Apply the function to convert the 'Air Quality Index' column to a dictionary
pollution_df['Air Quality Index'] = pollution_df['Air Quality Index'].apply(str_to_dict)

# Create a new column for 'aqi' values
pollution_df['AQI'] = pollution_df['Air Quality Index'].apply(lambda x: x['aqi'])

# Drop the original 'Air Quality Index' column if needed
pollution_df.drop(columns=['Air Quality Index'], inplace=True)

pollution_df

TypeError: 'int' object is not subscriptable

In [41]:
# Now for the Stats column

# Function to convert string to dictionary if needed and extract pollutants
def extract_pollutants(stats_value):
    try:
        if isinstance(stats_value, str):
            stats_value = ast.literal_eval(stats_value)
        if isinstance(stats_value, dict):
            return stats_value
    except (ValueError, SyntaxError, KeyError, TypeError) as e:
        # Handle unexpected formats
        return {}

# Apply the function to convert the 'stats' column to a dictionary
pollution_df['Stats'] = pollution_df['Stats'].apply(extract_pollutants)

# Normalize the 'stats' dictionary into separate columns
stats_df = pollution_df['Stats'].apply(pd.Series)

# Combine the original DataFrame with the new stats DataFrame
fixed_df = pd.concat([pollution_df, stats_df], axis=1)

# Drop the original 'stats' column if needed
fixed_df.drop(columns=['Stats'], inplace=True)

fixed_df

Unnamed: 0,Air Quality Index,lat,lon,co,no,no2,o3,so2,pm2_5,pm10,nh3
0,2,40.7128,-74.0059,520.71,32.19,28.45,0.00,3.22,14.19,20.84,1.39
1,4,34.0522,-118.2437,907.90,72.42,69.92,0.00,5.72,28.31,47.76,9.88
2,1,41.8781,-87.6298,347.14,0.34,31.19,16.81,4.59,7.75,11.57,0.71
3,1,29.7604,-95.3698,247.00,0.05,14.22,19.85,3.13,8.33,9.40,0.23
4,1,39.9526,-75.1652,417.23,6.82,25.71,0.37,2.95,9.42,12.84,0.33
...,...,...,...,...,...,...,...,...,...,...,...
120,1,34.1425,-118.2551,240.33,0.01,20.56,42.56,1.01,3.57,12.30,1.20
121,1,30.6954,-88.0399,323.77,0.08,15.77,7.96,0.40,7.39,8.60,0.26
122,1,42.9634,-85.6681,280.38,0.00,15.59,39.70,4.53,9.67,11.66,0.12
123,1,40.7608,-111.8910,297.07,0.38,38.39,13.59,1.28,7.77,11.82,2.25


In [47]:
# Rename the columns
fixed_and_renamed_df = fixed_df.rename({"lat": "Latitude", "lon": "Longitude",
                            "co": "Carbon monoxide (CO)", "no": "Nitric Oxide (NO)",
                            "no2": "Nitrogen dioxide (NO2)", "o3": "Ozone (O3)",
                            "so2": "Sulfur Dioxide (SO2)", "pm2_5":"Particulates 2.5",
                            "pm10": "Particulates 10", "nh3": "Ammonia (NH3)"})

# Set the cities from the cities list as the index (This does preserve the order)
final_df = fixed_and_renamed_df.set_index(pd.Index(cities))
final_df

Unnamed: 0,Air Quality Index,lat,lon,co,no,no2,o3,so2,pm2_5,pm10,nh3
New York,2,40.7128,-74.0059,520.71,32.19,28.45,0.00,3.22,14.19,20.84,1.39
Los Angeles,4,34.0522,-118.2437,907.90,72.42,69.92,0.00,5.72,28.31,47.76,9.88
Chicago,1,41.8781,-87.6298,347.14,0.34,31.19,16.81,4.59,7.75,11.57,0.71
Houston,1,29.7604,-95.3698,247.00,0.05,14.22,19.85,3.13,8.33,9.40,0.23
Philadelphia,1,39.9526,-75.1652,417.23,6.82,25.71,0.37,2.95,9.42,12.84,0.33
...,...,...,...,...,...,...,...,...,...,...,...
Glendale,1,34.1425,-118.2551,240.33,0.01,20.56,42.56,1.01,3.57,12.30,1.20
Mobile,1,30.6954,-88.0399,323.77,0.08,15.77,7.96,0.40,7.39,8.60,0.26
Grand Rapids,1,42.9634,-85.6681,280.38,0.00,15.59,39.70,4.53,9.67,11.66,0.12
Salt Lake City,1,40.7608,-111.8910,297.07,0.38,38.39,13.59,1.28,7.77,11.82,2.25
