# Geoapify Places 5 Cities

In [1]:
# Dependencies
from census import Census
import hvplot.pandas
import time
import requests
import json
import pandas as pd
import numpy as np
from scipy.stats import linregress
from matplotlib import pyplot as plt

# Turn off warning messages
import warnings
warnings.filterwarnings("ignore")

# Import the API key
from config_key import geoapify_key
from config_key import census_key

# Create an instance of the Census library
c = Census(
    census_key, 
    year=2021
)

In [2]:
# Run Census Search to retrieve data on all zip codes (2021 ACS5 Census)
census_data = c.acs5.get(
    (
        "B01003_001E",
        "B17001_002E"
    ),
    {'for': 'zip code tabulation area:*'}
)

# Convert to DataFrame
census_df = pd.DataFrame(census_data)



# Column renaming
census_df = census_df.rename(
    columns = {
        "B01003_001E": "Population",
        "B17001_002E": "Poverty Count",
        "zip code tabulation area": "Zipcode"
    }
)

In [3]:
#Create DataFrame called US_locations_df to store the city, coordinates, supermarket property, distance
cities = ["Bronx, NY","Napa, CA", "Victorville, CA","San Francisco, CA","Monmouth, OR"]
latitude = [40.837048,38.297539,34.536217,37.828724,44.8485]
longitude = [-73.865433,-122.286865,-117.292763,-122.355537,-123.2340]

#Create DataFrame called US_locations_df to store the city, coordinates, supermarket property, distance
US_locations_df = pd.DataFrame({
                            "cities": cities, 
                            "latitude": latitude, 
                            "longitude": longitude}
                            )

#Add empty columns to the DataFrame to store, supermarket (property name), distance, etc. 
US_locations_df["supermarket"] = ""
US_locations_df["distance"] = ""
US_locations_df["zipcode"] = ""

US_locations_df

Unnamed: 0,cities,latitude,longitude,supermarket,distance,zipcode
0,"Bronx, NY",40.837048,-73.865433,,,
1,"Napa, CA",38.297539,-122.286865,,,
2,"Victorville, CA",34.536217,-117.292763,,,
3,"San Francisco, CA",37.828724,-122.355537,,,
4,"Monmouth, OR",44.8485,-123.234,,,


In [4]:
# Set the parameters to search for supermarkets 
categories = "commercial.supermarket"
radius = 16200

# Set up a params dictionary
params = {
    "categories":categories,
    "apiKey":geoapify_key,
    "limit": 100
}

# Print a message to follow up the supermarket search
print("Starting supermarket search...") 

# Iterate through the US_locations_df DataFrame
for index, row in US_locations_df.iterrows():
    # get latitude, longitude from the DataFrame
    longitude = row["longitude"]
    latitude = row["latitude"]
    
    # Add filter and bias parameters with the current city's latitude and longitude to the params dictionary
    params["filter"] = f"circle:{longitude},{latitude},{radius}"
    params["bias"] = f"proximity:{longitude},{latitude}"
    
    # Set base URL
    base_url = "https://api.geoapify.com/v2/places"


    # Make and API request using the params dictionaty
    response = requests.get(base_url,params=params)
    
    # Convert the API response to JSON format
    response = response.json()

    # Grab the first hotel from the results and store the name in the hotel_df DataFrame
    try:
        US_locations_df.loc[index, "supermarket"] = response["features"][0]["properties"]["address_line1"]
        US_locations_df.loc[index, "distance"] = response["features"][0]["properties"]["distance"]
        US_locations_df.loc[index, "postcode"] = response["features"][0]["properties"]["postcode"]
        
    except (KeyError, IndexError):
        # If no hotel is found, set the hotel name as "No hotel found".
        US_locations_df.loc[index, "supermarket"] = "No supermarket found"
        US_locations_df.loc[index, "distance"] = "No distance found"
        US_locations_df.loc[index, "postcode"] = "No zipcode found"
      
    # Log the search results
    print(f"{US_locations_df.loc[index, 'cities']} - nearest supermarket: {US_locations_df.loc[index, 'supermarket']} - zipcode: {US_locations_df.loc[index, 'postcode']}")

# Display sample data
US_locations_df

Starting supermarket search...
Bronx, NY - nearest supermarket: Bravo - zipcode: 10460
Napa, CA - nearest supermarket: Walmart Supercenter - zipcode: 94558
Victorville, CA - nearest supermarket: Food 4 Less - zipcode: 92392
San Francisco, CA - nearest supermarket: Island Cove Market - zipcode: 94130
Monmouth, OR - nearest supermarket: Western Oregon University Book Store - zipcode: 97361


Unnamed: 0,cities,latitude,longitude,supermarket,distance,zipcode,postcode
0,"Bronx, NY",40.837048,-73.865433,Bravo,138,,10460
1,"Napa, CA",38.297539,-122.286865,Walmart Supercenter,1254,,94558
2,"Victorville, CA",34.536217,-117.292763,Food 4 Less,2734,,92392
3,"San Francisco, CA",37.828724,-122.355537,Island Cove Market,1338,,94130
4,"Monmouth, OR",44.8485,-123.234,Western Oregon University Book Store,519,,97361


In [9]:
census_df = census_df.rename(columns={"Zipcode":"postcode"})

In [10]:
census_df

Unnamed: 0,Population,Poverty Count,postcode
0,17126.0,11302.0,00601
1,37895.0,17121.0,00602
2,49136.0,23617.0,00603
3,5751.0,3139.0,00606
4,26153.0,11640.0,00610
...,...,...,...
33769,13.0,0.0,99923
33770,917.0,182.0,99925
33771,1445.0,252.0,99926
33772,11.0,0.0,99927


In [11]:
US_locations_df

Unnamed: 0,cities,latitude,longitude,supermarket,distance,zipcode,postcode
0,"Bronx, NY",40.837048,-73.865433,Bravo,138,,10460
1,"Napa, CA",38.297539,-122.286865,Walmart Supercenter,1254,,94558
2,"Victorville, CA",34.536217,-117.292763,Food 4 Less,2734,,92392
3,"San Francisco, CA",37.828724,-122.355537,Island Cove Market,1338,,94130
4,"Monmouth, OR",44.8485,-123.234,Western Oregon University Book Store,519,,97361


In [14]:
# Merge datasets on Zipcodes
supermarkets_density_final = pd.merge(
    census_df,
    US_locations_df,
    how = "left",
    on = ["postcode", "postcode"]
)

In [16]:
supermarkets_density_final

Unnamed: 0,Population,Poverty Count,postcode,cities,latitude,longitude,supermarket,distance,zipcode
0,17126.0,11302.0,00601,,,,,,
1,37895.0,17121.0,00602,,,,,,
2,49136.0,23617.0,00603,,,,,,
3,5751.0,3139.0,00606,,,,,,
4,26153.0,11640.0,00610,,,,,,
...,...,...,...,...,...,...,...,...,...
33769,13.0,0.0,99923,,,,,,
33770,917.0,182.0,99925,,,,,,
33771,1445.0,252.0,99926,,,,,,
33772,11.0,0.0,99927,,,,,,


In [15]:
# Add a Supermarket Count column (Supermarkets/Population)
supermarkets_density_final["Supermarket Count"] = 100 * supermarkets_density_final["supermarkets"].astype(int) / supermarkets_density_final["Population"].astype(int)

# Configure the final DataFrame
supermarkets_density_final = supermarkets_density_final[
    [
        "postcode",
        "population",
        "supermarkets"
    ]
]

# Display DataFrame length and sample data
# print(f"Number of rows in the DataFrame: {len(supermarkets_density_final)}")
supermarkets_density_final.head()

KeyError: 'supermarkets'