In [2]:
%matplotlib inline

# Import dependencies
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
from scipy.stats import linregress


import hvplot.pandas
import requests
import json
from pprint import pprint

# Import API keys
from api_key import geoapify_key
from api_key import weather_api_key

# Import dependency and set theme for visualizations
import seaborn as sns
color_theme = sns.set_palette("muted")
sns.set(rc={'axes.facecolor': 'gainsboro', 'figure.facecolor':'whitesmoke'})

# Turn off warning messages
import warnings
warnings.filterwarnings("ignore")

In [3]:
# Store file path
csv_path="Output/Border_Crossing_Final_Data.csv"

# Read the csv file
temperature_df = pd.read_csv(csv_path)

# Display the dataset
temperature_df.head()

Unnamed: 0,Index,Year,Month,Timestamp,Border,Port Name,State,Measure,Value,Longitude,Latitude
0,0,2018,12,1543622400,US-Canada Border,Van Buren,Maine,Trucks,1204,-67.94271,47.16207
1,1,2018,12,1543622400,US-Canada Border,Maida,North Dakota,Trucks,170,-98.36953,48.98568
2,2,2018,12,1543622400,US-Mexico Border,Douglas,Arizona,Buses,210,-109.54472,31.34444
3,3,2018,12,1543622400,US-Mexico Border,Presidio,Texas,Bus Passengers,238,-104.37167,29.56056
4,4,2018,12,1543622400,US-Canada Border,Anacortes,Washington,Personal Vehicle Passengers,5350,-122.61739,48.49988


## Question 6:  How does the border crossing volume correlate between Mexico and Canada over the year?

In [4]:
# Add an empty column, "Nearest City" to the DataFrame to store the nearest city found using the Geoapify API
temperature_df["Nearest City"] = ""

# Display the sample dataset:
temperature_df.head()

Unnamed: 0,Index,Year,Month,Timestamp,Border,Port Name,State,Measure,Value,Longitude,Latitude,Nearest City
0,0,2018,12,1543622400,US-Canada Border,Van Buren,Maine,Trucks,1204,-67.94271,47.16207,
1,1,2018,12,1543622400,US-Canada Border,Maida,North Dakota,Trucks,170,-98.36953,48.98568,
2,2,2018,12,1543622400,US-Mexico Border,Douglas,Arizona,Buses,210,-109.54472,31.34444,
3,3,2018,12,1543622400,US-Mexico Border,Presidio,Texas,Bus Passengers,238,-104.37167,29.56056,
4,4,2018,12,1543622400,US-Canada Border,Anacortes,Washington,Personal Vehicle Passengers,5350,-122.61739,48.49988,


In [5]:
# Groupby 'Port Name' to create a new dataframe to record the Nearest City for each Port
port_data = temperature_df.groupby(["Port Name", "Latitude", "Longitude","Nearest City"])["Value"].sum().reset_index()

# Remove the duplicate Port Name entries
port_data = port_data.drop_duplicates(subset='Port Name')

# Display the data
port_data.head()

Unnamed: 0,Port Name,Latitude,Longitude,Nearest City,Value
0,Alcan,62.61,-141.0,,1217588
2,Alexandria Bay,44.33583,-75.91806,,4218355
4,Algonac,42.33222,-83.04676,,59364
5,Ambrose,48.95389,-103.48222,,5691
7,Anacortes,48.49988,-122.61739,,185358


In [6]:
# Get the data for Nearest City for each Port from Geoapify

# Set parameters to search for nearest city
radius = 50000
categories = "populated_place.city"
params = {
    "categories":categories,
    "apiKey":geoapify_key,
}

# Print a message to follow up the Nearest City search
print("Starting city search")

# Iterate through the port_data DataFrame
for index, row in port_data.iterrows():
    # Get latitude, longitude from the DataFrame
    latitude = port_data.loc[index, "Latitude"]
    longitude = port_data.loc[index, "Longitude"]
    
    # Add filter and bias parameters with the current city's latitude and longitude to the params dictionary
    params["filter"] = f"circle:{longitude},{latitude},{radius}"
    params["bias"] = f"proximity:{longitude},{latitude}"
    
    # Set base URL
    base_url = "https://api.geoapify.com/v2/places"

    # Make and API request using the params dictionaty
    name_address = requests.get(base_url, params=params)
    
    # Convert the API response to JSON format
    name_address_json = name_address.json()

        # Note: I had used pprint to display the response to confirm details
        # pprint(name_address_json)

    # Grab the first City from the results and store the name in the port_data DataFrame
    try:
        port_data.loc[index, "Nearest City"] = name_address_json["features"][0]["properties"]["city"]
    except (KeyError, IndexError):
        # If no city is found, set the name as "No city found".
        port_data.loc[index, "Nearest City"] = "No city found"
        
    # Log the search results
    print(f"{port_data.loc[index, 'Port Name']} - nearest city: {port_data.loc[index, 'Nearest City']}")

# Display sample data
port_data.head()

Starting city search
Alcan - nearest city: No city found
Alexandria Bay - nearest city: Kingston
Algonac - nearest city: Detroit
Ambrose - nearest city: Ambrose
Anacortes - nearest city: Anacortes
Andrade - nearest city: Yuma
Antler - nearest city: Antler
Bar Harbor - nearest city: Ellsworth
Baudette - nearest city: Baudette
Beecher Falls - nearest city: No city found
Blaine - nearest city: White Rock
Boquillas - nearest city: No city found
Boundary - nearest city: Colville
Bridgewater - nearest city: Presque Isle
Brownsville - nearest city: Brownsville
Buffalo-Niagara Falls - nearest city: Buffalo
Calais - nearest city: Calais
Calexico - nearest city: Calexico
Calexico East - nearest city: Calexico
Cape Vincent - nearest city: Kingston
Carbury - nearest city: Souris
Champlain-Rouses Point - nearest city: City of Plattsburgh
Columbus - nearest city: No city found
Cross Border Xpress - nearest city: San Diego
Dalton Cache - nearest city: No city found
Danville - nearest city: Republic
D

Unnamed: 0,Port Name,Latitude,Longitude,Nearest City,Value
0,Alcan,62.61,-141.0,No city found,1217588
2,Alexandria Bay,44.33583,-75.91806,Kingston,4218355
4,Algonac,42.33222,-83.04676,Detroit,59364
5,Ambrose,48.95389,-103.48222,Ambrose,5691
7,Anacortes,48.49988,-122.61739,Anacortes,185358


In [7]:
# Create a new dataset to record data for Nearest City
temperature_data = temperature_df.groupby(["Year", "Month", "Port Name", "Timestamp", "Latitude", "Longitude","Nearest City"])["Value"].sum().reset_index()

# Display the dataset
temperature_data.head()

Unnamed: 0,Year,Month,Port Name,Timestamp,Latitude,Longitude,Nearest City,Value
0,2009,1,Alcan,1230768000,62.61,-141.0,,3262
1,2009,1,Alexandria Bay,1230768000,44.35,-75.98,,132905
2,2009,1,Ambrose,1230768000,49.0,-103.49,,271
3,2009,1,Anacortes,1230768000,48.51,-122.61,,850
4,2009,1,Andrade,1230768000,32.72,-114.73,,316005


In [8]:
# Import data for Nearest City from port_data to temperature_data

# Loop through temperature_data
for index, row in temperature_data.iterrows():

    # Loop through port_data
    for index1, row1 in port_data.iterrows():

        # Iterate Nearest City data based on Port Name
        if row["Port Name"] == row1["Port Name"]:
            temperature_data.loc[index, "Nearest City"] = port_data.loc[index1, "Nearest City"]

# Display the data
temperature_data.head()

Unnamed: 0,Year,Month,Port Name,Timestamp,Latitude,Longitude,Nearest City,Value
0,2009,1,Alcan,1230768000,62.61,-141.0,No city found,3262
1,2009,1,Alexandria Bay,1230768000,44.35,-75.98,Kingston,132905
2,2009,1,Ambrose,1230768000,49.0,-103.49,Ambrose,271
3,2009,1,Anacortes,1230768000,48.51,-122.61,Anacortes,850
4,2009,1,Andrade,1230768000,32.72,-114.73,Yuma,316005


In [9]:
temperature_data

Unnamed: 0,Year,Month,Port Name,Timestamp,Latitude,Longitude,Nearest City,Value
0,2009,1,Alcan,1230768000,62.61000,-141.00000,No city found,3262
1,2009,1,Alexandria Bay,1230768000,44.35000,-75.98000,Kingston,132905
2,2009,1,Ambrose,1230768000,49.00000,-103.49000,Ambrose,271
3,2009,1,Anacortes,1230768000,48.51000,-122.61000,Anacortes,850
4,2009,1,Andrade,1230768000,32.72000,-114.73000,Yuma,316005
...,...,...,...,...,...,...,...,...
13298,2018,12,Warroad,1543622400,48.89625,-95.32610,Warroad,20121
13299,2018,12,Westhope,1543622400,48.90940,-101.02001,Westhope,3087
13300,2018,12,Whitlash,1543622400,48.90372,-111.24900,No city found,163
13301,2018,12,Wildhorse,1543622400,48.54863,-109.67761,Havre,2411


In [None]:
print(weather_api_key)

In [19]:
city="Ambrose"
# https://api.openweathermap.org/data/3.0/onecall/timemachine?lat={lat}&lon={lon}&dt={time}&appid={API key}
url = "https://history.openweathermap.org/data/2.5/history/city?"
units = "metric"
lat=49.00000
lon=-103.49000
time=1230768000
end=1230768000
# city_url = f"{url}lat={lat}&lon={lon}&dt={time}&appid={weather_api_key}&units={units}"
city_url = f"{url}appid={weather_api_key}&q={city}&units={units}&dt={time}"

city_weather = requests.get(city_url).json()

# Note: I used pprint to review the response
pprint(city_weather)

{'cod': 401,
 'message': 'Invalid API key. Please see '
            'http://openweathermap.org/faq#error401 for more info.'}


In [18]:
city="Ambrose"
# https://api.openweathermap.org/data/3.0/onecall/timemachine?lat={lat}&lon={lon}&dt={time}&appid={API key}
url = "https://api.openweathermap.org/data/3.0/onecall/timemachine?"
units = "metric"
lat=49.00000
lon=-103.49000
time=1230768000
end=1230768000
# city_url = f"{url}lat={lat}&lon={lon}&dt={time}&appid={weather_api_key}&units={units}"
city_url = f"{url}appid={weather_api_key}&lat={lat}&lon={lon}&units={units}&dt={time}"

city_weather = requests.get(city_url).json()

# Note: I used pprint to review the response
pprint(city_weather)

{'data': [{'clouds': 100,
           'dew_point': -13.5,
           'dt': 1230768000,
           'feels_like': -18.44,
           'humidity': 83,
           'pressure': 1006,
           'snow': {'1h': 0.34},
           'sunrise': 1230734881,
           'sunset': 1230764775,
           'temp': -11.44,
           'weather': [{'description': 'light snow',
                        'icon': '13n',
                        'id': 600,
                        'main': 'Snow'}],
           'wind_deg': 159,
           'wind_speed': 6.72}],
 'lat': 49,
 'lon': -103.49,
 'timezone': 'America/Regina',
 'timezone_offset': -21600}
