In [17]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import gmaps
import os
import time
from citipy import citipy

# Range of latitudes and longitudes
lat_range = (64, 20)
lng_range = (-154, -65)

# Import API key
from api_keys import weather_key

In [18]:
weather_data_df = pd.read_csv("data/tornadoes.csv")
weather_data_df.head()

Unnamed: 0,EVENT_ID,CZ_NAME_STR,BEGIN_LOCATION,BEGIN_DATE,BEGIN_TIME,EVENT_TYPE,MAGNITUDE,TOR_F_SCALE,DEATHS_DIRECT,INJURIES_DIRECT,...,END_RANGE,END_AZIMUTH,END_LOCATION,BEGIN_LAT,BEGIN_LON,END_LAT,END_LON,EVENT_NARRATIVE,EPISODE_NARRATIVE,ABSOLUTE_ROWNUMBER
0,866322,CARTERET CO.,TRUTTNEYS LNDG,11/1/2019,15,Tornado,,EF0,0,0,...,1,NNW,TRUTTNEYS LNDG,34.7394,-77.1094,34.7411,-77.1053,A brief tornado occurred near the intersection...,A strong cold front moved across Eastern North...,1
1,859389,MARTIN CO.,WAVELAND,11/5/2019,1620,Tornado,,EF0,0,0,...,2,ENE,WAVELAND,27.214,-80.1729,27.2123,-80.1766,Martin County Beach Patrol reported a waterspo...,A strong thunderstorm produced a waterspout ju...,2
2,864077,MOBILE CO.,PRICHARD,11/7/2019,1335,Tornado,,EF0,0,0,...,0,SSE,PRICHARD,30.7248,-88.0548,30.726,-88.0486,The storm survey team surveyed the areas aroun...,Thunderstorms moved across the area during the...,3
3,857862,FRANKLIN PAR.,BASKIN,11/26/2019,1803,Tornado,,EF2,1,1,...,4,NE,BASKIN,32.2351,-91.7783,32.3184,-91.7095,This tornado began on Roger Miller Road as it ...,"Warm, moist air was in place across northeast ...",4
4,857944,CLAIBORNE PAR.,MAHAN,11/26/2019,1820,Tornado,,EF1,0,0,...,2,NNE,MAHAN,32.9017,-93.0423,32.9076,-93.0343,An EF-1 tornado with estimated maximum winds a...,A deep southwesterly flow aloft ahead of a lar...,5


In [19]:
gmaps.configure(api_key = weather_key)

In [20]:
tornado_df = weather_data_df.loc[:,["BEGIN_LOCATION","BEGIN_LAT", "BEGIN_DATE", "EVENT_TYPE", "TOR_F_SCALE", "DEATHS_DIRECT", "END_LOCATION"]]
tornado_df

Unnamed: 0,BEGIN_LOCATION,BEGIN_LAT,BEGIN_DATE,EVENT_TYPE,TOR_F_SCALE,DEATHS_DIRECT,END_LOCATION
0,TRUTTNEYS LNDG,34.7394,11/1/2019,Tornado,EF0,0,TRUTTNEYS LNDG
1,WAVELAND,27.2140,11/5/2019,Tornado,EF0,0,WAVELAND
2,PRICHARD,30.7248,11/7/2019,Tornado,EF0,0,PRICHARD
3,BASKIN,32.2351,11/26/2019,Tornado,EF2,1,BASKIN
4,MAHAN,32.9017,11/26/2019,Tornado,EF1,0,MAHAN
...,...,...,...,...,...,...,...
495,LAMONT,32.9896,4/13/2020,Tornado,EF0,0,LAMONT
496,RED OAK,33.6288,4/13/2020,Tornado,EF0,0,RED OAK
497,COLLIER,33.0298,4/13/2020,Tornado,EF3,0,BRIGHTON MILLS
498,ROBERTS XRDS,33.4560,4/13/2020,Tornado,EF0,0,ROBERTS XRDS


In [21]:
tornado_df = tornado_df.rename(columns={"BEGIN_LAT":"Lat"})
tornado_df.head()

Unnamed: 0,BEGIN_LOCATION,Lat,BEGIN_DATE,EVENT_TYPE,TOR_F_SCALE,DEATHS_DIRECT,END_LOCATION
0,TRUTTNEYS LNDG,34.7394,11/1/2019,Tornado,EF0,0,TRUTTNEYS LNDG
1,WAVELAND,27.214,11/5/2019,Tornado,EF0,0,WAVELAND
2,PRICHARD,30.7248,11/7/2019,Tornado,EF0,0,PRICHARD
3,BASKIN,32.2351,11/26/2019,Tornado,EF2,1,BASKIN
4,MAHAN,32.9017,11/26/2019,Tornado,EF1,0,MAHAN


In [36]:
tornado_df = tornado_df.rename(columns={"BEGIN_LOCATION":"City"})
tornado_df.head()

Unnamed: 0,City,Lat,BEGIN_DATE,EVENT_TYPE,TOR_F_SCALE,DEATHS_DIRECT,END_LOCATION
0,TRUTTNEYS LNDG,34.7394,11/1/2019,Tornado,EF0,0,TRUTTNEYS LNDG
1,WAVELAND,27.214,11/5/2019,Tornado,EF0,0,WAVELAND
2,PRICHARD,30.7248,11/7/2019,Tornado,EF0,0,PRICHARD
3,BASKIN,32.2351,11/26/2019,Tornado,EF2,1,BASKIN
4,MAHAN,32.9017,11/26/2019,Tornado,EF1,0,MAHAN


In [22]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

510

In [23]:
url = "http://api.openweathermap.org/data/2.5/weather?units=Imperial&APPID=04c41275fea3e8291170fd88095dde0f"
# List of city data
city_data = []
# Print to logger
print("Beginning Data Retrieval     ")
print("-----------------------------")
# Create counters
record_count = 1
set_count = 1
# Loop through all the cities in our list
for x, city in enumerate(cities):
    # Group cities in sets of 50 for logging purposes
    if (x % 50 == 0 and x >= 50):
        set_count += 1
        record_count = 0
    # Create endpoint URL with each city
    city_url = url + "&q=" + city
    # Log the url, record, and set numbers
    print("Processing Record %s of Set %s | %s" % (record_count, set_count, city))
    # Add 1 to the record count
    record_count += 1
    # Run an API request for each of the cities
    try:
        # Parse the JSON and retrieve data
        city_weather = requests.get(city_url).json()
        # Parse out the max temp, humidity, and cloudiness
        city_lat = city_weather["coord"]["lat"]
        city_lng = city_weather["coord"]["lon"]
        city_max_temp = city_weather["main"]["temp_max"]
        city_humidity = city_weather["main"]["humidity"]
        city_clouds = city_weather["clouds"]["all"]
        city_wind = city_weather["wind"]["speed"]
        city_country = city_weather["sys"]["country"]
        city_date = city_weather["dt"]
        # Append the City information into city_data list
        city_data.append({"City": city, 
                          "Lat": city_lat, 
                          "Lng": city_lng, 
                          "Max Temp": city_max_temp,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country,
                          "Date": city_date})
    # If an error is experienced, skip the city
    except:
        print("City not found. Skipping...")
        pass
# Indicate that Data Loading is complete 
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

Beginning Data Retrieval     
-----------------------------
Processing Record 1 of Set 1 | flin flon
Processing Record 2 of Set 1 | high level
Processing Record 3 of Set 1 | constitucion
Processing Record 4 of Set 1 | attawapiskat
City not found. Skipping...
Processing Record 5 of Set 1 | hilo
Processing Record 6 of Set 1 | sitka
Processing Record 7 of Set 1 | guymon
Processing Record 8 of Set 1 | haines junction
Processing Record 9 of Set 1 | hay river
Processing Record 10 of Set 1 | tullahoma
Processing Record 11 of Set 1 | lampazos de naranjo
Processing Record 12 of Set 1 | prince rupert
Processing Record 13 of Set 1 | galveston
Processing Record 14 of Set 1 | huron
Processing Record 15 of Set 1 | fairbanks
Processing Record 16 of Set 1 | monclova
Processing Record 17 of Set 1 | whitchurch-stouffville
City not found. Skipping...
Processing Record 18 of Set 1 | pitiquito
Processing Record 19 of Set 1 | kodiak
Processing Record 20 of Set 1 | cloquet
Processing Record 21 of Set 1 | ham

Processing Record 43 of Set 4 | gravelbourg
Processing Record 44 of Set 4 | danville
Processing Record 45 of Set 4 | bloomfield
Processing Record 46 of Set 4 | altamont
Processing Record 47 of Set 4 | gillette
Processing Record 48 of Set 4 | rutland
Processing Record 49 of Set 4 | two hills
Processing Record 0 of Set 5 | torrington
Processing Record 1 of Set 5 | statesboro
Processing Record 2 of Set 5 | mazatlan
Processing Record 3 of Set 5 | gimli
Processing Record 4 of Set 5 | puerto penasco
Processing Record 5 of Set 5 | whitehorse
Processing Record 6 of Set 5 | wasilla
Processing Record 7 of Set 5 | humboldt
Processing Record 8 of Set 5 | longlac
City not found. Skipping...
Processing Record 9 of Set 5 | forestville
Processing Record 10 of Set 5 | new ulm
Processing Record 11 of Set 5 | winnemucca
Processing Record 12 of Set 5 | mayo
Processing Record 13 of Set 5 | marsh harbour
Processing Record 14 of Set 5 | prince george
Processing Record 15 of Set 5 | ahome
Processing Record 16

Processing Record 36 of Set 8 | olean
Processing Record 37 of Set 8 | livingston
Processing Record 38 of Set 8 | bristol
Processing Record 39 of Set 8 | beecher
Processing Record 40 of Set 8 | bedford
Processing Record 41 of Set 8 | east wenatchee bench
Processing Record 42 of Set 8 | lumberton
Processing Record 43 of Set 8 | aberdeen
Processing Record 44 of Set 8 | cumberland
Processing Record 45 of Set 8 | hutchinson
Processing Record 46 of Set 8 | wawa
Processing Record 47 of Set 8 | redwater
Processing Record 48 of Set 8 | tlacote el bajo
Processing Record 49 of Set 8 | winfield
Processing Record 0 of Set 9 | ames
Processing Record 1 of Set 9 | rodeo
Processing Record 2 of Set 9 | espanola
Processing Record 3 of Set 9 | grimshaw
Processing Record 4 of Set 9 | brenham
Processing Record 5 of Set 9 | millet
Processing Record 6 of Set 9 | elko
Processing Record 7 of Set 9 | warsaw
Processing Record 8 of Set 9 | san fernando
Processing Record 9 of Set 9 | amos
Processing Record 10 of Se

In [24]:
city_data_pd = pd.DataFrame(city_data)
city_data_pd.count()


City          500
Lat           500
Lng           500
Max Temp      500
Humidity      500
Cloudiness    500
Wind Speed    500
Country       500
Date          500
dtype: int64

In [25]:
city_data_pd.head()

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,flin flon,54.7682,-101.865,30.2,55,40,10.36,CA,1615577231
1,high level,58.5169,-117.136,30.2,51,75,12.66,CA,1615577240
2,constitucion,-35.3333,-72.4167,67.62,57,7,8.72,CL,1615577623
3,hilo,19.7297,-155.09,75.2,69,20,4.61,US,1615576979
4,sitka,57.0531,-135.33,32.0,74,40,6.91,US,1615577230


In [26]:
america_city = city_data_pd[(city_data_pd["Country"]=="US")]
america_city

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
3,hilo,19.7297,-155.0900,75.20,69,20,4.61,US,1615576979
4,sitka,57.0531,-135.3300,32.00,74,40,6.91,US,1615577230
5,guymon,36.6828,-101.4816,42.80,93,90,11.50,US,1615577169
8,tullahoma,35.3620,-86.2094,72.00,60,90,8.05,US,1615577497
11,galveston,29.3669,-94.9669,77.00,73,40,14.97,US,1615577625
...,...,...,...,...,...,...,...,...,...
494,fulton,33.7668,-84.4499,77.00,41,40,6.91,US,1615577150
495,madawaska,47.3561,-68.3288,42.80,36,61,16.11,US,1615577722
497,spencer,43.1414,-95.1444,48.20,49,1,8.05,US,1615577264
498,san benito,36.6166,-121.0844,57.99,87,5,1.99,US,1615577723


In [27]:
america_city.to_csv("scrapefrom.csv", index=False)

In [28]:
america_city.loc[america_city["City"].isin(tornado_df["BEGIN_LOCATION"].apply(str.lower))]

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
13,fairbanks,64.8378,-147.7164,-11.0,76,1,1.54,US,1615577468
29,merrill,45.1805,-89.6835,35.6,27,1,13.8,US,1615577272
79,anchorage,61.2181,-149.9003,10.4,51,1,4.61,US,1615577079
84,fairview,40.8126,-73.999,69.8,17,1,3.44,US,1615577046
106,barstow,34.8986,-117.0228,51.8,43,40,7.78,US,1615577333
218,liberty,31.8335,-81.5004,80.01,38,1,3.44,US,1615577662
296,greenville,35.6127,-77.3663,81.0,36,1,8.05,US,1615577675
357,pineville,31.3224,-92.4343,81.0,59,89,3.0,US,1615577689
376,hopewell,37.3043,-77.2872,69.8,52,90,10.36,US,1615577515
404,jackson,42.4165,-122.8345,51.01,61,1,1.9,US,1615577175


In [29]:
america_city["Lat"].nunique()

294

In [30]:
america_city["Lng"].nunique()

294

In [37]:
merge_df = pd.merge(america_city, tornado_df, on="City")
merge_df

Unnamed: 0,City,Lat_x,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date,Lat_y,BEGIN_DATE,EVENT_TYPE,TOR_F_SCALE,DEATHS_DIRECT,END_LOCATION
