In [2]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

In [3]:
# Test openweatherAPI
city = 'san diego'
target_url = ('http://api.openweathermap.org/data/2.5/weather?q={0}&appid={1}&units=imperial').format(city, weather_api_key)
response = requests.get(target_url).json()
print(response)

{'coord': {'lon': -117.16, 'lat': 32.72}, 'weather': [{'id': 804, 'main': 'Clouds', 'description': 'overcast clouds', 'icon': '04n'}], 'base': 'stations', 'main': {'temp': 55.38, 'feels_like': 51.48, 'temp_min': 52, 'temp_max': 60.8, 'pressure': 1020, 'humidity': 67}, 'visibility': 10000, 'wind': {'speed': 4.7, 'deg': 160}, 'clouds': {'all': 90}, 'dt': 1605357593, 'sys': {'type': 1, 'id': 5771, 'country': 'US', 'sunrise': 1605363509, 'sunset': 1605401286}, 'timezone': -28800, 'id': 5391811, 'name': 'San Diego', 'cod': 200}


In [4]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

584

In [5]:
#API calls

counter = 0
city_number = 1

cities_pd = pd.DataFrame()
cities_pd['City'] = ''
for city in cities:
    target_url = ('http://api.openweathermap.org/data/2.5/weather?q={0}&appid={1}&units=imperial').format(city, weather_api_key)
    response = requests.get(target_url).json()
    time.sleep(1)
    try:
        cities_pd.loc[counter, 'City'] = response['name']
        print(f'Processing Record {city_number} of Set 1 | {city}')
        city_number = city_number + 1
        counter = counter + 1
    except:
        print(f'No data for {city} found!')
        city_number = city_number + 1

Processing Record 1 of Set 1 | okmulgee
Processing Record 2 of Set 1 | punta arenas
Processing Record 3 of Set 1 | sault sainte marie
Processing Record 4 of Set 1 | puerto ayora
Processing Record 5 of Set 1 | rovaniemi
Processing Record 6 of Set 1 | margate
Processing Record 7 of Set 1 | bluff
No data for illoqqortoormiut found!
Processing Record 9 of Set 1 | hermanus
Processing Record 10 of Set 1 | yellowknife
Processing Record 11 of Set 1 | port alfred
Processing Record 12 of Set 1 | ushuaia
Processing Record 13 of Set 1 | hobyo
Processing Record 14 of Set 1 | lanzhou
Processing Record 15 of Set 1 | pasni
Processing Record 16 of Set 1 | kapaa
Processing Record 17 of Set 1 | upernavik
Processing Record 18 of Set 1 | hobart
Processing Record 19 of Set 1 | salamiyah
Processing Record 20 of Set 1 | geraldton
Processing Record 21 of Set 1 | mazagao
Processing Record 22 of Set 1 | medea
Processing Record 23 of Set 1 | tiksi
Processing Record 24 of Set 1 | busselton
No data for amderma foun

Processing Record 202 of Set 1 | elmira
Processing Record 203 of Set 1 | norman wells
Processing Record 204 of Set 1 | aktau
Processing Record 205 of Set 1 | thinadhoo
Processing Record 206 of Set 1 | belmonte
Processing Record 207 of Set 1 | thayetmyo
Processing Record 208 of Set 1 | kavieng
Processing Record 209 of Set 1 | deputatskiy
Processing Record 210 of Set 1 | port blair
No data for sindand found!
Processing Record 212 of Set 1 | jalu
No data for barentsburg found!
Processing Record 214 of Set 1 | leningradskiy
Processing Record 215 of Set 1 | qasigiannguit
Processing Record 216 of Set 1 | saint-augustin
Processing Record 217 of Set 1 | esperance
Processing Record 218 of Set 1 | athabasca
Processing Record 219 of Set 1 | palmer
Processing Record 220 of Set 1 | shimoda
Processing Record 221 of Set 1 | severo-kurilsk
Processing Record 222 of Set 1 | nanakuli
Processing Record 223 of Set 1 | berestechko
Processing Record 224 of Set 1 | yulara
Processing Record 225 of Set 1 | anad

Processing Record 400 of Set 1 | talcahuano
Processing Record 401 of Set 1 | adrar
Processing Record 402 of Set 1 | rawlins
Processing Record 403 of Set 1 | ulladulla
Processing Record 404 of Set 1 | nizwa
Processing Record 405 of Set 1 | jacqueville
Processing Record 406 of Set 1 | tautira
Processing Record 407 of Set 1 | ouadda
Processing Record 408 of Set 1 | zabol
Processing Record 409 of Set 1 | kristinehamn
Processing Record 410 of Set 1 | bemidji
Processing Record 411 of Set 1 | salalah
Processing Record 412 of Set 1 | aswan
Processing Record 413 of Set 1 | portland
No data for podbelsk found!
Processing Record 415 of Set 1 | acandi
Processing Record 416 of Set 1 | uporovo
Processing Record 417 of Set 1 | toora-khem
Processing Record 418 of Set 1 | taree
Processing Record 419 of Set 1 | kothapet
Processing Record 420 of Set 1 | copperas cove
Processing Record 421 of Set 1 | kaeo
Processing Record 422 of Set 1 | zhigansk
Processing Record 423 of Set 1 | sept-iles
Processing Recor

In [6]:
cities_pd.head()


Unnamed: 0,City
0,Okmulgee
1,Punta Arenas
2,Sault Ste. Marie
3,Puerto Ayora
4,Rovaniemi


In [7]:
#adding columns to df
cities_pd['Lat'] = ''
cities_pd['Lng'] = ''
cities_pd['Max Temp'] = ''
cities_pd['Humidity'] = ''
cities_pd['Cloudiness'] = ''
cities_pd['Wind Speed'] = ''
cities_pd['Country'] = ''
cities_pd['Date'] = ''
cities_pd.head()


Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,Okmulgee,,,,,,,,
1,Punta Arenas,,,,,,,,
2,Sault Ste. Marie,,,,,,,,
3,Puerto Ayora,,,,,,,,
4,Rovaniemi,,,,,,,,


In [8]:
# populate the added columns
for index, row in cities_pd.iterrows():
    city = row['City']
    target_url = ('http://api.openweathermap.org/data/2.5/weather?q={0}&appid={1}&units=imperial').format(city, weather_api_key)
    response = requests.get(target_url).json()
    time.sleep(1)
    try:
        cities_pd.loc[index, 'Lat'] = response['coord']['lat']
        cities_pd.loc[index, 'Lng'] = response['coord']['lon']
        cities_pd.loc[index, 'Max Temp'] = response['main']['temp_max']
        cities_pd.loc[index, 'Humidity'] = response['main']['humidity']
        cities_pd.loc[index, 'Cloudiness'] = response['clouds']['all']
        cities_pd.loc[index, 'Wind Speed'] = response['wind']['speed']
        cities_pd.loc[index, 'Country'] = response['sys']['country']
        cities_pd.loc[index, 'Date'] = response['dt']
    except(KeyError):
        print('shucks... no luck')

In [10]:
# Convert data in columns to floats
cities_pd.dtypes
cities_pd['Lat'] = cities_pd['Lat'].astype(float)
cities_pd['Lng'] = cities_pd['Lng'].astype(float)
cities_pd['Max Temp'] = cities_pd['Max Temp'].astype(float)
cities_pd['Humidity'] = cities_pd['Humidity'].astype(float)
cities_pd['Cloudiness'] = cities_pd['Cloudiness'].astype(float)
cities_pd['Wind Speed'] = cities_pd['Wind Speed'].astype(float)
cities_pd['Date'] = cities_pd['Date'].astype(float)


cities_pd.head()

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,Okmulgee,35.62,-95.96,57.2,100.0,90.0,8.05,US,1605358000.0
1,Punta Arenas,-53.15,-70.92,41.0,63.0,75.0,20.8,CL,1605359000.0
2,Sault Ste. Marie,46.52,-84.33,30.2,74.0,90.0,4.7,CA,1605359000.0
3,Puerto Ayora,-0.74,-90.35,70.0,85.0,98.0,5.01,EC,1605359000.0
4,Rovaniemi,66.5,25.72,33.8,100.0,90.0,9.17,FI,1605359000.0


In [11]:
# Use describe to check for irregularities (such as humidity exceeding 100%)
cities_pd.describe()

Unnamed: 0,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Date
count,543.0,543.0,543.0,543.0,543.0,543.0,543.0
mean,19.595543,19.344991,58.59639,72.127072,51.016575,8.082762,1605359000.0
std,32.736671,90.987349,25.692047,20.861087,41.283621,5.186658,193.707
min,-54.8,-175.2,-22.0,9.0,0.0,0.51,1605358000.0
25%,-6.27,-62.075,41.0,62.0,1.0,4.195,1605359000.0
50%,22.93,22.4,66.2,77.0,59.0,6.93,1605359000.0
75%,45.57,100.66,79.215,88.0,90.0,11.065,1605359000.0
max,78.22,179.32,97.36,100.0,100.0,28.86,1605359000.0


In [13]:

# Save dataframe to csv
cities_pd.to_csv('../city_weather_data.csv', index=False, mode='w')

In [14]:
#Inspect the data and remove the cities where the humidity > 100%.
#Skipping this step as there are not cities with humidity >100%



In [16]:
# Load data from csv (no need to re-run API calls)
df = pd.read_csv('../city_weather_data.csv')
df.head()

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,Okmulgee,35.62,-95.96,57.2,100.0,90.0,8.05,US,1605358000.0
1,Punta Arenas,-53.15,-70.92,41.0,63.0,75.0,20.8,CL,1605359000.0
2,Sault Ste. Marie,46.52,-84.33,30.2,74.0,90.0,4.7,CA,1605359000.0
3,Puerto Ayora,-0.74,-90.35,70.0,85.0,98.0,5.01,EC,1605359000.0
4,Rovaniemi,66.5,25.72,33.8,100.0,90.0,9.17,FI,1605359000.0
