In [1]:
#3 Observations
#1. There is a strong correlation between cities in northern hemisphere and max temperature. Further north results in lower temperatures.
#2. There is a weak correlation between cities in southern hemisphere and max temperature. Further south results in lower temperatures. Meaning warmest temperatures occur closer to equator.
#3. In general there is evidence to support that weather is less extreme (or more comfortable for vacation) in areas closer to the equator (less wind, less clouds, warmer)

In [2]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import scipy.stats as st
from scipy.stats import linregress

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy


# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

In [3]:
## Generate Cities List
print(weather_api_key)

69d2c2ad5f9c2ca3d54cedfc103a4bc6


In [4]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(low=-90.000, high=90.000, size=1500)
lngs = np.random.uniform(low=-180.000, high=180.000, size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

590

In [5]:
# Perform API Calls
# Perform a weather check on each city using a series of successive API calls.
# Include a print log of each city as it'sbeing processed (with the city number and city name).

#need info for: city, Temp, cloudiness, humidity, windspeed, lat, long
# City        -- weather_json['name'] 
# Cloudiness  -- weather_json['clouds']['all']  
# Country     -- weather_json['sys']['country']   
# Date        -- weather_json['dt']
# Humidity    -- weather_json['main']['humidity']
# Lat         -- weather_json['coord']['lat']
# Lng         -- weather_json['coord']['lon']  
# Max Temp    -- weather_json['main']['temp_max']    
# Wind Speed  -- weather_json['wind']['speed']  

In [6]:
#lists to hold weather data
city_name = []
cloudines = []
country =[]
date = []
humidity = []
lat = []
lon = []
max_temp = []
wind_speed = []
not_found = []
loop=1

units='imperial'
base_url= 'http://api.openweathermap.org/data/2.5/weather?'

In [None]:
# Iterate through list of cities previously generated
# Perform a weather check on each city using a series of successive API calls.
# Include a print log of each city as it's being processed (with the city number and city name)
print('Beginning Data Retrieval')
print('-----------------------------')

for x in cities:
#     Query each city within list 
    query_url = f"{base_url}appid={weather_api_key}&units={units}&q="
    weather_response = requests.get(query_url + x)
    weather_json=weather_response.json()
    
#     Use exceptions to account for cities not found in API
    try:
#       Request information from API needed for assignment for each city
        city_name.append(weather_json['name'])
        cloudines.append(weather_json['clouds']['all'])
        country.append(weather_json['sys']['country'])
        date.append(weather_json['dt'])
        humidity.append(weather_json['main']['humidity'])
        lat.append(weather_json['coord']['lat'])
        lon.append(weather_json['coord']['lon'])
        max_temp.append(weather_json['main']['temp_max'])
        wind_speed.append(weather_json['wind']['speed'])
        print(f'Processing Record {loop} | {x}')
#     KeyError occurs when city not found (no reponse to query into)
    except KeyError:
        not_found.append(x)
        print(f'City ({x}) not found. Skipping...')
        loop=loop-1
    loop+=1
    

Beginning Data Retrieval
-----------------------------
Processing Record 1 | luderitz
Processing Record 2 | yellowknife
Processing Record 3 | castro
Processing Record 4 | barrow
Processing Record 5 | zhigansk
Processing Record 6 | saskylakh
Processing Record 7 | ushuaia
City (saleaula) not found. Skipping...
Processing Record 8 | anadyr
Processing Record 9 | poya
Processing Record 10 | mataura
Processing Record 11 | buckeye
Processing Record 12 | kaa-khem
Processing Record 13 | puerto ayora
Processing Record 14 | nipawin
Processing Record 15 | flin flon
Processing Record 16 | buchanan
Processing Record 17 | rikitea
Processing Record 18 | hasaki
Processing Record 19 | albany
Processing Record 20 | jamestown
City (barentsburg) not found. Skipping...
Processing Record 21 | busselton
Processing Record 22 | bulawayo
City (taolanaro) not found. Skipping...
City (karamay) not found. Skipping...
Processing Record 23 | bandarbeyla
Processing Record 24 | pincher creek
Processing Record 25 | tukt

Processing Record 222 | karpogory
Processing Record 223 | los llanos de aridane
Processing Record 224 | nanortalik
Processing Record 225 | karacabey
Processing Record 226 | lavrentiya
Processing Record 227 | le port
City (juifang) not found. Skipping...
Processing Record 228 | haines junction
Processing Record 229 | llangefni
Processing Record 230 | saldanha
City (samalaeulu) not found. Skipping...
City (boatlaname) not found. Skipping...
Processing Record 231 | lleida
Processing Record 232 | strezhevoy
Processing Record 233 | avera
Processing Record 234 | la romana
Processing Record 235 | dunedin
Processing Record 236 | shaowu
Processing Record 237 | puerto el triunfo
Processing Record 238 | ketchikan
Processing Record 239 | rocha
Processing Record 240 | camacha
Processing Record 241 | urumqi
Processing Record 242 | christchurch
Processing Record 243 | vila velha
Processing Record 244 | sorland
Processing Record 245 | saint-pierre
Processing Record 246 | half moon bay
Processing Recor

In [None]:
# Some cities not found
print(f'{len(not_found)}/{len(cities)} cities were not found')

# Create new list excluding the missing information
not_found=set(not_found)
cities=set(cities)
updated_cities = list(cities-not_found)

#should it be run again?

In [None]:
dates=[]
for city in date:
    date_test=city
    date_conversion=time.gmtime(date_test)
    date_format=time.strftime('%m/%d/%Y',date_conversion )
    dates.append(date_format)

In [None]:
# Create data frame with lists for each city
weather_df=pd.DataFrame({"City": city_name,
                        'Cloudiness': cloudines,
                        'Country': country,
                        'Date': date,
                        'Humidity (%)': humidity,
                        'Latitude': lat,
                        'Longitude': lon,
                        'Max Temperature': max_temp,
                        'Wind Speed (MPH)': wind_speed})
weather_df

In [None]:
# Export the city data into a .csv.
weather_df.to_csv('output_data/cities.csv', encoding='utf-8',index=False)

In [None]:
# PLOTTING

In [None]:
# Latitude vs. Temperature Plot
x_latitude = weather_df.loc[:,'Latitude']
y_temp = weather_df.loc[:,'Max Temperature']
plt.scatter(x_latitude,y_temp, edgecolor='black', alpha=.8)
plt.title(f"City Latitude vs. Max Temperature {dates[0]}")
plt.xlabel("Latitude")
plt.ylabel("Max Temp (F)")
plt.grid(alpha=.5)
plt.xlim(min(x_latitude)-6,max(x_latitude)+2)
plt.ylim(min(y_temp)-4,max(y_temp)+4)

plt.savefig("output_data/lat_temp.png")

In [None]:
# The code is comparing the max temp of a city to the latitude the city is found at.
# South of the equator is to the left and north is to the right.
# The further the city is from the equator, the colder it is (equator=0)

In [None]:
# Latitude vs. Humidity Plot
y_humidity = weather_df.loc[:,'Humidity (%)']
plt.scatter(x_latitude,y_humidity, edgecolor='black', alpha=.8)
plt.title(f"City Latitude vs. Humidity {dates[0]}")
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.grid(alpha=.5)
plt.xlim(min(x_latitude)-6,max(x_latitude)+2)
plt.ylim(min(y_humidity)-4,max(y_humidity)+4)

plt.savefig("output_data/lat_humidity.png")

In [None]:
# The code is comparing the % humidity of a city (on a given day) to the latitude the city is found at.
# Values in the top right would represent northern cities with high humidity (ex. Lensk, Russia)
# Values around lat=20 with low humidity may represent countries in north american desert
# This is a snapshot of a single day, as weather patterns change annually it may look completely different (monsoon/dry seasons in different parts of the world)

In [None]:
# Latitude vs. Cloudiness Plot
y_cloudiness = weather_df.loc[:,'Cloudiness']
plt.scatter(x_latitude,y_cloudiness, edgecolor='black', alpha=.8)
plt.title(f"City Latitude vs. Cloudiness {dates[0]}")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
plt.grid(alpha=.5)
plt.xlim(min(x_latitude)-6,max(x_latitude)+2)
plt.ylim(min(y_cloudiness)-4,max(y_cloudiness)+4)

plt.savefig("output_data/lat_cloud.png")

In [None]:
# This is % of cloud coverage compared to the latitude.
# There is no distinct pattern, as each latitude seems to have an even spread of cloud coverage

In [None]:
# Latitude vs. Wind Speed Plot
y_wind = weather_df.loc[:,'Wind Speed (MPH)']
plt.scatter(x_latitude,y_wind, edgecolor='black', alpha=.8)
plt.title(f"City Latitude vs. Wind Speed {dates[0]}")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (MPH)")
plt.grid(alpha=.5)
plt.xlim(min(x_latitude)-2,max(x_latitude)+2)
plt.ylim(min(y_wind)-2,max(y_wind)+2)

plt.savefig("output_data/lat_wind.png")

In [None]:
# Wind Speed Observations
# wind speed (MPH) compared to latitude, shows even spread of speeds for cities north and south of equator. 
# There is a slight qualitative trend of cities further north of equator with higher windspeeds than those south of border (these may be outliers)

In [None]:
# Create Northern and Southern Hemisphere DataFrames 
# Northern: Lat 0 - 90
# Southern: Lat 0 - (-90)
northern_df= weather_df.loc[weather_df['Latitude']>=0]
southern_df= weather_df.loc[weather_df['Latitude']<0]

In [None]:
#Create function to plot each graph with line of best fit:
def plot_graph(x,y):
    plt.scatter(x,y)
    plt.xlim(min(x)-3,max(x)+2)
    plt.ylim(min(y)-4,max(y)+4)
    #Correlation
    _slope, _int, _r, _p, _std_err = st.linregress(x, y)
    best_fit = _slope * x + _int
    plt.plot(x,best_fit,"r-", linewidth=.5)
    line_eq = "y = " + str(round(_slope,2)) + "x + " + str(round(_int,2))
    plt.annotate(line_eq,(min(x)+10,min(y)+20),fontsize=12,color="red")
    #pearon correlation statement
    pearson_r=st.pearsonr(x,y)[0]
    if abs(pearson_r)>0.7:
        print(f'The r value is {round(pearson_r,2)}, there is strong correlation between the city latitude and weather')
    elif abs(pearson_r)>0.5:
        print(f'The r value is {round(pearson_r,2)}, there is a moderate correlation between the city latitude and weather')
    elif abs(pearson_r)>0.3:
        print(f'The r value is {round(pearson_r,2)}, there is a weak correlation between the city latitude and weather')
    else:
        print(f'The r value is {round(pearson_r,2)}, there is no correlation between the city latitude and weather')
        

In [None]:
# NORTHERN Latitude vs. Temperature Plot
#These will be the x,y axis for plots needed
n_latitude = northern_df.loc[:,'Latitude']
n_temp = northern_df.loc[:,'Max Temperature']

plot_graph(n_latitude,n_temp)

#format
plt.title(f"Northern Latitude Cities vs. Max Temperature {dates[0]}")
plt.xlabel("Latitude")
plt.ylabel("Max Temp (F)")
plt.savefig("output_data/north_temp.png")

In [None]:
# SOUTHERN Latitude vs. Temperature Plot
#These will be the x,y axis for plots needed

s_latitude = southern_df.loc[:,'Latitude']
s_temp = southern_df.loc[:,'Max Temperature']

plot_graph(s_latitude,s_temp)

plt.title(f"Southern Latitude Cities vs. Max Temperature {dates[0]}")
plt.xlabel("Latitude")
plt.ylabel("Max Temp (F)")
plt.savefig("output_data/south_temp.png")

In [None]:
# Lat vs Temp Observations
# For all observations, there are more data points for cities located north of the equator (more major cities are north)
# Cities north of the equator have lower max temperatures than southern cities.
# There is strong correlation between the more north a city the lower the max termperature
# There is a weak correlation between the more south a city the low max temperature (which confirms the observation of cities north of the equator having lower max temp)

# Higher max temperatures occure closer to the equator, and decrease the further you are from the equator (lat=0)

In [None]:
# NORTHERN Latitude vs. Humidity Plot
#These will be the x,y axis for plots needed
n_latitude = northern_df.loc[:,'Latitude']
n_humidity = northern_df.loc[:,'Humidity (%)']

plot_graph(n_latitude,n_humidity)

plt.title(f"Northern Latitude Cities vs. Humidity {dates[0]}")
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")

plt.savefig("output_data/north_humidity.png")

In [None]:
# SOUTHERN Latitude vs. Humidity Plot
#These will be the x,y axis for plots needed
s_latitude = southern_df.loc[:,'Latitude']
s_humidity = southern_df.loc[:,'Humidity (%)']

plot_graph(s_latitude,s_humidity)

plt.title(f"Southern Latitude Cities vs. Humidity {dates[0]}")
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")

plt.savefig("output_data/south_humidity.png")

In [None]:
# Humidity observations
# This is only a single day, meaning there may be a dry/wet season that occurs for certain regions depending on the time of year and location (ex. monsoon season, hurricain, dryer summer/wet winter)
# There is slightly more dense high humidity in northern regions (lat=60-80) (observed in original graph)
# which may be a result of late winter/early spring weather patterns occuring there as of march 12, 2020
# The (weak) positive correlation of the more north a city the more humid confirms this qualitative observation

In [None]:
# NORTHERN Latitude vs. Cloudiness Plot
#These will be the x,y axis for plots needed
n_latitude = northern_df.loc[:,'Latitude']
n_cloudiness = northern_df.loc[:,'Cloudiness']

plot_graph(n_latitude,n_cloudiness)

plt.title(f"Northern City Latitude vs. Cloudiness {dates[0]}")
plt.xlabel('Latitude')
plt.ylabel("Cloudiness (%)")

plt.savefig("output_data/north_cloud.png")

In [None]:
# SOUTHERN Latitude vs. Cloudiness Plot
#These will be the x,y axis for plots needed
s_latitude = southern_df.loc[:,'Latitude']
s_cloudiness = southern_df.loc[:,'Cloudiness']

plot_graph(s_latitude,s_cloudiness)

plt.title(f"Southern City Latitude vs. Cloudiness {dates[0]}")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")

plt.savefig("output_data/south_cloud.png")

In [None]:
# There is no correlation between latitude and cloudiness, which was observed in the original graph of both hemispheres.
# once again this may change at different times of year, March 12 2020 is not winter/summer in eigther hemisphere.
# perhaps in july there would be a more distict differentiation between the two hemispheres

In [None]:
# NORTHERN Latitude vs. Wind Speed Plot
#These will be the x,y axis for plots needed
n_latitude = northern_df.loc[:,'Latitude']

n_wind = northern_df.loc[:,'Wind Speed (MPH)']

plot_graph(n_latitude,n_wind)
plt.title(f"Northern City Latitude vs. Wind Speed {dates[0]}")
plt.xlabel('Latitude')
plt.ylabel("Wind Speed (MPH)")

plt.savefig("output_data/north_wind.png")

In [None]:
# SOUTHER Latitude vs. Wind Speed Plot
#These will be the x,y axis for plots needed
s_latitude = southern_df.loc[:,'Latitude']
s_wind = southern_df.loc[:,'Wind Speed (MPH)']

plot_graph(s_latitude,s_wind)

plt.title(f"Southern City Latitude vs. Wind Speed {dates[0]}")
plt.xlabel('Latitude')
plt.ylabel("Wind Speed (MPH)")

plt.savefig("output_data/south_wind.png")

In [None]:
# There is no correlation between wind speed and latitude
# as observed in the original (lat v wind speed) plot, there is a slight positive slope showing more northern cities are windier (as seen in northn plot)
# And more southern cities are windier.
# Although the correlation coefficient is very low, it can be argued that it is less windy closer to equator on march 13, 2020