In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from datetime import datetime
from scipy.stats import linregress
import scipy.stats as st

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

In [2]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

602

In [None]:
# URL for GET requests to retrieve city data
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "imperial"

# Build partial query URL
query_url = f"{url}appid={weather_api_key}&units={units}&q="

# List for holding reponse information
lon = []
temp = []
temp_max = []
humidity = []
wind_speed = []
lat = []
date = []
country = []
cloudiness = []

# Loop through the list of cities and request for data on each
print("Beginning Data Retrieval")
print("-------------------------------------")
count = 0
set = 1
for index, city in enumerate(cities):
    count = count + 1
    # To avoid api call rate limits, get city weather data in sets of 50 cities,
    # with 5 seconds sleep time, and then continue
    if count == 51:
        count = 1
        set = set + 1
        time.sleep(5)
    print(f"Processing Record {count} of Set {set} | {city}")
    try:
        response = requests.get(query_url + city).json()
        lat.append(response['coord']['lat'])
        lon.append(response['coord']['lon'])
        temp.append(response['main']['temp'])
        temp_max.append(response['main']['temp_max'])
        humidity.append(response['main']['humidity'])
        wind_speed.append(response['wind']['speed'])
        date.append(response['dt'])
        country.append(response['sys']['country'])
        cloudiness.append(response['clouds']['all'])
    except KeyError:
        print("City not found. Skipping...")
        lat.append(np.nan)
        lon.append(np.nan)
        temp.append(np.nan)
        temp_max.append(np.nan)
        humidity.append(np.nan)
        wind_speed.append(np.nan)
        date.append(np.nan)
        country.append(np.nan)
        cloudiness.append(np.nan)
print("-------------------------------------")
print("Data Retrieval Complete")
print("-------------------------------------")


Beginning Data Retrieval
-------------------------------------
Processing Record 1 of Set 1 | comodoro rivadavia
City not found. Skipping...
Processing Record 2 of Set 1 | qaqortoq
City not found. Skipping...
Processing Record 3 of Set 1 | ushuaia
City not found. Skipping...
Processing Record 4 of Set 1 | kapa'a
City not found. Skipping...
Processing Record 5 of Set 1 | yagry
City not found. Skipping...
Processing Record 6 of Set 1 | mbeni
City not found. Skipping...
Processing Record 7 of Set 1 | utrik
City not found. Skipping...
Processing Record 8 of Set 1 | ciudad acuna
City not found. Skipping...
Processing Record 9 of Set 1 | kavaratti
City not found. Skipping...
Processing Record 10 of Set 1 | troitsko-pechorsk
City not found. Skipping...
Processing Record 11 of Set 1 | puerto ayora
City not found. Skipping...
Processing Record 12 of Set 1 | ola
City not found. Skipping...
Processing Record 13 of Set 1 | hamilton
City not found. Skipping...
Processing Record 14 of Set 1 | igarka

City not found. Skipping...
Processing Record 19 of Set 3 | jayapura
City not found. Skipping...
Processing Record 20 of Set 3 | faya-largeau
City not found. Skipping...
Processing Record 21 of Set 3 | celestun
City not found. Skipping...
Processing Record 22 of Set 3 | pingliang
City not found. Skipping...
Processing Record 23 of Set 3 | gumdag
City not found. Skipping...
Processing Record 24 of Set 3 | labuan bajo
City not found. Skipping...
Processing Record 25 of Set 3 | vila franca do campo
City not found. Skipping...
Processing Record 26 of Set 3 | lupane
City not found. Skipping...
Processing Record 27 of Set 3 | wailua homesteads
City not found. Skipping...
Processing Record 28 of Set 3 | sultanah
City not found. Skipping...
Processing Record 29 of Set 3 | suva
City not found. Skipping...
Processing Record 30 of Set 3 | mao
City not found. Skipping...
Processing Record 31 of Set 3 | jamestown
City not found. Skipping...
Processing Record 32 of Set 3 | brookings
City not found. 

City not found. Skipping...
Processing Record 38 of Set 5 | kodiak
City not found. Skipping...
Processing Record 39 of Set 5 | kalawit
City not found. Skipping...
Processing Record 40 of Set 5 | trapani
City not found. Skipping...
Processing Record 41 of Set 5 | namtsy
City not found. Skipping...
Processing Record 42 of Set 5 | zeya
City not found. Skipping...
Processing Record 43 of Set 5 | sao joao da barra
City not found. Skipping...
Processing Record 44 of Set 5 | sheopur
City not found. Skipping...
Processing Record 45 of Set 5 | xinyuan
City not found. Skipping...
Processing Record 46 of Set 5 | merizo village
City not found. Skipping...
Processing Record 47 of Set 5 | nanakuli
City not found. Skipping...
Processing Record 48 of Set 5 | ewo
City not found. Skipping...
Processing Record 49 of Set 5 | st. john's
City not found. Skipping...
Processing Record 50 of Set 5 | cueibet
City not found. Skipping...
Processing Record 1 of Set 6 | chonchi
City not found. Skipping...
Processin

City not found. Skipping...
Processing Record 7 of Set 8 | kermit
City not found. Skipping...
Processing Record 8 of Set 8 | wotje
City not found. Skipping...
Processing Record 9 of Set 8 | moanda
City not found. Skipping...
Processing Record 10 of Set 8 | labytnangi
City not found. Skipping...
Processing Record 11 of Set 8 | shibirghan
City not found. Skipping...
Processing Record 12 of Set 8 | shalqar
City not found. Skipping...
Processing Record 13 of Set 8 | slawharad
City not found. Skipping...
Processing Record 14 of Set 8 | pala
City not found. Skipping...
Processing Record 15 of Set 8 | sidqabad
City not found. Skipping...
Processing Record 16 of Set 8 | aioun
City not found. Skipping...
Processing Record 17 of Set 8 | tevriz
City not found. Skipping...
Processing Record 18 of Set 8 | shenkursk
City not found. Skipping...
Processing Record 19 of Set 8 | abhayapuri
City not found. Skipping...
Processing Record 20 of Set 8 | saint-joseph
City not found. Skipping...
Processing Rec

# Convert raw data to dataframe

In [None]:
cities_df = pd.DataFrame({
    "City": cities,
    "Lat": lat,
    "Lng": lon,
    "Max Temp": temp_max,
    "Humidity": humidity,
    "Cloudiness": cloudiness,
    "Wind Speed": wind_speed,
    "Country": country,
    "Date": date,
          })

# Drop any cities that were skipped because they could not return any response from OpenWeatherMap API.
cities_df = cities_df.dropna(how="any")

# Export the city data into a .csv file.
cities_df.to_csv("./output_data/cities.csv", index=False)

# Display the DataFrame
cities_df

In [None]:
cities_df["Humidity"].describe()

In [None]:
humidity_101 = cities_df[(cities_df["Humidity"] > 100)].index
humidity_101

In [None]:
clean_city_data = cities_df.drop(humidity_101, inplace=False)
clean_city_data

# Create the Scatter Plots Requested
Latitude Vs. Temperature

In [None]:
date_now = datetime.date(datetime.now())

# Create a scatter plot for latitude vs max temperature.
x_values = clean_city_data["Lat"]
y_values = clean_city_data["Max Temp"]

fig1, ax1 = plt.subplots(figsize=(11,8))
plt.scatter(x_values, y_values, edgecolor="black", linewidth=1, marker="o", alpha=0.8)
plt.title(f"City Latitude vs Max Temperature {date_now}")
plt.xlabel("Latitude")
plt.ylabel("Max Temperature (F)")
plt.grid()

# Save the figure
plt.savefig("./output_data/latitude_vs_max_temp.png", bbox_inches="tight")
plt.show()

# Latitude vs Humidity

In [None]:
x_values = clean_city_data["Lat"]
y_values = clean_city_data["Humidity"]

fig1, ax1 = plt.subplots(figsize=(11, 8))
plt.scatter(x_values, y_values, edgecolor="black", linewidth=1, marker="o", alpha=0.8)
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.title(f"City Latitude vs Humidity {date_now}")
plt.grid()

# Save the figure
plt.savefig("./output_data/latitude_vs_humidity.png", bbox_inches="tight")
plt.show()

# Latitude vs Cloudiness

In [None]:
# Create a scatter plot for latitude vs cloudiness.
x_values = clean_city_data["Lat"]
y_values = clean_city_data["Cloudiness"]

fig1, ax1 = plt.subplots(figsize=(10,8))
markersize=12
plt.scatter(x_values, y_values, edgecolor="black", linewidth=1, marker="o", alpha=0.8)
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
plt.title(f"City Latitude vs Cloudiness {date_now}")
plt.grid()

# Save the figure
plt.savefig("./output_data/latitude_vs_cloudiness.png", bbox_inches="tight")
plt.show()

# Latitude vs. Wind Speed Plot

In [None]:
# Create a scatter plot for latitude vs wind speed.
x_values = clean_city_data["Lat"]
y_values = clean_city_data["Wind Speed"]

fig1, ax1 = plt.subplots(figsize=(10,8))
markersize=12
plt.scatter(x_values, y_values, edgecolor="black", linewidth=1, marker="o", alpha=0.8)

plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")
plt.title(f"City Latitude vs Wind Speed {date_now}")
plt.grid()

# Save the figure
plt.savefig("./output_data/latitude_vs_wind_speed.png", bbox_inches="tight")
plt.show()

# Linear Regression

In [None]:
# Create a function to create Linear Regression plots for remaining activities
def plot_linear_regression(x_values, y_values, x_label, y_label, hemisphere, text_coordinates, ylim=None):
    (slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)

    # Get regression values
    regress_values = x_values * slope + intercept
    
    # Create line equation string
    line_eq = "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))
    
    # Generate plots     
    fig1, ax1 = plt.subplots(figsize=(12,8))
    plt.scatter(x_values, y_values, edgecolor="black", linewidth=1, marker="o", alpha=0.8)
    plt.plot(x_values,regress_values,"r-")
    date_now = datetime.date(datetime.now())
    plt.title(f"{hemisphere} Hemisphere - {x_label} vs {y_label} {date_now}",fontsize = 15)
    plt.xlabel(x_label,fontsize=14)
    plt.ylabel(y_label,fontsize=14)
    if ylim is not None:
        plt.ylim(0, ylim)
    plt.annotate(line_eq, text_coordinates, fontsize=20, color="red")
    
    # Print r square value
    print(f"The r-squared is: {rvalue**2}")
    correlation = st.pearsonr(x_values,y_values)
    print(f"The correlation between both factors is {round(correlation[0],2)}")

In [None]:
# Create Northern and Southern Hemisphere DataFrames
northern_hemi_weather_df = clean_city_data.loc[clean_city_data["Lat"] >= 0]

northern_hemi_weather_df.head()
#southern_hemi_weather_df = clean_city_data.loc[clean_city_data["Lat"] < 0]

In [None]:
southern_hemi_weather_df = clean_city_data.loc[clean_city_data["Lat"] < 0]
southern_hemi_weather_df.head()

In [None]:
# Create a scatter plot for latitude vs max temp (northern hemisphere)
x_values = northern_hemi_weather_df["Lat"]
y_values = northern_hemi_weather_df["Max Temp"]
plot_linear_regression(x_values, y_values, "Latitude", "Max Temp (F)", "Northern", (10, 10))

# Save the figure
plt.savefig("./output_data/northern_hem_linear_lat_vs_max_temp.png", bbox_inches="tight")
plt.show()

# Southerm hemisphere - Lat vs Max Temp

In [None]:
# Create a scatter plot for latitude vs cloudiness (southern hemisphere)
x_values = southern_hemi_weather_df["Lat"]
y_values = southern_hemi_weather_df["Max Temp"]
plot_linear_regression(x_values, y_values, "Latitude", "Max Temp (F)", "Southern", (-52, 75))

# Save the figure
plt.savefig("./output_data/southern_hem_linear_lat_vs_max_temp.png", bbox_inches="tight")
plt.show()

*Discussion about the linear relationship: The correlation between latitude and maximum temperature is strong for both the hemispheres. It is higher for southern hemisphere (0.82), indicating that as we move away from the equator, the maximum temperature keeps dropping in a more linear manner.

# Northern hemisphere - Lat vs Humidity

In [None]:
# Create a scatter plot for latitude vs humditiy (northern hemisphere)
x_values = northern_hemi_weather_df['Lat']
y_values = northern_hemi_weather_df['Humidity']
plot_linear_regression(x_values, y_values, "Latitude", "Humidity (%)", "Northern",(50,50))
plt.savefig("./output_data/northern_hem_linear_lat_vs_humidity.png", bbox_inches="tight")
plt.show()

# Southern hemisphere - Lat vs Humidity


In [None]:
# Create a scatter plot for latitude vs humditiy (southern hemisphere)
x_values = southern_hemi_weather_df['Lat']
y_values = southern_hemi_weather_df['Humidity']
plot_linear_regression(x_values, y_values, "Latitude", "Humidity (%)", "Southern",(50, 50), 100)
plt.savefig("./output_data/southern_hem_linear_lat_vs_humudity.png", bbox_inches="tight")
plt.show()

There is no correlation between latitude and humidity for southern hemisphere (0.04). For northern hemisphere, it is the same case, expect for the higher latitudes.

# Northern hemisphere - Lat vs Cloudiness


In [None]:
# Create a scatter plot for latitude vs cloudiness (northern hemisphere)
x_values = northern_hemi_weather_df['Lat']
y_values = northern_hemi_weather_df['Cloudiness']
plot_linear_regression(x_values, y_values, "Latitude", "Cloudiness (%)", "Northern", (20, 60))

plt.savefig("./output_data/northern_hem_linear_lat_vs_cloudiness.png", bbox_inches="tight")
plt.show()

# Southern hemisphere - Lat vs Cloudiness

In [None]:
# Create a scatter plot for latitude vs cloudiness (southern hemisphere)
x_values = southern_hemi_weather_df['Lat']
y_values = southern_hemi_weather_df['Cloudiness']
plot_linear_regression(x_values, y_values, "Latitude", "Cloudiness(%)", "Southern",(-45, 60))
plt.savefig("./output_data/southern_hem_linear_lat_vs_cloudiness.png", bbox_inches="tight")
plt.show()

There is no correlation between latitude and cloudiness for both, southern and northern hemispheres. Both show scattered values all over the plots.

# Northern hemisphere - Lat vs Wind Speed

In [None]:
# Create a scatter plot for latitude vs wind speed(northern hemisphere)
x_values = northern_hemi_weather_df['Lat']
y_values = northern_hemi_weather_df['Wind Speed']
plot_linear_regression(x_values, y_values, "Latitude", "Wind Speed (mph)", "Northern",(20, 25))
plt.savefig("./output_data/northern_hem_linear_lat_vs_wind_speed.png", bbox_inches="tight")
plt.show()

# Southern hemisphere - Lat vs Wind Speed

In [None]:
# Create a scatter plot for latitude vs wind speed (southern hemisphere)
x_values = southern_hemi_weather_df['Lat']
y_values = southern_hemi_weather_df['Wind Speed']
plot_linear_regression(x_values, y_values, "Latitude", "Wind Speed (mph)", "Southern",(-40, 25), ylim=40)
plt.savefig("./output_data/southern_hem_linear_lat_vs_wind_speed.png", bbox_inches="tight")
plt.show()

The r-squared is:0.0016903124668836253

The correlation between both factors is -0.04

There is no correlation between latitude and wind speed either, for both hemispheres. Both show evenly scattered values over the latitudes.