In [8]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from datetime import datetime
from scipy import stats
from scipy.stats import linregress

# Impor the OpenWeatherMap API key
from config import weather_api_key

# Import citipy to determine the cities based on latitude and longitude
from citipy import citipy

ModuleNotFoundError: No module named 'config'

In [None]:
# Empty list for holding the latitude and longitude combinations
lat_lngs = []

# Empty list for holding the cities names
cities = []

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
print(f"Number of cities in the list: {len(cities)}")

In [None]:
# Set the API base URL
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "metric"

# Define an empty list to fetch the weather data for each city
city_data = []
 
# Print to logger
print("Beginning Data Retrieval     ")
print("-----------------------------")
 
# Create counters
record_count = 1
set_count = 1
 
# Loop through all the cities in our list to fetch weather data
for i, city in enumerate(cities):
        
    # Group cities in sets of 50 for logging purposes
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 0
 
    # Create endpoint URL with each city
    city_url = f"{url}appid={weather_api_key}&units={units}&q="+city
    #city_url = url + "appid=" + weather_api_key + "&q=" + city
    
    # Log the url, record, and set numbers
    print("Processing Record %s of Set %s | %s" % (record_count, set_count, city))
 
    # Add 1 to the record count
    record_count += 1
 
    # Run an API request for each of the cities
    try:
        # Parse the JSON and retrieve data
        city_weather = requests.get(city_url).json()
# Parse out latitude, longitude, max temp, humidity, cloudiness, wind speed, country, and date
        city_lat = city_weather['coord']['lat']
        city_lng = city_weather['coord']['lon']
        city_max_temp = city_weather['main']['temp_max']
        city_humidity = city_weather['main']['humidity']
        city_clouds = city_weather['clouds']['all']
        city_wind = city_weather['wind']['speed']
        city_country = city_weather['sys']['country']
        city_date = city_weather['dt']
 
        # Append the City information into city_data list
        city_data.append({"City": city, 
                          "Lat": city_lat, 
                          "Lng": city_lng, 
                          "Max Temp": city_max_temp,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country,
                          "Date": city_date})
 
    # If an error is experienced, skip the city
    except:
        print("City not found. Skipping...")
        pass
              
# Indicate that Data Loading is complete 
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

In [None]:
city_data_df = pd.DataFrame(city_data)
city_data_df.count()

In [None]:
city_data_df.head()

In [None]:
city_data_df.info()

In [None]:
# Export the City_Data into a csv
city_data_df.to_csv("output_data/cities.csv", index_label="City_ID")

In [None]:
# Read saved data
city_data_df = pd.read_csv("output_data/cities.csv", index_col="City_ID")

# Display sample data
city_data_df.head()

In [None]:
lat_temp = city_data_df
lat_temp

In [None]:
# Build scatter plot for latitude vs. temperature

#Markersize
sizes = [50]

lat_temp.plot(kind="scatter", x="Lat", y="Max Temp", grid=True, figsize=(10,6), marker = "o", sizes = sizes, c = "blue", edgecolors="black") 

plt.title(f"City Max Latitude vs Max Temperature ({datetime.now():%y-%m-%d})")

# Save the figure
plt.savefig("output_data/Fig1.png")

# Show plot
plt.show()

In [None]:
# Build scatter plot for latitude vs. Humidity

#Markersize
sizes = [50]

lat_temp.plot(kind="scatter", x="Lat", y="Humidity", grid=True, figsize=(10,6), marker = "o", sizes=sizes, c="blue", edgecolors='black')

plt.title(f"Latitude vs Max Humidity ({datetime.now():%y-%m-%d})")

# Save the figure
plt.savefig("output_data/Fig2.png")

# Show plot
plt.show()

In [None]:
# Build scatter plot for latitude vs. Cloudiness

#Markersize
sizes = [50]

lat_temp.plot(kind="scatter", x="Lat", y="Cloudiness", grid=True, figsize=(10,6), marker = "o", sizes =sizes, c="blue", edgecolors='black')

plt.title(f"Latitude vs Cloudiness ({datetime.now():%y-%m-%d})")

# Save the figure
plt.savefig("output_data/Fig3.png")

# Show plot
plt.show()

In [None]:
# Build scatter plot for latitude vs. Wind Speed

#Markersize
sizes = [50]

lat_temp.plot(kind="scatter", x="Lat", y="Wind Speed", grid=True, figsize=(10,6), marker = "o", sizes=sizes, c="blue", edgecolors='black')

plt.title(f"Latitude vs Wind Speed ({datetime.now():%y-%m-%d})")

# Save the figure
plt.savefig("output_data/Fig4.png")

# Show plot
plt.show()

In [None]:
north_hemi_df = city_data_df.loc[city_data_df["Lat"]>=0,:]
north_hemi_df.head()

In [None]:
x_values_north_lat = north_hemi_df['Lat']
y_values_north_temp = north_hemi_df['Max Temp']

# Perform a linear regression on temperature vs. latitude
(slope, intercept, rvalue, pvalue, stderr) = stats.linregress(x_values_north_lat, y_values_north_temp)

# Get regression values
regress_values_north_temp = x_values_north_lat * slope + intercept
print(regress_values_north_temp)

In [None]:
# Create line equation string
line_eq_north_temp = "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))
print(line_eq_north_temp)

# Northern Hemisphere: Temperature vs. Latitude

In [None]:
# Create Plot
plt.scatter(x_values_north_lat,y_values_north_temp)
plt.plot(x_values_north_lat,regress_values_north_temp,"r-")

# Label plot and annotate the line equation
plt.xlabel('Latitude')
plt.ylabel('Max Temp')
plt.annotate(line_eq_north_temp,(40,30),fontsize=15,color="green")

# Print r value
print(f"The r-value is: {rvalue**2}")

# Show plot
plt.show()

In [None]:
south_hemi_df = city_data_df.loc[city_data_df["Lat"]<0,:]
south_hemi_df.head()

In [None]:
x_values_south_lat = south_hemi_df['Lat']
y_values_south_temp = south_hemi_df['Max Temp']

# Perform a linear regression on temperature vs. latitude
(slope, intercept, rvalue, pvalue, stderr) = stats.linregress(x_values_south_lat, y_values_south_temp)

# Get regression values
regress_values_south_temp = x_values_south_lat * slope + intercept
print(regress_values_south_temp)

In [None]:
# Create line equation string
line_eq_south_temp = "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))
print(line_eq_south_temp)

# Southern Hemisphere: Temperature vs. Latitude

In [None]:
plt.scatter(x_values_south_lat,y_values_south_temp)
plt.plot(x_values_south_lat,regress_values_south_temp,"r-")

# Label plot and annotate the line equation
plt.xlabel('Latitude')
plt.ylabel('Max Temp')
plt.annotate(line_eq_south_temp,(-50,30),fontsize=15,color="green")

# Print r value
print(f"The r-value is: {rvalue**2}")

# Show plot
plt.show()

In [None]:
x_values_north_lat = north_hemi_df['Lat']
y_values_north_hum = north_hemi_df['Humidity']

# Perform a linear regression on temperature vs. latitude
(slope, intercept, rvalue, pvalue, stderr) = stats.linregress(x_values_north_lat, y_values_north_hum)

# Get regression values
regress_values_north_hum = x_values_north_lat * slope + intercept
print(regress_values_north_hum)

In [None]:
# Create line equation string
line_eq_north_hum = "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))
print(line_eq_north_hum)

# Northern Hemisphere: Humidity vs. Latitude

In [None]:
# Create Plot
plt.scatter(x_values_north_lat,y_values_north_hum)
plt.plot(x_values_north_lat,regress_values_north_hum,"r-")

# Label plot and annotate the line equation
plt.xlabel('Latitude')
plt.ylabel('Humidity')
plt.annotate(line_eq_north_hum,(40,10),fontsize=15,color="green")

# Print r value
print(f"The r-value is: {rvalue**2}")

# Show plot
plt.show()

In [None]:
x_values_south_lat = south_hemi_df['Lat']
y_values_south_hum = south_hemi_df['Humidity']

# Perform a linear regression on temperature vs. latitude
(slope, intercept, rvalue, pvalue, stderr) = stats.linregress(x_values_south_lat, y_values_south_hum)

# Get regression values
regress_values_south_hum = x_values_south_lat * slope + intercept
print(regress_values_south_hum)

In [None]:
# Create line equation string
line_eq_south_hum = "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))
print(line_eq_south_hum)

# Southern Hemisphere: Humidity vs. Latitude

In [None]:
plt.scatter(x_values_south_lat,y_values_south_hum)
plt.plot(x_values_south_lat,regress_values_south_hum,"r-")

# Label plot and annotate the line equation
plt.xlabel('Latitude')
plt.ylabel('Humidity')
plt.annotate(line_eq_south_hum,(-55,20),fontsize=15,color="green")

# Print r value
print(f"The r-value is: {rvalue**2}")

# Show plot
plt.show()

In [None]:
x_values_north_lat = north_hemi_df['Lat']
y_values_north_cloud = north_hemi_df['Cloudiness']

# Perform a linear regression on temperature vs. latitude
(slope, intercept, rvalue, pvalue, stderr) = stats.linregress(x_values_north_lat, y_values_north_cloud)

# Get regression values
regress_values_north_cloud = x_values_north_lat * slope + intercept
print(regress_values_north_cloud)

In [None]:
# Create line equation string
line_eq_north_cloud = "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))
print(line_eq_north_cloud)

# Northern Hemisphere: Cloudiness vs. Latitude

In [None]:
# Create Plot
plt.scatter(x_values_north_lat,y_values_north_cloud)
plt.plot(x_values_north_lat,regress_values_north_cloud,"r-")

# Label plot and annotate the line equation
plt.xlabel('Latitude')
plt.ylabel('Cloudiness')
plt.annotate(line_eq_north_cloud,(45,45),fontsize=15,color="green")

# Print r value
print(f"The r-value is: {rvalue**2}")

# Show plot
plt.show()

In [None]:
x_values_south_lat = south_hemi_df['Lat']
y_values_south_cloud = south_hemi_df['Cloudiness']

# Perform a linear regression on temperature vs. latitude
(slope, intercept, rvalue, pvalue, stderr) = stats.linregress(x_values_south_lat, y_values_south_cloud)

# Get regression values
regress_values_south_cloud = x_values_south_lat * slope + intercept
print(regress_values_south_cloud)

In [None]:
# Create line equation string
line_eq_south_cloud = "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))
print(line_eq_south_cloud)

# Southern Hemisphere: Cloudiness vs. Latitude

In [None]:
plt.scatter(x_values_south_lat,y_values_south_cloud)
plt.plot(x_values_south_lat,regress_values_south_cloud,"r-")

# Label plot and annotate the line equation
plt.xlabel('Latitude')
plt.ylabel('Cloudiness')
plt.annotate(line_eq_south_cloud,(-55,25),fontsize=15,color="green")

# Print r value
print(f"The r-value is: {rvalue**2}")

# Show plot
plt.show()

In [None]:
x_values_north_lat = north_hemi_df['Lat']
y_values_north_wind = north_hemi_df['Wind Speed']

# Perform a linear regression on temperature vs. latitude
(slope, intercept, rvalue, pvalue, stderr) = stats.linregress(x_values_north_lat, y_values_north_wind)

# Get regression values
regress_values_north_wind = x_values_north_lat * slope + intercept
print(regress_values_north_wind)

In [None]:
# Create line equation string
line_eq_north_wind = "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))
print(line_eq_north_wind)

# Northern Hemisphere: Wind Speed vs. Latitude

In [None]:
# Create Plot
plt.scatter(x_values_north_lat,y_values_north_wind)
plt.plot(x_values_north_lat,regress_values_north_wind,"r-")

# Label plot and annotate the line equation
plt.xlabel('Latitude')
plt.ylabel('Cloudiness')
plt.annotate(line_eq_north_wind,(10,12),fontsize=15,color="green")

# Print r value
print(f"The r-value is: {rvalue**2}")

# Show plot
plt.show()

In [None]:
x_values_south_lat = south_hemi_df['Lat']
y_values_south_wind = south_hemi_df['Wind Speed']

# Perform a linear regression on temperature vs. latitude
(slope, intercept, rvalue, pvalue, stderr) = stats.linregress(x_values_south_lat, y_values_south_wind)

# Get regression values
regress_values_south_wind = x_values_south_lat * slope + intercept
print(regress_values_south_wind)

In [None]:
# Create line equation string
line_eq_south_wind = "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))
print(line_eq_south_wind)

# Southern Hemisphere: Wind Speed vs. Latitude

In [None]:
plt.scatter(x_values_south_lat,y_values_south_wind)
plt.plot(x_values_south_lat,regress_values_south_wind,"r-")

# Label plot and annotate the line equation
plt.xlabel('Latitude')
plt.ylabel('Cloudiness')
plt.annotate(line_eq_south_wind,(-40,12),fontsize=15,color="green")

# Print r value
print(f"The r-value is: {rvalue**2}")

# Show plot
plt.show()