In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Import API key
from config import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "../output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=15) #size=1500
lngs = np.random.uniform(lng_range[0], lng_range[1], size=15) #size=1500
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
print(len(cities))

In [None]:
#create data frame and add empty columns
weather_df = pd.DataFrame({'City':cities})
weather_df['Lat'] = ""
weather_df['Lng'] = ""
weather_df['MaxTemp'] = ""
weather_df['Humidity'] = ""
weather_df['Cloudiness'] = ""
weather_df['WindSpeed'] = ""
weather_df['Country'] = ""
weather_df['Date'] = ""


In [None]:
# Set units to imperial
units = 'imperial'

# Iterate over the rows in the df and fill the columns
print("Beginning data retrieval")
print("-------------------------")
for index, row in weather_df.iterrows():
    base_url = "https://api.openweathermap.org/data/2.5/weather?" 

    city = row['City']
    
    target_url = f"{base_url}q={city}&appid={weather_api_key}&units={units}"
    
    response = requests.get(target_url).json() 

    time.sleep(1)
    
    try:
        print(f"Processing record {index}|{city}")
        
        weather_df.loc[index,'Lat'] = response['coord']['lat']
        weather_df.loc[index,'Lng'] = response['coord']['lon']
        weather_df.loc[index,'MaxTemp'] = response['main']['temp_max']
        weather_df.loc[index,'Humidity'] = response['main']['humidity']
        weather_df.loc[index,'Cloudiness'] = response['clouds']['all']
        weather_df.loc[index,'WindSpeed'] = response['wind']['speed']
        weather_df.loc[index,'Country'] = response['sys']['country']
        weather_df.loc[index,'Date'] = response['dt']
        
    except:
        #weather_df.loc[index,'City'] = ""
        print(f"{city} not found. Skipping...")
        


In [None]:
# Replace empty column spaces with NaNs
weather_df.replace('', np.nan, inplace=True)

In [None]:
# Drop rows with NaNs
weather_df = weather_df.dropna()

In [None]:
# Save DataFrame to csv file
weather_df.to_csv(output_data_file)

# Display DataFrame
weather_df.head()

In [None]:
# Convert to floats
weather_df.loc[:,"Lat"] = weather_df["Lat"].astype('float')
weather_df.loc[:,"Lng"] = weather_df["Lng"].astype('float')
weather_df.loc[:,"MaxTemp"] = weather_df["MaxTemp"].astype('float')
weather_df.loc[:,"Humidity"] = weather_df["Humidity"].astype('float')
weather_df.loc[:,"Cloudiness"] = weather_df["Cloudiness"].astype('float')
weather_df.loc[:,"WindSpeed"] = weather_df["WindSpeed"].astype('float')
weather_df.loc[:,"Date"] = weather_df["Date"].astype('float')


In [None]:
# Remove cities where humidity is more than 100%
weather_df = weather_df.loc[weather_df['Humidity'] <= 100]

In [None]:
summary_stats = weather_df[['Lat','Lng','MaxTemp','Humidity','Cloudiness','WindSpeed','Date']]
summary_stats.describe(include='all')

In [None]:
# Latitude vs Max Temp 
plt.scatter(weather_df['Lat'], weather_df['MaxTemp'])
plt.xlabel('Latitude')
plt.ylabel('Maximum Temperature (F)')
plt.title('City Latitude vs. Maximum Temperature')
plt.savefig('../output_data/fig1.png')
plt.show()

In [None]:
# Latitude vs. Humidity
plt.scatter(weather_df['Lat'], weather_df['Humidity'])
plt.xlabel('Latitude')
plt.ylabel('Humidity (%)')
plt.title('City Latitude vs. Humidity')
plt.savefig('../output_data/fig2.png')
plt.show()

In [None]:
# Latitude vs. Cloudiness
plt.scatter(weather_df['Lat'], weather_df['Cloudiness'])
plt.xlabel('Latitude')
plt.ylabel('Cloudiness (%)')
plt.title('City Latitude vs. Cloudiness')
plt.savefig('../output_data/fig3.png')
plt.show()

In [None]:
# Latitude vs. Wind Speed
plt.scatter(weather_df['Lat'], weather_df['WindSpeed'])
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (mph)')
plt.title('City Latitude vs. Wind Speed')
plt.savefig('../output_data/fig4.png')
plt.show()

In [None]:
# Group DataFrame into northern and southern hemispheres
northern_hem = weather_df.loc[weather_df['Lat'] > 0]
southern_hem = weather_df.loc[weather_df['Lat'] < 0]
southern_hem.head()

In [None]:
# Latitude vs Max Temp, northern hemisphere

# Add the linear regression equation and line to plot
x_values = northern_hem['Lat']
y_values = northern_hem['MaxTemp']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
print(f"The r-value is {rvalue}.")
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,-10),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Maximum Temperature (F)')
plt.show()

In [None]:
# Latitude vs Max Temp, southern hemisphere

# Add the linear regression equation and line to plot
x_values = southern_hem['Lat']
y_values = southern_hem['MaxTemp']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
print(f"The r-value is {rvalue}.")
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(-30,60),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Maximum Temperature (F)')
plt.show()

In [None]:
# Latitude vs. Humidity, northern hemisphere

# Add the linear regression equation and line to plot
x_values = northern_hem['Lat']
y_values = northern_hem['Humidity']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
print(f"The r-value is {rvalue}.")
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(5,50),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Humidity (%)')
plt.show()

In [None]:
# Latitude vs. Humidity, southern hemisphere

# Add the linear regression equation and line to plot
x_values = southern_hem['Lat']
y_values = southern_hem['Humidity']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
print(f"The r-value is {rvalue}.")
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(-30,40),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Humidity (%)')
plt.show()

In [None]:
# Latitude vs. Cloudiness, northern hemishphere

# Add the linear regression equation and line to plot
x_values = northern_hem['Lat']
y_values = northern_hem['Cloudiness']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
print(f"The r-value is {rvalue}.")
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(10,20),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Cloudiness (%)')
plt.show()

In [None]:
# Latitude vs. Cloudiness, southern hemisphere

# Add the linear regression equation and line to plot
x_values = southern_hem['Lat']
y_values = southern_hem['Cloudiness']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
print(f"The r-value is {rvalue}.")
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(-50,70),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Cloudiness (%)')
plt.show()

In [None]:
# Latitude vs. Wind Speed, northern hemisphere

# Add the linear regression equation and line to plot
x_values = northern_hem['Lat']
y_values = northern_hem['WindSpeed']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
print(f"The r-value is {rvalue}.")
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(10,16),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (mph)')
plt.show()

In [None]:
# Latitude vs. Wind Speed, southern hemisphere

# Add the linear regression equation and line to plot
x_values = southern_hem['Lat']
y_values = southern_hem['WindSpeed']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
print(f"The r-value is {rvalue}.")
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(-30,20),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (mph)')
plt.show()