In [4]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
import json

#Import API Key
from api_keys import weather_api_key

#Incorporated citipy to detemine the city based on latitude and logitude
from citipy import citipy

#Output Data File (CSV)
output_data_file = "output_data/cities.csv"

#Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

ModuleNotFoundError: No module named 'api_keys'

## Generate Cities List

In [None]:
#List for holding lat_lngs and cities
lat_lngs = []
cities = []

#Create a set of random lat and lng combinations
lats = np.random.uniform(low=-90.000, high=90.000, size=1500)
lngs = np.random.uniform(low=-180.000, high=180.000, size=1500)
lat_lngs = zip(lats,lngs)

#Idnetify nearest city for each lat, lng comination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    #If the city is unique, then add it to our cities list
    if city not in cities:
        cities.append(city)
        
#print the city count to confirm sufficient count
len(cities)

## Perform API Calls
    * Perform a weather check on each city using a series of successive API calls
    * Include a print log of each city as it's being processed (with the city number and city name)

In [None]:
weather_url = "https://api.openweathermap.org/data/2.5/weather?units=imperial&APPID=" + weather_api_key

city = "houston"

In [None]:
weather_data = requests.get(f"{weather_url}&q={city}").json()

weather_data

In [None]:
#Create lists for each data point needed
temperature = []
humidity = []
cloudiness = []
wind_speed = []
latitude = []
longitude = []
country = []
date = []
cities_f = []

#Start record count at 1
record = 1

print("Beginning Data Retrieval")
print("------------------------")

#run loop through API for all cities in the list
for city in cities:
    try:
        pull = requests.get(f"{weather_url}&q={city}").json()
        temperature.append(pull["main"]["temp_max"])
        humidity.append(pull["main"]["humidity"])
        cloudiness.append(pull["clouds"]["all"])
        wind_speed.append(pull["wind"]["speed"])
        latitude.append(pull["coord"]["lat"])
        longitude.append(pull["coord"]["lat"])
        country.append(pull["sys"]["country"])
        date.append(pull["dt"])
        cities_f.append(city)        
        print(f"Processing Record {record} | {city}")
        record = record + 1
    except:
        print("City not found. Skipping...")
    continue

print("------------------------")
print("Data Retrival Complete")
print("------------------------")

## Convert Raw Data to DataFrame

In [None]:
weather_df = pd.DataFrame({"City": cities_f,
                          "Cloudiness": cloudiness,
                          "Country": country,
                          "Date": date,
                          "Humidity": humidity,
                          "Lat": latitude,
                          "Lng": longitude,
                          "Max Temp": temperature,
                          "Wind Speed": wind_speed})

weather_df.to_csv("CSV/weather_data.csv")

weather_df

## Plotting the Data

#### Latitude vs. Temperature Plot

In [None]:
plt.scatter(weather_df["Lat"], weather_df["Max Temp"], marker ="o", edgecolors="black")

plt.title("City Latitude vs. Max Temperature (02/06/2020)")
plt.xlabel("Latitude")
plt.ylabel("Max Temperature(F)")
plt.grid()

plt.savefig("Figure/City_Latitude_vs_Max_Temp.png")

plt.show()

* On average, we see that cities that are closer to 0 latitude have significantly higher temeperatures than cities that are further away. 

## Latitude vs. Humidity Plot

In [None]:
plt.scatter(weather_df["Lat"], weather_df["Humidity"], marker ="o", edgecolors="black")

plt.title("City Latitude vs. Humidity (02/06/2020)")
plt.xlabel("Latitude")
plt.ylabel("Humidity")
plt.grid()

plt.savefig("Figure/City_Latitude_vs_Humidity.png")

plt.show()

* We see a larger cluster of cities that are further away from the equator have higher amounts of humidity

## Latitude vs Cloudiness Plot

In [None]:
plt.scatter(weather_df["Lat"], weather_df["Cloudiness"], marker ="o", edgecolors="black")

plt.title("City Latitude vs. Cloudiness (02/06/2020)")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness")
plt.grid()

plt.savefig("Figure/City_Latitude_vs_Cloudiness.png")

plt.show()

## Latitude vs Wind Speed Plot

In [None]:
plt.scatter(weather_df["Lat"], weather_df["Wind Speed"], marker ="o", edgecolors="black")

plt.title("City Latitude vs. Wind Speed (02/06/2020)")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed")
plt.grid()

plt.savefig("Figure/City_Latitude_vs_Wind Speed.png")

plt.show()

## Linear Regression

In [None]:
#Create Northern and Souther Hemisphere DataFrames
n_hem = weather_df[weather_df["Lng"] >= 0]
s_hem = weather_df[weather_df["Lng"] < 0]



## Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
#Northern Hemisphere - Max Temp vs Latitude Linear Regression
tempvlat_r2 = (linregress(n_hem["Max Temp"], n_hem["Lat"]).rvalue)**2

plt.scatter(n_hem["Max Temp"], n_hem["Lat"], marker ="o", edgecolors="black")

plt.xlabel("Latitude")
plt.ylabel("Max Temp")
plt.grid()

#Plot Trendline
z = np.polyfit(n_hem["Max Temp"], n_hem["Lat"], 1)
p = np.poly1d(z)
plt.plot(n_hem["Max Temp"],p(n_hem["Max Temp"]),"r-")

plt.savefig("Figure/N_Temp_vs_Lat.png")

plt.show()
print(f"The r-squared is: {tempvlat_r2}")

## Souther Hemisphere - MAx Temp vs. Latitude Linear Regression

In [None]:
s_hem