# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import scipy.stats as st
from scipy.stats import linregress

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "../output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it's being processed (with the city number and city name).


In [None]:
# Save config information
base_url = "http://api.openweathermap.org/data/2.5/weather?"
city = "edina,minnesota"
query_url = base_url + "appid=" + weather_api_key + "&q=" + city

# Get weather data for one city, convert to JSON, and display to see keys
weather_response = requests.get(query_url)
weather_json = weather_response.json()
print(f"The weather API responded with:  {weather_json}.")

In [None]:
# Save config information
base_url = "http://api.openweathermap.org/data/2.5/weather?"
units = "imperial"

# Build partial query URL
query_url = f"{base_url}appid={weather_api_key}&units={units}&q="

# Set up lists to hold response info
city_name = []
city_latitude = []
city_longitude = []
city_max_temp = []
city_humidity = []
city_cloudiness = []
city_wind_speed = []
city_country = []
city_date = []

In [None]:
# Print header
print("Beginning Data Retrieval")
print("-----------------------------")

# Declare variables to count records and sets, and pass into for loop
record = 0
record_set = 1

# Make a request for each of the cities
for row in cities:
    response = requests.get(query_url + row).json()
    
    # Try to grab the data if they are available in the OpenWeather API
    try:
        city_name.append(response["name"])
        city_latitude.append(response["coord"]["lat"])
        city_longitude.append(response["coord"]["lon"])
        city_max_temp.append(response["main"]["temp_max"])
        city_humidity.append(response["main"]["humidity"])
        city_cloudiness.append(response["clouds"]["all"])
        city_wind_speed.append(response["wind"]["speed"])
        city_country.append(response["sys"]["country"])
        city_date.append(response["dt"])
               
        record += 1
        
        # Delay subsequent API request by one second to stay under calls-per-minute limit
        time.sleep(1)
        
        # Conditional statement to group cities into sets of 50
        if record > 50:
            record_set += 1
            record = 1        
        
        print(f"Processing Record {record} of Set {record_set} | {row}")
        
    # Handle exceptions for cities that are not available in the OpenWeather API
    except:
        print(f"City not found.  Skipping...")
        
print(f"-----------------------------")
print(f"Data Retrieval Complete")      
print(f"-----------------------------")

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
# Convert raw data to DataFrame
weather_df = pd.DataFrame({"City":city_name,
                           "Lat":city_latitude,
                           "Lng":city_longitude,
                           "Max Temp":city_max_temp,
                           "Humidity":city_humidity,
                           "Cloudiness":city_cloudiness,
                           "Wind Speed":city_wind_speed,
                           "Country":city_country,
                           "Date":city_date})
weather_df

In [None]:
# Export file as a CSV, without the Pandas index, but with the header
weather_df.to_csv("../output_data/Weather.csv", index = False, header = True)

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
# Inspect the data for humidity values >100%
weather_df["Cloudiness"].max()

In [None]:
# Get the indices of cities that have humidity over 100%.
# N/A

In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".
# clean_city_data = 

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
# Retrieve latitude and max temperature data
latitude = weather_df["Lat"]
maxtemp = weather_df["Max Temp"]

# Define scatter plot size
plt.figure(figsize = (18,10))

# Plot x and y values on scatter plot
plt.scatter(latitude, maxtemp)

# Define scatter plot title, x and y labels (and their font sizes)
plt.title("City Latitude vs. Maximum Temperature", fontsize = 18)
plt.xlabel("Latitude", fontsize = 14)
plt.ylabel("Max Temperature (F)", fontsize = 14)
# plt.xlim(0, max(timepoints))
# plt.ylim(34, max(volumes) * 1.01)

# Print scatter plot to image file
plt.savefig("../Images/Latitude_vs_Temperature_Plot.png")

plt.show()

## Latitude vs. Humidity Plot

In [None]:
# Retrieve latitude and humidity data
latitude = weather_df["Lat"]
humidity = weather_df["Humidity"]

# Define scatter plot size
plt.figure(figsize = (18,10))

# Plot x and y values on scatter plot
plt.scatter(latitude, humidity)

# Define scatter plot title, x and y labels (and their font sizes)
plt.title("City Latitude vs. Relative Humidity", fontsize = 18)
plt.xlabel("Latitude", fontsize = 14)
plt.ylabel("Relative Humidity (%)", fontsize = 14)
# plt.xlim(0, max(timepoints))
# plt.ylim(34, max(volumes) * 1.01)

# Print scatter plot to image file
plt.savefig("../Images/Latitude_vs_Humidity_Plot.png")

plt.show()

## Latitude vs. Cloudiness Plot

In [None]:
# Retrieve latitude and cloudiness data
latitude = weather_df["Lat"]
cloudiness = weather_df["Cloudiness"]

# Define scatter plot size
plt.figure(figsize = (18,10))

# Plot x and y values on scatter plot
plt.scatter(latitude, cloudiness)

# Define scatter plot title, x and y labels (and their font sizes)
plt.title("City Latitude vs. Cloudiness", fontsize = 18)
plt.xlabel("Latitude", fontsize = 14)
plt.ylabel("Cloudiness (%)", fontsize = 14)
# plt.xlim(0, max(timepoints))
# plt.ylim(34, max(volumes) * 1.01)

# Print scatter plot to image file
plt.savefig("../Images/Latitude_vs_Cloudiness_Plot.png")

plt.show()

## Latitude vs. Wind Speed Plot

In [None]:
# Retrieve latitude and wind speed data
latitude = weather_df["Lat"]
wind_speed = weather_df["Wind Speed"]

# Define scatter plot size
plt.figure(figsize = (18,10))

# Plot x and y values on scatter plot
plt.scatter(latitude, wind_speed)

# Define scatter plot title, x and y labels (and their font sizes)
plt.title("City Latitude vs. Wind Speed", fontsize = 18)
plt.xlabel("Latitude", fontsize = 14)
plt.ylabel("Wind Speed (mph)", fontsize = 14)
# plt.xlim(0, max(timepoints))
# plt.ylim(34, max(volumes) * 1.01)

# Print scatter plot to image file
plt.savefig("../Images/Latitude_vs_Windspeed_Plot.png")

plt.show()

## Linear Regression

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Filter all by northern hemisphere latitudes
northern_latitudes = weather_df.loc[(weather_df["Lat"] > 0)]

# Retrieve latitude and max temperature data
north_latitude = northern_latitudes["Lat"]
north_temp = northern_latitudes["Max Temp"]

# Perform a linear regression on latitudes versus max temperatures
tl_slope, tl_int, tl_r, tl_p, tl_std_err = st.linregress(north_latitude, north_temp)

# Create equation of line to calculate predicted max temperatures
tl_fit = tl_slope * north_latitude + tl_int

# Create equation in string formats to print on scatter plot
nl_equation = "y = " + str(round(tl_slope, 2)) + "x + " + str(round(tl_int, 2))

# Define scatter plot size
plt.figure(figsize = (18,10))

# Plot x and y values on scatter plot
plt.scatter(north_latitude, north_temp)

# Plot linear regression line on scatter plot
plt.plot(north_latitude, tl_fit, "--", color = "red")

# Define linear regression line and print on scatter plot
plt.annotate(nl_equation, (0, 40), fontsize = 14, color = "red")

# Define scatter plot title, and x and y labels (and their font sizes)
plt.title("Northern Hemisphere:  Max Temp vs. Latitude (with linear regression model)", fontsize = 18)
plt.xlabel("Latitude", fontsize = 14)
plt.ylabel("Max Temperature (F)", fontsize = 14)
print(f"The r-value is: {tl_r}")

# Print scatter plot to image file
plt.savefig("../Images/Northern_Hemisphere_Latitude_vs_MaxTemp_Plot.png")

plt.show()

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Filter all by southern hemisphere latitudes
southern_latitudes = weather_df.loc[(weather_df["Lat"] < 0)]

# Retrieve latitude and max temperature data
south_latitude = southern_latitudes["Lat"]
south_temp = southern_latitudes["Max Temp"]

# Perform a linear regression on latitudes versus max temperatures
sl_slope, sl_int, sl_r, sl_p, sl_std_err = st.linregress(south_latitude, south_temp)

# Create equation of line to calculate predicted max temperatures
sl_fit = sl_slope * south_latitude + sl_int

# Create equation in string formats to print on scatter plot
sl_equation = "y = " + str(round(sl_slope, 2)) + "x + " + str(round(sl_int, 2))

# Define scatter plot size
plt.figure(figsize = (18,10))

# Plot x and y values on scatter plot
plt.scatter(south_latitude, south_temp)

# Plot linear regression line on scatter plot
plt.plot(south_latitude, sl_fit, "--", color = "red")

# Define linear regression line and print on scatter plot
plt.annotate(sl_equation, (-55, 80), fontsize = 14, color = "red")

# Define scatter plot title, and x and y labels (and their font sizes)
plt.title("Southern Hemisphere:  Max Temp vs. Latitude (with linear regression model)", fontsize = 18)
plt.xlabel("Latitude", fontsize = 14)
plt.ylabel("Max Temperature (F)", fontsize = 14)
print(f"The r-value is: {sl_r}")

# Print scatter plot to image file
plt.savefig("../Images/Southern_Hemisphere_Latitude_vs_MaxTemp_Plot.png")

plt.show()

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# Retrieve latitude and humidity data
north_latitude = northern_latitudes["Lat"]
north_humidity = northern_latitudes["Humidity"]

# Perform a linear regression on latitudes versus humidity
tl_slope, tl_int, tl_r, tl_p, tl_std_err = st.linregress(north_latitude, north_humidity)

# Create equation of line to calculate predicted humidity
tl_fit = tl_slope * north_latitude + tl_int

# Create equation in string formats to print on scatter plot
nl_equation = "y = " + str(round(tl_slope, 2)) + "x + " + str(round(tl_int, 2))

# Define scatter plot size
plt.figure(figsize = (18,10))

# Plot x and y values on scatter plot
plt.scatter(north_latitude, north_humidity)

# Plot linear regression line on scatter plot
plt.plot(north_latitude, tl_fit, "--", color = "red")

# Define linear regression line and print on scatter plot
plt.annotate(nl_equation, (0, 40), fontsize = 14, color = "red")

# Define scatter plot title, and x and y labels (and their font sizes)
plt.title("Northern Hemisphere:  Relative Humidity vs. Latitude (with linear regression model)", fontsize = 18)
plt.xlabel("Latitude", fontsize = 14)
plt.ylabel("Relative Humidity (%)", fontsize = 14)
print(f"The r-value is: {tl_r}")

# Print scatter plot to image file
plt.savefig("../Images/Northern_Hemisphere_Latitude_vs_Humidity_Plot.png")

plt.show()

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# Retrieve latitude and humidity data
south_latitude = southern_latitudes["Lat"]
south_humidity = southern_latitudes["Humidity"]

# Perform a linear regression on latitudes versus humidity
sl_slope, sl_int, sl_r, sl_p, sl_std_err = st.linregress(south_latitude, south_humidity)

# Create equation of line to calculate predicted humidity
sl_fit = sl_slope * south_latitude + sl_int

# Create equation in string formats to print on scatter plot
sl_equation = "y = " + str(round(sl_slope, 2)) + "x + " + str(round(sl_int, 2))

# Define scatter plot size
plt.figure(figsize = (18,10))

# Plot x and y values on scatter plot
plt.scatter(south_latitude, south_humidity)

# Plot linear regression line on scatter plot
plt.plot(south_latitude, sl_fit, "--", color = "red")

# Define linear regression line and print on scatter plot
plt.annotate(sl_equation, (-55, 40), fontsize = 14, color = "red")

# Define scatter plot title, and x and y labels (and their font sizes)
plt.title("Southern Hemisphere:  Relative Humidity vs. Latitude (with linear regression model)", fontsize = 18)
plt.xlabel("Latitude", fontsize = 14)
plt.ylabel("Relative Humidity (%)", fontsize = 14)
print(f"The r-value is: {sl_r}")

# Print scatter plot to image file
plt.savefig("../Images/Southern_Hemisphere_Latitude_vs_Humidity_Plot.png")

plt.show()

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# Retrieve latitude and cloudiness data
north_latitude = northern_latitudes["Lat"]
north_cloudiness = northern_latitudes["Cloudiness"]

# Perform a linear regression on latitudes versus cloudiness
tl_slope, tl_int, tl_r, tl_p, tl_std_err = st.linregress(north_latitude, north_cloudiness)

# Create equation of line to calculate predicted cloudiness
tl_fit = tl_slope * north_latitude + tl_int

# Create equation in string formats to print on scatter plot
nl_equation = "y = " + str(round(tl_slope, 2)) + "x + " + str(round(tl_int, 2))

# Define scatter plot size
plt.figure(figsize = (18,10))

# Plot x and y values on scatter plot
plt.scatter(north_latitude, north_cloudiness)

# Plot linear regression line on scatter plot
plt.plot(north_latitude, tl_fit, "--", color = "red")

# Define linear regression line and print on scatter plot
plt.annotate(nl_equation, (0, 30), fontsize = 14, color = "red")

# Define scatter plot title, and x and y labels (and their font sizes)
plt.title("Northern Hemisphere:  Cloudiness vs. Latitude (with linear regression model)", fontsize = 18)
plt.xlabel("Latitude", fontsize = 14)
plt.ylabel("Cloudiness", fontsize = 14)
print(f"The r-value is: {tl_r}")

# Print scatter plot to image file
plt.savefig("../Images/Northern_Hemisphere_Latitude_vs_Cloudiness_Plot.png")

plt.show()

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# Retrieve latitude and cloudiness data
south_latitude = southern_latitudes["Lat"]
south_cloudiness = southern_latitudes["Cloudiness"]

# Perform a linear regression on latitudes versus cloudiness
sl_slope, sl_int, sl_r, sl_p, sl_std_err = st.linregress(south_latitude, south_cloudiness)

# Create equation of line to calculate predicted cloudiness
sl_fit = sl_slope * south_latitude + sl_int

# Create equation in string formats to print on scatter plot
sl_equation = "y = " + str(round(sl_slope, 2)) + "x + " + str(round(sl_int, 2))

# Define scatter plot size
plt.figure(figsize = (18,10))

# Plot x and y values on scatter plot
plt.scatter(south_latitude, south_cloudiness)

# Plot linear regression line on scatter plot
plt.plot(south_latitude, sl_fit, "--", color = "red")

# Define linear regression line and print on scatter plot
plt.annotate(sl_equation, (-55, 30), fontsize = 14, color = "red")

# Define scatter plot title, and x and y labels (and their font sizes)
plt.title("Southern Hemisphere:  Cloudiness vs. Latitude (with linear regression model)", fontsize = 18)
plt.xlabel("Latitude", fontsize = 14)
plt.ylabel("Cloudiness", fontsize = 14)
print(f"The r-value is: {sl_r}")

# Print scatter plot to image file
plt.savefig("../Images/Southern_Hemisphere_Latitude_vs_Cloudiness_Plot.png")

plt.show()

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# Retrieve latitude and wind speed data
north_latitude = northern_latitudes["Lat"]
north_wind = northern_latitudes["Wind Speed"]

# Perform a linear regression on latitudes versus wind speed
tl_slope, tl_int, tl_r, tl_p, tl_std_err = st.linregress(north_latitude, north_wind)

# Create equation of line to calculate predicted wind speed
tl_fit = tl_slope * north_latitude + tl_int

# Create equation in string formats to print on scatter plot
nl_equation = "y = " + str(round(tl_slope, 2)) + "x + " + str(round(tl_int, 2))

# Define scatter plot size
plt.figure(figsize = (18,10))

# Plot x and y values on scatter plot
plt.scatter(north_latitude, north_wind)

# Plot linear regression line on scatter plot
plt.plot(north_latitude, tl_fit, "--", color = "red")

# Define linear regression line and print on scatter plot
plt.annotate(nl_equation, (0, 30), fontsize = 14, color = "red")

# Define scatter plot title, and x and y labels (and their font sizes)
plt.title("Northern Hemisphere:  Wind Speed vs. Latitude (with linear regression model)", fontsize = 18)
plt.xlabel("Latitude", fontsize = 14)
plt.ylabel("Wind Speed (mph)", fontsize = 14)
print(f"The r-value is: {tl_r}")

# Print scatter plot to image file
plt.savefig("../Images/Northern_Hemisphere_Latitude_vs_Windspeed_Plot.png")

plt.show()

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# Retrieve latitude and wind speed data
south_latitude = southern_latitudes["Lat"]
south_wind = southern_latitudes["Wind Speed"]

# Perform a linear regression on latitudes versus wind speeds
sl_slope, sl_int, sl_r, sl_p, sl_std_err = st.linregress(south_latitude, south_wind)

# Create equation of line to calculate predicted wind speeds
sl_fit = sl_slope * south_latitude + sl_int

# Create equation in string formats to print on scatter plot
sl_equation = "y = " + str(round(sl_slope, 2)) + "x + " + str(round(sl_int, 2))

# Define scatter plot size
plt.figure(figsize = (18,10))

# Plot x and y values on scatter plot
plt.scatter(south_latitude, south_wind)

# Plot linear regression line on scatter plot
plt.plot(south_latitude, sl_fit, "--", color = "red")

# Define linear regression line and print on scatter plot
plt.annotate(sl_equation, (-55, 20), fontsize = 14, color = "red")

# Define scatter plot title, and x and y labels (and their font sizes)
plt.title("Southern Hemisphere:  Wind Speed vs. Latitude (with linear regression model)", fontsize = 18)
plt.xlabel("Latitude", fontsize = 14)
plt.ylabel("Wind Speed (mph)", fontsize = 14)
print(f"The r-value is: {sl_r}")

# Print scatter plot to image file
plt.savefig("../Images/Southern_Hemisphere_Latitude_vs_Windspeed_Plot.png")

plt.show()