# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import scipy.stats as sts
from pprint import pprint

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)
print(cities)

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
#URL formatting
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "metric"
query_url = f"{url}appid={weather_api_key}&units={units}&q="

#Beginning data for loop
citynumber = 1
weather_data = []
names = []
latitudes = []
longitudes = []
max_temps = []
humidities = []
clouds = []
winds = []
countries = []
dates = []

#Print header
print("Beginning Data Retrieval")
print("-----------------------------")

#For loop to pull in data from API
for city in cities:
    response = requests.get(query_url + city).json()
    #use an exception to allow code to continue in case of error
    try:
        names.append(response['name'])
        latitudes.append(response['coord']['lat'])
        longitudes.append(response['coord']['lon'])
        max_temps.append(response['main']['temp_max'])
        humidities.append(response['main']['humidity'])
        clouds.append(response['clouds']['all'])
        winds.append(response['wind']['speed'])
        countries.append(response['sys']['country'])
        dates.append(response['dt'])
    
        print(f"Processing Record {citynumber} | {city}")
    
    except:
        print("That city does not exist.")

    
    citynumber = citynumber + 1

#End of print-out so user knows data retrieval is complete
print("All records processed.")

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
#create a dictionary
weather_dictionary = {
    "City": names,
    "Lat": latitudes,
    "Lng": longitudes,
    "Max Temp": max_temps,
    "Humidity": humidities,
    "Cloudiness": clouds,
    "Wind Speed": winds,
    "Country": countries,
    "Date": dates
}

#pull dictionary into data frame
weather_df = pd.DataFrame(weather_dictionary)
weather_df.to_csv(output_data_file)
weather_df

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
#  Get the indices of cities that have humidity over 100%.
humidity_data = weather_df.query('Humidity > 100').index.tolist()

In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
if not humidity_data:
  print("All humidities less than or equal to 100%.")

else:
    for i in humidity_data:
        weather_df.drop(index=[i], axis=0, inplace=True)

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
plt.scatter(weather_df['Lat'],weather_df['Max Temp'], edgecolors="black")
plt.xlabel("Latitude")
plt.ylabel("Max Temperature (F)")
plt.title("City Latitude vs. Max Temperature (02/02/2021)")
plt.grid()

plt.savefig('output_data/Latitude_vs_Temperature_Plot.png')

plt.show()

## Latitude vs. Humidity Plot

In [None]:
plt.scatter(weather_df['Lat'],weather_df['Humidity'], edgecolors="black")
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.title("City Latitude vs. Humidity (02/02/2021)")
plt.grid()

plt.savefig('output_data/Latitude_vs_Humidity_Plot.png')

plt.show()

## Latitude vs. Cloudiness Plot

In [None]:
plt.scatter(weather_df['Lat'],weather_df['Cloudiness'], edgecolors="black")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
plt.title("City Latitude vs. Cloudiness (02/02/2021)")
plt.grid()

plt.savefig('output_data/Latitude_vs_Cloudiness_Plot.png')

plt.show()

## Latitude vs. Wind Speed Plot

In [None]:
plt.scatter(weather_df['Lat'],weather_df['Wind Speed'], edgecolors="black")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")
plt.title("City Latitude vs. Wind Speed (02/02/2021)")
plt.grid()

plt.savefig('output_data/Latitude_vs_WindSpeed_Plot.png')

plt.show()

## Linear Regression

In [None]:
northern_weather_df = weather_df[weather_df.Lat >= 0]
southern_weather_df = weather_df[weather_df.Lat <= 0]

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Set axises 
x_axis1 = northern_weather_df['Lat']
y_axis1 = northern_weather_df['Max Temp']

# Correlation equation
correlation1 = sts.pearsonr(x_axis1, y_axis1)
print(f"The r-value is: {round(correlation1[0],6)}")

# Find linear data by using x and y axis
(slope, intercept, rvalue, pvalue, stderr) = sts.linregress(x_axis1, y_axis1)

# Calculate the regression line
regression_line = x_axis1 * slope + intercept

# Calculate the linear equation
linear_equation = "y = " + str(round(slope)) + "x + " + str(round(intercept))

# Plot the scatterplot data
plt.scatter(x_axis1, y_axis1)

# Plot the regression line on top of scatterplot
plt.plot(x_axis1, regression_line, color="red")

# Print the linear equation on the scatterplot
plt.annotate(linear_equation, (38,8), fontsize=16, color="red")

# Set plot labels
plt.xlabel("Latitude")
plt.ylabel("Max Temperature (F)")
plt.title("Northern Hemisphere City Latitude vs. Max Temperature (02/02/2021)")

# Save figure as output .png file
plt.savefig('output_data/Northern_Hemisphere_Latitude_vs_Temperature_Plot.png')

# Print plot
plt.show()

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Set axises 
x_axis2 = southern_weather_df['Lat']
y_axis2 = southern_weather_df['Max Temp']

# Correlation equation
correlation2 = sts.pearsonr(x_axis2, y_axis2)
print(f"The r-value is: {round(correlation2[0],6)}")

# Find linear data by using x and y axis
(slope, intercept, rvalue, pvalue, stderr) = sts.linregress(x_axis2, y_axis2)

# Calculate the regression line
regression_line2 = x_axis2 * slope + intercept

# Calculate the linear equation
linear_equation2 = "y = " + str(round(slope)) + "x + " + str(round(intercept))

# Plot the scatterplot data
plt.scatter(x_axis2, y_axis2)

# Plot the regression line on top of scatterplot
plt.plot(x_axis2, regression_line2, color="red")

# Print the linear equation on the scatterplot
plt.annotate(linear_equation2, (-30,26), fontsize=16, color="red")

# Set plot labels
plt.xlabel("Latitude")
plt.ylabel("Max Temperature (F)")
plt.title("Southern Hemisphere City Latitude vs. Max Temperature (02/02/2021)")

# Save figure as output .png file
plt.savefig('output_data/Southern_Hemisphere_Latitude_vs_Temperature_Plot.png')

# Print plot
plt.show()

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# Set axises 
x_axis3 = northern_weather_df['Lat']
y_axis3 = northern_weather_df['Humidity']

# Correlation equation
correlation3 = sts.pearsonr(x_axis3, y_axis3)
print(f"The r-value is: {round(correlation3[0],6)}")

# Find linear data by using x and y axis
(slope, intercept, rvalue, pvalue, stderr) = sts.linregress(x_axis3, y_axis3)

# Calculate the regression line
regression_line3 = x_axis3 * slope + intercept

# Calculate the linear equation
linear_equation3 = "y = " + str(round(slope)) + "x + " + str(round(intercept))

# Plot the scatterplot data
plt.scatter(x_axis3, y_axis3)

# Plot the regression line on top of scatterplot
plt.plot(x_axis3, regression_line3, color="red")

# Print the linear equation on the scatterplot
plt.annotate(linear_equation3, (50,70), fontsize=16, color="red")

# Set plot labels
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.title("Northern Hemisphere City Latitude vs. Humidity (02/02/2021)")

# Save figure as output .png file
plt.savefig('output_data/Northern_Hemisphere_Latitude_vs_Humidity_Plot.png')

# Print plot
plt.show()

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# Set axises 
x_axis4 = southern_weather_df['Lat']
y_axis4 = southern_weather_df['Humidity']

# Correlation equation
correlation4 = sts.pearsonr(x_axis4, y_axis4)
print(f"The r-value is: {round(correlation4[0],6)}")

# Find linear data by using x and y axis
(slope, intercept, rvalue, pvalue, stderr) = sts.linregress(x_axis4, y_axis4)

# Calculate the regression line
regression_line4 = x_axis4 * slope + intercept

# Calculate the linear equation
linear_equation4 = "y = " + str(round(slope)) + "x + " + str(round(intercept))

# Plot the scatterplot data
plt.scatter(x_axis4, y_axis4)

# Plot the regression line on top of scatterplot
plt.plot(x_axis4, regression_line4, color="red")

# Print the linear equation on the scatterplot
plt.annotate(linear_equation4, (-30,67.8), fontsize=16, color="red")

# Set plot labels
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.title("Southern Hemisphere City Latitude vs. Humidity (02/02/2021)")

# Save figure as output .png file
plt.savefig('output_data/Southern_Hemisphere_Latitude_vs_Humidity_Plot.png')

# Print plot
plt.show()

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# Set axises 
x_axis5 = northern_weather_df['Lat']
y_axis5 = northern_weather_df['Cloudiness']

# Correlation equation
correlation5 = sts.pearsonr(x_axis5, y_axis5)
print(f"The r-value is: {round(correlation5[0],6)}")

# Find linear data by using x and y axis
(slope, intercept, rvalue, pvalue, stderr) = sts.linregress(x_axis5, y_axis5)

# Calculate the regression line
regression_line5 = x_axis5 * slope + intercept

# Calculate the linear equation
linear_equation5 = "y = " + str(round(slope)) + "x + " + str(round(intercept))

# Plot the scatterplot data
plt.scatter(x_axis5, y_axis5)

# Plot the regression line on top of scatterplot
plt.plot(x_axis5, regression_line5, color="red")

# Print the linear equation on the scatterplot
plt.annotate(linear_equation5, (50,45), fontsize=16, color="red")

# Set plot labels
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
plt.title("Northern Hemisphere City Latitude vs. Cloudiness (02/02/2021)")

# Save figure as output .png file
plt.savefig('output_data/Northern_Hemisphere_Latitude_vs_Cloudiness_Plot.png')

# Print plot
plt.show()

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# Set axises 
x_axis6 = southern_weather_df['Lat']
y_axis6 = southern_weather_df['Cloudiness']

# Correlation equation
correlation6 = sts.pearsonr(x_axis6, y_axis6)
print(f"The r-value is: {round(correlation6[0],6)}")

# Find linear data by using x and y axis
(slope, intercept, rvalue, pvalue, stderr) = sts.linregress(x_axis6, y_axis6)

# Calculate the regression line
regression_line6 = x_axis6 * slope + intercept

# Calculate the linear equation
linear_equation6 = "y = " + str(round(slope)) + "x + " + str(round(intercept))

# Plot the scatterplot data
plt.scatter(x_axis6, y_axis6)

# Plot the regression line on top of scatterplot
plt.plot(x_axis6, regression_line6, color="red")

# Print the linear equation on the scatterplot
plt.annotate(linear_equation6, (-30,62), fontsize=16, color="red")

# Set plot labels
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
plt.title("Southern Hemisphere City Latitude vs. Cloudiness (02/02/2021)")

# Save figure as output .png file
plt.savefig('output_data/Southern_Hemisphere_Latitude_vs_Cloudiness_Plot.png')

# Print plot
plt.show()

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# Set axises 
x_axis7 = northern_weather_df['Lat']
y_axis7 = northern_weather_df['Wind Speed']

# Correlation equation
correlation7 = sts.pearsonr(x_axis7, y_axis7)
print(f"The r-value is: {round(correlation7[0],6)}")

# Find linear data by using x and y axis
(slope, intercept, rvalue, pvalue, stderr) = sts.linregress(x_axis7, y_axis7)

# Calculate the regression line
regression_line7 = x_axis7 * slope + intercept

# Calculate the linear equation
linear_equation7 = "y = " + str(round(slope)) + "x + " + str(round(intercept))

# Plot the scatterplot data
plt.scatter(x_axis7, y_axis7)

# Plot the regression line on top of scatterplot
plt.plot(x_axis7, regression_line7, color="red")

# Print the linear equation on the scatterplot
plt.annotate(linear_equation7, (55,2.5), fontsize=16, color="red")

# Set plot labels
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")
plt.title("Northern Hemisphere City Latitude vs. Wind Speed (02/02/2021)")

# Save figure as output .png file
plt.savefig('output_data/Northern_Hemisphere_Latitude_vs_WindSpeed_Plot.png')

# Print plot
plt.show()

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# Set axises 
x_axis8 = southern_weather_df['Lat']
y_axis8 = southern_weather_df['Wind Speed']

# Correlation equation
correlation8 = sts.pearsonr(x_axis8, y_axis8)
print(f"The r-value is: {round(correlation8[0],6)}")

# Find linear data by using x and y axis
(slope, intercept, rvalue, pvalue, stderr) = sts.linregress(x_axis8, y_axis8)

# Calculate the regression line
regression_line8 = x_axis8 * slope + intercept

# Calculate the linear equation
linear_equation8 = "y = " + str(round(slope)) + "x + " + str(round(intercept))

# Plot the scatterplot data
plt.scatter(x_axis8, y_axis8)

# Plot the regression line on top of scatterplot
plt.plot(x_axis8, regression_line8, color="red")

# Print the linear equation on the scatterplot
plt.annotate(linear_equation8, (-30,6), fontsize=16, color="red")

# Set plot labels
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")
plt.title("Southern Hemisphere City Latitude vs. Wind Speed (02/02/2021)")

# Save figure as output .png file
plt.savefig('output_data/Southern_Hemisphere_Latitude_vs_WindSpeed_Plot.png')

# Print plot
plt.show()