# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

#### Analyis
Observed Trends
1.  In July, maximum temperatures range higher between the 30-50 latitude marks.
2.  In the Northern hemisphere, the maximum temperature gradually increases from the equator to about 30 degress latitude       and proceeds to decrease the further north the location. 
3.  Wind speeds slightly decrease the further north a location.

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import json
from scipy.stats import linregress

# Import API key
from api_keys import weather_api_key
#weather_api_key = "31956b2ede4a3c4f153a9f62517a8431"

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []
city_ids = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
# Base URL and imperial units
url = "http://api.openweathermap.org/data/2.5/weather?units=Imperial&APPID=" + weather_api_key

In [None]:
# City data
# CAUTION: This cell takes about 15 minutes to run.

# List of city data
city_data = []

# Print to logger
print("Beginning Data Retrieval     ")
print("-----------------------------")

# Create counters
record_count = 1
set_count = 1

# Loop through all the cities in our list
for i, city in enumerate(cities):
        
    # Group cities in sets of 50 for logging purposes
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 1
        time.sleep(60)

    # Create endpoint URL with each city
    city_url = url + "&q=" + city.replace(" ","+")
    
    # Log the url, record, and set numbers
    print(f"Processing Record {record_count} of Set {set_count} | {city}")

    # Add 1 to the record count
    record_count += 1

    # Run an API request for each of the cities
    try:
        city_weather = requests.get(city_url).json()
        # Parse out the max temp, humidity, and cloudiness
        city_lat = city_weather["coord"]["lat"]
        city_lng = city_weather["coord"]["lon"]
        city_max_temp = city_weather["main"]["temp_max"]
        city_humidity = city_weather["main"]["humidity"]
        city_clouds = city_weather["clouds"]["all"]
        city_wind = city_weather["wind"]["speed"]
        city_country = city_weather["sys"]["country"]
        city_date = city_weather["dt"]
            
        # Append the City information into city_data list
        city_data.append({"City": city.title(), 
                          "Lat": city_lat, 
                          "Lng": city_lng, 
                          "Max Temp": city_max_temp,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country,
                          "Date": city_date})
    # If an error is experienced, skip the city

    except:
        print("City not found. Skipping...")
        pass

# Indicate that Data Loading is complete 
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
# Create city data dataframe and export to .csv file
city_data_df = pd.DataFrame(city_data)
city_data_df.to_csv("city_data.csv")
city_data_df

In [None]:
city_data_df.describe()

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
# The described dataframe displays the max humidity at 100%,
# so this step should be skipped.

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs in the output_data folder
* Below are examples of what you should get but your results will be different.

## Latitude vs. Temperature Plot

In [None]:
# Build scatter plot for each data type
plt.figure(figsize=(6,4))
x_values = city_data_df["Lat"]
y_values = city_data_df["Max Temp"]

# Create plot:
plt.scatter(x_values,y_values, marker='o', color="blue", s=[70], edgecolors="black")
#plt.plot(x_values,regress_values,'darkred', linewidth=2)

# Incorporate the other graph properties:
plt.title("City Latitude vs. Max Temperature (07/24/22)", fontsize=10)
plt.ylabel("Max Temperature (F)", fontsize=10, color="black")
plt.xlabel("Latitude", fontsize=10, color="black")
plt.grid(True)

# Save the figure:
plt.savefig('output_data/Latitude vs. Max Temperature.png')

plt.show()

#### The above plot is displaying the max temperature of different latitudes. Max temperatures are displaying higher from 30-50 latitude. 

## Latitude vs. Humidity Plot

In [None]:
# Build scatter plot for each data type
plt.figure(figsize=(6,4))
x_values = city_data_df["Lat"]
y_values = city_data_df["Humidity"]

# Create plot:
plt.scatter(x_values,y_values, marker='o', color="blue", s=[70], edgecolors="black")
#plt.plot(x_values,regress_values,'darkred', linewidth=2)

# Incorporate the other graph properties:
plt.title("City Latitude vs. Humidity (07/30/22)", fontsize=10)
plt.ylabel("Humidity (%)", fontsize=10, color="black")
plt.xlabel("Latitude", fontsize=10, color="black")
plt.grid(True)

# Save the figure:
plt.savefig('output_data/Latitude vs. Humidity.png')

plt.show()

#### The above plot is displaying the humidity of different latitudes. Humidity levels vary across latitudes. There is no correlation between the two variables.

## Latitude vs. Cloudiness Plot

In [None]:
# Build scatter plot for each data type
plt.figure(figsize=(6,4))
x_values = city_data_df["Lat"]
y_values = city_data_df["Cloudiness"]

# Create plot:
plt.scatter(x_values,y_values, marker='o', color="blue", s=[70], edgecolors="black")
#plt.plot(x_values,regress_values,'darkred', linewidth=2)

# Incorporate the other graph properties:
plt.title("City Latitude vs. Cloudiness (07/30/22)", fontsize=10)
plt.ylabel("Cloudiness %", fontsize=10, color="black")
plt.xlabel("Latitude", fontsize=10, color="black")
plt.grid(True)

# Save the figure:
plt.savefig('output_data/Latitude vs. Cloudiness.png')

plt.show()

#### The above plot is displaying cloudiness of different latitudes. Clouds vary across latitudes and are not dependent on latitude location.

## Latitude vs. Wind Speed Plot

In [None]:
# Build scatter plot for each data type
plt.figure(figsize=(6,4))
x_values = city_data_df["Lat"]
y_values = city_data_df["Wind Speed"]

# Create plot:
plt.scatter(x_values,y_values, marker='o', color="blue", s=[70], edgecolors="black")

# Incorporate the other graph properties:
plt.title("City Latitude vs. Wind Speed (07/24/22)", fontsize=10)
plt.ylabel("Wind Speed (mph)", fontsize=10, color="black")
plt.xlabel("Latitude", fontsize=10, color="black")
plt.grid(True)

# Save the figure:
plt.savefig('output_data/Latitude vs. Wind Speed.png')

plt.show()

#### The above plot is displaying wind speed of different latitudes. Wind speeds vary across latitudes and are not dependent on latitude location.

## Linear Regression

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Find cities located in the Northern Hemisphere
northern_hemisphere_df = city_data_df.loc[(city_data_df["Lat"]>0)]
northern_hemisphere_df

In [None]:
# Northern Hemisphere - Max Temp vs. Latitude Linear Regression
plt.figure(figsize=(6,4))
x_values = northern_hemisphere_df["Lat"]
y_values = northern_hemisphere_df["Max Temp"]

# Perform a linear regression on Max Temp vs. latitude
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)

# Regression values
regress_values = x_values * slope + intercept

# Create line equation string
line_equation = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# Create plot
plt.scatter(x_values, y_values, marker="o", color="blue", s=[60], edgecolors="black")
plt.plot(x_values, regress_values,"red", linewidth=2)

# Additional graph properties
plt.xlabel("Latitude", fontsize=10, color="black")
plt.ylabel("Max Temp", fontsize=10, color="black")
#plt.annotate(line_equation,(15,8.000000e+04), fontsize=10, color="red")
#plt.ticklabel_format(style="plain")

# Print r-squared value:
print(f"The r-value is: {rvalue}")

# Save the figure:
plt.savefig('output_data/Northern Hemisphere - Latitude vs. Max Temp.png')

# Show plot:
plt.show()

#### Now, the above plot is only displaying maximum temperature of different latitudes in the northern hemisphere. The maximum temperature gradually increases from the equator to about 30 degress latitude and proceeds to decrease the further north the location. 

In [None]:
# Create dataframe for cities in the southern hemisphere
southern_hemisphere_df = city_data_df.loc[(city_data_df["Lat"]<0)]
southern_hemisphere_df

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Southern Hemisphere: Max Temp vs. Latitude Linear Regression
plt.figure(figsize=(6,4))
x_values = southern_hemisphere_df["Lat"]
y_values = southern_hemisphere_df["Max Temp"]

# Perform a linear regression on Max Temp vs. latitude
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)

# Regression values
regress_values = x_values * slope + intercept

# Create line equation string
line_equation = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# Create plot
plt.scatter(x_values, y_values, marker="o", color="blue", s=[60], edgecolors="black")
plt.plot(x_values, regress_values,"red", linewidth=2)

# Additional graph properties
plt.xlabel("Latitude", fontsize=10, color="black")
plt.ylabel("Max Temp", fontsize=10, color="black")
#plt.annotate(line_equation,(15,8.000000e+04), fontsize=10, color="red")
#plt.ticklabel_format(style="plain")

# Print r-squared value:
print(f"The r-value is: {rvalue}")

# Save the figure:
plt.savefig('output_data/Southern Hemisphere - Latitude vs. Max Temp.png')

# Show plot:
plt.show()

#### Now, this plot above is displaying the maximum temperature of different latitudes in the southern hemisphere. The maximum temperature consistently increases the further north the location.

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# Northern Hemisphere - Humidity(%) vs. Latitude Linear Regression
plt.figure(figsize=(6,4))
x_values = northern_hemisphere_df["Lat"]
y_values = northern_hemisphere_df["Humidity"]

# Perform a linear regression on Max Temp vs. latitude
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)

# Regression values
regress_values = x_values * slope + intercept

# Create line equation string
line_equation = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# Create plot
plt.scatter(x_values, y_values, marker="o", color="blue", s=[60], edgecolors="black")
plt.plot(x_values, regress_values,"red", linewidth=2)

# Additional graph properties
plt.xlabel("Latitude", fontsize=10, color="black")
plt.ylabel("Humidity", fontsize=10, color="black")
#plt.annotate(line_equation,(15,8.000000e+04), fontsize=10, color="red")
#plt.ticklabel_format(style="plain")

# Print r-squared value:
print(f"The r-value is: {rvalue}")

# Save the figure:
plt.savefig('output_data/Northern Hemisphere - Latitude vs. Humidity.png')

# Show plot:
plt.show()

#### The above plot is displaying humidity of different latitudes in the northern hemisphere. It shows us that there is no relationship between humidity and location in the northern hemisphere.

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# Southern Hemisphere: Humidity(%) vs. Latitude Linear Regression
plt.figure(figsize=(6,4))
x_values = southern_hemisphere_df["Lat"]
y_values = southern_hemisphere_df["Max Temp"]

# Perform a linear regression on Max Temp vs. latitude
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)

# Regression values
regress_values = x_values * slope + intercept

# Create line equation string
line_equation = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# Create plot
plt.scatter(x_values, y_values, marker="o", color="blue", s=[60], edgecolors="black")
plt.plot(x_values, regress_values,"red", linewidth=2)

# Additional graph properties
plt.xlabel("Latitude", fontsize=10, color="black")
plt.ylabel("Humidity", fontsize=10, color="black")
#plt.annotate(line_equation,(15,8.000000e+04), fontsize=10, color="red")
#plt.ticklabel_format(style="plain")

# Print r-squared value:
print(f"The r-value is: {rvalue}")

# Save the figure:
plt.savefig('output_data/Southern Hemisphere - Latitude vs. Humidity.png')

# Show plot:
plt.show()

#### The above plot is displaying humidity of different latitudes in the southern hemisphere. Oddly, it shows us that there is a relationship between humidity and location. Humidity increases the closer one gets to the equator.

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# Northern Hemisphere - Cloudiness(%) vs. Latitude Linear Regression
plt.figure(figsize=(6,4))
x_values = northern_hemisphere_df["Lat"]
y_values = northern_hemisphere_df["Cloudiness"]

# Perform a linear regression on Max Temp vs. latitude
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)

# Regression values
regress_values = x_values * slope + intercept

# Create line equation string
line_equation = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# Create plot
plt.scatter(x_values, y_values, marker="o", color="blue", s=[60], edgecolors="black")
plt.plot(x_values, regress_values,"red", linewidth=2)

# Additional graph properties
plt.xlabel("Latitude", fontsize=10, color="black")
plt.ylabel("Cloudiness", fontsize=10, color="black")
#plt.annotate(line_equation,(15,8.000000e+04), fontsize=10, color="red")
#plt.ticklabel_format(style="plain")

# Print r-squared value:
print(f"The r-value is: {rvalue}")

# Save the figure:
plt.savefig('output_data/Northern Hemisphere - Latitude vs. Cloudiness.png')

# Show plot:
plt.show()

#### The above plot is displaying the cloudiness of different latitudes in the northern hemisphere. It shows us that there is no relationship between cloudiness and location in the northern hemisphere.

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# Southern Hemisphere: Cloudiness(%) vs. Latitude Linear Regression
plt.figure(figsize=(6,4))
x_values = southern_hemisphere_df["Lat"]
y_values = southern_hemisphere_df["Cloudiness"]

# Perform a linear regression on Max Temp vs. latitude
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)

# Regression values
regress_values = x_values * slope + intercept

# Create line equation string
line_equation = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# Create plot
plt.scatter(x_values, y_values, marker="o", color="blue", s=[60], edgecolors="black")
plt.plot(x_values, regress_values,"red", linewidth=2)

# Additional graph properties
plt.xlabel("Latitude", fontsize=10, color="black")
plt.ylabel("Cloudiness", fontsize=10, color="black")
#plt.annotate(line_equation,(15,8.000000e+04), fontsize=10, color="red")
#plt.ticklabel_format(style="plain")

# Print r-squared value:
print(f"The r-value is: {rvalue}")

# Save the figure:
plt.savefig('output_data/Southern Hemisphere - Latitude vs. Cloudiness.png')

# Show plot:
plt.show()

#### The above plot is displaying the cloudiness of different latitudes in the southern hemisphere. It shows us that there is no relationship between cloudiness and location.

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# Northern Hemisphere - Cloudiness(%) vs. Latitude Linear Regression
plt.figure(figsize=(6,4))
x_values = northern_hemisphere_df["Lat"]
y_values = northern_hemisphere_df["Wind Speed"]

# Perform a linear regression on Max Temp vs. latitude
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)

# Regression values
regress_values = x_values * slope + intercept

# Create line equation string
line_equation = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# Create plot
plt.scatter(x_values, y_values, marker="o", color="blue", s=[60], edgecolors="black")
plt.plot(x_values, regress_values,"red", linewidth=2)

# Additional graph properties
plt.xlabel("Latitude", fontsize=10, color="black")
plt.ylabel("Wind Speed", fontsize=10, color="black")
#plt.annotate(line_equation,(15,8.000000e+04), fontsize=10, color="red")
#plt.ticklabel_format(style="plain")

# Print r-squared value:
print(f"The r-value is: {rvalue}")

# Save the figure:
plt.savefig('output_data/Northern Hemisphere - Latitude vs. Wind Speed.png')

# Show plot:
plt.show()

#### The above plot is displaying the wind speed of different latitudes in the northern hemisphere. Wind speeds vary greatly throughout locations with no significant relationship between humidity and location.

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# Southern Hemisphere: Wind Speed vs. Latitude Linear Regression
plt.figure(figsize=(6,4))
x_values = southern_hemisphere_df["Lat"]
y_values = southern_hemisphere_df["Wind Speed"]

# Perform a linear regression on Max Temp vs. latitude
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)

# Regression values
regress_values = x_values * slope + intercept

# Create line equation string
line_equation = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# Create plot
plt.scatter(x_values, y_values, marker="o", color="blue", s=[60], edgecolors="black")
plt.plot(x_values, regress_values,"red", linewidth=2)

# Additional graph properties
plt.xlabel("Latitude", fontsize=10, color="black")
plt.ylabel("Wind Speed", fontsize=10, color="black")
#plt.annotate(line_equation,(15,8.000000e+04), fontsize=10, color="red")
#plt.ticklabel_format(style="plain")

# Print r-squared value:
print(f"The r-value is: {rvalue}")

# Save the figure:
plt.savefig('output_data/Southern Hemisphere - Latitude vs. Wind Speed.png')

# Show plot:
plt.show()

#### The above plot is displaying wind speed of different latitudes in the southern hemisphere. Wind speeds here also vary greatly throughout locations with no significant relationship between humidity and location.