# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
city_count = len(cities)
city_count

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
# # Save config information

city = "Chicago"
units = 'Imperial'

# # Build query URL just to test it out...I printed it then deleted the print line


url = "http://api.openweathermap.org/data/2.5/weather?"
query_url = f"{url}appid={weather_api_key}&units={units}&q={city}"





In [None]:
city_results = []
lats = []
lons = []
max_temps = []
humiditys = []
cloud_pcts = []
wind_speeds = []
countries = []
dates = []

In [None]:
# # TEST CELL
# query_url = f"{url}appid={weather_api_key}&units={units}&q="
# cities_test = ["Chicago","London","StrawnygiJoe","Rockford","Sukkur","Sydney","Jakarta","Johannesburg","Lima"]
# city_count_test=len(cities_test)

# for x in range(city_count_test):    #city in cities:
#     time.sleep(1)
#     weather_response = requests.get(query_url+str(cities_test[x])).json()
#     print(f"Processing record {x} of {city_count_test} | {cities_test[x]}")
#     # need to use exception handling if city not found
#     try:
#         city_results.append(weather_response['name'])
#         lats.append(weather_response['coord']['lat']) 
#         lons.append(weather_response['coord']['lon']) 
#         max_temps.append(weather_response['main']['temp_max']) 
#         humiditys.append(weather_response['main']['humidity'])
#         cloud_pcts.append(weather_response['clouds']['all'])
#         wind_speeds.append(weather_response['wind']['speed'])
#         countries.append(weather_response['sys']['country'])
#         dates.append(weather_response['dt'])
#     except KeyError:
#         print(f"    The city of {cities_test[x]} not found, dummy.  Skipping....")
#         pass
# print("\n-----------------------------\nData Retrieval Complete\n-----------------------------\n")

In [None]:
# REAL CELL
units = 'Imperial'
query_url = f"{url}appid={weather_api_key}&units={units}&q="

for x in range(city_count):    #city in cities:
    time.sleep(1)
    weather_response = requests.get(query_url+str(cities[x])).json()
    print(f"Processing record {x} of {city_count} | {cities[x]}")
    # need to use exception handling if city not found
    try:
        city_results.append(weather_response['name'])
        lats.append(weather_response['coord']['lat']) 
        lons.append(weather_response['coord']['lon']) 
        max_temps.append(weather_response['main']['temp_max']) 
        humiditys.append(weather_response['main']['humidity'])
        cloud_pcts.append(weather_response['clouds']['all'])
        wind_speeds.append(weather_response['wind']['speed'])
        countries.append(weather_response['sys']['country'])
        dates.append(weather_response['dt'])
    except KeyError:
        print(f"    The city of {cities[x]} not found, dummy.  Skipping....")
        pass
print("\n-----------------------------\nData Retrieval Complete\n-----------------------------\n")

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
# city data is converted to a csv at the end of the notebook

In [None]:
# Create DataFrame
city_data_df = pd.DataFrame({
    'City': city_results,
    'Lat': lats,
    'Lng': lons,
    'Max Temp': max_temps,
    'Humidity': humiditys,
    'Cloudiness': cloud_pcts,
    'Wind Speed': wind_speeds,
    'Country': countries,
    'Date': dates

})
city_data_df

In [None]:
#prep for summary stats
city_data_2_df = city_data_df[["City","Lat","Lng","Max Temp","Humidity","Cloudiness","Wind Speed","Date"]]
city_data_3_df = city_data_2_df.set_index("City")
city_data_3_df

In [None]:
city_data_3_df.agg(['count','mean','std','min','max'] )


In [None]:
city_data_3_df.quantile([.25,.5,.75])


## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
city_data_clean_df = city_data_df.loc[ (city_data_df["Humidity"] <= 100) , :]

city_data_clean_df                           

In [None]:
 #  prep for .agg method
city_data_clean_2_df = city_data_clean_df[["City","Lat","Lng","Max Temp","Humidity","Cloudiness","Wind Speed","Date"]]
city_data_clean_3_df = city_data_clean_2_df.set_index("City")
city_data_clean_3_df

In [None]:
city_data_3_df.agg(['count','mean','std','min','max'] )

In [None]:
city_data_clean_3_df.quantile([.25,.5,.75])


In [None]:
#  Get the indices of cities that have humidity over 100%.
city_humid_df = city_data_df.loc[ (city_data_df["Humidity"] > 100) , :]
city_humid_df

In [None]:
city_humid_df.index

In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".


## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
# Latitude vs Temp Scatter Plot
plt.scatter(city_data_clean_df['Lat'],city_data_clean_df['Max Temp'],marker="o")

# tittles and axis labels
plt.title("City Latitude vs. Max Temperature")
plt.ylabel("Max Temperature (F)")
plt.xlabel("Latitude")
plt.grid(True)

plt.ylim(0,120)

# Save the figure
plt.savefig("LatitudeVsMaxTempPlot.png")

# Show plot
plt.show()




In [None]:
print("The latitude vs temperature plot indicates that the highest temperatures are near the equator (latitude = 0)")
print("It maximum temperatures drop off when you get further away from the equator, either north or south.")
print("It also indicates there are more cities in the north hemisphere.")

## Latitude vs. Humidity Plot

In [None]:
# Latitude vs Humidity Plot
plt.scatter(city_data_clean_df['Lat'],city_data_clean_df['Humidity'],marker="o")

# tittles and axis labels
plt.title("City Latitude vs. Humidity")
plt.ylabel("Humidity (%)")
plt.xlabel("Latitude")
plt.grid(True)

plt.ylim(0,120)

# Save the figure
plt.savefig("LatitudeVsHumidityPlot.png")

# Show plot
plt.show()

In [None]:
print("The latitude vs humidity plot indicates that there may not be much correlation between these two factors")
print("That said, the chart seems to indicate there may be slightly higher humidities in the northern hemisphere than the south....")
print("That may be due to some cities in the south hemisphere still experiencing early spring /late winter conditions, or \n it may be somewhat of an illusion given there are more cities (and more data) in the north.")

## Latitude vs. Cloudiness Plot

In [None]:
# Latitude vs Cloud Plot
plt.scatter(city_data_clean_df['Lat'],city_data_clean_df['Humidity'],marker="o")

# tittles and axis labels
plt.title("City Latitude vs. Cloudiness")
plt.ylabel("Cloudiness (%)")
plt.xlabel("Latitude")
plt.grid(True)

plt.ylim(0,120)

# Save the figure
plt.savefig("LatitudeVsCloudPlot.png")

# Show plot
plt.show()

In [None]:
print("As with humidity, the latitude vs cloudiness plot indicates that there may not be much correlation between these two factors")


## Latitude vs. Wind Speed Plot

In [None]:
# Latitude vs Wind Speed Plot
plt.scatter(city_data_clean_df['Lat'],city_data_clean_df['Wind Speed'],marker="o")

# tittles and axis labels
plt.title("City Latitude vs. Wind Speed")
plt.ylabel("Wind Speed (mph)")
plt.xlabel("Latitude")
plt.grid(True)

plt.ylim(0,60)

# Save the figure
plt.savefig("LatitudeVsWindPlot.png")

# Show plot
plt.show()

In [None]:
print("As with humidity and cloudiness, the latitude vs cloudiness plot indicates that there may not be much correlation\n between wind and latitude.")
print("There may be slightly higher wind points in the north, but again it is difficult to tell \ngiven the uneven distribution of cities in the two hemispheres.")

## Linear Regression

In [None]:
city_data_clean_north_df = city_data_clean_df.loc[ (city_data_clean_df["Lat"] >= 0) , :]

city_data_clean_north_df       

In [None]:
city_data_clean_south_df = city_data_clean_df.loc[ (city_data_clean_df["Lat"] < 0) , :]

city_data_clean_south_df

In [None]:
# prep series for plotting and linear regression

lat_north_values = city_data_clean_north_df ['Lat']
lat_south_values = city_data_clean_south_df ['Lat']
temp_north_values = city_data_clean_north_df ['Max Temp']
temp_south_values = city_data_clean_south_df ['Max Temp']
cloud_north_values = city_data_clean_north_df ['Cloudiness']
cloud_south_values = city_data_clean_south_df ['Cloudiness']
humid_north_values = city_data_clean_north_df ['Humidity']
humid_south_values = city_data_clean_south_df ['Humidity']
wind_north_values = city_data_clean_north_df ['Wind Speed']
wind_south_values = city_data_clean_south_df ['Wind Speed']

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Latitude vs Temp Scatter Plot with regression north hemisphere

(slope, intercept, rvalue, pvalue, stderr) = linregress(lat_north_values, temp_north_values)

# Get regression values
regress_values_north = lat_north_values * slope + intercept
line_eq_north= "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))

# Create Plot
plt.scatter(lat_north_values,temp_north_values,marker="o")
plt.plot(lat_north_values,regress_values_north,"r-")


# Label plot and annotate the line equation
plt.title("City Latitude vs. Max Temperature with Linear Regression Line\nNorth Hemisphere")
plt.ylabel("Max Temperature (F)")
plt.xlabel("Latitude")
plt.grid(True)
plt.annotate(line_eq_north,(20,15),fontsize=15,color="red") 
# I cannot get the line equation to annotate correctly.  
# per class, the () tuple specifies the lower left corner of the equation
# but after 20 minutes I still can't figure it out and don't have an hour to spend on it

# Print r value
print(f"The r-squared value is: {rvalue**2}")
print(f"The regression equation is: {line_eq_north}")
# Show plot
plt.show()
plt.tight_layout()
# Save the figure
plt.savefig("LatitudeVsMaxTempPlotWithRegrssNorth.png")



####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Latitude vs Temp Scatter Plot with regression SOUTH hemisphere

(slope, intercept, rvalue, pvalue, stderr) = linregress(lat_south_values, temp_south_values)

# Get regression values
regress_values_south = lat_south_values * slope + intercept
line_eq_south= "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))

# Create Plot
plt.scatter(lat_south_values,temp_south_values,marker="o")
plt.plot(lat_south_values,regress_values_south,"r-")


# Label plot and annotate the line equation
plt.title("City Latitude vs. Max Temperature with Linear Regression Line\nSouth Hemisphere")
plt.ylabel("Max Temperature (F)")
plt.xlabel("Latitude")
plt.grid(True)
plt.annotate(line_eq_north,(0,0),fontsize=15,color="red") 
# I cannot get the line equation to annotate correctly.  
# per class, the () tuple specifies the lower left corner of the equation
# but after 20 minutes I still can't figure it out and don't have an hour to spend on it

# Print r value
print(f"The r-squared value is: {rvalue**2}")
print(f"The regression equation is: {line_eq_south}")
print("The graph is showing the ")
print("The graph is showing the ")
# Show plot
plt.show()
plt.tight_layout()
# Save the figure
plt.savefig("LatitudeVsMaxTempPlotWithRegrssSouth.png")

In [None]:
print("The north hemisphere has a negative correlation between temperature and latitude, while the south has a positive correlation")
print("The R squared value is roughly 0.5 for both regression equations, meaning the equations are only moderately good at\n predicting max temperature based on latitude.")

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# Humidity vs LatScatter Plot with regression north hemisphere

(slope, intercept, rvalue, pvalue, stderr) = linregress(lat_north_values, humid_north_values)

# Get regression values
regress_values_north = lat_north_values * slope + intercept
line_eq_north= "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))

# Create Plot
plt.scatter(lat_north_values,humid_north_values,marker="o")
plt.plot(lat_north_values,regress_values_north,"r-")


# Label plot and annotate the line equation
plt.title("City Latitude vs. Humidity with Linear Regression Line\nNorth Hemisphere")
plt.ylabel("Humidity (%)")
plt.xlabel("Latitude")
plt.grid(True)
plt.annotate(line_eq_north,(20,15),fontsize=15,color="red") 
# I cannot get the line equation to annotate correctly.  
# per class, the () tuple specifies the lower left corner of the equation
# but after 20 minutes I still can't figure it out and don't have an hour to spend on it

# Print r value
print(f"The r-squared value is: {rvalue**2}")
print(f"The regression equation is: {line_eq_north}")
# Show plot
plt.show()
plt.tight_layout()
# Save the figure
plt.savefig("LatitudeVsHumidPlotWithRegrssNorth.png")

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# Humidity vs LatScatter Plot with regression SOUTH hemisphere

(slope, intercept, rvalue, pvalue, stderr) = linregress(lat_south_values, humid_south_values)

# Get regression values
regress_values_south = lat_south_values * slope + intercept
line_eq_south= "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))

# Create Plot
plt.scatter(lat_south_values,humid_south_values,marker="o")
plt.plot(lat_south_values,regress_values_south,"r-")


# Label plot and annotate the line equation
plt.title("City Latitude vs. Humidity with Linear Regression Line\nSouth Hemisphere")
plt.ylabel("Humidity (%)")
plt.xlabel("Latitude")
plt.grid(True)
plt.annotate(line_eq_south,(20,15),fontsize=15,color="red") 
# I cannot get the line equation to annotate correctly.  
# per class, the () tuple specifies the lower left corner of the equation
# but after 20 minutes I still can't figure it out and don't have an hour to spend on it

# Print r value
print(f"The r-squared value is: {rvalue**2}")
print(f"The regression equation is: {line_eq_south}")
# Show plot
plt.show()
plt.tight_layout()
# Save the figure
plt.savefig("LatitudeVHumidPlotWithRegrssSouth.png")

In [None]:
print('The regression lines for both the north and south hemisphere have a slightly positive slope, indicating a\n very weak correlation between humidity and latitude.')
print("The R squared value is roughly 0 for both regression equations, meaning the equations cannot be relied on to\n predict humidity based on latitude.")

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# Cloudiness vs LatScatter Plot with regression north hemisphere

(slope, intercept, rvalue, pvalue, stderr) = linregress(lat_north_values, cloud_north_values)

# Get regression values
regress_values_north = lat_north_values * slope + intercept
line_eq_north= "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))

# Create Plot
plt.scatter(lat_north_values,humid_north_values,marker="o")
plt.plot(lat_north_values,regress_values_north,"r-")


# Label plot and annotate the line equation
plt.title("City Latitude vs. Cloudiness with Linear Regression Line\nNorth Hemisphere")
plt.ylabel("Cloudiness (%)")
plt.xlabel("Latitude")
plt.grid(True)
plt.annotate(line_eq_north,(20,15),fontsize=15,color="red") 
# I cannot get the line equation to annotate correctly.  
# per class, the () tuple specifies the lower left corner of the equation
# but after 20 minutes I still can't figure it out and don't have an hour to spend on it

# Print r value
print(f"The r-squared value is: {rvalue**2}")
print(f"The regression equation is: {line_eq_north}")
# Show plot
plt.show()
plt.tight_layout()
# Save the figure
plt.savefig("LatitudeVsCloudPlotWithRegrssNorth.png")

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# Cloudiness vs LatScatter Plot with regression north hemisphere

(slope, intercept, rvalue, pvalue, stderr) = linregress(lat_south_values, cloud_south_values)

# Get regression values
regress_values_south = lat_south_values * slope + intercept
line_eq_south= "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))

# Create Plot
plt.scatter(lat_south_values,humid_south_values,marker="o")
plt.plot(lat_south_values,regress_values_south,"r-")


# Label plot and annotate the line equation
plt.title("City Latitude vs. Cloudiness with Linear Regression Line\nSouth Hemisphere")
plt.ylabel("Cloudiness (%)")
plt.xlabel("Latitude")
plt.grid(True)
plt.annotate(line_eq_south,(20,15),fontsize=15,color="red") 
# I cannot get the line equation to annotate correctly.  
# per class, the () tuple specifies the lower left corner of the equation
# but after 20 minutes I still can't figure it out and don't have an hour to spend on it

# Print r value
print(f"The r-squared value is: {rvalue**2}")
print(f"The regression equation is: {line_eq_south}")
# Show plot
plt.show()
plt.tight_layout()
# Save the figure
plt.savefig("LatitudeVsCloudPlotWithRegrssSouth.png")

In [None]:
print('The results for cloudiness are very similar to the results for humidity.')
print('The regression lines for both the north and south hemisphere have a slightly positive slope, indicating a\n very weak correlation between humidity and latitude.')
print("The R squared value is roughly 0 for both regression equations, meaning the equations cannot be relied on to\n predict cloudiness based on latitude.")

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# Wind vs LatScatter Plot with regression North hemisphere

(slope, intercept, rvalue, pvalue, stderr) = linregress(lat_north_values, wind_north_values)

# Get regression values
regress_values_north = lat_north_values * slope + intercept
line_eq_north= "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))

# Create Plot
plt.scatter(lat_north_values,wind_north_values,marker="o")
plt.plot(lat_north_values,regress_values_north,"r-")


# Label plot and annotate the line equation
plt.title("City Latitude vs. Wind Speed with Linear Regression Line\nNorth Hemisphere")
plt.ylabel("Wind Speed (mph)")
plt.xlabel("Latitude")
plt.grid(True)
plt.annotate(line_eq_north,(20,15),fontsize=15,color="red") 
# I cannot get the line equation to annotate correctly.  
# per class, the () tuple specifies the lower left corner of the equation
# but after 20 minutes I still can't figure it out and don't have an hour to spend on it

# Print r value
print(f"The r-squared value is: {rvalue**2}")
print(f"The regression equation is: {line_eq_north}")
# Show plot
plt.show()
plt.tight_layout()
# Save the figure
plt.savefig("LatitudeVsWindSpeedWithRegrssNorth.png")

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# Wind vs LatScatter Plot with regression South hemisphere

(slope, intercept, rvalue, pvalue, stderr) = linregress(lat_south_values, wind_south_values)

# Get regression values
regress_values_south = lat_south_values * slope + intercept
line_eq_south= "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))

# Create Plot
plt.scatter(lat_south_values,wind_south_values,marker="o")
plt.plot(lat_south_values,regress_values_south,"r-")


# Label plot and annotate the line equation
plt.title("City Latitude vs. Wind Speed with Linear Regression Line\nSouth Hemisphere")
plt.ylabel("Wind Speed (mph)")
plt.xlabel("Latitude")
plt.grid(True)
plt.annotate(line_eq_south,(-50,20),fontsize=15,color="red") 
# I cannot get the line equation to annotate correctly.  
# per class, the () tuple specifies the lower left corner of the equation
# but after 20 minutes I still can't figure it out and don't have an hour to spend on it

# Print r value
print(f"The r-squared value is: {rvalue**2}")
print(f"The regression equation is: {line_eq_south}")
# Show plot
plt.show()
plt.tight_layout()
# Save the figure
plt.savefig("LatitudeVsWindSpeedWithRegrssSouth.png")

In [None]:
print('The results for wind speed are very similar to the results for humidity and cloudiness.')
print('The regression lines for both the north and south hemisphere have a slightly positive slope, indicating a\n very weak correlation between humidity and latitude.')
print("The R squared value is roughly 0 for both regression equations, meaning the equations cannot be relied on to\n predict wind speed based on latitude.")

In [None]:
city_data_df.to_csv("city_data.csv")
city_data_clean_df.to_csv("city_data_clean.csv")
city_data_clean_north_df.to_csv("city_data_clean_north.csv")
city_data_clean_south_df.to_csv("city_data_clean_south.csv")