# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import numpy as np
import requests
import json
import pandas as pd
import matplotlib.pyplot as plt

from scipy.stats import linregress
from pprint import pprint
from citipy import citipy

# Import API key
from api_keys import weather_api_key
from api_keys import g_key

# Generate a list of random locations
np.random.seed()
ntot=2000
Lat_list=[np.random.uniform(-90.0,90.0) for x in range(ntot)]
Lng_list=[np.random.uniform(-180.0,180.0) for x in range(ntot)]

## Generate Cities List

In [2]:
# Gnerate Cities list
city_list=[citipy.nearest_city(Lat_list[i],Lng_list[i]) for i in range(ntot)]

# Delete the duplicated cities
city_list = list(dict.fromkeys(city_list))

# print the number of cities
len(city_list)
#type(city_list[0].city_name)

SyntaxError: Generator expression must be parenthesized (<ipython-input-2-1f0e2522df80>, line 2)

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
# Save config information.
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "imperial"

# Build partial query URL
query_url = f"{url}appid={weather_api_key}&units={units}&q="

In [None]:
city_name_list=[]
city_lat_list=[]
city_lon_list=[]
city_mtemp_list=[]
city_hum_list=[]
city_cl_list=[]
city_wind_list=[]
city_ctr_list=[]
city_dt_list=[]

print("Beginning Data Retrieval")
print("-----------------------------")

# Loop through the city_list and retrieve the weather data for each city
for city in city_list:
    try: 
        response = requests.get(query_url + city.city_name).json()
        city_name_list.append(response["name"])
        city_lat_list.append(response["coord"]["lat"])
        city_lon_list.append(response["coord"]["lon"])
        city_mtemp_list.append(response["main"]["temp_max"])
        city_hum_list.append(response["main"]["humidity"])
        city_cl_list.append(response["clouds"]["all"])
        city_wind_list.append(response["wind"]["speed"])
        city_ctr_list.append(response["sys"]["country"])
        city_dt_list.append(response["dt"])
        print(f"Retrieved data of {response['name']}")
        
    # Throw an error message if the city is not found 
    except:
        print("City not found. Skipping")

print("-----------------------------")
print("Data Retrieval Complete   ")
print("-----------------------------")

In [None]:
# print out the number of city list
len(city_name_list)

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
# Create a dataframe
ct_df=pd.DataFrame({"City":city_name_list,
                   "Lat":city_lat_list,
                   "Lng":city_lon_list,
                   "Max Temp":city_mtemp_list,
                   "Humidity":city_hum_list,
                   "Cloudiness":city_cl_list,
                   "Wind Speed":city_wind_list,
                   "Country":city_ctr_list,
                   "Date":city_dt_list})

# Display the dataframe
ct_df

In [None]:
# Clean the duplicated data
ct_df.drop_duplicates(subset=["City"],inplace=True)
ct_df

In [None]:
# show a summary of the data
ct_df.describe()

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
#  Get the indices of cities that have humidity over 100%.
ct_df[ct_df["Humidity"]>100].index

In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".
clean_city_data=ct_df.drop(ct_df[ct_df["Humidity"]>100].index,inplace=False)
clean_city_data.to_csv("../output_data/city_data.csv",index=False)

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
# Plot the scatter plot of Latitude vs. Temperature
plt.scatter(clean_city_data["Lat"],clean_city_data["Max Temp"], s=60,alpha=0.75,edgecolors="black")
plt.grid(True)
plt.xlabel("Latitude")
plt.ylabel("Max Temperature (F)")
plt.title("City Latitude vs Max Temperature")
plt.savefig("../output_data/Fig1_LatvsTemp.png")
plt.show()

## Latitude vs. Humidity Plot

In [None]:
# Plot the scatter plot of Latitude vs. Humidity
plt.scatter(clean_city_data["Lat"],clean_city_data["Humidity"], s=60,alpha=0.75,edgecolors="black")
plt.grid(True)
#plt.xlim(-90,90)
#plt.ylim(0,110)
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.title("City Latitude vs Humidity")
plt.savefig("../output_data/Fig2_LatvsHum.png")
plt.show()

## Latitude vs. Cloudiness Plot

In [None]:
# Plot the scatter plot of Latitude vs. Cloudiness
plt.scatter(clean_city_data["Lat"],clean_city_data["Cloudiness"], s=60,alpha=0.75,edgecolors="black")
plt.grid(True)
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
plt.title("City Latitude vs Cloudiness")
plt.savefig("../output_data/Fig3_LatvsCloud.png")
plt.show()

## Latitude vs. Wind Speed Plot

In [None]:
# Plot the scatter plot of Latitude vs. Wind Speed
plt.scatter(clean_city_data["Lat"],clean_city_data["Wind Speed"], s=60,alpha=0.75,edgecolors="black")
plt.grid(True)
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")
plt.title("City Latitude vs Wind Speed")
plt.savefig("../output_data/Fig4_LatvsWSpeed.png")
plt.show()

## Linear Regression

In [None]:
# Extract the city info in north and south hemisphere
NCity=clean_city_data.loc[clean_city_data["Lat"]>=0]
SCity=clean_city_data.loc[clean_city_data["Lat"]<0]

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Extract lattitude and temperature info
x_lat = NCity['Lat']
y_temp = NCity['Max Temp']

# Add the linear regression equation and line to plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_lat, y_temp)
print(f"The r-squared is: {rvalue**2}")
regress_values = x_lat * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_lat,y_temp, s=60,alpha=0.75,edgecolors="black")
plt.plot(x_lat,regress_values,"r-")
plt.annotate(line_eq,(5,-10),fontsize=15,color="red")
plt.xlabel("Latitude")
plt.ylabel("Max Temperature (F)")
plt.title("Northern Hemisphere Latitude vs Max Temperature")
plt.savefig("../output_data/Fig5_NLatvsTemp.png")
plt.show()

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Extract lattitude and temperature info
x_lat = SCity['Lat']
y_temp = SCity['Max Temp']

# Add the linear regression equation and line to plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_lat, y_temp)
print(f"The r-squared is: {rvalue**2}")
regress_values = x_lat * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_lat,y_temp, s=60,alpha=0.75,edgecolors="black")
plt.plot(x_lat,regress_values,"r-")
plt.annotate(line_eq,(-50,95),fontsize=15,color="red")
plt.xlabel("Latitude")
plt.ylabel("Max Temperature (F)")
plt.title("Sourthern Hemisphere Latitude vs Max Temperature")
plt.savefig("../output_data/Fig6_SLatvsTemp.png")
plt.show()

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# Extract lattitude and humidity info
x_lat = NCity['Lat']
y_hum = NCity['Humidity']

# Add the linear regression equation and line to plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_lat, y_hum)
print(f"The r-squared is: {rvalue**2}")
regress_values = x_lat * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_lat,y_hum, s=60,alpha=0.75,edgecolors="black")
plt.plot(x_lat,regress_values,"r-")
plt.annotate(line_eq,(40,30),fontsize=15,color="red")
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.title("Northern Hemisphere Latitude vs Humidity")
plt.savefig("../output_data/Fig7_NLatvsHum.png")
plt.show()

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# Extract lattitude and humidity info
x_lat = SCity['Lat']
y_hum = SCity['Humidity']

# Add the linear regression equation and line to plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_lat, y_hum)
print(f"The r-squared is: {rvalue**2}")
regress_values = x_lat * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_lat,y_hum, s=60,alpha=0.75,edgecolors="black")
plt.plot(x_lat,regress_values,"r-")
plt.annotate(line_eq,(-40,20),fontsize=15,color="red")
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.title("Southern Hemisphere Latitude vs Humidity")
plt.savefig("../output_data/Fig8_SLatvsHum.png")
plt.show()

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# Extract lattitude and cloudiness info
x_lat = NCity['Lat']
y_cl = NCity['Cloudiness']

# Add the linear regression equation and line to plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_lat, y_cl)
print(f"The r-squared is: {rvalue**2}")
regress_values = x_lat * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_lat,y_cl, s=60,alpha=0.75,edgecolors="black")
plt.plot(x_lat,regress_values,"r-")
plt.annotate(line_eq,(40,30),fontsize=15,color="red")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
plt.title("Northern Hemisphere Latitude vs Cloudiness")
plt.savefig("../output_data/Fig9_NLatvsCloud.png")
plt.show()

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# Extract lattitude and cloudiness info
x_lat = SCity['Lat']
y_cl = SCity['Cloudiness']

# Add the linear regression equation and line to plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_lat, y_cl)
print(f"The r-squared is: {rvalue**2}")
regress_values = x_lat * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_lat,y_cl, s=60,alpha=0.75,edgecolors="black")
plt.plot(x_lat,regress_values,"r-")
plt.annotate(line_eq,(-30,30),fontsize=15,color="red")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
plt.title("Southern Hemisphere Latitude vs Cloudiness")
plt.savefig("../output_data/Fig10_sLatvsCloud.png")
plt.show()

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# Extract lattitude and wind speed info
x_lat = NCity['Lat']
y_ws = NCity['Wind Speed']

# Add the linear regression equation and line to plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_lat, y_ws)
print(f"The r-squared is: {rvalue**2}")
regress_values = x_lat * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_lat,y_ws, s=60,alpha=0.75,edgecolors="black")
plt.plot(x_lat,regress_values,"r-")
plt.annotate(line_eq,(40,25),fontsize=15,color="red")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")
plt.title("Northern Hemisphere Latitude vs Wind Speed")
plt.savefig("../output_data/Fig11_NLatvsWSpeed.png")
plt.show()

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# Extract lattitude and wind speed info
x_lat = SCity['Lat']
y_ws = SCity['Wind Speed']

# Add the linear regression equation and line to plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_lat, y_ws)
print(f"The r-squared is: {rvalue**2}")
regress_values = x_lat * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_lat,y_ws, s=60,alpha=0.75,edgecolors="black")
plt.plot(x_lat,regress_values,"r-")
plt.annotate(line_eq,(-40,25),fontsize=15,color="red")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")
plt.title("Southern Hemisphere Latitude vs Wind Speed")
plt.savefig("../output_data/Fig12_SLatvsWSpeed.png")
plt.show()