# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from pprint import pprint
import scipy.stats as st

# Import API key
from config import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "../output_data/city_weather.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [None]:
def get_cities(mincitycount):
    # List for holding lat_lngs and cities
    lat_lngs = []
    cities = []

    while (len(cities) < mincitycount):
        # Create a set of random lat and lng combinations
        lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
        lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
        lat_lngs = zip(lats, lngs)

        # Identify nearest city for each lat, lng combination
        for lat_lng in lat_lngs:
            city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name

            # If the city is unique, then add it to a our cities list
            if city not in cities:
                cities.append(city)

    return cities;    

In [None]:
# get minimum 500 cities
cities = get_cities(500)

# Print the city count to confirm sufficient count
len(cities) 

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it's being processed (with the city number and city name).


In [None]:
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "imperial"

i = 0
weather_data = []

for city in cities:
    print(f"Index {str(i)} fetching weather for {city}")
    
    # Build query URL
    query_url = f"{url}appid={weather_api_key}&units={units}&q={city}"
    
    # increment index counter
    i += 1
    
    # Get weather data
    weather_response = requests.get(query_url)
    weather_json = weather_response.json()
    weather_data.append(weather_json)        
    time.sleep(1)


### Convert Raw Data to DataFrame
* Add weather data to dataframe
* Export the weather data into a .csv.
* Display weather datafram

In [None]:
pprint(weather_data[1]["sys"]["country"])
pprint(weather_data[1]["dt"])
pprint(weather_data[1])

In [None]:
# define lists for cities, countries, date, latitude, longitude, temperature (F), max temperature (F), humidity, cloudiness, wind speed
cities = []
countries = []
date = []
lat = []
lon = []
temp = []
max_temp = []
hum = []
cloud = []
wind = []

for weather in weather_data:
    if (weather["cod"] == 200):        
        cities.append(weather["name"])
        countries.append(weather["sys"]["country"])
        date.append(weather["dt"])        
        lat.append(weather["coord"]["lat"])
        lon.append(weather["coord"]["lon"])
        temp.append(weather["main"]["temp"]) 
        max_temp.append(weather["main"]["temp_max"]) 
        hum.append(weather["main"]["humidity"])
        cloud.append(weather["clouds"]["all"])        
        wind.append(weather["wind"]["speed"])        


In [None]:
# create a data frame for columns: cities, countries, date, lat, long, temp, max temp, humidity, cloudiness, and wind speed
weather_dict = {
    "city": cities,
    "country": countries,
    "date": date,
    "latitude": lat,
    "longitude": lon,    
    "temperature": temp,
    "max temperature": max_temp,
    "humidity": hum,
    "cloudiness": cloud,
    "wind speed": wind,
    
}
weather_df = pd.DataFrame(weather_dict)
weather_df


In [None]:
# ****************
# pull prevously generated and saved output data from csv
weather_df = pd.read_csv(output_data_file)

# **************** 
# save weather to csv
# weather_df.to_csv(output_data_file, index=True,index_label="city_id")  

In [None]:
list_humidity_outlier_index = []
humidity_outlier_df = weather_df.loc[weather_df["humidity"] > 100]
if (len(humidity_outlier_df) > 0):
    list_humidity_outlier_index = humidity_outlier_df.index.tolist()


## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
#  Get the indices of cities that have humidity over 100%.
if (len(list_humidity_outlier_index) > 0):
    print(f"Dropping {len(list_humidity_outlier_index)} humidity outliers (humidity over 100)")
    
    # Make a new DataFrame equal to the city data to drop all humidity outliers by index.
    # Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".
    clean_weather_df = weather_df.drop(weather_df.index[list_humidity_outlier_index], inplace = False)        
else: 
    print(f"Dropping zero humidity outliers (no weather in weather_df with humidity > 100)")
    clean_weather_df = weather_df

# verify dataframe counts, if no outliers counts should be the same
# if outliers dropped, clean_weather_df dataframe should be len(list_humidity_outlier_index) less than weather_df
print(f"Weather dataframe count: {len(weather_df)}")
print(f"Clean weather dataframe count: {len(clean_weather_df)}")        

In [None]:
# view clean_weather_df
clean_weather_df

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
# define plot data serieses 
latitude = clean_weather_df["latitude"]
temps = clean_weather_df["temperature"]
humidity = clean_weather_df["humidity"]
cloud = clean_weather_df["cloudiness"]
wind = clean_weather_df["wind speed"]

In [None]:
plt.title("Latitude vs. Temperature Plot")
plt.ylabel("Temperature (F)")
plt.xlabel("Latitude")

plt.scatter(latitude,temps,marker="o",color="red",edgecolors="black")
plt.show()


## Latitude vs. Humidity Plot

In [None]:
plt.title("Latitude vs. Humidity Plot")
plt.ylabel("Percent Humidity")
plt.xlabel("Latitude")

plt.scatter(latitude,humidity,marker="o", color="blue", edgecolors="black")
plt.show()


## Latitude vs. Cloudiness Plot

In [None]:
plt.title("Latitude vs. Cloudiness Plot")
plt.ylabel("Percent Cloudiness")
plt.xlabel("Latitude")

plt.scatter(latitude,cloud,marker="o", color="grey", edgecolors="black")
plt.show()


## Latitude vs. Wind Speed Plot

In [None]:
plt.title("Latitude vs. Wind Speed Plot")
plt.ylabel("Wind Speed (mph)")
plt.xlabel("Latitude")

plt.scatter(latitude,wind,marker="o", color="lightblue", edgecolors="black")
plt.show()


## Linear Regression

In [None]:
# define series for linear regression plots

# latitudes northern/southern hemisphere
lat_nh = clean_weather_df.loc[clean_weather_df["latitude"] > 0]["latitude"] 
lat_sh = clean_weather_df.loc[clean_weather_df["latitude"] < 0]["latitude"] 

# Northnern/Southern Hemisphere - Max Temp vs. Latitude Linear Regression

# max temps northern/southern hemisphere
max_temps_nh = clean_weather_df.loc[clean_weather_df["latitude"] > 0]["max temperature"] 
max_temps_sh = clean_weather_df.loc[clean_weather_df["latitude"] < 0]["max temperature"] 

# Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

# humidity % temps northern/southern hemisphere
hum_nh = clean_weather_df.loc[clean_weather_df["latitude"] > 0]["humidity"] 
hum_sh = clean_weather_df.loc[clean_weather_df["latitude"] < 0]["humidity"] 



####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Calculate the correlation coefficient and linear regression model 
# correlation coefficient northern hemisphere
correlation_latmaxtemp_nh = st.pearsonr(lat_nh,max_temps_nh)

print(f"The correlation between Max Temperature vs. Latitude (Northern Hemisphere) is {round(correlation_latmaxtemp_nh[0],2)}")

# for Northern Hemisphere - Max Temp vs. Latitude Linear Regression
x_lat_nh = lat_nh
y_max_temps_nh = max_temps_nh
(slope, intercept, rvalue, pvalue, stderr) = st.linregress(x_lat_nh, y_max_temps_nh)
regress_values = x_lat_nh * slope + intercept
line_eq_latmaxtemps_nh = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_lat_nh,y_max_temps_nh)
plt.plot(x_lat_nh,regress_values,"r-")
plt.annotate(line_eq_latmaxtemps_nh,(6,10),fontsize=15,color="red")

plt.title("Northern Hemisphere - Max Temp vs. Latitude Linear Regression")
plt.ylabel("Max Temperatures (F)")
plt.xlabel("Latitude")
plt.show()


####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Calculate the correlation coefficient and linear regression model for southern hemisphere
# correlation coefficient southern hemisphere
correlation_latmaxtemp_sh = st.pearsonr(lat_sh,max_temps_sh)

print(f"The correlation between Max Temperature vs. Latitude (Southern Hemisphere) is {round(correlation_latmaxtemp_sh[0],2)}")

# for Southern Hemisphere - Max Temp vs. Latitude Linear Regression
x_lat_sh = lat_sh
y_max_temps_sh = max_temps_sh
(slope, intercept, rvalue, pvalue, stderr) = st.linregress(x_lat_sh,y_max_temps_sh)
regress_values = x_lat_sh * slope + intercept
line_eq_sh = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_lat_sh,y_max_temps_sh)
plt.plot(x_lat_sh,regress_values,"r-")
plt.annotate(line_eq_sh,(-30,55),fontsize=15,color="red")

plt.title("Southern Hemisphere - Max Temp vs. Latitude Linear Regression")
plt.ylabel("Max Temperatures (F)")
plt.xlabel("Latitude")
plt.show()

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# correlation coefficient northern hemisphere
correlation_lathum_nh = st.pearsonr(lat_nh,hum_nh)

print(f"The correlation between Humidity (%) vs. Latitude (Northern Hemisphere) is {round(correlation_lathum_nh[0],2)}")

# for Northern Hemisphere - Max Temp vs. Latitude Linear Regression
x_lat_nh = lat_nh
y_hum_nh = hum_nh
(slope, intercept, rvalue, pvalue, stderr) = st.linregress(x_lat_nh, y_hum_nh)
regress_values = x_lat_nh * slope + intercept
line_eq_lathum_nh = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_lat_nh,y_hum_nh)
plt.plot(x_lat_nh,regress_values,"r-")
plt.annotate(line_eq_lathum_nh,(40,6),fontsize=15,color="red")

plt.title("Northern Hemisphere - Max Temp vs. Latitude Linear Regression")
plt.ylabel("Percent Humidity")
plt.xlabel("Latitude")
plt.show()


####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# Calculate the correlation coefficient and linear regression model for southern hemisphere
# correlation coefficient southern hemisphere
correlation_lathum_sh = st.pearsonr(lat_sh,hum_sh)

print(f"The correlation between Percent Humidity vs. Latitude (Southern Hemisphere) is {round(correlation_lathum_sh[0],2)}")

# for Southern Hemisphere - Percent Humidity vs. Latitude Linear Regression
x_lat_sh = lat_sh
y_hum_sh = hum_sh
(slope, intercept, rvalue, pvalue, stderr) = st.linregress(x_lat_sh,y_hum_sh)
regress_values = x_lat_sh * slope + intercept
line_eq_lathum_sh = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_lat_sh,y_hum_sh)
plt.plot(x_lat_sh,regress_values,"r-")
#plt.annotate(line_eq_sh,(10,-10),fontsize=15,color="red")

plt.title("Southern Hemisphere - Percent Humidity vs. Latitude Linear Regression")
plt.ylabel("Percent Humidity")
plt.xlabel("Latitude")
plt.show()

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
######
# ***************TODO



# Calculate the correlation coefficient and linear regression model 

# latitudes and max temps northern hemisphere
lat_nh = clean_weather_df.loc[clean_weather_df["latitude"] > 0]["latitude"] 
max_temps_nh = clean_weather_df.loc[clean_weather_df["latitude"] > 0]["max temperature"] 

# correlation coefficient northern hemisphere
correlation_nh = st.pearsonr(lat_nh,max_temps_nh)

print(f"The correlation between Max Temperature vs. Latitude (Northern Hemisphere) is {round(correlation_nh[0],2)}")

# for Northern Hemisphere - Max Temp vs. Latitude Linear Regression
x_values_nh = lat_nh
y_values_nh = max_temps_nh
(slope, intercept, rvalue, pvalue, stderr) = st.linregress(x_values_nh, y_values_nh)
regress_values = x_values_nh * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values_nh,y_values_nh)
plt.plot(x_values_nh,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")

plt.title("Northern Hemisphere - Max Temp vs. Latitude Linear Regression")
plt.ylabel("Max Temperatures (F)")
plt.xlabel("Latitude")
plt.show()


####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
######
# ***************TODO



# Calculate the correlation coefficient and linear regression model for southern hemisphere
# correlation coefficient southern hemisphere
correlation_sh = st.pearsonr(lat_sh,max_temps_sh)

print(f"The correlation between Max Temperature vs. Latitude (Southern Hemisphere) is {round(correlation_sh[0],2)}")

# for Southern Hemisphere - Max Temp vs. Latitude Linear Regression
x_values_sh = lat_sh
y_values_sh = max_temps_sh
(slope, intercept, rvalue, pvalue, stderr) = st.linregress(x_values_sh,y_values_sh)
regress_values = x_values_sh * slope + intercept
line_eq_sh = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values_sh,y_values_sh)
plt.plot(x_values_sh,regress_values,"r-")
#plt.annotate(line_eq_sh,(10,-10),fontsize=15,color="red")

plt.title("Northern Hemisphere - Max Temp vs. Latitude Linear Regression")
plt.ylabel("Max Temperatures (F)")
plt.xlabel("Latitude")
plt.show()

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
######
# ***************TODO



# Calculate the correlation coefficient and linear regression model 

# latitudes and max temps northern hemisphere
lat_nh = clean_weather_df.loc[clean_weather_df["latitude"] > 0]["latitude"] 
max_temps_nh = clean_weather_df.loc[clean_weather_df["latitude"] > 0]["max temperature"] 

# correlation coefficient northern hemisphere
correlation_nh = st.pearsonr(lat_nh,max_temps_nh)

print(f"The correlation between Max Temperature vs. Latitude (Northern Hemisphere) is {round(correlation_nh[0],2)}")

# for Northern Hemisphere - Max Temp vs. Latitude Linear Regression
x_values_nh = lat_nh
y_values_nh = max_temps_nh
(slope, intercept, rvalue, pvalue, stderr) = st.linregress(x_values_nh, y_values_nh)
regress_values = x_values_nh * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values_nh,y_values_nh)
plt.plot(x_values_nh,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")

plt.title("Northern Hemisphere - Max Temp vs. Latitude Linear Regression")
plt.ylabel("Max Temperatures (F)")
plt.xlabel("Latitude")
plt.show()


####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
######
# ***************TODO



# Calculate the correlation coefficient and linear regression model for southern hemisphere
# correlation coefficient southern hemisphere
correlation_sh = st.pearsonr(lat_sh,max_temps_sh)

print(f"The correlation between Max Temperature vs. Latitude (Southern Hemisphere) is {round(correlation_sh[0],2)}")

# for Southern Hemisphere - Max Temp vs. Latitude Linear Regression
x_values_sh = lat_sh
y_values_sh = max_temps_sh
(slope, intercept, rvalue, pvalue, stderr) = st.linregress(x_values_sh,y_values_sh)
regress_values = x_values_sh * slope + intercept
line_eq_sh = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values_sh,y_values_sh)
plt.plot(x_values_sh,regress_values,"r-")
#plt.annotate(line_eq_sh,(10,-10),fontsize=15,color="red")

plt.title("Northern Hemisphere - Max Temp vs. Latitude Linear Regression")
plt.ylabel("Max Temperatures (F)")
plt.xlabel("Latitude")
plt.show()