# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
from api_keys import (weather_api_key, g_key)
from citipy import citipy
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
from random import uniform
import requests
from scipy.stats import linregress

## Generate Cities List

In [None]:
#Randomly select at least 500 unique (non-repeat) cities based on latitude and longitude.
# default 643

In [2]:
# Lists for holding data
number_of_cities = 5
cities = []

while len(cities) < number_of_cities:  
    lon, lat = uniform(-180,180), uniform(-90, 90)       
    city = citipy.nearest_city(lon, lat)   
    city_name = city.city_name          
    if city_name not in cities:                
        cities.append(city_name)
            
cities

['ushuaia', 'busselton', 'murud', 'cape town', 'norton shores']

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [3]:
# Save config information.
url = "http://api.openweathermap.org/data/2.5/weather?"

params = {
    "units": "metric", 
    "appid": weather_api_key,
    "q": ""
}

In [4]:
# set up lists to hold reponse info
Name = []
Lat = []
Lng = []
Max_Temp = []
Humidity = []
Cloudiness = []
Wind_Speed = []
Country = []
Date = []

In [5]:
print('Beginning Data Retrieval')
print('-----------------------------')

sets = 1
records = 1

for i, city in enumerate(cities):
    
    if (i % 2 == 0 and i >= 2):
        sets += 1
        records = 1
    
    params['q'] = city
    response = requests.get(url, params=params).json()

    print(f"Processing Record {records} of Set {sets} | {city}")    
    
    try:
        Lat.append(response['coord']['lat'])
        Lng.append(response['coord']['lon'])
        Max_Temp.append(response['main']['temp_max'])
        Humidity.append(response['main']['humidity'])
        Cloudiness.append(response['clouds']['all'])
        Wind_Speed.append(response['wind']['speed'])
        Country.append(response['sys']['country'])
        Date.append(response['dt'])
        Name.append(city)
     
    except:
        print("City not found. Skipping...")
        pass
    
    records +=1      

print('-----------------------------')        
print('Data Retrieval Complete ')
print('-----------------------------')  

Beginning Data Retrieval
-----------------------------
Processing Record 1 of Set 1 | ushuaia
Processing Record 2 of Set 1 | busselton
Processing Record 1 of Set 2 | murud
Processing Record 2 of Set 2 | cape town
Processing Record 1 of Set 3 | norton shores
-----------------------------
Data Retrieval Complete 
-----------------------------


### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
# create a data frame from cities, lat, and temp
weather_dict = {
    "City": Name,
    "Lat": Lat,
    "Lng": Lng,
    "Max Temp": Max_Temp,
    "Humidity": Humidity,
    "Cloudiness": Cloudiness,    
    "Wind Speed": Wind_Speed,
    "Country": Country,
    "Date": Date,
}

weather_df = pd.DataFrame(weather_dict)
weather_df

In [None]:
weather_df.describe()

In [None]:
weather_df[['City', 'Cloudiness', 'Country','Date', 'Humidity', 'Lat', 'Lng', 'Max Temp', 'Wind Speed']].to_csv("../output_data/cities.csv", encoding="utf-8", index=True, header=True, index_label="City_ID")


## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
#  Get the indices of cities that have humidity over 100%.
indexNames = weather_df[weather_df['Humidity'] > 100].index

In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".
clean_city_df = weather_df.drop(indexNames, inplace=False)

In [None]:
clean_city_df.describe()

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
lat_temp_pl = clean_city_df.plot(kind="scatter", x="Lat", y="Max Temp", grid=True, figsize=(6,4), facecolors="blue", edgecolors="black", s=35,
              title=(f'City Latitude vs. Max Temperature (todays date)'))


lat_temp_pl.set_xlabel("Latitude")
lat_temp_pl.set_ylabel("Max Temperature (C)")

plt.savefig("../output_data/TemperatureVsLatitude.png")
plt.show()

## Latitude vs. Humidity Plot

In [None]:
lat_hum_pl = clean_city_df.plot(kind="scatter", x="Lat", y="Humidity", grid=True, figsize=(6,4), facecolors="blue", edgecolors="black", s=35,
              title=(f'City Latitude vs. Humidity (todays date)'))


lat_hum_pl.set_xlabel("Latitude")
lat_hum_pl.set_ylabel("Humidity (%)")

plt.savefig("../output_data/HumidityVsLatitude.png")
plt.show()

## Latitude vs. Cloudiness Plot

In [None]:
lat_cloud_pl = clean_city_df.plot(kind="scatter", x="Lat", y="Cloudiness", grid=True, figsize=(6,4), facecolors="blue", edgecolors="black", s=35,
              title=(f'City Latitude vs. Cloudiness (todays date)'))


lat_cloud_pl.set_xlabel("Latitude")
lat_cloud_pl.set_ylabel("Cloudiness (%)")

plt.savefig("../output_data/CloudinessVsLatitude.png")
plt.show()

## Latitude vs. Wind Speed Plot

In [None]:
lat_wind_pl = clean_city_df.plot(kind="scatter", x="Lat", y="Wind Speed", grid=True, figsize=(6,4), facecolors="blue", edgecolors="black", s=35,
              title=(f'City Latitude vs. Wind Speed (todays date)'))

lat_wind_pl.set_xlabel("Latitude")
lat_wind_pl.set_ylabel("Wind Speed (kmph)")

plt.savefig("../output_data/WindSpeedVsLatitude.png")
plt.show()

## Linear Regression

In [None]:
northern_hem_df = clean_city_df.loc[clean_city_df['Lat'] >= 0 ].copy()

southern_hem_df = clean_city_df.loc[clean_city_df['Lat'] < 0 ].copy()

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
nor_temp_pl = northern_hem_df.plot(kind="scatter", x="Lat", y="Max Temp", figsize=(6,4), facecolors="blue", s=35,
              title=(f'Northern Hemisphere - City Latitude vs. Max Temperature (todays date)'))

(slope_nt, intercept_nt, rvalue_nt, pvalue_nt, stderr_nt) = linregress(northern_hem_df['Lat'], northern_hem_df['Max Temp'])

print(f"The r-squared is: {rvalue_nt**2}")

regress_values_nt = northern_hem_df['Lat'] * slope_nt + intercept_nt
line_eq_nt = "y = " + str(round(slope_nt,2)) + "x + " + str(round(intercept_nt,2))

nor_temp_pl.plot(northern_hem_df['Lat'],regress_values_nt,"r-")
#nor_temp_pl.annotate(line_eq_nt,(30,-20),fontsize=15,color="red")

nor_temp_pl.set_xlabel("Latitude")
nor_temp_pl.set_ylabel("Max Temperature (C)")

plt.show()

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
sou_temp_pl = southern_hem_df.plot(kind="scatter", x="Lat", y="Max Temp", figsize=(6,4), facecolors="blue", s=35,
              title=(f'Southern Hemisphere - City Latitude vs. Max Temperature (todays date)'))

(slope_st, intercept_st, rvalue_st, pvalue_st, stderr_st) = linregress(southern_hem_df['Lat'], southern_hem_df['Max Temp'])

print(f"The r-squared is: {rvalue_st**2}")

regress_values_st = southern_hem_df['Lat'] * slope_st + intercept_st
line_eq_st = "y = " + str(round(slope_st,2)) + "x + " + str(round(intercept_st,2))

sou_temp_pl.plot(southern_hem_df['Lat'],regress_values_st,"r-")
#sou_temp_pl.annotate(line_eq_st,(-33.90,17.5),fontsize=15,color="red")

sou_temp_pl.set_xlabel("Latitude")
sou_temp_pl.set_ylabel("Max Temperature (C)")

plt.show()

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
nor_hum_pl = northern_hem_df.plot(kind="scatter", x="Lat", y="Humidity", figsize=(6,4), facecolors="blue", s=35,
              title=(f'Northern Hemisphere - City Latitude vs. Humidity (todays date)'))

(slope_nh, intercept_nh, rvalue_nh, pvalue_nh, stderr_nh) = linregress(northern_hem_df['Lat'], northern_hem_df['Humidity'])

print(f"The r-squared is: {rvalue_nh**2}")

regress_values_nh = northern_hem_df['Lat'] * slope_nh + intercept_nh
line_eq_nh = "y = " + str(round(slope_nh,2)) + "x + " + str(round(intercept_nh,2))

nor_hum_pl.plot(northern_hem_df['Lat'],regress_values_nh,"r-")
#nor_hum_pl.annotate(line_eq_nh,(50,75),fontsize=15,color="red")

nor_hum_pl.set_xlabel("Latitude")
nor_hum_pl.set_ylabel("Humidity (%)")

plt.show()

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
sou_hum_pl = southern_hem_df.plot(kind="scatter", x="Lat", y="Humidity", figsize=(6,4), facecolors="blue", s=35,
              title=(f'Southern Hemisphere - City Latitude vs. Humidity (todays date)'))

(slope_sh, intercept_sh, rvalue_sh, pvalue_sh, stderr_sh) = linregress(southern_hem_df['Lat'], southern_hem_df['Humidity'])

print(f"The r-squared is: {rvalue_sh**2}")

regress_values_sh = southern_hem_df['Lat'] * slope_sh + intercept_sh
line_eq_sh = "y = " + str(round(slope_sh,2)) + "x + " + str(round(intercept_sh,2))

sou_hum_pl.plot(southern_hem_df['Lat'],regress_values_sh,"r-")
#sou_hum_pl.annotate(line_eq_sh,(-33.90,97.5),fontsize=15,color="red")

sou_hum_pl.set_xlabel("Latitude")
sou_hum_pl.set_ylabel("Humidity (%)")

plt.show()

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
nor_cloud_pl = northern_hem_df.plot(kind="scatter", x="Lat", y="Cloudiness", figsize=(6,4), facecolors="blue", s=35,
              title=(f'Northern Hemisphere - City Latitude vs. Cloudiness (todays date)'))

(slope_nc, intercept_nc, rvalue_nc, pvalue_nc, stderr_nc) = linregress(northern_hem_df['Lat'], northern_hem_df['Cloudiness'])

print(f"The r-squared is: {rvalue_nc**2}")

regress_values_nc = northern_hem_df['Lat'] * slope_nc + intercept_nc
line_eq_nc = "y = " + str(round(slope_nc,2)) + "x + " + str(round(intercept_nc,2))

nor_cloud_pl.plot(northern_hem_df['Lat'],regress_values_nc,"r-")
#nor_cloud_pl.annotate(line_eq_nc,(50,0),fontsize=15,color="red")

nor_cloud_pl.set_xlabel("Latitude")
nor_cloud_pl.set_ylabel("Cloudiness (%)")

plt.show()

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
sou_cloud_pl = southern_hem_df.plot(kind="scatter", x="Lat", y="Cloudiness", figsize=(6,4), facecolors="blue", s=35,
              title=(f'Southern Hemisphere - City Latitude vs. Cloudiness (todays date)'))

(slope_sc, intercept_sc, rvalue_sc, pvalue_sc, stderr_sc) = linregress(southern_hem_df['Lat'], southern_hem_df['Cloudiness'])

print(f"The r-squared is: {rvalue_sc**2}")

regress_values_sc = southern_hem_df['Lat'] * slope_sc + intercept_sc
line_eq_sc = "y = " + str(round(slope_sc,2)) + "x + " + str(round(intercept_sc,2))

sou_cloud_pl.plot(southern_hem_df['Lat'],regress_values_sc,"r-")
#sou_cloud_pl.annotate(line_eq_sc,(-33.90,0),fontsize=15,color="red")

sou_cloud_pl.set_xlabel("Latitude")
sou_cloud_pl.set_ylabel("Cloudiness (%)")

plt.show()

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
nor_wind_pl = northern_hem_df.plot(kind="scatter", x="Lat", y="Wind Speed", figsize=(6,4), facecolors="blue", s=35,
              title=(f'Northern Hemisphere - City Latitude vs. Wind Speed (todays date)'))

(slope_nw, intercept_nw, rvalue_nw, pvalue_nw, stderr_nw) = linregress(northern_hem_df['Lat'], northern_hem_df['Wind Speed'])

print(f"The r-squared is: {rvalue_nw**2}")

regress_values_nw = northern_hem_df['Lat'] * slope_nw + intercept_nw
line_eq_nw = "y = " + str(round(slope_nw,2)) + "x + " + str(round(intercept_nw,2))

nor_wind_pl.plot(northern_hem_df['Lat'],regress_values_nw,"r-")
#nor_wind_pl.annotate(line_eq_nw,(50,1.2),fontsize=15,color="red")

nor_wind_pl.set_xlabel("Latitude")
nor_wind_pl.set_ylabel("Wind Speed (kmph)")

plt.show()

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
sou_wind_pl = southern_hem_df.plot(kind="scatter", x="Lat", y="Wind Speed", figsize=(6,4), facecolors="blue", s=35,
              title=(f'Southern Hemisphere - City Latitude vs. Wind Speed (todays date)'))

(slope_sw, intercept_sw, rvalue_sw, pvalue_sw, stderr_sw) = linregress(southern_hem_df['Lat'], southern_hem_df['Wind Speed'])

print(f"The r-squared is: {rvalue_sw**2}")

regress_values_sw = southern_hem_df['Lat'] * slope_sw + intercept_sw
line_eq_sw = "y = " + str(round(slope_sw,2)) + "x + " + str(round(intercept_sw,2))

sou_wind_pl.plot(southern_hem_df['Lat'],regress_values_sw,"r-")
#sou_wind_pl.annotate(line_eq_sw,(-33.80,4.15),fontsize=15,color="red")

sou_wind_pl.set_xlabel("Latitude")
sou_wind_pl.set_ylabel("Wind Speed (kmph)")

plt.show()