# WeatherPy
----

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import string
import os

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Import custom plotting function
from plotter import scatter_linregress

# Outputs
output_data_folder = 'output_data'
output_data_file = os.path.join(output_data_folder, 'cities.csv')

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

# Figure size for scatter plots
figsize=(15, 5)

## Generate Cities List

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
num_of_cities = len(cities)
print(num_of_cities)

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it's being processed (with the city number and city name).

In [None]:
# Openweathermap api url
url = "http://api.openweathermap.org/data/2.5/weather"
units = 'imperial'
params = {'units': units,
          'appid': weather_api_key}

print("""Beginning Data Retrieval
-----------------------------""")
d = []
for idx, city in enumerate(cities, 1):
    # 60 calls/min = 1 call per second
    time.sleep(1)
    
    # set city param
    params['q'] = city
    print(f"Processing Record {idx} of {num_of_cities} | {string.capwords(city)}")
    response = requests.get(url=url, params=params)
    data = response.json()
    # check if city not found
    if data['cod'] == '404':
        print(f"{string.capwords(city)} not found. Skipping...")
        continue
    
    # create new dict entry
    d.append(
        {
            'City': data['name'],
            'Lat': data['coord']['lat'],
            'Lng': data['coord']['lon'],
            'Max_Temp': data['main']['temp_max'],
            'Humidity': data['main']['humidity'],
            'Cloudiness': data['clouds']['all'],
            'Wind_Speed': data['wind']['speed'],
            'Country': data['sys']['country'],
            'Date': pd.to_datetime(data['dt'], unit='s')
        }
    )

print("""-----------------------------
Data Retrieval Complete
-----------------------------""")

### If API Calls Was Run, Run This Cell

In [None]:
weather_df = pd.DataFrame(d)
weather_df.to_csv(output_data_file, index=False)
weather_df.head()

### If Accessing Saved Data, Run This Cell

In [None]:
weather_df = pd.read_csv(output_data_file)
weather_df.head()

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
# Get initial data length
data_len = len(weather_df)
weather_df = weather_df.drop(weather_df[weather_df.Humidity > 100].index)
print(f"Removed {data_len - len(weather_df)} cities with Humidity values greater than 100%")

In [None]:
# Write cleaned csv to file
weather_df.to_csv(output_data_file, index=False)

In [None]:
# Set date as 
weather_df['Date'] = pd.to_datetime(weather_df['Date'])

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
fig, axes = plt.subplots(1, 1, figsize=figsize)

title = "Lat Vs Temp" + f" ({weather_df['Date'].dt.date[0]})"
scatter_linregress(axes, weather_df['Lat'], weather_df['Max_Temp'], title, regress=False)

plt.savefig(os.path.join(output_data_folder, title))

plt.show()

## Latitude vs. Humidity Plot

In [None]:
fig, axes = plt.subplots(1, 1, figsize=figsize)

title = "Lat Vs Humidity" + f" ({weather_df['Date'].dt.date[0]})"
scatter_linregress(axes, weather_df['Lat'], weather_df['Humidity'], title, regress=False)

plt.savefig(os.path.join(output_data_folder, title))

plt.show()

## Latitude vs. Cloudiness Plot

In [None]:
fig, axes = plt.subplots(1, 1, figsize=figsize)

title = "Lat Vs Cloudiness" + f" ({weather_df['Date'].dt.date[0]})"
scatter_linregress(axes, weather_df['Lat'], weather_df['Cloudiness'], title, regress=False)

plt.savefig(os.path.join(output_data_folder, title))

plt.show()

## Latitude vs. Wind Speed Plot

In [None]:
fig, axes = plt.subplots(1, 1, figsize=figsize)

title = "Lat Vs Wind_Speed" + f" ({weather_df['Date'].dt.date[0]})"
scatter_linregress(axes, weather_df['Lat'], weather_df['Wind_Speed'], title, regress=False)

plt.savefig(os.path.join(output_data_folder, title))

plt.show()

In [None]:
# split the datset into north and south hemispheres
north_hemis_df = weather_df.drop(weather_df[weather_df['Lat'] < 0].index)
south_hemis_df = weather_df.drop(weather_df[weather_df['Lat'] >= 0].index)

#### Hemispheres - Max Temp vs. Latitude Linear Regression

In [None]:
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=figsize)

title = "Hemispheres Lat Vs Max_Temp" + f" ({weather_df['Date'].dt.date[0]})"

# Southern Hemisphere
scatter_linregress(axes[0], south_hemis_df['Lat'], south_hemis_df['Max_Temp'], "Southern Hemisphere")

# Northern Hemisphere plot
scatter_linregress(axes[1], north_hemis_df['Lat'], north_hemis_df['Max_Temp'], "Northern Hemisphere")

plt.savefig(os.path.join(output_data_folder, title))

plt.show()

### Observations
Clearly the temperature increases as you move toward the equator, and decreases as you move toward the poles.
If the data points were more evenly distributed, the rsquared value might be even higher.

####  Hemispheres - Humidity (%) vs. Latitude Linear Regression

In [None]:
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=figsize)

title = "Hemispheres Lat Vs Humidity" + f" ({weather_df['Date'].dt.date[0]})"

# Southern Hemisphere
scatter_linregress(axes[0], south_hemis_df['Lat'], south_hemis_df['Humidity'], "Southern Hemisphere")

# Northern Hemisphere plot
scatter_linregress(axes[1], north_hemis_df['Lat'], north_hemis_df['Humidity'], "Northern Hemisphere")

plt.savefig(os.path.join(output_data_folder, title))

plt.show()

### Observation
While there isn't much of a clear trend shown by the linear regression, the poles don't seem to have low humidity values. A better graph might look at only <-40 and >40 latitudes where the empty corners of the graph reside.

####  Hemispheres - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=figsize)

title = "Hemispheres Lat Vs Cloudiness" + f" ({weather_df['Date'].dt.date[0]})"

# Southern Hemisphere
scatter_linregress(axes[0], south_hemis_df['Lat'], south_hemis_df['Cloudiness'], "Southern Hemisphere")

# Northern Hemisphere plot
scatter_linregress(axes[1], north_hemis_df['Lat'], north_hemis_df['Cloudiness'], "Northern Hemisphere")

plt.savefig(os.path.join(output_data_folder, title))

plt.show()

### Observation
There doesn't seem to be any correlation between latitude and cloudiness.

####  Hemispheres - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=figsize)

title = "Hemispheres Lat Vs Wind_Speed" + f" ({weather_df['Date'].dt.date[0]})"

# Southern Hemisphere
scatter_linregress(axes[0], south_hemis_df['Lat'], south_hemis_df['Wind_Speed'], "Southern Hemisphere")

# Northern Hemisphere plot
scatter_linregress(axes[1], north_hemis_df['Lat'], north_hemis_df['Wind_Speed'], "Northern Hemisphere")

plt.savefig(os.path.join(output_data_folder, title))

plt.show()

### Observation
There's a potential trend of higher wind speeds moving from the south pole to the north pole. This trend might be driven a by few high outliers, further analysis is needed to confirm.