# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
import json
import os

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
search_cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in search_cities:
        search_cities.append(city)

# Print the city count to confirm sufficient count
len(search_cities)

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
# Save config information.
units = "metric"
url = (f"http://api.openweathermap.org/data/2.5/weather?appid={weather_api_key}&units={units}&q=")

In [None]:
# Set up lists to hold reponse info
city = []
lat = []
lng = []
max_temp = []
humidity = []
cloudiness = []
wind_speed = []
country = []
date = []
# Counter for records found
record_count = 0
# Counter for processing records
i = 0

print("Beginning Data Retrieval")
print("-----------------------------")

# Loop through the list of cities and perform a request for data on each
for cities in search_cities:
    i = i + 1
    query = url + cities
    response = requests.get(query).json()
    try:
        city.append(response['name'])
        lat.append(response['coord']['lat'])
        lng.append(response['coord']['lon'])
        max_temp.append(response['main']['temp_max'])
        humidity.append(response['main']['humidity'])
        cloudiness.append(response['clouds']['all'])
        wind_speed.append(response['wind']['speed'])
        country.append(response['sys']['country'])
        date.append(response['dt'])
        record_count = record_count + 1
        print(f"Processing record {i} | {search_cities[i]}")
    except:
        print(f"City not found. Skipping...")
print(f"Data for {record_count} cities has been retrieved.")

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
# Create the DataFrame
city_data = {"City": city, "Country": country, "Lat": lat, "Long": lng, "Max Temp": max_temp, 
             "Humidity": humidity, "Cloudiness": cloudiness, "Wind Speed": wind_speed, "Date": date
            }
city_df = pd.DataFrame(city_data)

# Save the DataFrame to .csv
city_df.to_csv("output_data\cities.csv")

# Show the first 5 lines of the DataFrame
city_df.head()

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
clean_df = city_df.loc[(city_df["Humidity"] <= 100)]
clean_df
# No cities in the DataFrame have > 100 % humidity

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
# Define axes
x_latvtemp = list(clean_df["Lat"])
y_latvtemp = list(clean_df["Max Temp"])

# Plot setup
plt.scatter(x_latvtemp, y_latvtemp, color = 'lightblue', edgecolors='black')
plt.xlabel("Latitude")
plt.ylabel("Max Temperature (C)")
plt.title("City Latitude vs. Max Temperature (07/24/20)")
plt.grid()
plt.show()

## Latitude vs. Humidity Plot

In [None]:
# Define axes
x_latvhumidity = list(clean_df["Lat"])
y_latvhumidity = list(clean_df["Humidity"])

# Plot setup
plt.scatter(x_latvhumidity, y_latvhumidity, color = 'lightgreen', edgecolors='black')
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.title("City Latitude vs. Humidity (07/24/20)")
plt.grid()
plt.show()

## Latitude vs. Cloudiness Plot

In [None]:
# Define axes
x_latvclouds = list(clean_df["Lat"])
y_latvclouds = list(clean_df["Cloudiness"])

# Plot setup
plt.scatter(x_latvclouds, y_latvclouds, color = 'aqua', edgecolors='black')
plt.xlabel("Latitude")
plt.ylabel("Cloudiness")
plt.title("City Latitude vs. Cloudiness (07/24/20)")
plt.grid()
plt.show()

## Latitude vs. Wind Speed Plot

In [None]:
# Define axes
x_latvwind = list(clean_df["Lat"])
y_latvwind = list(clean_df["Wind Speed"])

# Plot setup
plt.scatter(x_latvwind, y_latvwind, color = 'pink', edgecolors='black')
plt.xlabel("Latitude")
plt.ylabel("Cloudiness")
plt.title("City Latitude vs. Wind Speed (07/24/20)")
plt.grid()
plt.show()

## Linear Regression

In [None]:
# Create bins for the data
bins = [-90, 0, 90]
bin_names = ["Southern", "Northern"]
bin_df = clean_df
bin_df["Hemisphere"] = pd.cut(bin_df["Lat"], bins, labels=bin_names, include_lowest=True)
bin_df.head()

In [None]:
# Create lists to hold y axis data (for plotting)
lat_list = []
Max_Temp = []
Humidity = []
Cloudiness = []
Wind_Speed = []

# Create a list of y axis names
y_list = ["Max_Temp", "Humidity", "Cloudiness", "Wind_Speed"]

# Loop to separate data into regions
for bins in bin_names: 

    # Return lat coordinates for a region
    lat_list = bin_df.loc[bin_df["Hemisphere"] == bins,"Lat"]

    # Loop again to get data for each criteria per region
    for y in y_list:
        if y == "Max_Temp":
            temp_list = bin_df.loc[bin_df["Hemisphere"] == bins,"Max Temp"]
        elif y == "Humidity":
            humid_list = bin_df.loc[bin_df["Hemisphere"] == bins,"Humidity"]
        elif y == "Cloudiness":
            cloud_list = bin_df.loc[bin_df["Hemisphere"] == bins,"Cloudiness"]
        elif y == "Wind_Speed":
            wind_list = bin_df.loc[bin_df["Hemisphere"] == bins,"Wind Speed"]
    
    # Now that lists are complete, loop again to create plots
    for y in y_list:
        
        # Define x axis
        x_axis = lat_list
        
        # Define y axis
        if y == "Max_Temp":
            y_axis = temp_list
        elif y == "Humidity":
            y_axis = humid_list
        elif y == "Cloudiness":
            y_axis = cloud_list
        elif y == "Wind_Speed":
            y_axis = wind_list

        # Plot setup
        plt.scatter(x_axis, y_axis, color = 'lightgreen', edgecolors='black')
        plt.xlabel("Latitude")
        plt.ylabel(y)
        plt.title(f"City Latitude vs. {y} in the {bins} Hemisphere (07/24/20)")
        plt.grid()

        # Do linear regression
        (slope, intercept, rvalue, pvalue, stderr) = linregress(x_axis, y_axis)
        regress_values = x_axis * slope + intercept
        plt.plot(x_axis, regress_values, "r-")
        line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
        rsq = round(x_axis.corr(y_axis)**2,2)
        if bins == "Southern":
            plt.annotate(line_eq,(-55, 8),fontsize=15,color="red")
        elif bins == "Northern":
            plt.annotate(line_eq,(6, 10),fontsize=15,color="red")
        print(f"The r-squared is: {rsq}")
        plt.savefig(f"output_data\{bins}_Hemisphere_{y}.png")
        plt.show()