# WeatherPy

---

## Starter Code to Generate Random Geographic Coordinates and a List of Cities

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Import the OpenWeatherMap API key
from api_keys import weather_api_key

# Import citipy to determine the cities based on latitude and longitude
from citipy import citipy

### Generate the Cities List by Using the `citipy` Library

In [None]:
# Empty list for holding the latitude and longitude combinations
lat_lngs = []

# Empty list for holding the cities names
cities = []

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
print(f"Number of cities in the list: {len(cities)}")

---

## Requirement 1: Create Plots to Showcase the Relationship Between Weather Variables and Latitude

### Use the OpenWeatherMap API to retrieve weather data from the cities list generated in the started code

In [None]:
# Set the API base URL
url = 'https://api.openweathermap.org/data/2.5/weather?q='

# Define an empty list to fetch the weather data for each city
city_data = []

# Print to logger
print("Beginning Data Retrieval     ")
print("-----------------------------")

# Create counters
record_count = 1
set_count = 1

# Loop through all the cities in our list to fetch weather data
for i, city in enumerate(cities):
        
    # Group cities in sets of 50 for logging purposes
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 0

    # Create endpoint URL with each city
    city_url = f'{url}{city}&appid={weather_api_key}'
    
    # Log the url, record, and set numbers
    print("Processing Record %s of Set %s | %s" % (record_count, set_count, city))

    # Add 1 to the record count
    record_count += 1

    # Run an API request for each of the cities
    try:
        # Parse the JSON and retrieve data
        city_weather = requests.get(f'{city_url}').json()

        # Parse out latitude, longitude, max temp, humidity, cloudiness, wind speed, country, and date
        city_lat = city_weather["coord"]["lon"]
        city_lng = city_weather["coord"]["lat"]
        city_max_temp = city_weather["main"]["temp_max"]
        city_humidity = city_weather["main"]["humidity"]
        city_clouds = city_weather["clouds"]["all"]
        city_wind = city_weather["wind"]["speed"]
        city_country = city_weather["sys"]["country"]
        city_date = city_weather["dt"]

        # Append the City information into city_data list
        city_data.append({"City": city, 
                          "Lat": city_lat, 
                          "Lng": city_lng, 
                          "Max Temp": city_max_temp,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country,
                          "Date": city_date})

    # If an error is experienced, skip the city
    except:
        print("City not found. Skipping...")
        pass
              
# Indicate that Data Loading is complete 
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

In [None]:
# Convert the cities weather data into a Pandas DataFrame
city_data_df = pd.DataFrame(city_data)
# Show Record Count
city_data_df.count()

In [None]:
city_data_df = pd.DataFrame(city_data)
city_data_df["Max Temp (C)"] = city_data_df["Max Temp"] - 273.15
city_data_df.head()

In [None]:
# Export the City_Data into a csv
city_data_df.to_csv("output_data/cities.csv", index_label="City_ID")

In [None]:
# Read saved data
city_data_df = pd.read_csv("output_data/cities.csv", index_col="City_ID")

# Display sample data
city_data_df.head()

### Create the Scatter Plots Requested

#### Latitude Vs. Temperature

In [None]:
# Build scatter plot for latitude vs. temperature
plt.scatter(city_data_df["Lat"], city_data_df["Max Temp (C)"], edgecolors="black")
# Incorporate the other graph properties
plt.xlabel("Latitude")
plt.ylabel("Max Temperature (C)")
plt.title("City Max Temperature vs. Latitude")
plt.grid()

# Save the figure
plt.savefig("output_data/Fig1.png")

# Show plot
plt.show()

#### Latitude Vs. Humidity

In [None]:
# Build the scatter plots for latitude vs. humidity
plt.scatter(city_data_df["Lat"], city_data_df["Humidity"], edgecolors="black")

# Incorporate the other graph properties
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.title("City Humidity vs. Latitude")
plt.grid()

# Save the figure
plt.savefig("output_data/Fig2.png")

# Show plot
plt.show()

#### Latitude Vs. Cloudiness

In [None]:
# Build the scatter plots for latitude vs. cloudiness
plt.scatter(city_data_df["Lat"], city_data_df["Cloudiness"], edgecolors="black")

# Incorporate the other graph properties
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
plt.title("City Cloudiness vs. Latitude")
plt.grid()

# Save the figure
plt.savefig("output_data/Fig3.png")

# Show plot
plt.show()

#### Latitude vs. Wind Speed Plot

In [None]:
# Build the scatter plots for latitude vs. wind speed
plt.scatter(city_data_df["Lat"], city_data_df["Wind Speed"], edgecolors="black")

# Incorporate the other graph properties
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (m/s)")
plt.title("Wind Speed vs. City Latitude")
plt.grid()

# Save the figure
plt.savefig("output_data/Fig4.png")

# Show plot
plt.show()

---

## Requirement 2: Compute Linear Regression for Each Relationship


In [None]:
# Define a function to create Linear Regression plots
def linear_regression(x, y):
    (slope, intercept, r_value, p_value, stderr) = linregress(x,y)
    line = (slope * x) + intercept
    plt.plot(x, line, color="red")
    print(f'The r-value is: {r_value}')
    return(slope, intercept, r_value)

In [None]:
# Create a DataFrame with the Northern Hemisphere data (Latitude >= 0)
northern_hemi_df = city_data_df.loc[city_data_df["Lat"] >= 0, :]

# Display sample data
northern_hemi_df.head()

In [None]:
# Create a DataFrame with the Southern Hemisphere data (Latitude < 0)
southern_hemi_df = city_data_df.loc[city_data_df["Lat"] < 0, :]

# Display sample data
southern_hemi_df.head()

###  Temperature vs. Latitude Linear Regression Plot

In [None]:
# Linear regression on Northern Hemisphere
x = northern_hemi_df["Lat"]
y = northern_hemi_df["Max Temp (C)"]
plt.scatter(x, y)
plt.xlabel("Latitude")
plt.ylabel("Max Temp (C)")
plt.title("Northern Hemisphere - Temperature vs City Latitude")
linear_regression(x,y)
(slope, intercept, r_value, p_value, stderr) = linregress(x,y)
plt.annotate(f'y = {round(slope,2)}x + {round(intercept,2)}', (0,-40),fontsize=15,color="red")

In [None]:
# Linear regression on Southern Hemisphere
x = southern_hemi_df["Lat"]
y = southern_hemi_df["Max Temp (C)"]
plt.scatter(x, y, alpha= .75)
plt.xlabel("Latitude")
plt.ylabel("Max Temp (C)")
plt.title("Southern Hemisphere - Temperature vs City Latitude")
linear_regression(x,y)
(slope, intercept, r_value, p_value, stderr) = linregress(x,y)
plt.annotate(f'y = {round(slope,2)}x + {round(intercept,2)}', (-175, 5),fontsize=15,color="red")

**Discussion about the linear relationship:** Comparing the r-values for temperature and city latitude, there doesn't appear to be a strong correlation between temperature and city latitude. The linear regression of the southern hemisphere has a higher correlation than that of the northern. Because the r-value of the northern hemisphere is negative, it does imply that the further north you get, the cooler it gets. The r-value calculated from the southern hemipshere temperature regression implies that, because it is positive, the temperatures tend to increase as one approaches the equator from the south.

### Humidity vs. Latitude Linear Regression Plot

In [None]:
# Northern Hemisphere
x = northern_hemi_df["Lat"]
y = northern_hemi_df["Humidity"]
plt.scatter(x, y)
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.title("Northern Hemisphere - Humidity vs City Latitude")
linear_regression(x,y)
(slope, intercept, r_value, p_value, stderr) = linregress(x,y)
plt.annotate(f'y = {round(slope,2)}x + {round(intercept,2)}', (100,5),fontsize=15,color="red")

In [None]:
# Southern Hemisphere
x = southern_hemi_df["Lat"]
y = southern_hemi_df["Humidity"]
plt.scatter(x, y)
plt.xlabel("Latitude")
plt.ylabel("Humidity(%)")
plt.title("Southern Hemisphere - Humidity vs City Latitude")
linear_regression(x,y)
(slope, intercept, r_value, p_value, stderr) = linregress(x,y)
plt.annotate(f'y = {round(slope,2)}x + {round(intercept,2)}', (-175, 40),fontsize=15,color="red")

**Discussion about the linear relationship:** Because both of the r-values for humidity and latitude for the northern and southern hemispheres are below 0.5, there does not seem be a strong correlation between humidity and city latitude. Comparing the values to each other, the r-value of the southern hemisphere is slightly higher than the r-value of the northern hemisphere. The northern hemisphere is positive and implies that the further north one travels from the equator, the more humid it becomes. On the other hand, as the r-value is negative, it appears that as one travels from the south toward to the equator, humidity decreases.

### Cloudiness vs. Latitude Linear Regression Plot

In [None]:
# Northern Hemisphere
x = northern_hemi_df["Lat"]
y = northern_hemi_df["Cloudiness"]
plt.scatter(x, y)
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
plt.title("Northern Hemisphere - Cloudiness vs City Latitude")
linear_regression(x,y)
(slope, intercept, r_value, p_value, stderr) = linregress(x,y)
plt.annotate(f'y = {round(slope,2)}x + {round(intercept,2)}', (10, 46),fontsize=15,color="red")

In [None]:
# Southern Hemisphere
x = southern_hemi_df["Lat"]
y = southern_hemi_df["Cloudiness"]
plt.scatter(x, y)
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
plt.title("Southern Hemisphere - Cloudiness vs City Latitude")
linear_regression(x,y)
(slope, intercept, r_value, p_value, stderr) = linregress(x,y)
plt.annotate(f'y = {round(slope,2)}x + {round(intercept,2)}', (-175, 50),fontsize=15,color="red")

**Discussion about the linear relationship:** Both of the r-values for the linear regression of cloudiness and city latitude for the northern and southern hemispheres are below 0.5, indicating that there is not a strong correlation between the two, although the r-value for the southern hemisphere's cloudiness against latitude hints that there is a stronger correlation than that seen in the northern hemisphere. Because both are negative, it can be implied that the further north one goes, whether beginning above or below the equator, cloudiness appears to increase.

### Wind Speed vs. Latitude Linear Regression Plot

In [None]:
# Northern Hemisphere
x = northern_hemi_df["Lat"]
y = northern_hemi_df["Wind Speed"]
plt.scatter(x, y)
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (m/s)")
plt.title("Northern Hemisphere - Wind Speed vs City Latitude")
linear_regression(x,y)
(slope, intercept, r_value, p_value, stderr) = linregress(x,y)
plt.annotate(f'y = {round(slope,2)}x + {round(intercept,2)}', (0, 13),fontsize=15,color="red")

In [None]:
# Southern Hemisphere
x = southern_hemi_df["Lat"]
y = southern_hemi_df["Wind Speed"]
plt.scatter(x, y)
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (m/s)")
plt.title("Southern Hemisphere - Wind Speed vs City Latitude")
linear_regression(x,y)
(slope, intercept, r_value, p_value, stderr) = linregress(x,y)
plt.annotate(f'y = {round(slope,3)}x + {round(intercept,2)}', (-80, 10),fontsize=15,color="red")

**Discussion about the linear relationship:** Because the r-values for the linear regression of wind speed and city latitude for the northern and southern hemispheres are below 0.5, there does not appear to be a strong correlation between the two. The r-value for the northern hemisphere's windiness against latitude hints that there is a stronger correlation than that seen in the southern hemisphere. Both r-values are positive, implying that the further north one goes, whether beginning above or below the equator, windiness appears to increase.