# WeatherPy

---

## Starter Code to Generate Random Geographic Coordinates and a List of Cities

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import scipy.stats as st
from scipy.stats import linregress

# Impor the OpenWeatherMap API key
from api_keys import weather_api_key

# Import citipy to determine the cities based on latitude and longitude
from citipy import citipy

### Generate the Cities List by Using the `citipy` Library

In [None]:
# Empty list for holding the latitude and longitude combinations
lat_lngs = []

# Empty list for holding the cities names
cities = []

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

# Create a set of random lat and lng combinations
#lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
#lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lats = np.random.uniform(lat_range[0], lat_range[1], size=50)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=50)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
print(f"Number of cities in the list: {len(cities)}")

---

## Requirement 1: Create Plots to Showcase the Relationship Between Weather Variables and Latitude

### Use the OpenWeatherMap API to retrieve weather data from the cities list generated in the started code

In [None]:
# Set the API base URL
url = "http://api.openweathermap.org/data/2.5/weather?"

# Define an empty list to fetch the weather data for each city
city_data = []
# Define variables
units = 'imperial'

# Print to logger
print("Beginning Data Retrieval     ")
print("-----------------------------")

# Create counters
record_count = 1
set_count = 1

# Loop through all the cities in our list to fetch weather data
for i, city in enumerate(cities):
        
    # Group cities in sets of 50 for logging purposes
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 0

    # Create endpoint URL with each city
    city_url = url + "&q=" + city + "&appid=" +  weather_api_key + "&units=" + units
    
    # Log the url, record, and set numbers
    print("Processing Record %s of Set %s | %s" % (record_count, set_count, city))

    # Add 1 to the record count
    record_count += 1

    # Run an API request for each of the cities
    try:
        # Parse the JSON and retrieve data
        city_weather = requests.get(city_url).json()
        

        # Parse out latitude, longitude, max temp, humidity, cloudiness, wind speed, country, and date
        city_lat = city_weather['coord']['lat']
        city_lng = city_weather['coord']['lon']
        city_max_temp = city_weather['main']['temp_max']
        city_humidity = city_weather['main']['humidity']
        city_clouds = city_weather['clouds']['all']
        city_wind = city_weather['wind']['speed']
        city_country = city_weather['sys']['country']
        city_date = city_weather['dt']
        
        # Call function to recalculate maximum temperature to farenheight
       # convert_temp(city_max_temp)
       # city_max_temp = int(max_tempF)

        # Append the City information into city_data list
        city_data.append({"City": city, 
                          "Lat": city_lat, 
                          "Lng": city_lng, 
                          "Max Temp": city_max_temp,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country,
                          "Date": city_date})

    # If an error is experienced, skip the city
    except:
        print("City not found. Skipping...")
        pass
              
# Indicate that Data Loading is complete 
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

In [None]:
# confirm the request results
city_weather.items()

In [None]:
# Convert the cities weather data into a Pandas DataFrame
city_data_df = pd.DataFrame(city_data)
# Show Record Count
city_data_df.count()

In [None]:
# Display sample data
city_data_df.head()

In [None]:
# Export the City_Data into a csv
city_data_df.to_csv("output_data/cities.csv", index_label="City_ID")

In [None]:
# Read saved data
city_data_df = pd.read_csv("output_data/cities.csv", index_col="City_ID")

# Display sample data
#city_data_df.info()
city_data_df.head()

### Create the Scatter Plots Requested

#### Latitude Vs. Temperature

In [None]:
# Build scatter plot for latitude vs. temperature
fig1 = city_data_df.plot(
                        kind='scatter',
                        x='Lat', y='Max Temp',
                        legend=False, marker='o',
                        figsize=(8,6), s= 150,
                        edgecolor='Black',
                        c='Teal'
)

# Incorporate the other graph properties
fig1.set_title('CityPY Cities Latitude vs. Maximum Temperature', fontsize=18, weight='bold')
fig1.set_xlabel('Latitude', fontsize=14, weight='bold')
fig1.set_ylabel('Maximum Temperature (°F)', fontsize=14, weight='bold')
fig1.grid(color='gray', linestyle='-', linewidth=0.2)

# Save the figure
plt.savefig("output_data/Fig1.png")

# Show plot
plt.show()

#### Latitude Vs. Humidity

In [None]:
# Build the scatter plots for latitude vs. humidity
fig2 = city_data_df.plot(
                        kind='scatter',
                        x='Lat', y='Humidity',
                        legend=False, marker='o',
                        figsize=(8,6), s= 150,
                        edgecolor='Black',
                        c='Teal'
)

# Incorporate the other graph properties
fig2.set_title('CityPY Cities Latitude vs. Humidity', fontsize=18, weight='bold')
fig2.set_xlabel('Latitude', fontsize=14, weight='bold')
fig2.set_ylabel('Humidity (%)', fontsize=14, weight='bold')
fig2.grid(color='gray', linestyle='-', linewidth=0.2)

# Save the figure
plt.savefig("output_data/Fig2.png")

# Show plot
plt.show()

#### Latitude Vs. Cloudiness

In [None]:
# Build the scatter plots for latitude vs. cloudiness
fig3 = city_data_df.plot(
                        kind='scatter',
                        x='Lat', y='Cloudiness',
                        legend=False, marker='o',
                        figsize=(8,6), s= 150,
                        edgecolor='Black',
                        c='Teal'
)
# Incorporate the other graph properties
fig3.set_title('CityPY Cities Latitude vs. Cloudiness', fontsize=18, weight='bold')
fig3.set_xlabel('Latitude', fontsize=14, weight='bold')
fig3.set_ylabel('Cloudiness (%)', fontsize=14, weight='bold')
fig3.grid(color='gray', linestyle='-', linewidth=0.2)

# Save the figure
plt.savefig("output_data/Fig3.png")

# Show plot
plt.show()

#### Latitude vs. Wind Speed Plot

In [None]:
# Build the scatter plots for latitude vs. wind speed
fig4 = city_data_df.plot(
                        kind='scatter',
                        x='Lat', y='Wind Speed',
                        legend=False, marker='o',
                        figsize=(8,6), s= 150,
                        edgecolor='Black',
                        c='Teal'
)

# Incorporate the other graph properties
fig4.set_title('CityPY Cities Latitude vs. Wind Speed', fontsize=18, weight='bold')
fig4.set_xlabel('Latitude', fontsize=14, weight='bold')
fig4.set_ylabel('Wind Speed (mph)', fontsize=14, weight='bold')
fig4.grid(color='gray', linestyle='-', linewidth=0.2)

# Save the figure
plt.savefig("output_data/Fig4.png")

# Show plot
plt.show()

---

## Requirement 2: Compute Linear Regression for Each Relationship


In [None]:
# Define a function to create Linear Regression plots for Latitude vs. each of Maximum Temperature, Humidity, Cloudiness, and Wind Speed
fig5, ax5 = plt.subplots(figsize=(8,6))

x_values = city_data_df['Lat']
y_values = city_data_df['Max Temp']
print(f"The correlation coefficient between Latitude and Maximum Temperature is {round(st.pearsonr(x_values, y_values)[0],2)}")

# for latitude (x-values) and Maximum Temperature (y_values) for the CityPY Cities selected
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values =x_values * slope + intercept

slope_intercept = 'y = ' + str(round(slope,2)) + 'x +' + str(round(intercept,2))  ## y = mx + b

# plot details, add annotation, add chart features
plt.scatter(x_values, y_values, s=150, marker='o', edgecolors='Black', c='Teal')
plt.plot(x_values, regress_values, 'r-') ##'r-' is a red line

plt.annotate(slope_intercept, xy=(-25,30), xytext=(-25,30), fontsize=20,color="red")

# set title, labels, and present the data
plt.title('CityPY Cities Latitude vs. Maximum Temperature', fontsize=18, weight='bold')           
plt.xlabel('Latitude', fontsize=14, weight='bold')
plt.ylabel('Maximum Temperature (°F)', fontsize=14, weight='bold')

# print r value
print(f'The r-value is: {rvalue**2}')

# Save the figure
plt.savefig("output_data/Fig5.png")

# present the data
plt.grid(color='gray', linestyle='-', linewidth=0.2)
plt.show()

In [None]:
# Define a function to create Linear Regression plots for Latitude vs. each of Maximum Temperature, Humidity, Cloudiness, and Wind Speed
fig6, ax6 = plt.subplots(figsize=(8,6))

x_values = city_data_df['Lat']
y_values = city_data_df['Humidity']
print(f"The correlation coefficient between latitude and Humidity is {round(st.pearsonr(x_values, y_values)[0],2)}")

# for latitude (x-values) and Humidity (y_values) for the CityPY Cities selected
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values =x_values * slope + intercept

slope_intercept = 'y = ' + str(round(slope,2)) + 'x +' + str(round(intercept,2))  ## y = mx + b

# plot details, add annotation, add chart features
plt.scatter(x_values, y_values, s=150, marker='o', edgecolors='Black', c='Teal')
plt.plot(x_values, regress_values, 'r-') ##'r-' is a red line

plt.annotate(slope_intercept, xy=(-10,50), xytext=(-10,50), fontsize=20,color="red")

# set title, labels, and present the data
plt.title('CityPY Cities Latitude vs. Humidity', fontsize=18, weight='bold')           
plt.xlabel('Latitude', fontsize=14, weight='bold')
plt.ylabel('Humidity (%)', fontsize=14, weight='bold')

# print r value
print(f'The r-value is: {rvalue**2}')

# Save the figure
plt.savefig("output_data/Fig6.png")

# present the data
plt.grid(color='gray', linestyle='-', linewidth=0.2)
plt.show()

In [None]:
# Define a function to create Linear Regression plots for Latitude vs. each of Maximum Temperature, Humidity, Cloudiness, and Wind Speed
fig7, ax7 = plt.subplots(figsize=(8,6))

x_values = city_data_df['Lat']
y_values = city_data_df['Cloudiness']
print(f"The correlation coefficient between Latitude and Cloudiness is {round(st.pearsonr(x_values, y_values)[0],2)}")

# for latitude (x-values) and Cloudiness (y_values) for the CityPY Cities selected
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values =x_values * slope + intercept

slope_intercept = 'y = ' + str(round(slope,2)) + 'x +' + str(round(intercept,2))  ## y = mx + b

# plot details, add annotation, add chart features
plt.scatter(x_values, y_values, s=150, marker='o', edgecolors='Black', c='Teal')
plt.plot(x_values, regress_values, 'r-') ##'r-' is a red line

plt.annotate(slope_intercept, xy=(10,45), xytext=(10,45), fontsize=20,color="red")

# set title, labels, and present the data
plt.title('CityPY Cities Latitude vs. Cloudiness', fontsize=18, weight='bold')           
plt.xlabel('Latitude', fontsize=14, weight='bold')
plt.ylabel('Cloudiness (%)', fontsize=14, weight='bold')

# print r value
print(f'The r-value is: {rvalue**2}')

# Save the figure
plt.savefig("output_data/Fig7.png")

# present the data
plt.grid(color='gray', linestyle='-', linewidth=0.2)
plt.show()

In [None]:
# Define a function to create Linear Regression plots for Latitude vs. each of Maximum Temperature, Humidity, Cloudiness, and Wind Speed
fig8, ax8 = plt.subplots(figsize=(8,6))

x_values = city_data_df['Lat']
y_values = city_data_df['Wind Speed']
print(f"The correlation coefficient between Latitude and Wind Speed is {round(st.pearsonr(x_values, y_values)[0],2)}")

# for latitude (x-values) and Wind Speed (y_values) for the CityPY Cities selected
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values =x_values * slope + intercept

slope_intercept = 'y = ' + str(round(slope,2)) + 'x +' + str(round(intercept,2))  ## y = mx + b

# plot details, add annotation, add chart features
plt.scatter(x_values, y_values, s=150, marker='o', edgecolors='Black', c='Teal')
plt.plot(x_values, regress_values, 'r-') ##'r-' is a red line

plt.annotate(slope_intercept, xy=(10,45), xytext=(10,45), fontsize=20,color="red")

# set title, labels, and present the data
plt.title('CityPY Cities Latitude vs. Wind Speed', fontsize=18, weight='bold')           
plt.xlabel('Latitude', fontsize=14, weight='bold')
plt.ylabel('Wind Speed (mph)', fontsize=14, weight='bold')

# print r value
print(f'The r-value is: {rvalue**2}')

# Save the figure
plt.savefig("output_data/Fig8.png")

# present the data
plt.grid(color='gray', linestyle='-', linewidth=0.2)
plt.show()

In [None]:
# Create a DataFrame with the Northern Hemisphere data (Latitude >= 0)
 (°F)'f = city_data_df.loc[city_data_df['Lat'] >= 0]

# Display sample data
northern_hemi_df.head()

In [None]:
# Create a DataFrame with the Southern Hemisphere data (Latitude < 0)
southern_hemi_df = city_data_df.loc[city_data_df['Lat'] < 0]

# Display sample data
southern_hemi_df.head()

###  Temperature vs. Latitude Linear Regression Plot

In [None]:
# y_tests = ['Max Temp', 'Humidity', 'Cloudiness', 'Wind Speed']
# m = y_tests[1]
# m

# hemi = f'hemispheres[0]_hemi_df'
# x_values = hemi['Lat']
# x_values
hemispheres = ['northern', 'southern']

#hemi = f'{hemispheres[0]}_hemi_
hemi = hemispheres[1] + '_hemi_df'
hemi

In [None]:
# Define a function to create Linear Regression plots for Latitude vs. each of Maximum Temperature, Humidity, Cloudiness, and Wind Speed
# for the Northern Hemisphere
hemispheres = ['northern', 'southern']
y_tests = ['Max Temp', 'Humidity', 'Cloudiness', 'Wind Speed']
y_labels = ['Maximum Temperture  (°F)', 'Humidity (%)', 'Cloudiness (%)', 'Wind Speed (mph)']

h=0
i = 0  # criteria location in list
for sphere in hemispheres:    
    i = 0
    for test in y_tests:
        fig8, ax8 = plt.subplots(figsize=(8,6))
        
        # assign hemisphere for processing
        #hemi = f'{hemispheres[h]}_hemi_df'
        x_values = northern_hemi_df['Lat']
        y_values = northern_hemi_df[y_tests[i]]


        # for latitude (x-values) and Wind Speed (y_values) for the CityPY Cities selected
        (slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
        regress_values =x_values * slope + intercept

        slope_intercept = 'y = ' + str(round(slope,2)) + 'x +' + str(round(intercept,2))  ## y = mx + b

        #print the r-value and correlation coefficient
        print(f'The correlation coefficient between Latitude and {y_tests[i]} is {round(st.pearsonr(x_values, y_values)[0],2)}. The r-value is: {round(rvalue**2,2)}.')
        # plot details, add annotation, add chart features
        plt.scatter(x_values, y_values, s=150, marker='o', edgecolors='Black', c='Teal')
        plt.plot(x_values, regress_values, 'r-') ##'r-' is a red line

        plt.annotate(slope_intercept, xy=(10,45), xytext=(10,45), fontsize=20,color="red")

        # set title, labels, and present the data
        plt.title(f'CityPY Cities Latitude vs. {y_tests[i]}', fontsize=18, weight='bold')           
        plt.xlabel('Latitude', fontsize=14, weight='bold')
        plt.ylabel(y_labels[i], fontsize=14, weight='bold')

    #     # print r value
    #     print(f'The r-value is: {rvalue**2}')

        # Save the figure
        plt.savefig("output_data/Fig8.png")

        # present the data
        plt.grid(color='gray', linestyle='-', linewidth=0.2)
        # loop through all four criteria
        i +=1
    # loop through both hemispheres
    h += 1
    # present the data    
    plt.show()

In [None]:
# fig9, axs = plt.subplots(2, 2)
# axs[0, 0].plot(x, y)
# axs[0, 0].set_title('Axis [0, 0]')
# axs[0, 1].plot(x, y, 'tab:orange')
# axs[0, 1].set_title('Axis [0, 1]')
# axs[1, 0].plot(x, -y, 'tab:green')
# axs[1, 0].set_title('Axis [1, 0]')
# axs[1, 1].plot(x, -y, 'tab:red')
# axs[1, 1].set_title('Axis [1, 1]')

# for ax in axs.flat:
#     ax.set(xlabel='x-label', ylabel='y-label')

# # Hide x labels and tick labels for top plots and y ticks for right plots.
# for ax in axs.flat:
#     ax.label_outer()
#############################################################

fig9, axes = plt.subplots(nrows=2, ncols=2, figsize=(18, 16))  # define the figure and subplots
axes = axes.ravel()  # array to 1D
cols = southern_hemi_df.columns['Max Temp', 'Humidity','Cloudiness', 'Wind Speed']  # create a list of dataframe columns to use
colors = ['tab:teal','tab:blue', 'tab:orange', 'tab:green']  # list of colors for each subplot, otherwise all subplots will be one color

for col, color, ax in zip(cols, colors, axes):
    df[col].plot(kind='scatter', ax=ax, color=color, label=col, title=col)
    ax.legend()
    
fig9.delaxes(axes[3])  # delete the empty subplot
fig9.tight_layout()
plt.show()

In [None]:
# Linear regression on Northern Hemisphere

fig9, ax9 = plt.subplots(figsize=(8,6))

x_values = city_data_df['Lat']
y_values = city_data_df['Wind Speed']
print(f"The correlation coefficient between Latitude and Wind Speed is {round(st.pearsonr(x_values, y_values)[0],2)}")

# for latitude (x-values) and Wind Speed (y_values) for the CityPY Cities selected
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values =x_values * slope + intercept

slope_intercept = 'y = ' + str(round(slope,2)) + 'x +' + str(round(intercept,2))  ## y = mx + b

# plot details, add annotation, add chart features
plt.scatter(x_values, y_values, s=150, marker='o', edgecolors='Black', c='Teal')
plt.plot(x_values, regress_values, 'r-') ##'r-' is a red line

plt.annotate(slope_intercept, xy=(10,45), xytext=(10,45), fontsize=20,color="red")

# set title, labels, and present the data
plt.title('CityPY Cities Latitude vs. Wind Speed', fontsize=18, weight='bold')           
plt.xlabel('Latitude', fontsize=14, weight='bold')
plt.ylabel('Wind Speed (mph)', fontsize=14, weight='bold')

# print r value
print(f'The r-value is: {rvalue**2}')

# present the data
plt.grid(True)
plt.show()
j = 10    # number figure to avoid plotting confusion
x_value = northern_hemi_df.loc[northern_hemi_df['Lat']]
y_value = northern_hemi_df.loc[northern_hemi_df['Max Temp']]
plot_title = 'Temperature vs. Latitude Linear Regression Plot'
plot_title_sup = 'Northern Hemisphere'
plot_xlabel = 'Latitude'
plot_ylabel = 'Maximum Temperature (°F)'
    ## Function to create plot

create_plot(x_value, y_value, j)
    
plt.show()

In [None]:
# Linear regression on Southern Hemisphere
# YOUR CODE HERE
j += 1
x_value = southern_hemi_df['Lat']
y_value = southern_hemi_df['Max Temp']
plot_title = 'Temperature vs. Latitude Linear Regression Plot'
plot_title_sup = 'Southern Hemisphere'
plot_xlabel = 'Latitude'
plot_ylabel = 'Maximum Temperature (°F)'
    ## Function to create plot
create_plot(x_value, y_value, j)
    
plt.show()
#x_value, y_value, j

**Discussion about the linear relationship:** YOUR RESPONSE HERE

### Humidity vs. Latitude Linear Regression Plot

In [None]:
# Northern Hemisphere
# YOUR CODE HERE
j += 1    # number figure to avoid plotting confusion
x_value = northern_hemi_df['Lat']
y_value = northern_hemi_df['Humidity']
plot_title = 'Humidity vs. Latitude Linear Regression Plot'
plot_title_sup = 'Northern Hemisphere'
plot_xlabel = 'Latitude'
plot_ylabel = 'Humidity'
    ## Function to create plot
create_plot(x_value, y_value, j)
    
plt.show()

In [None]:
# Southern Hemisphere
# YOUR CODE HERE
j += 1
x_value = southern_hemi_df['Lat']
y_value = southern_hemi_df['Humidity']
plot_title = 'Humidity vs. Latitude Linear Regression Plot'
plot_title_sup = 'Southern Hemisphere'
plot_xlabel = 'Latitude'
plot_ylabel = 'Humidity'
    ## Function to create plot
create_plot(x_value, y_value, j)
    
plt.show()

**Discussion about the linear relationship:** YOUR RESPONSE HERE

### Cloudiness vs. Latitude Linear Regression Plot

In [None]:
# Northern Hemisphere
# YOUR CODE HERE
j += 1    # number figure to avoid plotting confusion
x_value = northern_hemi_df['Lat']
y_value = northern_hemi_df['Cloudiness']
plot_title = 'Cloudiness vs. Latitude Linear Regression Plot'
plot_title_sup = 'Northern Hemisphere'
plot_xlabel = 'Latitude'
plot_ylabel = 'Cloudiness (oktas)'
    ## Function to create plot
create_plot(x_value, y_value, j)
    
plt.show()

In [None]:
# Southern Hemisphere
# YOUR CODE HERE
j += 1
x_value = southern_hemi_df['Lat']
y_value = southern_hemi_df['Cloudiness']
plot_title = 'Cloudiness vs. Latitude Linear Regression Plot'
plot_title_sup = 'Southern Hemisphere'
plot_xlabel = 'Latitude'
plot_ylabel = 'Cloudiness (oktas)'
    ## Function to create plot
create_plot(x_value, y_value, j)
    
plt.show()

**Discussion about the linear relationship:** YOUR RESPONSE HERE

### Wind Speed vs. Latitude Linear Regression Plot

In [None]:
# Northern Hemisphere
# YOUR CODE HERE
j += 1    # number figure to avoid plotting confusion
x_value = northern_hemi_df['Lat']
y_value = northern_hemi_df['Wind Speed']
plot_title = 'Wind Speed vs. Latitude Linear Regression Plot'
plot_title_sup = 'Northern Hemisphere'
plot_xlabel = 'Latitude'
plot_ylabel = 'Wind Speed (m/s)'
    ## Function to create plot
create_plot(x_value, y_value, j)
    
plt.show()

In [None]:
# Southern Hemisphere
# YOUR CODE HERE
j += 1
x_value = southern_hemi_df['Lat']
y_value = southern_hemi_df['Wind Speed']
plot_title = 'Cloudiness vs. Latitude Linear Regression Plot'
plot_title_sup = 'Southern Hemisphere'
plot_xlabel = 'Latitude'
plot_ylabel = 'Wind Speed (m/s)'
    ## Function to create plot
create_plot(x_value, y_value, j)
    
plt.show()

**Discussion about the linear relationship:** YOUR RESPONSE HERE