## WeatherPy

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Import API key
from config import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "Output/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

## Perform API Calls

In [None]:
# Using the Open Weather API gather information about the weather of the cities

# Set up list for data frame
loc = []
lat = []
lng = []
max_temp = []
hum = []
cloud = []
wind = []
country = []
date = []

# Url of Open Weather API
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "imperial"

record = 0
sets = 1


print("Beginning Data Retrival")
print( "-" * 29)

# Time delay to not past the limit on the weather API
time.sleep(1)

# For Loop to read through the weather json and retrieve information for data frame
for place in cities:
    
    query_url = f"{url}appid={weather_api_key}&q={place}&units={units}"
    weather_response = requests.get(query_url)
    weather_json = weather_response.json()
    
    # Using Try/Except to ensure that if the random cities can't be found on the json the code wont crash
    try:
        loc.append(weather_json["name"])
        lat.append(weather_json["coord"]["lat"])
        lng.append(weather_json["coord"]["lon"])
        max_temp.append(weather_json["main"]["temp_max"])
        hum.append(weather_json["main"]["humidity"])
        cloud.append(weather_json["clouds"]["all"])
        wind.append(weather_json["wind"]["speed"])
        country.append(weather_json["sys"]["country"])
        date.append(weather_json["dt"])
        
        # If/else statement to get processing record and set numbers
        if record < 49:
            record += 1
        
        else:
            record = 0
            sets += 1
        
        print(f"Processing Record of {record} of Set {sets} | {place}")
    except:
        print(f"City not found. Skipping ... ")

In [None]:
# Create a DataFrame for the random cities
weather_dict = {'City': loc,
                'Lat': lat,
                'Lng': lng,
                'Max Temp': max_temp,
                'Humidity': hum,
                'Cloudiness': cloud,
                'Wind Speed': wind,
                'Country': country,
                'Date': date}

city_Weather = pd.DataFrame.from_dict(weather_dict, orient = "index")
city_Weather = city_Weather.transpose()
city_Weather

In [None]:
# Cleaning the data frame to only have humidity less than or equal to 100%
city_Weather["Humidity"].max()

city_Weather = city_Weather.loc[city_Weather["Humidity"] <= 100, :]

## Convert Raw Data to DataFrame

In [None]:
# Export the city weather dat to a csv
city_Weather.to_csv(output_data_file)

# View the dataframe
city_Weather.head()

## Plotting the Data

### Temperature vs. Latitude

In [None]:
# Scatter plot of temp vs latitude of the cities

plt.scatter(lat,max_temp, marker = "o", edgecolor = "black")

plt.title("Maximum Temperature (2/12/2021) vs Latitude")
plt.xlabel("Latitude")
plt.ylabel("Temperature (F)")
plt.grid(True)

plt.show()

This scatter plot displays the relationship between
a city's maximum temperature and a city's latitude in February. 
This displays how the city's latitude, distance from the equator, affects
a city's maximum temperature.

### Humidity vs. Latitude

In [None]:
# Scatter plot of humidity vs latitude of the cities

plt.scatter(lat,hum, marker = "o", edgecolor = "black")

plt.title("Humidity (2/12/2021) vs Latitude")
plt.xlabel("Latitude")
plt.ylabel("Humidity %")
plt.grid(True)
plt.show()

This scatter plot displays the relationship between a city's 
humidity and a city's latitude in February. 
This displays how the city's latitude, distance from the equator, 
affects a city's humidity.

### Cloudiness vs. Latitude

In [None]:
# Scatter plot of cloudiness vs latitude of the cities

plt.scatter(lat,cloud, marker = "o", edgecolor = "black")

plt.title("Cloudiness (2/12/2021) vs Latitude")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness %")
plt.grid(True)
plt.show()

This scatter plot displays the relationship between a city's cloudiness, sky coverage of clouds, and a city's latitude in February. This displays how the city's latitude, distance from the equator, affects a city's cloudiness.

### Wind Speed vs. Latitude

In [None]:
# Scatter plot of cloudiness vs latitude of the cities

plt.scatter(lat,wind, marker = "o", edgecolor = "black")

plt.title("Wind Speed (2/12/2021) vs Latitude")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")
plt.grid(True)
plt.show()

This scatter plot displays the relationship between a city's wind speed and a city's latitude in February. This displays how the city's latitude, distance from the equator, affects a city's wind speed.

## Linear Regression

In [None]:
# Splitting the data frame based on the northern and southern hemisphere
northern_hemisphere = city_Weather.loc[city_Weather["Lat"] >= 0, :]
southern_hemisphere = city_Weather.loc[city_Weather["Lat"] <= 0, :]

# Creating variables for graphs
n_lat = northern_hemisphere["Lat"].astype(float)
n_temp = northern_hemisphere["Max Temp"].astype(float)
n_hum = northern_hemisphere["Humidity"].astype(float)
n_cloud = northern_hemisphere["Cloudiness"].astype(float)
n_wind = northern_hemisphere["Wind Speed"].astype(float)

s_lat = southern_hemisphere["Lat"].astype(float)
s_temp = southern_hemisphere["Max Temp"].astype(float)
s_hum = southern_hemisphere["Humidity"].astype(float)
s_cloud = southern_hemisphere["Cloudiness"].astype(float)
s_wind = southern_hemisphere["Wind Speed"].astype(float)

#### Northern Hemisphere - Temperature vs. Latitude

In [None]:
# Scatter plot of temperature vs latitude of the cities

plt.scatter(n_lat,n_temp, marker = "o")

plt.title("Maximum Temperature (2/12/2021) vs Latitude")
plt.xlabel("Latitude")
plt.ylabel("Temperature (F)")

# Linear regression of N. Hemi Temperature vs Latitude
(slope, intercept, rvalue, pvalue, stderr) = linregress(n_lat, n_temp)

# creating linear regression line
regress_line = slope * n_lat + intercept

line_eq = f'y = {slope:.2f}x + {intercept:.2f}'

plt.plot(n_lat, regress_line, color = "red")
plt.annotate(line_eq, (5,-30), color = "red", fontsize = 15)
plt.show()

# printing r value
print(f'The r value is {rvalue}')

With a -0.87858 r value, there is a strong negative correlation between the Maximum Temperature and a city's Latitude in the Northern Hemisphere. Therefore it's confident to say, as the latitude increases the maximum temperature in a city in the Northern Hemisphere during February the temperature decreases.

#### Southern Hemisphere - Temperature vs. Latitude

In [None]:
# Scatter plot of temperature vs latitude of the cities

plt.scatter(s_lat,s_temp, marker = "o")

plt.title("Maximum Temperature (2/12/2021) vs Latitude")
plt.xlabel("Latitude")
plt.ylabel("Temperature (F)")

# Linear regression of S. Hemi Temperature vs Latitude
(slope, intercept, rvalue, pvalue, stderr) = linregress(s_lat, s_temp)

# creating linear regression line
regress_line = slope * s_lat + intercept

line_eq = f'y = {slope:.2f}x + {intercept:.2f}'

plt.plot(s_lat, regress_line, color = "red")
plt.annotate(line_eq, (-55,90), color = "red", fontsize = 15)
plt.show()

# printing r value
print(f'The r value is {rvalue}')

With a 0.45053 r value, there is a moderate positive correlation between the Maximum Temperature and a city's Latitude in the Southern Hemisphere. Therefore, as the latitude increases the maximum temperature in a city in the Southern Hemisphere during February the temperature increases.

#### Northern Hemisphere - Humidity vs. Latitude

In [None]:
# Scatter plot of temperature vs latitude of the cities

plt.scatter(n_lat,n_hum, marker = "o")

plt.title("Humidity (2/12/2021) vs Latitude")
plt.xlabel("Latitude")
plt.ylabel("Humidity %")

# Linear regression of N. Hemi Humidity vs Latitude
(slope, intercept, rvalue, pvalue, stderr) = linregress(n_lat, n_hum)

# creating linear regression line
regress_line = slope * n_lat + intercept

line_eq = f'y = {slope:.2f}x + {intercept:.2f}'

plt.plot(n_lat, regress_line, color = "red")
plt.annotate(line_eq, (45,10), color = "red", fontsize = 15)
plt.show()

# printing r value
print(f'The r value is {rvalue}')

With a 0.1994 r value, there is a weak positive correlation between the Humidity percentage and a city's Latitude in the Northern Hemisphere. Therefore it's not confident to say, as the latitude increases the humidity in a city in the Northern Hemisphere during February the humidity increases.

#### Southern Hemisphere - Humidity vs. Latitude

In [None]:
# Scatter plot of temperature vs latitude of the cities

plt.scatter(s_lat,s_hum, marker = "o")

plt.title("Humidity (2/12/2021) vs Latitude")
plt.xlabel("Latitude")
plt.ylabel("Humidity %")

# Linear regression of S. Hemi Humidity vs Latitude
(slope, intercept, rvalue, pvalue, stderr) = linregress(s_lat, s_hum)

# creating linear regression line
regress_line = slope * s_lat + intercept

line_eq = f'y = {slope:.2f}x + {intercept:.2f}'

plt.plot(s_lat, regress_line, color = "red")
plt.annotate(line_eq, (-25,35), color = "red", fontsize = 15)
plt.show()

# printing r value
print(f'The r value is {rvalue}')

With a 0.4013 r value, there is a moderate correlation between the Humidity percentage and a city's Latitude in the Southern Hemisphere. Therefore, as the latitude increases the humidity in a city in the Southern Hemisphere during February the humidity increases.

#### Northern Hemisphere - Cloudiness vs. Latitude

In [None]:
# Scatter plot of temperature vs latitude of the cities

plt.scatter(n_lat,n_cloud, marker = "o")

plt.title("Cloudiness (2/12/2021) vs Latitude")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness %")

# Linear regression of N. Hemi Cloudiness vs Latitude
(slope, intercept, rvalue, pvalue, stderr) = linregress(n_lat, n_cloud)

# creating linear regression line
regress_line = slope * n_lat + intercept

line_eq = f'y = {slope:.2f}x + {intercept:.2f}'

plt.plot(n_lat, regress_line, color = "red")
plt.annotate(line_eq, (45,10), color = "red", fontsize = 15)
plt.show()

# printing r value
print(f'The r value is {rvalue}')

With a 0.2285 r value, there is a weak positive correlation between the Cloudiness percentage and a city's Latitude in the Northern Hemisphere. Therefore it's not confident to say, as the latitude increases the cloudiness in a city in the Northern Hemisphere during February the cloudiness increases.

#### Southern Hemisphere - Cloudiness vs. Latitude

In [None]:
# Scatter plot of temperature vs latitude of the cities

plt.scatter(s_lat,s_cloud, marker = "o")

plt.title("Cloudiness (2/12/2021) vs Latitude")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness %")

# Linear regression of S. Hemi Cloudiness vs Latitude
(slope, intercept, rvalue, pvalue, stderr) = linregress(s_lat, s_cloud)

# creating linear regression line
regress_line = slope * s_lat + intercept

line_eq = f'y = {slope:.2f}x + {intercept:.2f}'

plt.plot(s_lat, regress_line, color = "red")
plt.annotate(line_eq, (-55,10), color = "red", fontsize = 15)
plt.show()

# printing r value
print(f'The r value is {rvalue}')

With a 0.2449 r value, there is a weak positive correlation between the Cloudiness percentage and a city's Latitude in the Southern Hemisphere. Therefore it's not confident to say, as the latitude increases the cloudiness in a city in the Southern Hemisphere during February the cloudiness increases.

#### Northern Hemisphere - Wind Speed vs. Latitude

In [None]:
# Scatter plot of temperature vs latitude of the cities

plt.scatter(n_lat,n_wind, marker = "o")

plt.title("Wind Speed (2/12/2021) vs Latitude")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")

# Linear regression of N. Hemi Wind Speed vs Latitude
(slope, intercept, rvalue, pvalue, stderr) = linregress(n_lat, n_wind)

# creating linear regression line
regress_line = slope * n_lat + intercept

line_eq = f'y = {slope:.2f}x + {intercept:.2f}'

plt.plot(n_lat, regress_line, color = "red")
plt.annotate(line_eq, (5,35), color = "red", fontsize = 15)
plt.show()

# printing r value
print(f'The r value is {rvalue}')

With a 0.04457 r value, there is a no correlation between the Wind Speed percentage and a city's Latitude in the Northern Hemisphere. Therefore it's not confident to say, as the latitude increases the wind speed in a city in the Northern Hemisphere during February the wind speed increases.

#### Southern Hemisphere - Wind Speed vs. Latitude

In [None]:
# Scatter plot of temperature vs latitude of the cities

plt.scatter(s_lat,s_wind, marker = "o")

plt.title("Wind Speed (2/12/2021) vs Latitude")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")

# Linear regression of S. Hemi Wind Speed vs Latitude
(slope, intercept, rvalue, pvalue, stderr) = linregress(s_lat, s_wind)

# creating linear regression line
regress_line = slope * s_lat + intercept

line_eq = f'y = {slope:.2f}x + {intercept:.2f}'

plt.plot(s_lat, regress_line, color = "red")
plt.annotate(line_eq, (-55,20), color = "red", fontsize = 15)
plt.show()

# printing r value
print(f'The r value is {rvalue}')

With a -0.1610 r value, there is a weak negative correlation between the Wind Speed percentage and a city's Latitude in the Southern Hemisphere. Therefore it's not confident to say, as the latitude increases the wind speed in a city in the Southern Hemisphere during February the wind speed decreases.

## Observable Trends

1. There is a correlation between the Maximum Temperature and the City's Latitude; in the Northern Hemisphere the strong correlation is negative meaning it can be predicted that as the latitude gets further north from the equator the temperature is expected to have a lower max temperature. 

2. Cloudiness as a weak correlation with Latitude; therefore, it can not be predicted what the cloud percentage may be based on a city's latitude.

3. In the Northern Hemisphere, the wind speed has no correlation with the city's latitude; therefore, wind speed can not be predicted based on the city's latitude.