In [None]:

get_ipython().system(' pip install citipy')
import matplotlib.pyplot as plt
from api_keys import weather_api_key
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
import seaborn as sns
import urllib
import json
from citipy import citipy

temp_units = "imperial"
output_data_file = "WeatherPy.csv"

lat_range = (-90, 90)
lng_range = (-180, 180)




In [None]:
### Generate the Cities List by Using the `citipy` Library

In [None]:
lat_lngs = []
cities = []

lat_range = (-90, 90)
lng_range = (-180, 180)

lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    if city not in cities:
        cities.append(city)

print(f"Number of cities in the list: {len(cities)}")

In [None]:
## Requirement 1: Create Plots to Showcase the Relationship Between Weather Variables and Latitude

### Use the OpenWeatherMap API to retrieve weather data from the cities list generated in the started code

In [None]:

url = "http://api.openweathermap.org/data/2.5/weather?"
units = "imperial" 
appid = weather_api_key
settings = {"units": "imperial", "appid": weather_api_key}
url = f"{url}appid={weather_api_key}&units={units}"
print (url)

In [None]:
# List of city data
city_data = []

# Print to logger
print("Beginning Data Retrieval     ")
print("-----------------------------")

# Create counters
record_count = 1
set_count = 1

# Loop through all the cities in our list
for i, city in enumerate(cities):
        
    # Group cities in sets of 50 for logging purposes
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 0

    # Create endpoint URL with each city
    city_url = url + "&q=" + urllib.request.pathname2url(city)

    # Log the url, record, and set numbers
    print("Processing Record %s of Set %s | %s" % (record_count, set_count, city))
    print(city_url)

    # Add 1 to the record count
    record_count += 1

    # Run an API request for each of the cities
    try:
        # Parse the JSON and retrieve data
        city_weather = requests.get(city_url).json()

        # Parse out the max temp, humidity, and cloudiness
        city_latitute = city_weather["coord"]["lat"]
        city_longitude = city_weather["coord"]["lon"]
        city_max_temperature = city_weather["main"]["temp_max"]
        city_humidity = city_weather["main"]["humidity"]
        city_clouds = city_weather["clouds"]["all"]
        city_wind = city_weather["wind"]["speed"]
        city_country = city_weather["sys"]["country"]
        city_date = city_weather["dt"]

        # Append the City information into city_data list
        city_data.append({"City": city, 
                          "Lat": city_latitute, 
                          "Lng": city_longitude, 
                          "Max Temp": city_max_temperature,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country,
                          "Date": city_date})

    # If an error is experienced, skip the city
    except:
        print("City not found...")
        pass
              
# Indicate that Data Loading is complete 
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

In [None]:
city_data_pd = pd.DataFrame(city_data)

city_data_pd.count()

In [None]:

city_data_pd.head()


In [None]:
# Export the City_Data into a csv
city_data_pd.to_csv("WeatherPy.csv",encoding="utf-8",index=False)


# Read saved data
city_data_pd = pd.read_csv("WeatherPy.csv",index_col=0)

# Display sample data
city_data_pd.head()

In [None]:
### Create the Scatter Plots Requested

#### Latitude Vs. Temperature

In [None]:
plt.scatter(city_data_pd["Lat"], 
            city_data_pd["Max Temp"],
            edgecolor="black", linewidths=1, marker="o", 
            alpha=0.8, label="Cities")

plt.title("City Latitude vs. Max Temperature (%s)"% time.strftime("%x"))
plt.ylabel("Max Temperature (F)")
plt.xlabel("Latitude")
plt.grid(True)
sns.set()
plt.show()

In [None]:
#### Latitude Vs. Humidity

In [None]:
plt.scatter(city_data_pd["Lat"], 
            city_data_pd["Humidity"],
            edgecolor="black", linewidths=1, marker="o", 
            alpha=0.8, label="Cities")

plt.title("City Latitude vs. Humidity (%s)" % time.strftime("%x"))
plt.ylabel("Humidity (%)")
plt.xlabel("Latitude")
plt.grid(True)
sns.set()

plt.show()

In [None]:
#### Latitude Vs. Cloudiness

In [None]:
plt.scatter(city_data_pd["Lat"], 
            city_data_pd["Cloudiness"],
            edgecolor="black", linewidths=1, marker="o", 
            alpha=0.8, label="Cities")

plt.title("City Latitude vs. Cloudiness (%s)" % time.strftime("%x"))
plt.ylabel("Cloudiness (%)")
plt.xlabel("Latitude")
plt.grid(True)
sns.set()

plt.show()

In [None]:
#### Latitude vs. Wind Speed Plot

In [None]:
plt.scatter(city_data_pd["Lat"], 
            city_data_pd["Wind Speed"],
            edgecolor="black", linewidths=1, marker="o", 
            alpha=0.8, label="Cities")

plt.title("City Latitude vs. Wind Speed (%s)" % time.strftime("%x"))
plt.ylabel("Wind Speed (mph)")
plt.xlabel("Latitude")
plt.grid(True)
sns.set()

plt.show()

In [None]:
## Requirement 2: Compute Linear Regression for Each Relationship


In [None]:
northern_hemi_df = city_data_pd[city_data_pd['Lat'] >= 0]
northern_hemi_df.head()

In [None]:
southern_hemi_df = city_data_pd[city_data_pd['Lat'] < 0]
southern_hemi_df.head()



In [None]:
###  Temperature vs. Latitude Linear Regression Plot

In [None]:
# Linear regression on Northern Hemisphere

In [None]:
plt.figure(figsize=(9,7))
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df['Max Temp']


(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)

regress_values = x_values * slope + intercept

linear_eq = 'y = ' + str(round(slope,2)) + 'x + ' + str(round(intercept,2))

plt.scatter(x_values,y_values, marker='o', color='royalblue', s=[80], edgecolors='black')
plt.plot(x_values,regress_values, 'darkred', linewidth=2)


plt.title('Northern Hemisphere - Max Temp vs. Latitude', fontsize=20)
plt.ylabel('Max Temperature', fontsize=16, color='black')
plt.xlabel('Latitude', fontsize=12, color='black')
plt.annotate(linear_eq,(1, 1), fontsize=18, color='darkred')

plt.ylim(min(city_data_pd['Max Temp'])-10, max(city_data_pd['Max Temp'])+10)

plt.xlim(-3, max(city_data_pd['Lat'])+3)
print(f'The r-squared is: {round(rvalue**2,2)}')


plt.show()

In [None]:
# Discussion: The r - squared is: 0.81.
#There is a strong negative correlation between max temperature and latitude on the Northern Hemisphere. Farther north from equator the position of a city the lower is the max temperature.

In [None]:
#Linear regression on Southern Hemisphere

In [None]:
plt.figure(figsize=(9,7))
x_values = southern_hemi_df['Lat']
y_values = southern_hemi_df['Max Temp']

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)

regress_values = x_values * slope + intercept

line_eq2 = 'y = ' + str(round(slope,2)) + 'x + ' + str(round(intercept,2))

plt.scatter(x_values,y_values, marker='o', color='royalblue', s=[70], edgecolors='black')
plt.plot(x_values,regress_values, 'darkred', linewidth=2)

plt.title('Southern Hemisphere - Max Temp vs. Latitude', fontsize=20)
plt.ylabel('Max Temperature', fontsize=16, color='black')
plt.xlabel('Latitude', fontsize=16, color='black')
plt.annotate(line_eq2,(-25,43),fontsize=18, color='darkred')


plt.ylim(30, 100)

plt.xlim(min(southern_hemi_df['Lat'])-3, +1)
print(f'The r-squared is: {round(rvalue**2,2)}')

plt.show()

In [None]:
# Discussion: There is a positive correlation between latitude and max temperature on the Southern Hemisphere. 
# The closer a city is to equator the higher is the max temperature. Variance of the maximum temp on the Southern Hemisphere is higher than on the Northern Hemisphere and there is maximum temperature variation for different cities positioned on same latitude.

In [None]:
#Northern Hemisphere - Humidity vs. Latitude Linear Regression Plot

In [None]:
plt.figure(figsize=(9,7))
x_values = northern_hemi_df['Lat']
y_values = northern_hemi_df['Humidity']

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)

regress_values = x_values * slope + intercept
line_eq = 'y = ' + str(round(slope,2)) + 'x + ' + str(round(intercept,2))

plt.scatter(x_values,y_values, marker='o', color='royalblue', s=[70], edgecolors='black')
plt.plot(x_values,regress_values,'darkred', linewidth=2)

plt.title('Northern Hemisphere - Humidity vs. Latitude', fontsize=20)
plt.ylabel('Humidity', fontsize=16, color='black')
plt.xlabel('Latitude', fontsize=16, color='black')
plt.annotate(line_eq,(51,6),fontsize=18, color='darkred')

plt.ylim(-6, 110)

plt.xlim(-4, 82)
print(f'The r-squared is: {round(rvalue**2,2)}')

plt.show()

In [None]:
#Discussion: The r - squared is: 0.03.There is almost no correlation between humidity level and the city position moving north from the equator.

In [None]:
#Southern Hemisphere - Humidity vs. Latitude Linear Regression

In [None]:
plt.figure(figsize=(9,7))
x_values = southern_hemi_df['Lat']
y_values = southern_hemi_df['Humidity']

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)

regress_values = x_values * slope + intercept

line_eq = 'y = ' + str(round(slope,2)) + 'x + ' + str(round(intercept,2))

plt.scatter(x_values,y_values, marker='o', color='royalblue', s=[70], edgecolors='black')
plt.plot(x_values,regress_values,'darkred', linewidth=2)


plt.title('Southern Hemisphere - Humidity vs. Latitude', fontsize=20)
plt.ylabel('Humidity', fontsize=16, color='black')
plt.xlabel('Latitude', fontsize=16, color='black')
plt.annotate(line_eq,(-55,22), fontsize=18, color='darkred')


plt.ylim(10, 110)

plt.xlim(-57, 1)
print(f'The r-squared is: {round(rvalue**2,2)}')
plt.show()

In [None]:
#Discussion: The r - squared is: 0.09.There is almost no correlating between humidity levels and the city position on Southern Hemisphere.

In [None]:
#Northern Hemisphere - Cloudiness vs. Latitude Linear Regression Plot

In [None]:
plt.figure(figsize=(9,7))
x_values = northern_hemi_df['Lat']
y_values = northern_hemi_df['Cloudiness']

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)


regress_values = x_values * slope + intercept


line_eq = 'y = ' + str(round(slope,2)) + 'x + ' + str(round(intercept,2))

plt.scatter(x_values,y_values, marker='o', color='royalblue', s=[70], edgecolors='black')
plt.plot(x_values,regress_values,'darkred', linewidth=2)


plt.title('Northern Hemisphere - Cloudiness vs. Latitude', fontsize=20)
plt.ylabel('Cloudiness', fontsize=16, color='black')
plt.xlabel('Latitude', fontsize=16, color='black')
plt.annotate(line_eq,(35,25), fontsize=18, color='darkred')

plt.ylim(-8, 110)

plt.xlim(-3, 82)

print(f'The r-squared is: {round(rvalue**2,2)}')


plt.show()

In [None]:
#Discussion:The r - squared is: 0.04.
#There is no correlation between the cloudiness and a distance from equator for cities positions. 
# Cloudiness are widely spread all over latitudes along whole Northern Hemisphere.

In [None]:
# Southern Hemisphere - Cloudiness vs. Latitude Linear Regression

In [None]:
plt.figure(figsize=(9,7))
x_values = southern_hemi_df['Lat']
y_values = southern_hemi_df['Cloudiness']

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)

regress_values = x_values * slope + intercept

line_eq = 'y = ' + str(round(slope,2)) + 'x + ' + str(round(intercept,2))

plt.scatter(x_values,y_values, marker='o', color='royalblue', s=[70], edgecolors='black')
plt.plot(x_values,regress_values,'darkred', linewidth=2)

plt.title('Southern Hemisphere - Cloudiness vs. Latitude', fontsize=20)
plt.ylabel('Cloudiness', fontsize=16, color='black')
plt.xlabel('Latitude', fontsize=16, color='black')
plt.annotate(line_eq,(-56,8), fontsize=18, color='darkred')


plt.ylim(-7, 108)
plt.xlim(-58, 1)


print(f'The r-squared is: {round(rvalue**2,2)}')

plt.show()

In [None]:
# Discussion: The r-squared is: 0.02. There is no correlation between cloudiness level and a latitude on Southern Hemisphere. 
# The level of cloudiness is ranging from 0% to 100% independent of the latitude.

In [None]:
# Northern Hemisphere - Wind Speed vs. Latitude Linear Regression Plot

In [None]:
plt.figure(figsize=(9,7))
x_values = northern_hemi_df['Lat']
y_values = northern_hemi_df['Wind Speed']

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)

regress_values = x_values * slope + intercept

line_eq = 'y = ' + str(round(slope,2)) + 'x + ' + str(round(intercept,2))

plt.scatter(x_values,y_values, marker='o', color='royalblue', s=[70], edgecolors='black')
plt.plot(x_values,regress_values,'darkred', linewidth=2)

plt.title('Northern Hemisphere - Wind Speed vs. Latitude', fontsize=20)
plt.ylabel('Wind Speed', fontsize=16, color='black')
plt.xlabel('Latitude', fontsize=16, color='black')
plt.annotate(line_eq,(2,42), fontsize=18, color='darkred')


plt.ylim(-3, 48)
plt.xlim(-3, 82)

print(f'The r-squared is: {round(rvalue**2,2)}')

plt.show()

In [None]:
# Discussion: The r - squared is: 0.05
# There is low correlation between the wind speed and the latitude on Northern Hemisphere.
# Most of the cities have winds with variable speed levels along whole of the Northern Hemisphere.

In [None]:
# Southern Hemisphere - Wind Speed vs. Latitude Linear Regression Plot

In [None]:
plt.figure(figsize=(9,6))
x_values = southern_hemi_df['Lat']
y_values = southern_hemi_df['Wind Speed']

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)

regress_values = x_values * slope + intercept
line_eq = 'y = ' + str(round(slope,2)) + 'x + ' + str(round(intercept,2))


plt.scatter(x_values,y_values, marker='o', color='royalblue', s=[70], edgecolors='black')
plt.plot(x_values,regress_values,'darkred', linewidth=2)

plt.title('Southern Hemisphere - Wind Speed vs. Latitude', fontsize=20)
plt.ylabel('Wind Speed', fontsize=16, color='black')
plt.xlabel('Latitude', fontsize=16, color='black')
plt.annotate(line_eq,(-19.5,21), fontsize=18, color='darkred')


plt.ylim(-1, 25)
plt.xlim(-58, 1)

print(f'The r-squared is: {round(rvalue**2,2)}')

plt.show()

In [None]:
#Discussion: The r - squared is: 0.06
# There is low negative correlation between a wind speed and the latitude on Southern Hemisphere. 
