In [None]:

# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from pprint import pprint
from scipy.stats import linregress

# Import API key
from config import weathapi_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

In [1]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(low=-90.000, high=90.000, size=1500)
lngs = np.random.uniform(low=-180.000, high=180.000, size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

NameError: name 'np' is not defined

In [None]:
# Print the city count to confirm sufficient count
len(cities)
#cities

In [None]:

# Save config information.
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "imperial"

# Build partial query URL
query_url = f"{url}appid={weathapi_key}&units={units}&q="
query_url

In [None]:
# set up lists to hold response info
cit = []
lat = []
lng = []
temp = []
humid = []
MaxT = []
cloud = []
wind = []
country = []
date = []
city_count = 0

# Get weather data
# weather_response = requests.get(query_url)
# weather_json = weather_response.json()
# Get the temperature from the response
#pprint(weather_json)
for city1 in cities:
    city_count += 1
    try:
        response = requests.get(query_url + city1).json()
        temp_lat = response['coord']['lat']
        temp_lng = response['coord']['lon']
        temp_temp = response['main']['temp']
        temp_humid = response['main']['humidity']
        temp_MaxT = response['main']['temp_max']
        temp_cloud = response['clouds']['all']
        temp_wind = response['wind']['speed']
        temp_country = response['sys']['country']
        temp_date = response['dt']
        temp_cit = response['name']
        lat.append(temp_lat)
        lng.append(temp_lng)
        temp.append(temp_temp) 
        humid.append(temp_humid)
        MaxT.append(temp_MaxT)
        cloud.append(temp_cloud)
        wind.append(temp_wind)
        country.append(temp_country)
        date.append(temp_date)
        cit.append(temp_cit)
        print(f"The temperature for: {city_count}. {city1} is {response['main']['temp']}")
    except:
        print(f"No information available for {city1}")

In [None]:

#Verify length of arrays
print(len(cit))
print(len(lat))
print(len(lng))
print(len(temp))
print(len(humid))
print(len(MaxT))
print(len(cloud))
print(len(wind))
print(len(country))
print(len(date))

In [None]:
weatherdict = {
        "City": cit,
        "Lat": lat,
        "Lng": lng,
        "Temp": temp,
        "Humidity": humid,
        "Max Temp": MaxT,
        "Cloudiness": cloud,
        "Wind Speed": wind,
        "Country": country,
        "Date": date
}

In [None]:
weather_data = pd.DataFrame(weatherdict)
weather_data.head()

In [None]:
# Output to CSV file
weather_data.to_csv("weather_data.csv")

In [None]:

#  Get the indices of cities that have humidity over 100%.
weather_data.loc[(weather_data["Humidity"] > 100)]

In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".
# NO CITIES HAVE HUMIDITY > 100%

In [None]:

Adjlat_df = weather_data.copy()
Adjlat_df['Lat'] = Adjlat_df["Lat"].abs()
Adjlat_df.head()

In [None]:

# Generate scatterplot
Adjlat_df.plot(kind="scatter", x="Lat", y="Temp", grid=True, figsize=(4,4),
              title="Temperature Vs. Latitude as of 8/27/20")
plt.xlabel("Latitude (North and South)")
plt.ylabel("Current Temperature")
plt.ylim(0, max(Adjlat_df["Temp"])+10)
plt.savefig("Temp.png", format="png")
plt.show()

In [None]:

# Generate scatterplot
Adjlat_df.plot(kind="scatter", x="Lat", y="Humidity", grid=True, figsize=(4,4),
              title="Humidity Vs. Latitude as of 8/27/20")
plt.xlabel("Latitude (North and South)")
plt.ylabel("Current Humidity (%)")
plt.ylim(0, max(Adjlat_df["Humidity"])+10)
plt.savefig("Humidity.png", format="png")
plt.show()

In [None]:

# Generate scatterplot
Adjlat_df.plot(kind="scatter", x="Lat", y="Cloudiness", grid=True, figsize=(4,4),
              title="Cloudiness Vs. Latitude as of 8/27/20")
plt.xlabel("Latitude (North and South)")
plt.ylabel("Current Cloudiness (%)")
plt.ylim(0, max(Adjlat_df["Cloudiness"])+10)
plt.savefig("Cloudy.png", format="png")
plt.show()

In [None]:

# Generate scatterplot
Adjlat_df.plot(kind="scatter", x="Lat", y="Wind Speed", grid=True, figsize=(4,4),
              title="Wind Speed Vs. Latitude as of 8/27/20")
plt.xlabel("Latitude (North and South)")
plt.ylabel("Current Wind Speed (mph)")
plt.ylim(0, max(Adjlat_df["Wind Speed"])+10)
plt.savefig("Wind.png", format="png")
plt.show()

In [None]:
#Break main dataframe into northern and southern dataframes
north_df = weather_data.loc[(weather_data["Lat"] >= 0)]
south_df = weather_data.loc[(weather_data["Lat"] < 0)]
north_df = north_df.reset_index()
south_df = south_df.reset_index()
#north_df.head()
#south_df.head()

In [None]:
#Calculate linear regression model for north latitude and maximum temperature
Latitude = north_df["Lat"]
TempMax = north_df["Max Temp"]
north_df.plot(kind="scatter", x="Lat", y="Max Temp", grid=True, figsize=(8,8), title="Temperature Vs. North Latitude 8/27/20")
plt.xlabel("Degrees North Latitude")
plt.ylabel("Maximum Temperature")
(slope, intercept, rvalue, pvalue, stderr) = linregress(Latitude, TempMax)
regress_values = Latitude * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.plot(Latitude,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
print(f"The r-squared is: {rvalue**2}")
print(line_eq)

In [None]:

#Calculate linear regression model for south latitude and maximum temperature
SLatitude = south_df["Lat"]
STempMax = south_df["Max Temp"]
south_df.plot(kind="scatter", x="Lat", y="Max Temp", grid=True, figsize=(8,8), title="Temperature Vs. South Latitude 8/27/20")
plt.xlabel("Degrees South Latitude")
plt.ylabel("Maximum Temperature")
(slope, intercept, rvalue, pvalue, stderr) = linregress(SLatitude, STempMax)
regress_values = SLatitude * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.plot(SLatitude,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
print(f"The r-squared is: {rvalue**2}")
print(line_eq)

In [None]:
#Calculate linear regression model for north latitude and humidity
Humidity = north_df["Humidity"]
north_df.plot(kind="scatter", x="Lat", y="Humidity", grid=True, figsize=(8,8), title="Humidity Vs. North Latitude 8/27/20")
plt.xlabel("Degrees North Latitude")
plt.ylabel("Humidity (%)")
(slope, intercept, rvalue, pvalue, stderr) = linregress(Latitude, Humidity)
regress_values = Latitude * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.plot(Latitude,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
print(f"The r-squared is: {rvalue**2}")
print(line_eq)

In [None]:

#Calculate linear regression model for south latitude and humidity
SHumidity = south_df["Humidity"]
south_df.plot(kind="scatter", x="Lat", y="Humidity", grid=True, figsize=(8,8), title="Humidity Vs. South Latitude 8/27/20")
plt.xlabel("Degrees South Latitude")
plt.ylabel("Humidity (%)")
(slope, intercept, rvalue, pvalue, stderr) = linregress(SLatitude, SHumidity)
regress_values = SLatitude * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.plot(SLatitude,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
print(f"The r-squared is: {rvalue**2}")
print(line_eq)

In [None]:

#Calculate linear regression model for north latitude and cloudiness
Cloudiness = north_df["Cloudiness"]
north_df.plot(kind="scatter", x="Lat", y="Cloudiness", grid=True, figsize=(8,8), title="Cloudiness Vs. North Latitude 8/27/20")
plt.xlabel("Degrees North Latitude")
plt.ylabel("Cloudiness (%)")
(slope, intercept, rvalue, pvalue, stderr) = linregress(Latitude, Cloudiness)
regress_values = Latitude * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.plot(Latitude,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
print(f"The r-squared is: {rvalue**2}")
print(line_eq)

In [None]:
#Calculate linear regression model for north latitude and cloudiness
SCloudiness = south_df["Cloudiness"]
south_df.plot(kind="scatter", x="Lat", y="Cloudiness", grid=True, figsize=(8,8), title="Cloudiness Vs. South Latitude 8/27/20")
plt.xlabel("Degrees South Latitude")
plt.ylabel("Cloudiness (%)")
(slope, intercept, rvalue, pvalue, stderr) = linregress(SLatitude, SCloudiness)
regress_values = SLatitude * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.plot(SLatitude,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
print(f"The r-squared is: {rvalue**2}")
print(line_eq)

In [None]:
#Calculate linear regression model for north latitude and wind speed
Windspeed = north_df["Wind Speed"]
north_df.plot(kind="scatter", x="Lat", y="Wind Speed", grid=True, figsize=(8,8), title="Wind Speed Vs. North Latitude 8/27/20")
plt.xlabel("Degrees North Latitude")
plt.ylabel("Wind Speed (mph)")
(slope, intercept, rvalue, pvalue, stderr) = linregress(Latitude, Windspeed)
regress_values = Latitude * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.plot(Latitude,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
print(f"The r-squared is: {rvalue**2}")
print(line_eq)

In [None]:
#Calculate linear regression model for south latitude and wind speed
SWindspeed = south_df["Wind Speed"]
south_df.plot(kind="scatter", x="Lat", y="Wind Speed", grid=True, figsize=(8,8), title="Wind Speed Vs. South Latitude 8/27/20")
plt.xlabel("Degrees South Latitude")
plt.ylabel("Wind Speed (mph)")
(slope, intercept, rvalue, pvalue, stderr) = linregress(SLatitude, SWindspeed)
regress_values = SLatitude * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.plot(SLatitude,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
print(f"The r-squared is: {rvalue**2}")
print(line_eq)

In [None]:
# There appears to be slight correlation between wind speed and latitude; measurements are sporadic at every latitude

In [None]:
#FINAL OBSERVATIONS
    #In both hemishpheres, temperature do slightly trend downwards the further north or south you go of the equator.
    #There are slight trends for wind speed and cloudiness as you mover further north or south of the equator.
    #There is minimal correlation between humidity and distance from the equator.