In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import datetime
from scipy.stats import linregress


# Import API key
from api_keys import weather_api_key

from citipy import citipy

#CSV
output_data_file = "./cities.csv"

# Range of lat/lngs
lat_range = (-90, 90)
lng_range = (-180, 180)

In [2]:
# List for holding lat/lngs and cities
lat_lngs = []
cities = []

# Random lat/lngs
lats = np.random.uniform(low=-90.000, high=90.000, size=1500)
lngs = np.random.uniform(low=-180.000, high=180.000, size=1500)
lat_lngs = zip(lats, lngs)

# Nearest city for each combo
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

630

In [None]:
n=0
weather_data = []
for city in cities:
    n=n+1
    print(city)
    url = (f"https://api.openweathermap.org/data/2.5/weather?q={city}&units=imperial&appid={weather_api_key}")
    response = requests.get(url).json()
    try:
        weather_data.append(
            {
                'City': city,
                'Lat': response['coord']['lat'],
                'Lng': response['coord']['lon'],
                'Max Temp': response['main']['temp_max'],
                'Humidity': response['main']['humidity'],
                'Cloudiness': response['clouds']['all'],
                'Wind Speed': response['wind']['speed'],
                'Country': response['sys']['country'],
                'Date': datetime.datetime.fromtimestamp(int(response['dt'])).strftime('%Y-%m-%d %H:%M')
            }
        )
    except:
        next
    print(f"Processing Record {n} of {len(cities)} | {city}")

weather_df = pd.DataFrame(weather_data)

ushuaia
Processing Record 1 of 630 | ushuaia
lima
Processing Record 2 of 630 | lima
rikitea
Processing Record 3 of 630 | rikitea
college
Processing Record 4 of 630 | college
bluff
Processing Record 5 of 630 | bluff
richards bay
Processing Record 6 of 630 | richards bay
cabo san lucas
Processing Record 7 of 630 | cabo san lucas
labuhan
Processing Record 8 of 630 | labuhan
aflu
Processing Record 9 of 630 | aflu
nikolskoye
Processing Record 10 of 630 | nikolskoye
qaanaaq
Processing Record 11 of 630 | qaanaaq
rungata
Processing Record 12 of 630 | rungata
barrow
Processing Record 13 of 630 | barrow
stamsund
Processing Record 14 of 630 | stamsund
cascais
Processing Record 15 of 630 | cascais
kamenskoye
Processing Record 16 of 630 | kamenskoye
ilulissat
Processing Record 17 of 630 | ilulissat
noumea
Processing Record 18 of 630 | noumea
ribeira grande
Processing Record 19 of 630 | ribeira grande
kaukauna
Processing Record 20 of 630 | kaukauna
funtua
Processing Record 21 of 630 | funtua
hilo
Pr

Processing Record 167 of 630 | nan
chapais
Processing Record 168 of 630 | chapais
cordoba
Processing Record 169 of 630 | cordoba
shingu
Processing Record 170 of 630 | shingu
yulara
Processing Record 171 of 630 | yulara
atar
Processing Record 172 of 630 | atar
hasaki
Processing Record 173 of 630 | hasaki
busselton
Processing Record 174 of 630 | busselton
terrace bay
Processing Record 175 of 630 | terrace bay
nome
Processing Record 176 of 630 | nome
dikson
Processing Record 177 of 630 | dikson
esperance
Processing Record 178 of 630 | esperance
kushmurun
Processing Record 179 of 630 | kushmurun
samusu
Processing Record 180 of 630 | samusu
orlik
Processing Record 181 of 630 | orlik
bengkulu
Processing Record 182 of 630 | bengkulu
hermanus
Processing Record 183 of 630 | hermanus
zhezkazgan
Processing Record 184 of 630 | zhezkazgan
shenjiamen
Processing Record 185 of 630 | shenjiamen
tura
Processing Record 186 of 630 | tura
lagoa
Processing Record 187 of 630 | lagoa
mazabuka
Processing Recor

In [None]:
weather_df.to_csv(output_data_file, index=False)
weather_df.head()

In [None]:
#No cities with humidity >100
rm_hum_df = weather_df.loc[(weather_df['Humidity']>100)]
len(rm_hum_df)

In [None]:
#Clean city data
clean_city_data = weather_df
clean_city_data.head()

In [None]:
#Lat vs. Temp plot
latitude = clean_city_data['Lat']
max_temp = clean_city_data['Max Temp']

plt.scatter(latitude, max_temp)
plt.xlabel('Latitude')
plt.ylabel('Max Temperature F')
plt.title(f"City Latitude vs. Max Temperature F {datetime.datetime.now().strftime('%m/%d/%y')}")
plt.show()
print("Analyzing the relationship between the latitude and maximum temperature of various cities")

In [None]:
#Lat vs. Humidity plot
latitude = clean_city_data['Lat']
humidity = clean_city_data['Humidity']

plt.scatter(latitude, humidity)
plt.xlabel('Latitude')
plt.ylabel('Humidity %')
plt.title(f"City Latitude vs. Humidity % {datetime.datetime.now().strftime('%m/%d/%y')}")
plt.show()

print("Analyzing the relationship between the latitude and humidity of various cities")

In [None]:
#Lat vs. Cloudiness plot
latitude = clean_city_data['Lat']
cloudiness = clean_city_data['Cloudiness']

plt.scatter(latitude, cloudiness)
plt.xlabel('Latitude')
plt.ylabel('Cloudiness')
plt.title(f"City Latitude vs. Cloudiness {datetime.datetime.now().strftime('%m/%d/%y')}")
plt.show()

print("Analyzing the relationship between the latitude and cloudiness of various cities")

In [None]:
#Lat vs. Wind Speed
latitude = clean_city_data['Lat']
wind_speed = clean_city_data['Wind Speed']

plt.scatter(latitude, wind_speed)
plt.xlabel('Latitude')
plt.ylabel('Wind Speed mph')
plt.title(f"City Latitude vs. Wind Speed mph {datetime.datetime.now().strftime('%m/%d/%y')}")
plt.show()
print("Analyzing the relationship between the latitude and wind speed of various cities")

In [None]:
#Linear Regression
#Northern Hemisphere Lat vs. Max Temp
nh_clean_city_data = clean_city_data.loc[(clean_city_data['Lat']>0)]

nh_latitude = nh_clean_city_data['Lat']
max_temp = nh_clean_city_data['Max Temp']

plt.scatter(nh_latitude, max_temp)
plt.xlabel('Latitude')
plt.ylabel('Max Temperature F')
plt.title(f"Northern Hemisphere Latitude vs. Max Temperature F {datetime.datetime.now().strftime('%m/%d/%y')}")

(slope, intercept, rvalue, pvalue, stderr) = linregress(nh_latitude, max_temp)
regress_values = nh_latitude * slope + intercept
line_eq = f"y = {str(round(slope,2))}x + {str(round(intercept,2))}"
plt.plot(nh_latitude, regress_values,"r-")
plt.annotate(line_eq,(5,40),fontsize=15, color='red')
plt.show()

print("Analyzing the relationship between the latitude and max temperature of various cities in the northern hemisphere.")
print("The results indicate that the correlation between proximity to the equator and max temperature is moderately strong in the northern hemisphere.")

In [None]:
#Linear Regression
#Southern Hemisphere Lat vs. Max Temp
sh_clean_city_data = clean_city_data.loc[(clean_city_data['Lat']<0)]

sh_latitude = sh_clean_city_data['Lat']
max_temp = sh_clean_city_data['Max Temp']

plt.scatter(sh_latitude, max_temp)
plt.xlabel('Latitude')
plt.ylabel('Max Temperature F')
plt.title(f"Southern Hemisphere Latitude vs. Max Temperature F {datetime.datetime.now().strftime('%m/%d/%y')}")

(slope, intercept, rvalue, pvalue, stderr) = linregress(sh_latitude, max_temp)
regress_values = sh_latitude * slope + intercept
line_eq = f"y = {str(round(slope,2))}x + {str(round(intercept,2))}"
plt.plot(sh_latitude, regress_values,"r-")
plt.annotate(line_eq,(-55,90),fontsize=15, color='red')
plt.show()

print("Analyzing the relationship between the latitude and max temperature of various cities in the southern hemisphere.")
print("The results indicate that the correlation between proximity to the equator and max temperature is moderately strong in the southern hemisphere.")

In [None]:
#Linear Regression
#Nothern Hemisphere Lat vs. Humidity
nh_clean_city_data = clean_city_data.loc[(clean_city_data['Lat']>0)]

nh_latitude = nh_clean_city_data['Lat']
humidity = nh_clean_city_data['Humidity']

plt.scatter(nh_latitude, humidity)
plt.xlabel('Latitude')
plt.ylabel('Humidity')
plt.title(f"Northern Hemisphere Latitude vs. Humidity {datetime.datetime.now().strftime('%m/%d/%y')}")

(slope, intercept, rvalue, pvalue, stderr) = linregress(nh_latitude, humidity)
regress_values = nh_latitude * slope + intercept
line_eq = f"y = {str(round(slope,2))}x + {str(round(intercept,2))}"
plt.plot(nh_latitude, regress_values,"r-")
plt.annotate(line_eq,(0,35),fontsize=12, color='red')
plt.show()
print("Analyzing the relationship between the latitude and humidity of various cities in the northern hemisphere.")
print("The results indicate no correlation between latitude and humidity in the northern hemisphere.")

In [None]:
#Linear Regression
#Southern Hemisphere Lat vs. Humidity
sh_clean_city_data = clean_city_data.loc[(clean_city_data['Lat']<0)]

sh_latitude = sh_clean_city_data['Lat']
humidity = sh_clean_city_data['Humidity']

plt.scatter(sh_latitude, humidity)
plt.xlabel('Latitude')
plt.ylabel('Humidity')
plt.title(f"Southern Hemisphere Latitude vs. Humidity {datetime.datetime.now().strftime('%m/%d/%y')}")

(slope, intercept, rvalue, pvalue, stderr) = linregress(sh_latitude, humidity)
regress_values = sh_latitude * slope + intercept
line_eq = f"y = {str(round(slope,2))}x + {str(round(intercept,2))}"
plt.plot(sh_latitude, regress_values,"r-")
plt.annotate(line_eq,(-50,35),fontsize=12, color='red')
plt.show()

print("Analyzing the relationship between the latitude and humidity of various cities in the southern hemisphere.")
print("The results indicate a very weak correlation between latitude and humidity in the southern hemisphere.")

In [None]:
#Linear Regression
#Northern Hemisphere Lat vs. Cloudiness
nh_clean_city_data = clean_city_data.loc[(clean_city_data['Lat']>0)]

nh_latitude = nh_clean_city_data['Lat']
cloudiness = nh_clean_city_data['Cloudiness']

plt.scatter(nh_latitude, cloudiness)
plt.xlabel('Latitude')
plt.ylabel('Cloudiness')
plt.title(f"Northern Hemisphere Latitude vs. Cloudiness {datetime.datetime.now().strftime('%m/%d/%y')}")

(slope, intercept, rvalue, pvalue, stderr) = linregress(nh_latitude, cloudiness)
regress_values = nh_latitude * slope + intercept
line_eq = f"y = {str(round(slope,2))}x + {str(round(intercept,2))}"
plt.plot(nh_latitude, regress_values,"r-")
plt.annotate(line_eq,(0,50),fontsize=12, color='red')
plt.show()
print("Analyzing the relationship between the latitude and cloudiness of various cities in the northern hemisphere.")
print("No correlation between latitude and cloudiness in the northern hemisphere.")

In [None]:
#Linear Regression
#Southern Hemisphere Lat vs. Cloudiness
sh_clean_city_data = clean_city_data.loc[(clean_city_data['Lat']<0)]

sh_latitude = sh_clean_city_data['Lat']
cloudiness = sh_clean_city_data['Cloudiness']

plt.scatter(sh_latitude, cloudiness)
plt.xlabel('Latitude')
plt.ylabel('Cloudiness')
plt.title(f"Southern Hemisphere Latitude vs. Cloudiness {datetime.datetime.now().strftime('%m/%d/%y')}")

(slope, intercept, rvalue, pvalue, stderr) = linregress(sh_latitude, cloudiness)
regress_values = sh_latitude * slope + intercept
line_eq = f"y = {str(round(slope,2))}x + {str(round(intercept,2))}"
plt.plot(sh_latitude, regress_values,"r-")
plt.annotate(line_eq,(-55,20),fontsize=12, color='red')
plt.show()

print("Analyzing the relationship between the latitude and cloudiness of various cities in the southern hemisphere.")
print("The results indicate a weak correlation between latitude and cloudiness in the southern hemisphere")

In [None]:
#Linear Regression
#Nothern Hemisphere Lat vs. Wind Speed
nh_clean_city_data = clean_city_data.loc[(clean_city_data['Lat']>0)]

nh_latitude = nh_clean_city_data['Lat']
wind_speed = nh_clean_city_data['Wind Speed']

plt.scatter(nh_latitude, wind_speed)
plt.xlabel('Latitude')
plt.ylabel('Wind Speed')
plt.title(f"Northern Hemisphere Latitude vs. Wind Speed {datetime.datetime.now().strftime('%m/%d/%y')}")

(slope, intercept, rvalue, pvalue, stderr) = linregress(nh_latitude, wind_speed)
regress_values = nh_latitude * slope + intercept
line_eq = f"y = {str(round(slope,2))}x + {str(round(intercept,2))}"
plt.plot(nh_latitude, regress_values,"r-")
plt.annotate(line_eq,(0,25),fontsize=12, color='red')
plt.show()

print("Analyzing the relationship between the latitude and wind speed of various cities in the northern hemisphere.")
print("The results indicate no correlation between latitude and wind speed in the northern hemisphere.")

In [None]:
#Linear Regression
#Southern Hemisphere Lat vs. Wind Speed
sh_clean_city_data = clean_city_data.loc[(clean_city_data['Lat']<0)]

sh_latitude = sh_clean_city_data['Lat']
wind_speed = sh_clean_city_data['Wind Speed']

plt.scatter(sh_latitude, wind_speed)
plt.xlabel('Latitude')
plt.ylabel('Wind Speed')
plt.title(f"Southern Hemisphere Latitude vs. Wind Speed {datetime.datetime.now().strftime('%m/%d/%y')}")

(slope, intercept, rvalue, pvalue, stderr) = linregress(sh_latitude, wind_speed)
regress_values = sh_latitude * slope + intercept
line_eq = f"y = {str(round(slope,2))}x + {str(round(intercept,2))}"
plt.plot(sh_latitude, regress_values,"r-")
plt.annotate(line_eq,(-55,5),fontsize=12, color='red')
plt.show()

print("Analyzing the relationship between the latitude and wind speed of various cities in the southern hemisphere.")
print("The results indicate a very weak correlation between latitude and wind speed in the southern hemisphere.")