In [1]:
# Import dependencies and Setup
import matplotlib.pyplot as plt
import csv
import pandas as pd
import numpy as np
import requests
import time
import scipy.stats as st
from scipy.stats import linregress
from pprint import pprint

# Import API key
from api_keys import w_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "../output/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)


# Generate cities list

In [2]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

608

In [3]:
#Get API url 
url = "http://api.openweathermap.org/data/2.5/weather?"
    
    
#See list of cities to get information and lists
#cities 

#Chose Cape Town to see other columns
#response = requests.get(url + "cape town" + "&appid=" + w_key)
#w_city_d = response.json()
#pprint(w_city_d)

# Perform API Calls

Perform a weather check on each city using a series of successive API calls. Include a print log of each city as it's being processed (with the city number and city name).

In [6]:
#Temperature is in Kelvins so change the unit of measurement to F°
units = "imperial"
#Create lists for variables needed
all_cities = []
lat=[]
lng=[]
temp_max=[]
humidity=[]
cloudiness=[]
wind_speed=[]
country=[]
date=[]

#Create a counter for sucessive API calls
city_count = 0
set_count = 1

#query_url =url + one_city + "&appid=" + w_key +"&units=imperial"
#query_url= f'{url}{one_city}&appid={w_key}&units={units}'
#query_url = f"{url}&appid={w_key}&units={units}&q="

In [14]:
# Print things at start of loop 
print("Beginning Data Retrieval")
print("------------------------------")
#Make loop for going trough cities 

for one_city in cities: 
 
    #Add exceptions in loop to avoid it stopping from missing city values 
    try:
        
        params = f'q={one_city}&appid={w_key}&units={units}'
        response = requests.get(url + params).json()
        
    #Counter wil change 
        city_count = city_count + 1
        
    #Append data series loop through cities will be used    
        all_cities.append(one_city)
        lat.append(response["coord"]["lat"])
        lng.append(response["coord"]["lon"])
        temp_max.append(response["main"]["temp_max"])
        humidity.append(response["main"]["humidity"])
        cloudiness.append(response["clouds"]["all"])
        wind_speed.append(response["wind"]["speed"])
        country.append(response["sys"]["country"])
        date.append(response["dt"])
        
        if city_count > 50: 
            set_count += 1 
            city_count = 0 
        
        print(f'Processing Record {city_count} of {set_count}|{one_city}')
    
    except (KeyError):
        print("City not found. Skipping...")

#Add intervals between queries to stay within API limits 
        time.sleep(3)



#Data retrieval is complete so announce it
print("----------------------------")
print("Data Retrieval Complete")
print("----------------------------")

Beginning Data Retrieval
------------------------------
City not found. Skipping...
Processing Record 48 of 12|jamestown
Processing Record 49 of 12|tupik
Processing Record 50 of 12|new norfolk
Processing Record 0 of 13|mnogovershinnyy
Processing Record 1 of 13|busselton
Processing Record 2 of 13|kapaa
Processing Record 3 of 13|ushuaia
Processing Record 4 of 13|atuona
Processing Record 5 of 13|college
Processing Record 6 of 13|klaksvik
Processing Record 7 of 13|birao
Processing Record 8 of 13|san luis
Processing Record 9 of 13|arraial do cabo
Processing Record 10 of 13|vaini
Processing Record 11 of 13|ovalle
Processing Record 12 of 13|rikitea
Processing Record 13 of 13|aden
Processing Record 14 of 13|boguchany
Processing Record 15 of 13|mahebourg
Processing Record 16 of 13|avarua
Processing Record 17 of 13|bubaque
Processing Record 18 of 13|castro
Processing Record 19 of 13|eydhafushi
Processing Record 20 of 13|olafsvik
City not found. Skipping...
Processing Record 22 of 13|punta arenas

Processing Record 19 of 17|alihe
City not found. Skipping...
Processing Record 21 of 17|kidal
Processing Record 22 of 17|aginskoye
Processing Record 23 of 17|tautira
Processing Record 24 of 17|lavrentiya
Processing Record 25 of 17|christchurch
Processing Record 26 of 17|kodiak
Processing Record 27 of 17|buraydah
City not found. Skipping...
Processing Record 29 of 17|petrovsk
Processing Record 30 of 17|brownwood
Processing Record 31 of 17|vostok
Processing Record 32 of 17|tondano
Processing Record 33 of 17|rocha
Processing Record 34 of 17|vieste
Processing Record 35 of 17|chuy
Processing Record 36 of 17|madison
City not found. Skipping...
Processing Record 38 of 17|nouadhibou
Processing Record 39 of 17|templemore
Processing Record 40 of 17|ambilobe
Processing Record 41 of 17|pevek
Processing Record 42 of 17|ostrovnoy
Processing Record 43 of 17|chapais
Processing Record 44 of 17|tambura
Processing Record 45 of 17|hovd
Processing Record 46 of 17|isangel
Processing Record 47 of 17|dolores


Processing Record 47 of 21|saint anthony
Processing Record 48 of 21|ulladulla
Processing Record 49 of 21|presidente medici
Processing Record 50 of 21|tete
Processing Record 0 of 22|marfino
Processing Record 1 of 22|port alberni
Processing Record 2 of 22|mareeba
Processing Record 3 of 22|dudinka
Processing Record 4 of 22|sibolga
Processing Record 5 of 22|berwick
Processing Record 6 of 22|ancud
Processing Record 7 of 22|half moon bay
Processing Record 8 of 22|hadejia
Processing Record 9 of 22|avera
Processing Record 10 of 22|makiyivka
Processing Record 11 of 22|paptalaya
Processing Record 12 of 22|grindavik
Processing Record 13 of 22|mokhsogollokh
Processing Record 14 of 22|sola
Processing Record 15 of 22|rosarito
City not found. Skipping...
Processing Record 17 of 22|abhar
Processing Record 18 of 22|chateaubelair
Processing Record 19 of 22|langres
Processing Record 20 of 22|sao joao do piaui
Processing Record 21 of 22|suining
Processing Record 22 of 22|suntar
Processing Record 23 of 22|

# Convert Raw Data to DataFrame

Export the city data into a .csv.Display the DataFrame

In [9]:
#Convert to pandas DataFrame (code from class)
weather_dict = ({"City": all_cities,
    "Lat": lat, 
    "Lng": lng,
    "Max Temp": temp_max,
    "Humidity" : humidity,
    "Cloudiness": cloudiness,
    "Wind Speed": wind_speed,
    "Country": country, 
    "Date": date
    })

#weather_data = pd.DataFrame(weather_dict)
#weather_data.head()
weather_df = pd.DataFrame.from_dict(weather_dict, orient='index').transpose()
weather_df.head(15)

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,amderma,42.097,-79.2353,46.99,93,90,11.5,US,1619765343
1,jamestown,54.4253,119.933,36.7,87,100,5.75,RU,1619765517
2,tupik,-42.7826,147.059,63.0,76,7,1.03,AU,1619765517
3,new norfolk,53.9353,139.924,30.42,83,93,4.47,RU,1619765518
4,mnogovershinnyy,-33.65,115.333,62.01,73,92,1.01,AU,1619765518
5,busselton,22.0752,-159.319,73.4,68,1,13.8,US,1619765519
6,kapaa,-54.8,-68.3,39.2,81,75,0.96,AR,1619765491
7,ushuaia,-9.8,-139.033,79.2,71,3,5.23,PF,1619765519
8,atuona,64.8569,-147.803,35.6,80,90,6.91,US,1619765520
9,college,62.2266,-6.589,37.4,70,75,4.61,FO,1619765521


In [10]:
print(weather_df.dtypes)

City          object
Lat           object
Lng           object
Max Temp      object
Humidity      object
Cloudiness    object
Wind Speed    object
Country       object
Date          object
dtype: object


In [11]:
#Clean dataframe (several NAs)
weather_df = weather_df.dropna(how= "any")

In [13]:
weather_df.head(20)

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,amderma,42.097,-79.2353,46.99,93,90,11.5,US,1619765343
1,jamestown,54.4253,119.933,36.7,87,100,5.75,RU,1619765517
2,tupik,-42.7826,147.059,63.0,76,7,1.03,AU,1619765517
3,new norfolk,53.9353,139.924,30.42,83,93,4.47,RU,1619765518
4,mnogovershinnyy,-33.65,115.333,62.01,73,92,1.01,AU,1619765518
5,busselton,22.0752,-159.319,73.4,68,1,13.8,US,1619765519
6,kapaa,-54.8,-68.3,39.2,81,75,0.96,AR,1619765491
7,ushuaia,-9.8,-139.033,79.2,71,3,5.23,PF,1619765519
8,atuona,64.8569,-147.803,35.6,80,90,6.91,US,1619765520
9,college,62.2266,-6.589,37.4,70,75,4.61,FO,1619765521


In [None]:
#Export city data into a csv 
filename = "cities.csv"
weather_df.to_csv("../output/cities.csv", index=False, header=True)



Inspect the data and remove the cities where the humidity > 100%.

In [None]:
#Something is weird with describe
weather_df.describe()

In [None]:
#Get the indices of cities that have humidity over 100%.
weather_df["Humidity"].max()

Max humidity is not over 100% so we don't need to clean data. 

# Plotting the Data

Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.Save the plotted figures as .pngs.

### Latitude vs. Temperature Plot

In [None]:
#Create variables 
latitude_p = weather_df["Lat"]
temp_p= weather_df["Max Temp"]
humidity_p=weather_df["Humidity"]
cloud_p=weather_df["Cloudiness"]
wind_sp = weather_df["Wind Speed"]

#Create scatterplot 
plt.scatter(latitude_p, temp_p, marker="o", facecolors="gold", edgecolors="orangered",
             alpha=0.75)
#Add title and labels to scatter plot 
plt.title( "City latitude vs. Max Temperature")
plt.xlabel("Latitude")
plt.ylabel("Max temperature (F°)")

#Create png image 
plt.savefig("sp_latitude_vs_maxtemp.png")


### Latitude vs. Humidity Plot

In [None]:
#Create scatterplot 
plt.scatter(latitude_p, humidity_p, marker="o", facecolors="lime", edgecolors="green",
             alpha=0.75)
#Add title and labels to scatter plot 
plt.title( "City latitude vs. Humidity")
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")

#Save figure
plt.savefig("sp_latitude_vs_humidity.png")

### Latitude vs. Cloudiness Plot

In [None]:
#Create scatterplot 
plt.scatter(latitude_p, cloud_p, marker="o", facecolors="pink", edgecolors="deeppink",
             alpha=0.75)
#Add title and labels to scatter plot 
plt.title( "City latitude vs. Cloudiness")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")

#Figure 
plt.savefig("sp_latitude_vs_Cloudiness.png")

### Latitude vs. Wind Speed Plot

In [None]:
#Create scatterplot 
plt.scatter(latitude_p, wind_sp, marker="o", facecolors="paleturquoise", edgecolors="cyan",
             alpha=0.75)
#Add title and labels to scatter plot 
plt.title( "City latitude vs. Wind Speed")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")

#Make figure 
plt.savefig("sp_latitude_vs_wind.png")

## Linear Regression

In [None]:
#Change dataframe types 
#Everything is an object so change values to what they have to be 
weather_df["Lat"] = weather_df["Lat"].astype(float)
weather_df["Lng"] = weather_df["Lng"].astype(float)
weather_df["Max Temp"] = weather_df["Max Temp"].astype(float)
weather_df["Humidity"] = weather_df["Humidity"].astype(float)
weather_df["Cloudiness"] = weather_df["Cloudiness"].astype(float)
weather_df["Wind Speed"] = weather_df["Wind Speed"].astype(float)

In [None]:
#Make a group for the north hemisphere and the south hemisphere 
north_hem = weather_df[weather_df['Lat'] > 0]
south_hem= weather_df[weather_df['Lat'] < 0]

In [None]:
#Check both dataframes to see if they work 
north_hem.head(10)


In [None]:
north_hem["Max Temp"]

In [None]:
#south_hem.head()
print(south_hem.dtypes)


#### Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
#Create values for regression and correlation graphs 
#north_lat = north_hem["Lat"]
#north_temp =north_hem["Max Temp"]

# Equations for correlation and regression

correlation = st.pearsonr(north_hem["Lat"], north_hem["Max Temp"])
(slope, intercept, rvalue, pvalue, stderr) = linregress(north_hem["Lat"], north_hem["Max Temp"])
regress_values = north_hem["Lat"] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

#Use the same scatterplot made before 
#Change colors 
plt.scatter(north_hem["Lat"], north_hem["Max Temp"], marker="o", facecolors="orchid", edgecolors="darkviolet",
            s=north_hem["Lat"], alpha=0.75)
plt.plot(north_hem["Lat"],regress_values,"b-")


#Add title and labels to scatter plot 
plt.title( "Northern Hemisphere vs. Max Temperature")
plt.xlabel("Latitude")
plt.ylabel("Max Temperature (F°)")

plt.annotate(line_eq,(5,10),fontsize=15,color="deeppink")
print(f"The correlation between both factors is {round(correlation[0],2)}")
print(f"The r-squared is: {rvalue**2}")
print(line_eq)

#Make figure 
plt.savefig("N_vs_maxtemp.png")

#### Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Equations for correlation and regression

correlation = st.pearsonr(south_hem["Lat"], south_hem["Max Temp"])
(slope, intercept, rvalue, pvalue, stderr) = linregress(south_hem["Lat"], south_hem["Max Temp"])
regress_values = south_hem["Lat"] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

#Use the same scatterplot made before 
#Change colors 
plt.scatter(south_hem["Lat"], south_hem["Max Temp"], marker="h", facecolors="cyan", edgecolors="turquoise",
            s= None, alpha=0.75)
plt.plot(south_hem["Lat"],regress_values,"b-")


#Add title and labels to scatter plot 
plt.title( "Southern Hemisphere vs. Max Temperature")
plt.xlabel("Latitude")
plt.ylabel("Max Temperature (F°)")

plt.annotate(line_eq,(-40,40),fontsize=15,color="deeppink")
print(f"The correlation between both factors is {round(correlation[0],2)}")
print(f"The r-squared is: {rvalue**2}")
print(line_eq)

#Make figure 
plt.savefig("S_vs_maxtemp.png")

#### Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# Equations for correlation and regression

correlation = st.pearsonr(north_hem["Lat"], north_hem["Humidity"])
(slope, intercept, rvalue, pvalue, stderr) = linregress(north_hem["Lat"], north_hem["Humidity"])
regress_values = north_hem["Lat"] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

#Use the same scatterplot made before 
#Change colors 
plt.scatter(north_hem["Lat"], north_hem["Humidity"], marker="o", facecolors="orchid", edgecolors="darkviolet",
            s=north_hem["Lat"], alpha=0.75)
plt.plot(north_hem["Lat"],regress_values,"b-")


#Add title and labels to scatter plot 
plt.title( "Northern Hemisphere vs. Humidity")
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")

plt.annotate(line_eq,(45,5),fontsize=15,color="deeppink")
print(f"The correlation between both factors is {round(correlation[0],2)}")
print(f"The r-squared is: {rvalue**2}")
print(line_eq)

#Make figure 
plt.savefig("N_vs_humidity.png")

#### Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# Equations for correlation and regression

correlation = st.pearsonr(south_hem["Lat"], south_hem["Humidity"])
(slope, intercept, rvalue, pvalue, stderr) = linregress(south_hem["Lat"], south_hem["Humidity"])
regress_values = south_hem["Lat"] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

#Use the same scatterplot made before 
#Change colors 
plt.scatter(south_hem["Lat"], south_hem["Humidity"], marker="h", facecolors="cyan", edgecolors="turquoise",
            s= None, alpha=0.75)
plt.plot(south_hem["Lat"],regress_values,"b-")


#Add title and labels to scatter plot 
plt.title( "Southern Hemisphere vs. Humidity")
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")

plt.annotate(line_eq,(-50,30),fontsize=15,color="deeppink")
print(f"The correlation between both factors is {round(correlation[0],2)}")
print(f"The r-squared is: {rvalue**2}")
print(line_eq)

#Make figure 
plt.savefig("S_vs_humidity.png")

#### Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:

correlation = st.pearsonr(north_hem["Lat"], north_hem["Cloudiness"])
(slope, intercept, rvalue, pvalue, stderr) = linregress(north_hem["Lat"], north_hem["Cloudiness"])
regress_values = north_hem["Lat"] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
 
#Change colors 
plt.scatter(north_hem["Lat"], north_hem["Cloudiness"], marker="o", facecolors="orchid", edgecolors="darkviolet",
            s=north_hem["Lat"], alpha=0.75)
plt.plot(north_hem["Lat"],regress_values,"b-")

#Add title and labels to scatter plot 
plt.title( "Northern Hemisphere vs. Cloudiness")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")

plt.annotate(line_eq,(10,10),fontsize=15,color="black")
print(f"The correlation between both factors is {round(correlation[0],2)}")
print(f"The r-squared is: {rvalue**2}")
print(line_eq)

#Make figure 
plt.savefig("N_vs_cloudiness.png")

#### Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
correlation = st.pearsonr(south_hem["Lat"], south_hem["Cloudiness"])
(slope, intercept, rvalue, pvalue, stderr) = linregress(south_hem["Lat"], south_hem["Cloudiness"])
regress_values = south_hem["Lat"] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

#Use the same scatterplot made before 
#Change colors 
plt.scatter(south_hem["Lat"], south_hem["Cloudiness"], marker="h", facecolors="cyan", edgecolors="turquoise",
            s= None, alpha=0.75)
plt.plot(south_hem["Lat"],regress_values,"b-")


#Add title and labels to scatter plot 
plt.title( "Southern Hemisphere vs. Cloudiness")
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")

plt.annotate(line_eq,(-55,15),fontsize=15,color="deeppink")
print(f"The correlation between both factors is {round(correlation[0],2)}")
print(f"The r-squared is: {rvalue**2}")
print(line_eq)

#Make figure 
plt.savefig("S_vs_cloudiness.png")

#### Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:

correlation = st.pearsonr(north_hem["Lat"], north_hem["Wind Speed"])
(slope, intercept, rvalue, pvalue, stderr) = linregress(north_hem["Lat"], north_hem["Wind Speed"])
regress_values = north_hem["Lat"] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
 
#Change colors 
plt.scatter(north_hem["Lat"], north_hem["Wind Speed"], marker="o", facecolors="orchid", edgecolors="darkviolet",
            s=north_hem["Lat"], alpha=0.75)
plt.plot(north_hem["Lat"],regress_values,"b-")

#Add title and labels to scatter plot 
plt.title( "Northern Hemisphere vs. Wind Speed")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")

plt.annotate(line_eq,(45,25),fontsize=15,color="deeppink")
print(f"The correlation between both factors is {round(correlation[0],2)}")
print(f"The r-squared is: {rvalue**2}")
print(line_eq)

#Make figure 
plt.savefig("N_vs_windspeed.png")

#### Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# Equations for correlation and regression

correlation = st.pearsonr(south_hem["Lat"], south_hem["Wind Speed"])
(slope, intercept, rvalue, pvalue, stderr) = linregress(south_hem["Lat"], south_hem["Wind Speed"])
regress_values = south_hem["Lat"] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

#Use the same scatterplot made before 
#Change colors 
plt.scatter(south_hem["Lat"], south_hem["Wind Speed"], marker="h", facecolors="cyan", edgecolors="turquoise",
            s= None, alpha=0.75)
plt.plot(south_hem["Lat"],regress_values,"b-")


#Add title and labels to scatter plot 
plt.title( "Southern Hemisphere vs. Wind Speed")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")

plt.annotate(line_eq,(-25,27),fontsize=15,color="deeppink")
print(f"The correlation between both factors is {round(correlation[0],2)}")
print(f"The r-squared is: {rvalue**2}")
print(line_eq)

#Save figure 
plt.savefig("S_vs_windspeed.png")