# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [108]:
# Import dependencies
%matplotlib notebook

import requests
import pandas as pd
import json
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
from citipy import citipy
from random import uniform
from api_keys import weather_api_key
from api_keys import gkey
from pprint import pprint
from urllib.parse import quote




## Generate Cities List

In [89]:
# Creates an empty list for cities, then fills the list based on random latitudes and longitudes

cities = []

for i in range(0,1500):
    city = citipy.nearest_city((uniform(-90,90)), (uniform(-180,180)))
    city = city.city_name
    cities.append(city)

print(len(cities))

# Removes duplicates from cities list
    
cities = list(dict.fromkeys(cities))

print(len(cities))



1500
639


### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [90]:
# Builds url to grab weather data for cities

url = "http://api.openweathermap.org/data/2.5/weather?"
units = "imperial"

# Builds partial query URL
query_url = f"{url}appid={weather_api_key}&units={units}&q="

# Creates lists for storing weather data for cities

city_name = []
country = []
latitude = []
longitude = []
max_temp = []
humidity = []
wind_speed = []
cloudiness = []

# Sets variable for printing log of each city as it's processed

city_count = 1

for city in cities:
    try:
        response = requests.get(query_url + city).json()
        city_name.append(response['name'])
        country.append(response['sys']['country'])
        latitude.append(response['coord']['lat'])
        longitude.append(response['coord']['lon'])
        max_temp.append(response['main']['temp_max'])
        humidity.append(response['main']['humidity'])
        wind_speed.append(response['wind']['speed'])
        cloudiness.append(response['clouds']['all'])
        print(f"City number {city_count} - {response['name']}")
        city_count += 1
    except KeyError:
        continue




City number 1 - Port Alfred
City number 2 - Vaini
City number 3 - New Norfolk
City number 4 - Nikolskoye
City number 5 - Rikitea
City number 6 - Kahului
City number 7 - Arraial do Cabo
City number 8 - Tuktoyaktuk
City number 9 - Busselton
City number 10 - Sokoto
City number 11 - Havøysund
City number 12 - Ushuaia
City number 13 - Bilibino
City number 14 - Lebu
City number 15 - Ponta do Sol
City number 16 - Nishihara
City number 17 - Wau
City number 18 - Albany
City number 19 - Mar del Plata
City number 20 - Khatanga
City number 21 - Atuona
City number 22 - Huaibei
City number 23 - Ribeira Grande
City number 24 - Beyneu
City number 25 - Kapaa
City number 26 - Bengkulu
City number 27 - Esim
City number 28 - Lakselv
City number 29 - Punta Arenas
City number 30 - Brae
City number 31 - Talaya
City number 32 - Magadan
City number 33 - Zelenets
City number 34 - Port Moresby
City number 35 - Bredasdorp
City number 36 - Wagga Wagga
City number 37 - Mataura
City number 38 - Hilo
City number 39 -

City number 304 - Bend
City number 305 - Zambezi
City number 306 - Anuradhapura
City number 307 - Bhag
City number 308 - Rome
City number 309 - Monticello
City number 310 - Los Llanos de Aridane
City number 311 - Saint-Pierre
City number 312 - Bandundu Province
City number 313 - Meulaboh
City number 314 - Shimanovsk
City number 315 - Monrovia
City number 316 - Carnarvon
City number 317 - Bacsay
City number 318 - Saint-Joseph
City number 319 - Caher
City number 320 - Tomigusuku
City number 321 - Goderich
City number 322 - Norwich
City number 323 - Methóni
City number 324 - Novhorodka
City number 325 - Samarai
City number 326 - Hasaki
City number 327 - Vysokogornyy
City number 328 - Puerto Escondido
City number 329 - Pisco
City number 330 - Popondetta
City number 331 - Waiouru
City number 332 - Balasheyka
City number 333 - Bubaque
City number 334 - Shirokiy
City number 335 - Sobolevo
City number 336 - San Jose
City number 337 - Azul
City number 338 - Bendigo
City number 339 - Nicoya
City

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [91]:
# Creates DataFrame from weather variable lists and exports to csv

df_cities_weather = pd.DataFrame({"City": city_name,
                                  "Country": country,
                                  "Latitude": latitude,
                                  "Longitude": longitude,
                                  "Max Temp (F)": max_temp,
                                  "Humidity (%)": humidity,
                                  "Wind Speed (mph)": wind_speed,
                                  "Cloudiness": cloudiness})
                                 

df_cities_weather.head()
df_cities_weather.to_csv("cities_weather.csv", index=False)

In [92]:
df_cities_weather.describe()

Unnamed: 0,Latitude,Longitude,Max Temp (F),Humidity (%),Wind Speed (mph),Cloudiness
count,588.0,588.0,588.0,588.0,588.0,588.0
mean,20.05466,20.791616,60.421531,71.267007,8.193401,49.670068
std,33.296884,90.709132,20.42757,21.139316,5.446594,38.784777
min,-54.8,-179.17,8.22,10.0,0.11,0.0
25%,-7.5625,-57.855,44.01,60.0,4.1925,5.75
50%,22.625,26.77,64.99,76.0,6.93,49.0
75%,48.555,102.395,78.01,88.0,11.41,90.0
max,78.22,179.32,96.8,100.0,29.95,100.0


## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

Unnamed: 0,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Date
count,550.0,550.0,550.0,550.0,550.0,550.0,550.0
mean,19.973545,17.1244,58.3314,67.890909,52.141818,8.5448,1585764000.0
std,33.28484,91.595451,25.795297,20.864881,35.766469,6.078869,55.39674
min,-54.8,-179.17,-11.34,9.0,0.0,0.16,1585764000.0
25%,-8.0775,-64.6275,42.8,55.0,20.0,4.525,1585764000.0
50%,23.63,19.635,64.94,72.0,57.0,7.325,1585764000.0
75%,48.6725,97.35,78.8,83.0,86.75,11.41,1585764000.0
max,78.22,179.32,102.2,100.0,100.0,46.08,1585764000.0


In [17]:
#  Get the indices of cities that have humidity over 100%.


Int64Index([], dtype='int64')

In [19]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".


Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,butaritari,3.07,172.79,83.32,74,84,10.42,KI,1593605283
1,yerbogachen,61.28,108.01,79.52,58,20,7.7,RU,1593605121
2,cape town,-33.93,18.42,73.0,77,27,4.7,ZA,1593605039
3,touba,8.28,-7.68,79.97,75,82,3.58,CI,1593605284
4,jamestown,42.1,-79.24,64.0,88,1,6.93,US,1593605284


## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [152]:
# Creates figure for comparing latitude and temperature
fig1, ax1 = plt.subplots(figsize=(5,5))
ax1.scatter(df_cities_weather["Latitude"], df_cities_weather["Max Temp (F)"])
ax1.set(xlabel="Latitude", ylabel="Max Temperature (F)")
plt.title("Latitude vs. Max Temperature (F)")

plt.tight_layout()

fig1.savefig("TemperatureVsLatitude.png")

print("As depicted in the graph above, the highest temperature cities are concentrated within roughly 20 degrees of latitude above or below the equator, and drop off sharply as latitude extends above 40 degrees north. This can be explained by the relative distance of the sun over these latitudes.")

<IPython.core.display.Javascript object>

As depicted in the graph above, the highest temperature cities are concentrated within roughly 20 degrees of latitude above or below the equator, and drop off sharply as latitude extends above 40 degrees north. This can be explained by the relative distance of the sun over these latitudes.


## Latitude vs. Humidity Plot

In [96]:
# Creates figure for comparing latitude and humidity
fig2, ax2 = plt.subplots(figsize=(5,5))
ax2.scatter(df_cities_weather["Latitude"], df_cities_weather["Humidity (%)"])
ax2.set(xlabel="Latitude", ylabel="Humidity (%)")
plt.title("Latitude vs. Humidity (%)")

plt.tight_layout()

fig2.savefig("HumidityVsLatitude.png")

print("As depicted in the graph above, humidity does not appear to have a direct relationship with latitude, as there is a wide degree of variation in humidity among cities located along the same latitudes.")

<IPython.core.display.Javascript object>

As depicted in the graph above, humidity does not appear to have a direct relationship with latitude, as there is a wide degree of variation in humidity among cities located along the same latitudes.


## Latitude vs. Cloudiness Plot

In [100]:
# Creates figure for comparing latitude and cloudiness
fig3, ax3 = plt.subplots(figsize=(5,5))
ax3.scatter(df_cities_weather["Latitude"], df_cities_weather["Cloudiness"])
ax3.set(xlabel="Latitude", ylabel="Cloudiness")
plt.title("Latitude vs. Cloudiness")

plt.tight_layout()

fig3.savefig("CLoudinessVsLatitude.png")

print("As depicted in the graph above, cloudiness does not appear to have a direct relationship with latitude, as there is a wide degree of variation in cloudiness among cities located along the same latitudes.")

<IPython.core.display.Javascript object>

As depicted in the graph above, cloudiness does not appear to have a direct relationship with latitude, as there is a wide degree of variation in cloudiness among cities located along the same latitudes.


## Latitude vs. Wind Speed Plot

In [153]:
# Creates figure for comparing latitude and cloudiness
fig4, ax4 = plt.subplots(figsize=(5,5))
ax4.scatter(df_cities_weather["Latitude"], df_cities_weather["Wind Speed (mph)"])
ax4.set(xlabel="Latitude", ylabel="Wind Speed (mph)")
plt.title("Latitude vs. Wind Speed (mph)")

plt.tight_layout()

fig4.savefig("WindSpeedVsLatitude.png")

print("As depicted in the graph above, wind speed does not appear to have a direct relationship with latitude, as there is a wide degree of variation in wind speed among cities located along the same latitudes, although the highest wind speeds are often at the extreme south or north of the globe, which could be representative of the relative flatness of these regions.")

<IPython.core.display.Javascript object>

As depicted in the graph above, wind speed does not appear to have a direct relationship with latitude, as there is a wide degree of variation in wind speed among cities located along the same latitudes, although the highest wind speeds are often at the extreme south or north of the globe, which could be representative of the relative flatness of these regions.


## Linear Regression

In [117]:
df_northern_cities = df_cities_weather.loc[df_cities_weather["Latitude"] > 0]

df_southern_cities = df_cities_weather.loc[df_cities_weather["Latitude"] < 0]

df_northern_cities.describe()

Unnamed: 0,Latitude,Longitude,Max Temp (F),Humidity (%),Wind Speed (mph),Cloudiness
count,405.0,405.0,405.0,405.0,405.0,405.0
mean,38.565012,11.821333,55.615333,71.716049,8.303136,49.31358
std,20.70359,87.635916,21.789299,22.35317,5.592126,40.095405
min,0.13,-179.17,8.22,10.0,0.11,0.0
25%,20.89,-69.36,36.77,61.0,4.38,2.0
50%,39.4,22.47,57.2,77.0,6.93,47.0
75%,56.25,79.8,77.0,89.0,11.21,90.0
max,78.22,179.32,92.28,100.0,29.95,100.0


####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [129]:
# Creates figure for comparing latitude and temperature in the northern hemisphere
fig5, ax5 = plt.subplots(figsize=(5,5))
ax5.scatter(df_northern_cities["Latitude"], df_northern_cities["Max Temp (F)"])
ax5.set(xlabel="Latitude", ylabel="Max Temperature (F)")
plt.title("Latitude vs. Max Temperature (F) in the Northern Hemisphere")

plt.tight_layout()

# Performs linear regression on latitude vs. temperature
(slope1, intercept1, rvalue1, pvalue1, stderr1) = stats.linregress(df_northern_cities["Latitude"], df_northern_cities["Max Temp (F)"])

# Gets regression values and plots regression line with line equation
regress_values1 = df_northern_cities["Latitude"] * slope1 + intercept1

line_eq1 = "y = " + str(round(slope1,2)) + "x +" + str(round(intercept1,2))

plt.plot(df_northern_cities["Latitude"], regress_values1, "r-")
plt.annotate(line_eq1, (5,5), fontsize=15, color="red")

fig5.savefig("NorthernHemishphere-TemperatureVsLatitude.png")

print(f"As depicted in the graph above, the highest temperature cities are concentrated within roughly 20 degrees of latitude above the equator, and drop off sharply as latitude extends above 40 degrees north. The r-value is {str(round(rvalue1,2))}.")



<IPython.core.display.Javascript object>

As depicted in the graph above, the highest temperature cities are concentrated within roughly 20 degrees of latitude above the equator, and drop off sharply as latitude extends above 40 degrees north. The r-value is -0.89.


####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [140]:
# Creates figure for comparing latitude and temperature in the southern hemisphere
fig6, ax6 = plt.subplots(figsize=(5,5))
ax6.scatter(df_southern_cities["Latitude"], df_southern_cities["Max Temp (F)"])
ax6.set(xlabel="Latitude", ylabel="Max Temperature (F)")
plt.title("Latitude vs. Max Temperature (F) in the Southern Hemisphere")

plt.tight_layout()

# Performs linear regression on latitude vs. temperature
(slope2, intercept2, rvalue2, pvalue2, stderr2) = stats.linregress(df_southern_cities["Latitude"], df_southern_cities["Max Temp (F)"])

# Gets regression values and plots regression line with line equation
regress_values2 = df_southern_cities["Latitude"] * slope2 + intercept2

line_eq2 = "y = " + str(round(slope2,2)) + "x +" + str(round(intercept2,2))

plt.plot(df_southern_cities["Latitude"], regress_values2, "r-")
plt.annotate(line_eq2, (-45,45), fontsize=15, color="red")

fig6.savefig("SouthernHemishphere-TemperatureVsLatitude.png")

print(f"As depicted in the graph above, the highest temperature cities are concentrated within roughly 20 degrees of latitude below the equator, and drop off sharply as latitude extends below 40 degrees south. The r-value is {str(round(rvalue2,2))}.")



<IPython.core.display.Javascript object>

As depicted in the graph above, the highest temperature cities are concentrated within roughly 20 degrees of latitude below the equator, and drop off sharply as latitude extends below 40 degrees south. The r-value is 0.63.


####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [138]:
# Creates figure for comparing humidity and latitude in the northern hemisphere
fig7, ax7 = plt.subplots(figsize=(5,5))
ax7.scatter(df_northern_cities["Latitude"], df_northern_cities["Humidity (%)"])
ax7.set(xlabel="Latitude", ylabel="Humidity (%)")
plt.title("Latitude vs. Humidity (%) in the Northern Hemisphere")

plt.tight_layout()

# Performs linear regression on latitude vs. humidity
(slope3, intercept3, rvalue3, pvalue3, stderr3) = stats.linregress(df_northern_cities["Latitude"], df_northern_cities["Humidity (%)"])

# Gets regression values and plots regression line with line equation
regress_values3 = df_northern_cities["Latitude"] * slope3 + intercept3

line_eq3 = "y = " + str(round(slope3,2)) + "x +" + str(round(intercept3,2))

plt.plot(df_northern_cities["Latitude"], regress_values3, "r-")
plt.annotate(line_eq3, (10,10), fontsize=15, color="red")

fig7.savefig("NorthernHemishphere-HumidityVsLatitude.png")

print(f"As depicted in the graph above, humidity does not appear to have a strong relationship with latitude, as there is a wide degree of variation in humidity among cities located along the same latitudes. The r-value is {str(round(rvalue3,2))}.")



<IPython.core.display.Javascript object>

As depicted in the graph above, humidity does not appear to have a strong relationship with latitude, as there is a wide degree of variation in humidity among cities located along the same latitudes. The r-value is 0.24.


####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [137]:
# Creates figure for comparing humidity and latitude in the northern hemisphere
fig8, ax8 = plt.subplots(figsize=(5,5))
ax8.scatter(df_southern_cities["Latitude"], df_southern_cities["Humidity (%)"])
ax8.set(xlabel="Latitude", ylabel="Humidity (%)")
plt.title("Latitude vs. Humidity (%) in the Southern Hemisphere")

plt.tight_layout()

# Performs linear regression on latitude vs. humidity
(slope4, intercept4, rvalue4, pvalue4, stderr4) = stats.linregress(df_southern_cities["Latitude"], df_southern_cities["Humidity (%)"])

# Gets regression values and plots regression line with line equation
regress_values4 = df_southern_cities["Latitude"] * slope4 + intercept4

line_eq4 = "y = " + str(round(slope4,2)) + "x +" + str(round(intercept4,2))

plt.plot(df_southern_cities["Latitude"], regress_values4, "r-")
plt.annotate(line_eq4, (-40,20), fontsize=15, color="red")

fig8.savefig("SouthernHemishphere-HumidityVsLatitude.png")

print(f"As depicted in the graph above, humidity does not appear to have a strong relationship with latitude, as there is a wide degree of variation in humidity among cities located along the same latitudes. The r-value is {str(round(rvalue4,2))}.")



<IPython.core.display.Javascript object>

As depicted in the graph above, humidity does not appear to have a strong relationship with latitude, as there is a wide degree of variation in humidity among cities located along the same latitudes. The r-value is 0.1.


####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [134]:
# Creates figure for comparing cloudiness and latitude in the northern hemisphere
fig9, ax9 = plt.subplots(figsize=(5,5))
ax9.scatter(df_northern_cities["Latitude"], df_northern_cities["Cloudiness"])
ax9.set(xlabel="Latitude", ylabel="Cloudiness")
plt.title("Latitude vs. Cloudiness in the Northern Hemisphere")

plt.tight_layout()

# Performs linear regression on latitude vs. cloudiness
(slope5, intercept5, rvalue5, pvalue5, stderr5) = stats.linregress(df_northern_cities["Latitude"], df_northern_cities["Cloudiness"])

# Gets regression values and plots regression line with line equation
regress_values5 = df_northern_cities["Latitude"] * slope5 + intercept5

line_eq5 = "y = " + str(round(slope5,2)) + "x +" + str(round(intercept5,2))

plt.plot(df_northern_cities["Latitude"], regress_values5, "r-")
plt.annotate(line_eq5, (5,5), fontsize=15, color="red")

fig9.savefig("NorthernHemishphere-CloudinessVsLatitude.png")

print(f"As depicted in the graph above, cloudiness does not appear to have a strong relationship with latitude, as there is a wide degree of variation in cloudiness among cities located along the same latitudes. The r-value is {str(round(rvalue5,2))}.")



<IPython.core.display.Javascript object>

As depicted in the graph above, cloudiness does not appear to have a strong relationship with latitude, as there is a wide degree of variation in cloudiness among cities located along the same latitudes. The r-value is 0.15.


####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [142]:
# Creates figure for comparing cloudiness and latitude in the southern hemisphere
fig10, ax10 = plt.subplots(figsize=(5,5))
ax10.scatter(df_southern_cities["Latitude"], df_southern_cities["Cloudiness"])
ax10.set(xlabel="Latitude", ylabel="Cloudiness")
plt.title("Latitude vs. Cloudiness in the Southern Hemisphere")

plt.tight_layout()

# Performs linear regression on latitude vs. cloudiness
(slope6, intercept6, rvalue6, pvalue6, stderr6) = stats.linregress(df_southern_cities["Latitude"], df_southern_cities["Cloudiness"])

# Gets regression values and plots regression line with line equation
regress_values6 = df_southern_cities["Latitude"] * slope6 + intercept6

line_eq6 = "y = " + str(round(slope6,2)) + "x +" + str(round(intercept6,2))

plt.plot(df_southern_cities["Latitude"], regress_values6, "r-")
plt.annotate(line_eq6, (-55,10), fontsize=15, color="red")

fig10.savefig("SouthernHemishphere-CloudinessVsLatitude.png")

print(f"As depicted in the graph above, cloudiness does not appear to have a strong relationship with latitude, as there is a wide degree of variation in cloudiness among cities located along the same latitudes. The r-value is {str(round(rvalue6,2))}.")



<IPython.core.display.Javascript object>

As depicted in the graph above, cloudiness does not appear to have a strong relationship with latitude, as there is a wide degree of variation in cloudiness among cities located along the same latitudes. The r-value is 0.02.


####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [150]:
# Creates figure for comparing windspeed and latitude in the northern hemisphere
fig11, ax11 = plt.subplots(figsize=(5,5))
ax11.scatter(df_northern_cities["Latitude"], df_northern_cities["Wind Speed (mph)"])
ax11.set(xlabel="Latitude", ylabel="Wind Speed (mph)")
plt.title("Latitude vs. Wind Speed in the Northern Hemisphere")

plt.tight_layout()

# Performs linear regression on latitude vs. wind speed
(slope7, intercept7, rvalue7, pvalue7, stderr7) = stats.linregress(df_northern_cities["Latitude"], df_northern_cities["Wind Speed (mph)"])

# Gets regression values and plots regression line with line equation
regress_values7 = df_northern_cities["Latitude"] * slope7 + intercept7

line_eq7 = "y = " + str(round(slope7,2)) + "x +" + str(round(intercept7,2))

plt.plot(df_northern_cities["Latitude"], regress_values7, "r-")
plt.annotate(line_eq7, (5,22), fontsize=15, color="red")

fig11.savefig("NorthernHemishphere-WindSpeedVsLatitude.png")

print(f"As depicted in the graph above, wind speed does not appear to have a strong relationship with latitude, as there is a wide degree of variation in wind speed among cities located along the same latitudes, although the highest wind speeds are often at the extreme north of the globe. The r-value is {str(round(rvalue7,2))}.")


<IPython.core.display.Javascript object>

As depicted in the graph above, wind speed does not appear to have a strong relationship with latitude, as there is a wide degree of variation in wind speed among cities located along the same latitudes, although the highest wind speeds are often at the extreme north of the globe. The r-value is 0.2.


####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [149]:
# Creates figure for comparing wind speed and latitude in the southern hemisphere
fig12, ax12 = plt.subplots(figsize=(5,5))
ax12.scatter(df_southern_cities["Latitude"], df_southern_cities["Wind Speed (mph)"])
ax12.set(xlabel="Latitude", ylabel="Wind Speed (mph)")
plt.title("Latitude vs. Wind Speed in the Southern Hemisphere")

plt.tight_layout()

# Performs linear regression on latitude vs. wind speed
(slope8, intercept8, rvalue8, pvalue8, stderr8) = stats.linregress(df_southern_cities["Latitude"], df_southern_cities["Wind Speed (mph)"])

# Gets regression values and plots regression line with line equation
regress_values8 = df_southern_cities["Latitude"] * slope8 + intercept8

line_eq8 = "y = " + str(round(slope8,2)) + "x +" + str(round(intercept8,2))

plt.plot(df_southern_cities["Latitude"], regress_values8, "r-")
plt.annotate(line_eq8, (-50,20), fontsize=15, color="red")

fig12.savefig("SouthernHemishphere-WindSpeedVsLatitude.png")

print(f"As depicted in the graph above, wind speed does not appear to have a strong relationship with latitude, as there is a wide degree of variation in wind speed among cities located along the same latitudes. The r-value is {str(round(rvalue8,2))}.")


<IPython.core.display.Javascript object>

As depicted in the graph above, wind speed does not appear to have a strong relationship with latitude, as there is a wide degree of variation in wind speed among cities located along the same latitudes. The r-value is -0.21.
