# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [4]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
from pprint import pprint

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [5]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
print(len(cities))

592


In [4]:
#Print list of cities
#cities

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it's being processed (with the city number and city name).


In [6]:
url = "https://api.openweathermap.org/data/2.5/weather?"
units = "metric"
query_url = f"{url}appid={weather_api_key}&units={units}&q="

# Counter for city number
counter = 1

# Lists to hold data
city_name = []
lat = []
lng = []
max_temp = []
humidity = []
cloudiness = []
wind_speed = []
country = []
date = []

print('------------------------------')
print('Start Data Retrieval')
print('------------------------------')

for city in cities:
    response = requests.get(query_url + city).json() 
    try: 
        city_name.append(response['name'])
        lat.append(response['coord']['lat'])   
        lng.append(response['coord']['lon'])            
        max_temp.append(response['main']['temp_max'])              
        humidity.append(response['main']['humidity']) 
        cloudiness.append(response['clouds']['all'])   
        wind_speed.append(response['wind']['speed'])
        country.append(response['sys']['country'])
        date.append(response['dt'])
        print(f'City number {counter} of {len(cities)} | {city} information was added.')
        counter = counter + 1
    except KeyError:
        print(f'Missing data in city number {counter} of {len(cities)} | {city} information was NOT added.')
        counter = counter + 1
        pass       

------------------------------
Start Data Retrieval
------------------------------
City number 1 of 592 | kumluca information was added.
City number 2 of 592 | narsaq information was added.
City number 3 of 592 | kapaa information was added.
City number 4 of 592 | arraial do cabo information was added.
City number 5 of 592 | kajaani information was added.
City number 6 of 592 | urumqi information was added.
City number 7 of 592 | beringovskiy information was added.
City number 8 of 592 | souillac information was added.
City number 9 of 592 | bethel information was added.
City number 10 of 592 | cabo san lucas information was added.
City number 11 of 592 | thompson information was added.
City number 12 of 592 | rikitea information was added.
City number 13 of 592 | port alfred information was added.
City number 14 of 592 | georgetown information was added.
Missing data in city number 15 of 592 | taolanaro information was NOT added.
City number 16 of 592 | kjollefjord information was add

City number 141 of 592 | tadine information was added.
Missing data in city number 142 of 592 | saryshagan information was NOT added.
City number 143 of 592 | khor information was added.
City number 144 of 592 | porbandar information was added.
City number 145 of 592 | sibolga information was added.
City number 146 of 592 | broome information was added.
City number 147 of 592 | ulaangom information was added.
City number 148 of 592 | ponta do sol information was added.
City number 149 of 592 | kropotkin information was added.
City number 150 of 592 | hermanus information was added.
City number 151 of 592 | tuktoyaktuk information was added.
City number 152 of 592 | alugan information was added.
City number 153 of 592 | pevek information was added.
Missing data in city number 154 of 592 | grand river south east information was NOT added.
City number 155 of 592 | limbang information was added.
City number 156 of 592 | keetmanshoop information was added.
City number 157 of 592 | vuktyl in

City number 279 of 592 | ulaanbaatar information was added.
City number 280 of 592 | lebu information was added.
City number 281 of 592 | dryden information was added.
City number 282 of 592 | hovd information was added.
City number 283 of 592 | kununurra information was added.
City number 284 of 592 | plettenberg bay information was added.
City number 285 of 592 | jerusalem information was added.
City number 286 of 592 | salina information was added.
City number 287 of 592 | itacare information was added.
City number 288 of 592 | goderich information was added.
City number 289 of 592 | san mateo del mar information was added.
City number 290 of 592 | gazojak information was added.
City number 291 of 592 | nelson bay information was added.
City number 292 of 592 | saint-louis information was added.
City number 293 of 592 | kurumkan information was added.
City number 294 of 592 | nalut information was added.
City number 295 of 592 | lompoc information was added.
City number 296 of 592 |

City number 419 of 592 | hasaki information was added.
City number 420 of 592 | alice springs information was added.
City number 421 of 592 | sakaiminato information was added.
City number 422 of 592 | monrovia information was added.
City number 423 of 592 | sumkino information was added.
City number 424 of 592 | pacifica information was added.
City number 425 of 592 | havoysund information was added.
City number 426 of 592 | galesburg information was added.
City number 427 of 592 | fairbanks information was added.
City number 428 of 592 | gizo information was added.
City number 429 of 592 | guymon information was added.
Missing data in city number 430 of 592 | bolshoye soldatskoye information was NOT added.
City number 431 of 592 | newport information was added.
City number 432 of 592 | micheweni information was added.
City number 433 of 592 | pacific grove information was added.
City number 434 of 592 | remiremont information was added.
City number 435 of 592 | sines information was 

City number 559 of 592 | kruisfontein information was added.
City number 560 of 592 | salamiyah information was added.
City number 561 of 592 | mecca information was added.
City number 562 of 592 | faanui information was added.
City number 563 of 592 | zverinogolovskoye information was added.
City number 564 of 592 | sao felix do xingu information was added.
City number 565 of 592 | port blair information was added.
City number 566 of 592 | logon information was added.
City number 567 of 592 | tromso information was added.
City number 568 of 592 | huarmey information was added.
City number 569 of 592 | manokwari information was added.
City number 570 of 592 | matagami information was added.
City number 571 of 592 | kachug information was added.
Missing data in city number 572 of 592 | kuche information was NOT added.
City number 573 of 592 | ibotirama information was added.
City number 574 of 592 | wenling information was added.
City number 575 of 592 | sal rei information was added.
C

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [8]:
city_data_df = pd.DataFrame(
    {"City Name": city_name,
     "Latitude": lat,
     "Longitude": lng,
     "Max Temp": max_temp,
     "Humidity": humidity,
     "Cloudiness": cloudiness,
     "Wind Speed": wind_speed,
     "Country": country,
     "Date": date
    }
)
city_data_df.head()

Unnamed: 0,City Name,Latitude,Longitude,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,Kumluca,36.37,30.29,31.6,41,0,4.04,TR,1594396904
1,Narsaq,60.92,-46.05,9.0,93,75,1.5,GL,1594396907
2,Kapaa,22.08,-159.32,26.0,78,90,7.2,US,1594396909
3,Arraial do Cabo,-22.97,-42.03,23.0,56,75,3.1,BR,1594396912
4,Kajaani,64.23,27.73,13.0,76,100,4.1,FI,1594396913


In [9]:
city_data_df.to_csv("city_data.csv", index=False, header=True)

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [10]:
too_humid = city_data_df["Humidity"] > 100
too_humid.value_counts()

False    549
Name: Humidity, dtype: int64

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [11]:
%matplotlib notebook

# Import Dependencies
import matplotlib.pyplot as plt
import numpy as np

city_data_df.plot(kind="scatter", x="Latitude", y="Max Temp", marker="o", facecolors="blue", edgecolors="black", alpha=0.75)
plt.title("City Latitude vs Max Temperature (7/10/20)")
plt.xlabel("City Latitude")
plt.ylabel("Max Temperature (F)")
plt.grid(True)
plt.tight_layout()
plt.savefig("lat_max_temp.png")

<IPython.core.display.Javascript object>

## Latitude vs. Humidity Plot

In [12]:
city_data_df.plot(kind="scatter", x="Latitude", y="Humidity", marker="o", facecolors="blue", edgecolors="black", alpha=0.75)
plt.title("City Latitude vs Humidity (%) (7/10/20)")
plt.xlabel("City Latitude")
plt.ylabel("Humidity (%)")
plt.grid(True)
plt.tight_layout()
plt.savefig("lat_humidity.png")

<IPython.core.display.Javascript object>

## Latitude vs. Cloudiness Plot

In [13]:
city_data_df.plot(kind="scatter", x="Latitude", y="Cloudiness", marker="o", facecolors="blue", edgecolors="black", alpha=0.75)
plt.title("City Latitude vs Cloudiness (%) (7/10/20)")
plt.xlabel("City Latitude")
plt.ylabel("Cloudiness (%)")
plt.grid(True)
plt.tight_layout()
plt.savefig("lat_cloudiness.png")

<IPython.core.display.Javascript object>

## Latitude vs. Wind Speed Plot

In [14]:
city_data_df.plot(kind="scatter", x="Latitude", y="Wind Speed", marker="o", facecolors="blue", edgecolors="black", alpha=0.75)
plt.title("City Latitude vs Wind Speed (mph) (7/10/20)")
plt.xlabel("City Latitude")
plt.ylabel("Wind Speed (mph)")
plt.grid(True)
plt.tight_layout()
plt.savefig("lat_wind_speed.png")

<IPython.core.display.Javascript object>

## Linear Regression

In [None]:
# OPTIONAL: Create a function to create Linear Regression plots

In [None]:
# Create Northern and Southern Hemisphere DataFrames

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [15]:
northern_hemisphere = city_data_df.loc[(city_data_df["Latitude"] >= 0), :]
northern_hemisphere.head()

Unnamed: 0,City Name,Latitude,Longitude,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,Kumluca,36.37,30.29,31.6,41,0,4.04,TR,1594396904
1,Narsaq,60.92,-46.05,9.0,93,75,1.5,GL,1594396907
2,Kapaa,22.08,-159.32,26.0,78,90,7.2,US,1594396909
4,Kajaani,64.23,27.73,13.0,76,100,4.1,FI,1594396913
5,Ürümqi,43.8,87.6,27.0,32,100,4.0,CN,1594396916


In [24]:
%matplotlib notebook

# Import Dependencies
import matplotlib.pyplot as plt
import numpy as np

# Create the plot
northern_hemisphere.plot(kind="scatter", x="Latitude", y="Max Temp", marker="o", facecolors="blue", edgecolors="black", alpha=0.75)
plt.title("Northern Hemisphere: City Latitude vs Max Temperature (7/10/20)")
plt.xlabel("City Latitude")
plt.ylabel("Max Temperature (F)")
plt.grid(True)
plt.tight_layout()

# Linear regression
x_values = northern_hemisphere['Latitude']
y_values = northern_hemisphere['Max Temp']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
print(f"The r-squared is: {rvalue**2}")
plt.show()

plt.savefig("north_lat_max_temp.png")

<IPython.core.display.Javascript object>

The r-squared is: 0.5389498791354754


####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [17]:
southern_hemisphere = city_data_df.loc[(city_data_df["Latitude"] < 0), :]
southern_hemisphere.head()

Unnamed: 0,City Name,Latitude,Longitude,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
3,Arraial do Cabo,-22.97,-42.03,23.0,56,75,3.1,BR,1594396912
7,Souillac,-20.52,57.52,22.0,64,40,9.8,MU,1594396924
11,Rikitea,-23.12,-134.97,19.49,65,41,8.55,PF,1594396933
12,Port Alfred,-33.59,26.89,16.11,90,74,7.03,ZA,1594396655
16,Cidreira,-30.18,-50.21,15.47,70,0,7.38,BR,1594396663


In [23]:
# Create the plot
southern_hemisphere.plot(kind="scatter", x="Latitude", y="Max Temp", marker="o", facecolors="blue", edgecolors="black", alpha=0.75)
plt.title("Southern Hemisphere: City Latitude vs Max Temperature (7/10/20)")
plt.xlabel("City Latitude")
plt.ylabel("Max Temperature (F)")
plt.grid(True)
plt.tight_layout()

# Linear regression
x_values = southern_hemisphere['Latitude']
y_values = southern_hemisphere['Max Temp']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
print(f"The r-squared is: {rvalue**2}")
plt.show()

plt.savefig("south_lat_max_temp.png")

<IPython.core.display.Javascript object>

The r-squared is: 0.7569161532719475


In [33]:
print("OBSERVATIONS FOR TEMPERATURE AND LATITUDE")
print("--------------------------------")
print("Based on these two plots, there is a correlation between cities closer to the equator having higher maximum temperatures compared to those farther away from the equator.")

OBSERVATIONS FOR TEMPERATURE AND LATITUDE
--------------------------------
Based on these two plots, there is a correlation between cities closer to the equator having higher maximum temperatures compared to those farther away from the equator.


####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [38]:
# Create the plot
northern_hemisphere.plot(kind="scatter", x="Latitude", y="Humidity", marker="o", facecolors="blue", edgecolors="black", alpha=0.75)
plt.title("Northern Hemisphere: City Latitude vs Humidity (%) (7/10/20)")
plt.xlabel("City Latitude")
plt.ylabel("Humidity (%)")
plt.grid(True)
plt.tight_layout()

# Linear regression
x_values = northern_hemisphere['Latitude']
y_values = northern_hemisphere['Humidity']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
print(f"The r-squared is: {rvalue**2}")
plt.show()

plt.savefig("north_lat_humidity.png")

<IPython.core.display.Javascript object>

The r-squared is: 0.008040825966949827


####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [39]:
# Create the plot
southern_hemisphere.plot(kind="scatter", x="Latitude", y="Humidity", marker="o", facecolors="blue", edgecolors="black", alpha=0.75)
plt.title("Southern Hemisphere: City Latitude vs Humidity (%) (7/10/20)")
plt.xlabel("City Latitude")
plt.ylabel("Humidity (%)")
plt.grid(True)
plt.tight_layout()

# Linear regression
x_values = southern_hemisphere['Latitude']
y_values = southern_hemisphere['Humidity']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
print(f"The r-squared is: {rvalue**2}")
plt.show()

plt.savefig("south_lat_humid.png")

<IPython.core.display.Javascript object>

The r-squared is: 0.021568783881867927


In [44]:
print("OBSERVATIONS FOR HUMIDITY AND LATITUDE")
print("--------------------------------")
print("There are no correlations between humidity levels and the distance from the equator.")

OBSERVATIONS FOR HUMIDITY AND LATITUDE
--------------------------------
There are no correlations between humidity levels and the distance from the equator.


####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [42]:
# Create the plot
northern_hemisphere.plot(kind="scatter", x="Latitude", y="Cloudiness", marker="o", facecolors="blue", edgecolors="black", alpha=0.75)
plt.title("Northern Hemisphere: City Latitude vs Cloudiness (%) (7/10/20)")
plt.xlabel("City Latitude")
plt.ylabel("Cloudiness (%)")
plt.grid(True)
plt.tight_layout()

# Linear regression
x_values = northern_hemisphere['Latitude']
y_values = northern_hemisphere['Cloudiness']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
print(f"The r-squared is: {rvalue**2}")
plt.show()

plt.savefig("north_lat_cloudiness.png")

<IPython.core.display.Javascript object>

The r-squared is: 0.007066859478870897


####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [43]:
# Create the plot
southern_hemisphere.plot(kind="scatter", x="Latitude", y="Cloudiness", marker="o", facecolors="blue", edgecolors="black", alpha=0.75)
plt.title("Southern Hemisphere: City Latitude vs Cloudiness (%) (7/10/20)")
plt.xlabel("City Latitude")
plt.ylabel("Cloudiness (%)")
plt.grid(True)
plt.tight_layout()

# Linear regression
x_values = southern_hemisphere['Latitude']
y_values = southern_hemisphere['Cloudiness']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
print(f"The r-squared is: {rvalue**2}")
plt.show()

plt.savefig("south_lat_cloudiness.png")

<IPython.core.display.Javascript object>

The r-squared is: 0.0209377108321318


In [45]:
print("OBSERVATIONS FOR CLOUDINESS AND LATITUDE")
print("--------------------------------")
print("There are no correlations between cloudiness levels and the distance from the equator.")

OBSERVATIONS FOR CLOUDINESS AND LATITUDE
--------------------------------
There are no correlations between cloudiness levels and the distance from the equator.


####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [46]:
# Create the plot
northern_hemisphere.plot(kind="scatter", x="Latitude", y="Wind Speed", marker="o", facecolors="blue", edgecolors="black", alpha=0.75)
plt.title("Northern Hemisphere: City Latitude vs Wind Speed (mph) (7/10/20)")
plt.xlabel("City Latitude")
plt.ylabel("Wind Speed (mph)")
plt.grid(True)
plt.tight_layout()

# Linear regression
x_values = northern_hemisphere['Latitude']
y_values = northern_hemisphere['Wind Speed']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
print(f"The r-squared is: {rvalue**2}")
plt.show()

plt.savefig("north_lat_wind_speed.png")

<IPython.core.display.Javascript object>

The r-squared is: 0.0006535611322313781


####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [47]:
# Create the plot
southern_hemisphere.plot(kind="scatter", x="Latitude", y="Wind Speed", marker="o", facecolors="blue", edgecolors="black", alpha=0.75)
plt.title("Southern Hemisphere: City Latitude vs Wind Speed (mph) (7/10/20)")
plt.xlabel("City Latitude")
plt.ylabel("Wind Speed (mph)")
plt.grid(True)
plt.tight_layout()

# Linear regression
x_values = southern_hemisphere['Latitude']
y_values = southern_hemisphere['Wind Speed']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
print(f"The r-squared is: {rvalue**2}")
plt.show()

plt.savefig("south_lat_wind_speed.png")

<IPython.core.display.Javascript object>

The r-squared is: 0.01223743229768018


In [48]:
print("OBSERVATIONS FOR WIND SPEED AND LATITUDE")
print("--------------------------------")
print("There are no correlations between wind speed and the distance from the equator.")

OBSERVATIONS FOR WIND SPEED AND LATITUDE
--------------------------------
There are no correlations between wind speed and the distance from the equator.
