# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
#Dependencies
import pandas as pd
import json
import requests
from api_keys import weather_api_key
from pprint import pprint
import random
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import linregress

## Generate Cities List

In [2]:
full_cities_file = pd.read_csv("../Resources/output_data/worldcities.csv")

#Get a random sample of 600 cities from full_cities_file

full_cities_file.head()

Unnamed: 0,city,city_ascii,lat,lng,country,iso2,iso3,admin_name,capital,population,id
0,Tokyo,Tokyo,35.6897,139.6922,Japan,JP,JPN,Tōkyō,primary,37977000.0,1392685764
1,Jakarta,Jakarta,-6.2146,106.8451,Indonesia,ID,IDN,Jakarta,primary,34540000.0,1360771077
2,Delhi,Delhi,28.66,77.23,India,IN,IND,Delhi,admin,29617000.0,1356872604
3,Mumbai,Mumbai,18.9667,72.8333,India,IN,IND,Mahārāshtra,admin,23355000.0,1356226629
4,Manila,Manila,14.6,120.9833,Philippines,PH,PHL,Manila,primary,23088000.0,1608618140


In [3]:
full_cities_file.describe()

Unnamed: 0,lat,lng,population,id
count,41001.0,41001.0,40263.0,41001.0
mean,30.90985,-4.228119,111761.4,1487309000.0
std,23.504898,68.759032,724891.7,284720500.0
min,-54.9341,-179.59,0.0,1004003000.0
25%,19.1903,-71.85,8194.0,1250291000.0
50%,39.8854,3.3333,15831.0,1484693000.0
75%,47.3717,25.9833,39823.5,1807301000.0
max,81.7166,179.3667,37977000.0,1934000000.0


In [4]:
#Commented out the line below so that a fixed sample of 600 is used 
#rather than changing everytime we run this code

#cities_file = (full_cities_file.sample(600)).to_csv("../Resources/output_data/cities_names.csv", index= False, header = True)

In [5]:
interest_cities = pd.read_csv("../Resources/output_data/cities_names.csv")
interest_cities = interest_cities[['city','lat', 'lng', 'country','iso2',]]

print(len(interest_cities))

interest_cities.head()

600


Unnamed: 0,city,lat,lng,country,iso2
0,Arima,10.6374,-61.2823,Trinidad And Tobago,TT
1,Shankou,24.5032,116.4046,China,CN
2,Uraí,-23.1978,-50.7958,Brazil,BR
3,Farmington,44.6573,-93.1688,United States,US
4,Tsetserleg,47.4769,101.4503,Mongolia,MN


### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [6]:
#Create a new dataFrame to hold all values
cities_pd = pd.DataFrame()
cities_pd["City"] = ""
cities_pd["Lat"] = ""
cities_pd["Lng"] = ""
cities_pd["Max Temp"] = ""
cities_pd["Humidity"] = ""
cities_pd["Cloudiness"] = ""
cities_pd["Wind Speed"] = ""
cities_pd["Country"] = ""
cities_pd["Date"] = ""
cities_pd.head()

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date


In [None]:
#Base url for collecting information
url = f"http://api.openweathermap.org/data/2.5/weather?appid={weather_api_key}&"

#Empty list to append data into
cities =[]
lat = []
long = []
max_temp = []
humidity = []
cloudiness = []
wind_speed =[]
country_code = []
date = []

#Set up the format of communicating to user the progress of obtaining data
#changed the units to Imperial - i.e. Fahrenheit (else it would default as Kelvin)

set_num = 1
units = "Imperial"
print("Beginning Data Retrieval")
print("--------------------------------")

#Start the loop and appending city data into the list above.

for index, row in interest_cities.iterrows():
    lat = row['lat']
    lon = row['lng']
     
    response = requests.get(f"{url}lat={lat}&lon={lon}&units={units}").json()
        
    try:    
        print(f"Processing Record {index +1} of Set {set_num} | {response['name']}")    
    
        cities_pd.loc[index, "City"] = response["name"]
        cities_pd.loc[index, "Lat"] = response["coord"]["lat"]
        cities_pd.loc[index, "Lng"] = response["coord"]["lon"]
        cities_pd.loc[index, "Max Temp"] = response["main"]["temp_max"]
        cities_pd.loc[index, "Humidity"] = response["main"]["humidity"]
        cities_pd.loc[index, "Cloudiness"] = response["clouds"]["all"]
        cities_pd.loc[index, "Wind Speed"] = response["wind"]["speed"]
        cities_pd.loc[index, "Country"] = response["sys"]["country"]
        cities_pd.loc[index, "Date"] = response["dt"]

    except(KeyError):
        print("City not found. Skipping...")
        set_num += 1


Beginning Data Retrieval
--------------------------------
Processing Record 1 of Set 1 | Arima
Processing Record 2 of Set 1 | Huliao
Processing Record 3 of Set 1 | Leópolis
Processing Record 4 of Set 1 | Farmington
Processing Record 5 of Set 1 | Cecerleg
Processing Record 6 of Set 1 | Charta
Processing Record 7 of Set 1 | Melrose
Processing Record 8 of Set 1 | Visp
Processing Record 9 of Set 1 | Surat Thani
Processing Record 10 of Set 1 | Kupino
Processing Record 11 of Set 1 | Pueblo Nuevo
Processing Record 12 of Set 1 | Stoodley Corners
Processing Record 13 of Set 1 | Mina Clavero
Processing Record 14 of Set 1 | Avola
Processing Record 15 of Set 1 | Saint-Nazaire
Processing Record 16 of Set 1 | Kumalarang
Processing Record 17 of Set 1 | Seto
Processing Record 18 of Set 1 | Salvaterra


In [None]:
cities_pd

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
#Remove NaN data line
#Export the dataframe into a csv file
cities_pd = cities_pd.dropna()
cities_pd.to_csv("../Resources/output_data/cities_data.csv", index= False, header = True)
cities_pd


In [None]:
#To convert data to a float data type
#Commented out the below line just in case it is needed again when
#regenerating a new list of cities

#cities_pd = cities_pd.astype({'Lat': float, 'Lng': float,'Max Temp': float,'Humidity': float,'Cloudiness': float,'Wind Speed': float,'Date': int})

In [None]:
#Generate a simple summary statistics of dataframe cities_pd
cities_pd.describe()

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
#Check if there is any data that has humidity greater than 100%, it seems
#that there is none as there are still 599 rows.
filtered_cities_pd = cities_pd[cities_pd['Humidity']<=100]
filtered_cities_pd

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

Below the graph is comparing the latitude and maximum temperature of a group of cities. 
This looks at whether there is any any relationship between the northern and sourthern hemisphere and the maximum temperature.

In [None]:
fig1, ax1 = plt.subplots()
plt.scatter(filtered_cities_pd['Lat'],filtered_cities_pd['Max Temp'] , edgecolors = 'black')
plt.title('City Latitude vs Max Temperature (11/09/2021)')
plt.xlabel('Latitude')
plt.ylabel('Max Temperature(F)')
plt.grid(True)
plt.savefig("../Resources/output_data/Max Temp v Latitude.png")
plt.show()

## Latitude vs. Humidity Plot

Below the graph is comparing the latitude and humidity of a group of cities. 
This looks at whether there is any any relationship between the northern and sourthern hemisphere and the humidity.

In [None]:
fig2, ax2 = plt.subplots()
plt.scatter(filtered_cities_pd['Lat'],filtered_cities_pd['Humidity'] , edgecolors = 'black')
plt.title('City Latitude vs Humidity (11/09/2021)')
plt.xlabel('Latitude')
plt.ylabel('Humidity(%)')
plt.grid(True)
plt.savefig("../Resources/output_data/Humidity v Latitude.png")
plt.show()

## Latitude vs. Cloudiness Plot

Below the graph is comparing the latitude and cloudiness of a group of cities. 
This looks at whether there is any any relationship between the northern and sourthern hemisphere and the cloudiness.

In [None]:
fig3, ax3 = plt.subplots()
plt.scatter(filtered_cities_pd['Lat'],filtered_cities_pd['Cloudiness'] , edgecolors = 'black')
plt.title('City Latitude vs Cloudiness (11/09/2021)')
plt.xlabel('Latitude')
plt.ylabel('Cloudiness(%)')
plt.grid(True)
plt.savefig("../Resources/output_data/Cloudiness v Latitude.png")
plt.show()

## Latitude vs. Wind Speed Plot

Below the graph is comparing the latitude and wind speed of a group of cities. 
This looks at whether there is any any relationship between the northern and sourthern hemisphere and the wind speed.

In [None]:
fig4, ax4 = plt.subplots()
plt.scatter(filtered_cities_pd['Lat'],filtered_cities_pd['Wind Speed'] , edgecolors = 'black')
plt.title('City Latitude vs Wind Speed (11/09/2021)')
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (mph)')
plt.grid(True)
plt.savefig("../Resources/output_data/Wind Speed v Latitude.png")
plt.show()

## Linear Regression

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
#Created a variable to filter on northern and southern hemisphere.
northern_cities_pd = filtered_cities_pd.loc[filtered_cities_pd['Lat']>0]
sothern_cities_pd = filtered_cities_pd.loc[filtered_cities_pd['Lat']<=0]

#continued with coding of plotting the linear regression...
x_values = northern_cities_pd['Lat']
y_values = northern_cities_pd['Max Temp']

fig4, ax4 = plt.subplots()
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values,y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(5,45),fontsize=15,color="red")
print(f"The r-squared is: {rvalue**2}")
plt.scatter(x_values, y_values)
plt.xlabel('Latitude')
plt.ylabel('Max Temperature(F)')
plt.savefig("../Resources/output_data/NH - Max Temp v Latitude.png")
plt.show()


####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
x_values = sothern_cities_pd['Lat']
y_values = sothern_cities_pd['Max Temp']

fig4, ax4 = plt.subplots()
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values,y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(-40,80),fontsize=15,color="red")
print(f"The r-squared is: {rvalue**2}")
plt.scatter(x_values, y_values)
plt.xlabel('Latitude')
plt.ylabel('Max Temperature(F)')
plt.savefig("../Resources/output_data/SH - Max Temp v Latitude.png")
plt.show()

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = northern_cities_pd['Lat']
y_values = northern_cities_pd['Humidity']

fig5, ax5 = plt.subplots()
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values,y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(40,20),fontsize=15,color="red")
print(f"The r-squared is: {rvalue**2}")
plt.scatter(x_values, y_values)
plt.xlabel('Latitude')
plt.ylabel('Humidity')
plt.savefig("../Resources/output_data/NH - Humidity v Latitude.png")
plt.show()

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = sothern_cities_pd['Lat']
y_values = sothern_cities_pd['Humidity']

fig4, ax4 = plt.subplots()
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values,y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(-40,30),fontsize=15,color="red")
print(f"The r-squared is: {rvalue**2}")
plt.scatter(x_values, y_values)
plt.xlabel('Latitude')
plt.ylabel('Humidity')
plt.savefig("../Resources/output_data/SH - Humidity v Latitude.png")
plt.show()

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = northern_cities_pd['Lat']
y_values = northern_cities_pd['Cloudiness']

fig6, ax6 = plt.subplots()
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values,y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(10,25),fontsize=15,color="red")
print(f"The r-squared is: {rvalue**2}")
plt.scatter(x_values, y_values)
plt.xlabel('Latitude')
plt.ylabel('Cloudiness')
plt.savefig("../Resources/output_data/NH - Cloudiness v Latitude.png")
plt.show()

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = sothern_cities_pd['Lat']
y_values = sothern_cities_pd['Cloudiness']

fig4, ax4 = plt.subplots()
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values,y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(-40,30),fontsize=15,color="red")
print(f"The r-squared is: {rvalue**2}")
plt.scatter(x_values, y_values)
plt.xlabel('Latitude')
plt.ylabel('Cloudiness')
plt.savefig("../Resources/output_data/SH - Cloudiness v Latitude.png")
plt.show()

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = northern_cities_pd['Lat']
y_values = northern_cities_pd['Wind Speed']

fig6, ax6 = plt.subplots()
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values,y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(40,8),fontsize=15,color="red")
print(f"The r-squared is: {rvalue**2}")
plt.scatter(x_values, y_values)
plt.xlabel('Latitude')
plt.ylabel('Wind Speed')
plt.savefig("../Resources/output_data/NH - Wind Speed v Latitude.png")
plt.show()


####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = sothern_cities_pd['Lat']
y_values = sothern_cities_pd['Wind Speed']

fig4, ax4 = plt.subplots()
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values,y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(-40,7),fontsize=15,color="red")
print(f"The r-squared is: {rvalue**2}")
plt.scatter(x_values, y_values)
plt.xlabel('Latitude')
plt.ylabel('Wind Speed')
plt.savefig("../Resources/output_data/SH - Wind Speed v Latitude.png")
plt.show()