# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import scipy.stats as st
from scipy.stats import linregress
# Import API key
from api_keys import weather_api_key
import seaborn as sns

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [2]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

619

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [3]:
# Making sure the units are imperial and settings up the url for the api
units = 'imperial'
url = "http://api.openweathermap.org/data/2.5/weather?"    
# Creating lists for the data that will be stored from the .json request
lat = []
long = []
maxtemp = []
humidity = []
clouds = []
wind_speed = []
country = []
new_city = []
counter = 1

In [4]:
# Creating a for loop which will query the api and grab info for the set of lats/lngs from above.
# Also for any lat/lng that it doesn't find a city, it will skip over those and go to the next.
for city in cities:
    query_url = f'{url}appid={weather_api_key}&units={units}&q={city}'
    data = requests.get(query_url)
    data2 = data.json()
    if counter == 1:
        print('Beginning Data Retrieval:\n---------------------------')
    try: 
        new_city.append(data2['name'])
        lat.append(data2['coord']['lat'])
        long.append(data2['coord']['lon'])
        maxtemp.append(data2['main']['temp_max'])
        humidity.append(data2['main']['humidity'])
        clouds.append(data2['clouds']['all'])
        wind_speed.append(data2['wind']['speed'])
        country.append(data2['sys']['country'])
        current_city = data2['name']
        print(f'Processing Record  {counter} of Set 1 | {current_city}')
        counter += 1
    except:
        pass
 

Beginning Data Retrieval:
---------------------------
Processing Record  1 of Set 1 | Asău
Processing Record  2 of Set 1 | Yerbogachën
Processing Record  3 of Set 1 | Hongjiang
Processing Record  4 of Set 1 | Jiazi
Processing Record  5 of Set 1 | Rikitea
Processing Record  6 of Set 1 | Amga
Processing Record  7 of Set 1 | Hobart
Processing Record  8 of Set 1 | Torbay
Processing Record  9 of Set 1 | San Policarpo
Processing Record  10 of Set 1 | Bredasdorp
Processing Record  11 of Set 1 | Nanortalik
Processing Record  12 of Set 1 | Nagapattinam
Processing Record  13 of Set 1 | Maun
Processing Record  14 of Set 1 | Bubaque
Processing Record  15 of Set 1 | Manaure
Processing Record  16 of Set 1 | Puerto Ayora
Processing Record  17 of Set 1 | Busselton
Processing Record  18 of Set 1 | Ushuaia
Processing Record  19 of Set 1 | Airai
Processing Record  20 of Set 1 | Jamestown
Processing Record  21 of Set 1 | Mataura
Processing Record  22 of Set 1 | Sur
Processing Record  23 of Set 1 | Aklavik

Processing Record  193 of Set 1 | Fare
Processing Record  194 of Set 1 | Dosso
Processing Record  195 of Set 1 | Kuteynykove
Processing Record  196 of Set 1 | Pekan
Processing Record  197 of Set 1 | Husavik
Processing Record  198 of Set 1 | Villa Carlos Paz
Processing Record  199 of Set 1 | Dzerzhinskoye
Processing Record  200 of Set 1 | Ilulissat
Processing Record  201 of Set 1 | São Filipe
Processing Record  202 of Set 1 | Tromsø
Processing Record  203 of Set 1 | Port Alfred
Processing Record  204 of Set 1 | Tacuarembó
Processing Record  205 of Set 1 | Geraldton
Processing Record  206 of Set 1 | Mahébourg
Processing Record  207 of Set 1 | Havre-St-Pierre
Processing Record  208 of Set 1 | Kavaratti
Processing Record  209 of Set 1 | Mar del Plata
Processing Record  210 of Set 1 | Havelock
Processing Record  211 of Set 1 | Ola
Processing Record  212 of Set 1 | Kostanay
Processing Record  213 of Set 1 | The Valley
Processing Record  214 of Set 1 | Acapulco de Juárez
Processing Record  21

Processing Record  382 of Set 1 | Bella Union
Processing Record  383 of Set 1 | Krompachy
Processing Record  384 of Set 1 | Muros
Processing Record  385 of Set 1 | Oktyabr'skiy
Processing Record  386 of Set 1 | Novoagansk
Processing Record  387 of Set 1 | Palora
Processing Record  388 of Set 1 | Clifton Springs
Processing Record  389 of Set 1 | Tabou
Processing Record  390 of Set 1 | Shawnee
Processing Record  391 of Set 1 | Rincon
Processing Record  392 of Set 1 | Karratha
Processing Record  393 of Set 1 | Hamilton
Processing Record  394 of Set 1 | Sungai Penuh
Processing Record  395 of Set 1 | Santa Luzia
Processing Record  396 of Set 1 | Kushmurun
Processing Record  397 of Set 1 | Cururupu
Processing Record  398 of Set 1 | Berdigestyakh
Processing Record  399 of Set 1 | Barwāni
Processing Record  400 of Set 1 | Rāmpura
Processing Record  401 of Set 1 | Trelew
Processing Record  402 of Set 1 | Gijang
Processing Record  403 of Set 1 | Haines Junction
Processing Record  404 of Set 1 | 

In [5]:
# Creating the dataframe by putting it first into a dictionary, and then into the dataframe
weather_dict = {
    'City': new_city,
    'Latitude': lat,
    'Longitude': long,
    'Max Temp': maxtemp,
    'Humidity': humidity,
    'Cloudiness': clouds,
    'Wind Speed': wind_speed,
    'Country': country
}
weather_data = pd.DataFrame(weather_dict)
weather_data
# Taking the dataframe and storing as a .csv file for use in Vacation.Py
weather_data.to_csv(r'output.csv', index = False)

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [6]:
# Displaying the new dataframe!
weather_data
# **** Note: I took the **Max Temp** specifically because that is what was displayed in the starter code walkthrough!

Unnamed: 0,City,Latitude,Longitude,Max Temp,Humidity,Cloudiness,Wind Speed,Country
0,Asău,46.4333,26.4000,37.40,93,90,10.36,RO
1,Yerbogachën,61.2767,108.0108,8.92,93,100,9.35,RU
2,Hongjiang,27.1100,109.9956,66.51,93,90,0.11,CN
3,Jiazi,22.8779,116.0670,68.65,81,98,3.42,CN
4,Rikitea,-23.1203,-134.9692,81.27,71,2,6.69,PF
...,...,...,...,...,...,...,...,...
561,Mehamn,71.0357,27.8492,21.20,79,20,10.36,NO
562,Qian’an,44.9932,124.0581,23.31,37,16,18.50,CN
563,Port Blair,11.6667,92.7500,78.33,79,74,6.51,IN
564,Fort Nelson,58.8053,-122.7002,21.20,58,40,8.37,CA


## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [7]:
#Finding out if there are any values in the column "Humidity" that are over 100%
new = weather_data.loc[weather_data['Humidity'] > 100]
new

Unnamed: 0,City,Latitude,Longitude,Max Temp,Humidity,Cloudiness,Wind Speed,Country


In [8]:
# Splitting into Northern Hemisphere...

northern_hemi = weather_data.loc[weather_data['Latitude'] >= 0]
northern_hemi

Unnamed: 0,City,Latitude,Longitude,Max Temp,Humidity,Cloudiness,Wind Speed,Country
0,Asău,46.4333,26.4000,37.40,93,90,10.36,RO
1,Yerbogachën,61.2767,108.0108,8.92,93,100,9.35,RU
2,Hongjiang,27.1100,109.9956,66.51,93,90,0.11,CN
3,Jiazi,22.8779,116.0670,68.65,81,98,3.42,CN
5,Amga,60.8953,131.9608,-8.99,94,0,5.10,RU
...,...,...,...,...,...,...,...,...
561,Mehamn,71.0357,27.8492,21.20,79,20,10.36,NO
562,Qian’an,44.9932,124.0581,23.31,37,16,18.50,CN
563,Port Blair,11.6667,92.7500,78.33,79,74,6.51,IN
564,Fort Nelson,58.8053,-122.7002,21.20,58,40,8.37,CA


In [9]:
#And southern hemisphere here ->
southern_hemi = weather_data.loc[weather_data['Latitude'] < 0]
southern_hemi

Unnamed: 0,City,Latitude,Longitude,Max Temp,Humidity,Cloudiness,Wind Speed,Country
4,Rikitea,-23.1203,-134.9692,81.27,71,2,6.69,PF
6,Hobart,-42.8794,147.3294,57.00,62,75,8.05,AU
9,Bredasdorp,-34.5322,20.0403,60.80,94,8,3.44,ZA
12,Maun,-19.9833,23.4167,71.60,78,0,3.58,BW
15,Puerto Ayora,-0.7393,-90.3518,80.60,78,75,16.11,EC
...,...,...,...,...,...,...,...,...
539,Quatre Cocos,-20.2078,57.7625,78.80,83,40,9.22,MU
543,De Aar,-30.6497,24.0123,62.28,71,0,6.15,ZA
544,Olinda,-8.0089,-34.8553,78.80,69,75,10.36,BR
551,Beloha,-25.1667,45.0500,74.07,78,0,9.06,MG


## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [10]:
cmap = sns.cubehelix_palette(rot=-.2, as_cmap=True)
a4_dims = (100,100)
fig, ax = pyplot.subplots(figsize=a4_dims)
sns.scatterplot(weather_data['Latitude'], weather_data['Max Temp'], marker="o", palette=cmap, hue=weather_data['Latitude'])
plt.title('Latitude vs Temperature Plot')
plt.xlabel('Latitude')
plt.ylabel('Temperature')
plt.savefig('latitude_vs_temp')


NameError: name 'pyplot' is not defined

In [None]:
# With this temperature plot, it looks as though as the latitude increases, the temperature drops off.

## Latitude vs. Humidity Plot

In [None]:
sns.scatterplot(weather_data['Latitude'], weather_data['Humidity'], marker="o", palette=cmap, hue=weather_data['Latitude'])
plt.title('Latitude vs Humidity Plot')
plt.xlabel('Latitude')
plt.ylabel('Humidity %')
plt.savefig('latitude_vs_humidity')

In [None]:
# There isn't much to tell from this data other then there seem to be some clustering in the top left and top right,
# that may indicate there tends to be more humidty the more north and south you go.

## Latitude vs. Cloudiness Plot

In [None]:
sns.scatterplot(weather_data['Latitude'], weather_data['Cloudiness'], marker="o", palette=cmap,  hue=weather_data['Latitude'])
plt.title('Latitude vs Cloudiness Plot')
plt.xlabel('Latitude')
plt.ylabel('Cloudiness')
plt.savefig('latitude_vs_cloudiness')

In [None]:
# Again, this one is a bit harder to form conclusions about ,but it looks as though
# a good amount of either full cloudiness or none at all occur at all points along the latitude axis

## Latitude vs. Wind Speed Plot

In [None]:
sns.scatterplot(weather_data['Latitude'], weather_data['Wind Speed'], marker="o", palette=cmap, hue=weather_data['Latitude'])
plt.title('Latitude vs Wind Speed Plot')
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (mph)')
plt.savefig('latitude_vs_windspeed')

In [None]:
# With this particular plot, most of the data lies below the 15 mph mark for all the latitudes observed.

## Linear Regression

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
x_values = northern_hemi['Max Temp'].astype('float')
y_values = northern_hemi['Latitude'].astype('float')

# Run linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# Plot scatter plot
plt.scatter(x_values,y_values)

# Plot regression line
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")

# Label plot
plt.xlabel('Max Temp')
plt.ylabel('Latitude')
plt.title('Max Temp vs Latitude Linear Regression (Northern)')
# Print r square value
print(f"R squared: {rvalue**2}")
plt.savefig('northern_temp_latitude')
# Show plot
plt.show()


In [None]:
# It looks as though there is a clear trend downward for latitude downward as the max temp increases.

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
x_values = southern_hemi['Max Temp'].astype('float')
y_values = southern_hemi['Latitude'].astype('float')

# Run linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# Plot scatter plot
plt.scatter(x_values,y_values)

# Plot regression line
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")

# Label plot
plt.xlabel('Max Temp')
plt.ylabel('Latitude')
plt.title('Max Temp vs Latitude Linear Regression (Southern)')
# Print r square value
print(f"R squared: {rvalue**2}")
plt.savefig('southern_temp_latitude')
# Show plot
plt.show()


In [None]:
# Though not as concise a scatter plot as the last one, it looks as though there is a trend upward for latitude downward as the max temp increases.

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = northern_hemi['Humidity'].astype('float')
y_values = northern_hemi['Latitude'].astype('float')

# Run linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# Plot scatter plot
plt.scatter(x_values,y_values)

# Plot regression line
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")

# Label plot
plt.xlabel('Humidity %')
plt.ylabel('Latitude')
plt.title('Humidity vs Latitude Linear Regression (Northern)')
# Print r square value
print(f"R squared: {rvalue**2}")
plt.savefig('northern_humidity_latitude')
# Show plot
plt.show()


In [None]:
# There appears to be a slight increase in humidity as the latitudes increase

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = southern_hemi['Humidity'].astype('float')
y_values = southern_hemi['Latitude'].astype('float')

# Run linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# Plot scatter plot
plt.scatter(x_values,y_values)

# Plot regression line
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")

# Label plot
plt.xlabel('Humidity %')
plt.ylabel('Latitude')
plt.title('Humidity vs Latitude Linear Regression (Southern)')
# Print r square value
print(f"R squared: {rvalue**2}")
plt.savefig('southern_humidity_latitude')
# Show plot
plt.show()


In [None]:
# Inversely, it looks as though as the latitudes decrease, the humidity increases

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = northern_hemi['Cloudiness'].astype('float')
y_values = northern_hemi['Latitude'].astype('float')

# Run linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# Plot scatter plot
plt.scatter(x_values,y_values)

# Plot regression line
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")

# Label plot
plt.xlabel('Cloudiness %')
plt.ylabel('Latitude')
plt.title('Cloudiness vs Latitude Linear Regression (Northern)')
# Print r square value
print(f"R squared: {rvalue**2}")
plt.savefig('northern_cloudiness_latitude')
# Show plot
plt.show()


In [None]:
# These next two plots are a bit harder to truly come to a conclusion. It looks as though most of the data
# is clustered on either end of the cloudiness scale (close to 0 and around 100), as shown by the previous
# scatter plot shown above (Latitude vs Cloudiness)

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = southern_hemi['Cloudiness'].astype('float')
y_values = southern_hemi['Latitude'].astype('float')

# Run linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# Plot scatter plot
plt.scatter(x_values,y_values)

# Plot regression line
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")

# Label plot
plt.xlabel('Cloudiness %')
plt.ylabel('Latitude')
plt.title('Cloudiness vs Latitude Linear Regression (Southern)')
# Print r square value
print(f"R squared: {rvalue**2}")
plt.savefig('southern_humidity_latitude')
# Show plot
plt.show()


In [None]:
# The same thing, but a bit less so, as there aren't as many data points, but it also looks as though
# most of the data points are on either end of the cloudiness range

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = northern_hemi['Wind Speed'].astype('float')
y_values = northern_hemi['Latitude'].astype('float')

# Run linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# Plot scatter plot
plt.scatter(x_values,y_values)

# Plot regression line
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")

# Label plot
plt.xlabel('Wind Speed (mph)')
plt.ylabel('Latitude')
plt.title('Wind Speed vs Latitude Linear Regression (Northern)')
# Print r square value
print(f"R squared: {rvalue**2}")
plt.savefig('northern_windspeed_latitude')
# Show plot
plt.show()


In [None]:
# It appears as though most of the data points lie around where the wind speed is < 15 or so. Thus, latitude
# isn't as much a factor here as it is with other plots

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = southern_hemi['Wind Speed'].astype('float')
y_values = southern_hemi['Latitude'].astype('float')

# Run linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# Plot scatter plot
plt.scatter(x_values,y_values)

# Plot regression line
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")

# Label plot
plt.xlabel('Wind Speed (mph)')
plt.ylabel('Latitude')
plt.title('Wind Speed vs Latitude Linear Regression (Southern)')
# Print r square value
print(f"R squared: {rvalue**2}")
plt.savefig('southern_windspeed_latitude')
# Show plot
plt.show()


In [None]:
# This one is a bit more obscure though the linear regression shows a similar relationship; as the latitudes
# get closer to the south pole, the wind speed tends to drop off a bit.