# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
import datetime
import json


# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = 'output_data/cities.csv'

# Range of latitudes and longitudes
latitude_range = (-90, 90)
longitude_range = (-180, 180)

## Generate Cities List

In [None]:
# List for holding lat_lngs and cities
latitude_longitude = []
cities = []

# Create a set of random lat and lng combinations
latitude = np.random.uniform(latitude_range[0], latitude_range[1], size=1500)
longitude = np.random.uniform(longitude_range[0], longitude_range[1], size=1500)
latitude_longitude = zip(latitude, longitude)

# Identify nearest city for each lat, lng combination
for lat_lng in latitude_longitude:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
# Base URL to work off                         
base_url = 'http://api.openweathermap.org/data/2.5/weather?'

# Counter to go through data
record = 0
sets = 1

# Create DataFrame
WeatherPy = pd.DataFrame({'City': cities})
WeatherPy['Country'] = ""
WeatherPy['Date'] = ""      
WeatherPy['Latitude'] = ""
WeatherPy['Longitude'] = ""
WeatherPy['Cloudiness (%)'] = ""
WeatherPy['Humidity (%)'] = ""
WeatherPy['Max Temp (F)'] = ""
WeatherPy['Wind Speed (mph)'] = ""

In [None]:
# Interate through Cities and retrieve data
print('Beginning Data Retrieval')
print('-----------------------------')


for index, row in WeatherPy.iterrows():
    
    city = row['City']
    
    query_url = base_url + 'appid=' + weather_api_key + '&q=' + city
    weather_response = requests.get(query_url).json()
 
    try:
        print(f'Processing Record {record}  of Set {sets} | {city}.')
        #print (query_url) 
        
        WeatherPy.loc[index, 'Country'] = weather_response['sys']['country']     
        WeatherPy.loc[index, 'Date'] = datetime.datetime.fromtimestamp(weather_response['dt'])
        WeatherPy.loc[index, 'Latitude'] = weather_response['coord']['lat']
        WeatherPy.loc[index, 'Longitude'] = weather_response['coord']['lon']
        WeatherPy.loc[index, 'Cloudiness (%)'] = weather_response['clouds']['all']
        WeatherPy.loc[index, 'Humidity (%)'] = weather_response['main']['humidity']
        WeatherPy.loc[index, 'Max Temp (F)'] = 1.8 * (weather_response['main']['temp_max'] - 273) + 32
        WeatherPy.loc[index, 'Wind Speed (mph)'] = weather_response['wind']['speed']
               
    except:
        print(f'City not found...skipping {city}.')
        #print (query_url) 
        
    
    record = record + 1
    if record == 50:
        record = 0
        sets = sets + 1 

print('-----------------------------')        
print('Data Retrieval Complete')
print('-----------------------------')

In [None]:
WeatherPy.count()

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
#export city data into csv
WeatherPy.to_csv('output_data/cities.csv', encoding='utf-8', index=False)

In [None]:
#display dataframe
WeatherPy.head()

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
#  Get the indices of cities that have humidity over 100%.


In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".


## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
today = time.strftime('%m/%d/%Y')
plt.scatter(pd.to_numeric(WeatherPy['Latitude']).astype(float), pd.to_numeric(WeatherPy['Max Temp (F)']).astype(float), facecolors='green', edgecolors='black', alpha = 0.5)
plt.xlabel('Latitude')
plt.ylabel('Max Temperature (F)')
plt.title(f'City Latitude vs Max Temperature ({today})')
print('The plot proves that as we generally move away from the equator, temperature drops.')
plt.savefig('Latitude vs Temperature (F).png')
plt.show()


## Latitude vs. Humidity Plot

In [None]:
today = time.strftime('%m/%d/%Y')
plt.scatter(pd.to_numeric(WeatherPy['Latitude']).astype(float), pd.to_numeric(WeatherPy['Humidity (%)']).astype(float), facecolors='green', edgecolors='black', alpha = 0.5)
plt.xlabel('Latitude')
plt.ylabel('Humidity (%)')
plt.title(f'City Latitude vs Humidity (%) ({today})')
print('The plot shows that there is no real pattern that can describe the relationship between latitude and humidity.')
plt.savefig('Latitude vs Humidity (%).png')
plt.show()

## Latitude vs. Cloudiness Plot

In [None]:
today = time.strftime('%m/%d/%Y')
plt.scatter(pd.to_numeric(WeatherPy['Latitude']).astype(float), pd.to_numeric(WeatherPy['Cloudiness (%)']).astype(float), facecolors='green', edgecolors='black', alpha = 0.5)
plt.xlabel('Latitude')
plt.ylabel('Cloudiness (%)')
plt.title(f'City Latitude vs Cloudiness ({today})')
print('The plot shows that there is no real pattern that can describe the relationship between latitude and cloudiness.')
plt.savefig('Latitude vs Cloudiness (%).png')
plt.show()

## Latitude vs. Wind Speed Plot

In [None]:
today = time.strftime('%m/%d/%Y')
plt.scatter(pd.to_numeric(WeatherPy['Latitude']).astype(float), pd.to_numeric(WeatherPy['Wind Speed (mph)']).astype(float), facecolors='green', edgecolors='black', alpha = 0.5)
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (mph)')
plt.title(f'City Latitude vs Wind Speed ({today})')
plt.savefig('Latitude vs Wind Speed (mpg).png')
print('The plot shows that there is no real pattern that can describe the relationship between latitude and windspeed.')

## Linear Regression

In [None]:
# Create Northern and Southern Hemisphere DataFrames
northern = WeatherPy.loc[pd.to_numeric(WeatherPy['Latitude']).astype(float) > 0, :]
southern = WeatherPy.loc[pd.to_numeric(WeatherPy['Latitude']).astype(float) < 0, :]

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
x_values = pd.to_numeric(northern['Latitude']).astype(float)
y_values = pd.to_numeric(northern['Max Temp (F)']).astype(float)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = 'y = ' + str(round(slope,2)) + 'x + ' + str(round(intercept,2))
print(f'Regression line equation is: {line_eq}')
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,'r-')
plt.annotate(line_eq,(5,15),fontsize=15,color='red')
plt.xlabel('Latitude')
plt.ylabel('Max Temperature (F)')
plt.title('Northern Hemisphere - Max Temp vs. Latitude Linear Regression')
print(f'The r-squared is: {rvalue}')
print('The regression displays a negative correlation.')
print('In the nothern hemisphere, as you move away from the equator the temperature decreases.')
plt.savefig('Northern Hemisphere - Temperature (F) vs. Latitude.png')
plt.show()

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
x_values = pd.to_numeric(southern['Latitude']).astype(float)
y_values = pd.to_numeric(southern['Max Temp (F)']).astype(float)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = 'y = ' + str(round(slope,2)) + 'x + ' + str(round(intercept,2))
print(f'Regression line equation is: {line_eq}')
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,'r-')
plt.annotate(line_eq,(-50,90),fontsize=15,color='red')
plt.xlabel('Latitude')
plt.ylabel('Max Temperature (F)')
plt.title('Southern Hemisphere - Max Temp vs. Latitude Linear Regression')
print(f'The r-squared is: {rvalue}')
print('The regression displays a positive correlation.')
print('In the southern hemisphere, as you get closer to the equator the temperature increases.')
plt.savefig('Southern Hemisphere - Temperature (F) vs. Latitude.png')
plt.show()

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = pd.to_numeric(northern['Latitude']).astype(float)
y_values = pd.to_numeric(northern['Humidity (%)']).astype(float)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = 'y = ' + str(round(slope,2)) + 'x + ' + str(round(intercept,2))
print(f'Regression line equation is: {line_eq}')
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,'r-')
plt.annotate(line_eq,(45,10),fontsize=15,color='red')
plt.xlabel('Latitude')
plt.ylabel('Humidity (%)')
plt.title('Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression')
print(f'The r-squared is: {rvalue}')
print('The regression does not display much of a correlation between percent humidity and latitude.') 
plt.savefig('Northern Hemisphere - Humidity (%) vs. Latitude.png')
plt.show()

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = pd.to_numeric(southern['Latitude']).astype(float)
y_values = pd.to_numeric(southern['Humidity (%)']).astype(float)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = 'y = ' + str(round(slope,2)) + 'x + ' + str(round(intercept,2))
print(f'Regression line equation is: {line_eq}')
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,'r-')
plt.annotate(line_eq,(-50,0),fontsize=15,color='red')
plt.xlabel('Latitude')
plt.ylabel('Humidity (%)')
plt.title('Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression')
print(f'The r-squared is: {rvalue}')
print('The regression does not display much of a correlation between percent humidity and latitude.')
plt.savefig('Southern Hemisphere - Humidity (%) vs. Latitude.png')
plt.show()

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = pd.to_numeric(northern['Latitude']).astype(float)
y_values = pd.to_numeric(northern['Cloudiness (%)']).astype(float)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = 'y = ' + str(round(slope,2)) + 'x + ' + str(round(intercept,2))
print(f'Regression line equation is: {line_eq}')
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,'r-')
plt.annotate(line_eq,(6,10),fontsize=15,color='red')
plt.xlabel('Latitude')
plt.ylabel('Cloudiness (%)')
plt.title('Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression')
print(f'The r-squared is: {rvalue}')
print('The regression does not display much of a correlation between cloudiness and latitude.')
plt.savefig('Northern Hemisphere - Cloudiness (%) vs. Latitude.png')
plt.show()

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = pd.to_numeric(southern['Latitude']).astype(float)
y_values = pd.to_numeric(southern['Cloudiness (%)']).astype(float)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = 'y = ' + str(round(slope,2)) + 'x + ' + str(round(intercept,2))
print(f"Regression line equation is: {line_eq}")
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,'r-')
plt.annotate(line_eq,(6,10),fontsize=15,color='red')
plt.xlabel('Latitude')
plt.ylabel('Cloudiness (%)')
plt.title('Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression')
print(f'The r-squared is: {rvalue}')
print('The regression does not display much of a correlation between cloudiness and latitude.')
plt.savefig('Southern Hemisphere - Cloudiness (%) vs. Latitude.png')
plt.show()

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = pd.to_numeric(northern['Latitude']).astype(float)
y_values = pd.to_numeric(northern['Wind Speed (mph)']).astype(float)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = 'y = ' + str(round(slope,2)) + 'x + ' + str(round(intercept,2))
print(f'Regression line equation is: {line_eq}')
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,'r-')
plt.annotate(line_eq,(6,10),fontsize=15,color='red')
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (mph)')
plt.title('Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression')
print(f'The r-squared is: {rvalue}')
print('The regression does not display much of a correlation between wind speed and latitude.')
plt.savefig('Northern Hemisphere - Wind Speed (mph) vs. Latitude.png')
plt.show()

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = pd.to_numeric(southern['Latitude']).astype(float)
y_values = pd.to_numeric(southern['Wind Speed (mph)']).astype(float)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = 'y = ' + str(round(slope,2)) + 'x + ' + str(round(intercept,2))
print(f'Regression line equation is: {line_eq}')
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,'r-')
plt.annotate(line_eq,(-55,10.5),fontsize=15,color='red')
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (mph)')
plt.title('Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression')
print(f'The r-squared is: {rvalue}')
print('The regression does not display much of a correlation between wind speed and latitude.')
plt.savefig('Southern Hemisphere - Wind Speed (mph) vs. Latitude.png')
plt.show()