# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [4]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import scipy.stats as st
from scipy.stats import linregress
# Import API key
from api_keys import weather_api_key
import seaborn as sns

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

ModuleNotFoundError: No module named 'citipy'

## Generate Cities List

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
# Making sure the units are imperial and settings up the url for the api
units = 'imperial'
url = "http://api.openweathermap.org/data/2.5/weather?"    
# Creating lists for the data that will be stored from the .json request
lat = []
long = []
maxtemp = []
humidity = []
clouds = []
wind_speed = []
country = []
new_city = []
counter = 1

In [None]:
# Creating a for loop which will query the api and grab info for the set of lats/lngs from above.
# Also for any lat/lng that it doesn't find a city, it will skip over those and go to the next.
for city in cities:
    query_url = f'{url}appid={weather_api_key}&units={units}&q={city}'
    data = requests.get(query_url)
    data2 = data.json()
    if counter == 1:
        print('Beginning Data Retrieval:\n---------------------------')
    try: 
        new_city.append(data2['name'])
        lat.append(data2['coord']['lat'])
        long.append(data2['coord']['lon'])
        maxtemp.append(data2['main']['temp_max'])
        humidity.append(data2['main']['humidity'])
        clouds.append(data2['clouds']['all'])
        wind_speed.append(data2['wind']['speed'])
        country.append(data2['sys']['country'])
        current_city = data2['name']
        print(f'Processing Record  {counter} of Set 1 | {current_city}')
        counter += 1
    except:
        pass
 

In [None]:
# Creating the dataframe by putting it first into a dictionary, and then into the dataframe
weather_dict = {
    'City': new_city,
    'Latitude': lat,
    'Longitude': long,
    'Max Temp': maxtemp,
    'Humidity': humidity,
    'Cloudiness': clouds,
    'Wind Speed': wind_speed,
    'Country': country
}
weather_data = pd.DataFrame(weather_dict)
weather_data
# Taking the dataframe and storing as a .csv file for use in Vacation.Py
weather_data.to_csv(r'output.csv', index = False)

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
# Displaying the new dataframe!
weather_data
# **** Note: I took the **Max Temp** specifically because that is what was displayed in the starter code walkthrough!

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
#Finding out if there are any values in the column "Humidity" that are over 100%
new = weather_data.loc[weather_data['Humidity'] > 100]
new

In [None]:
# Splitting into Northern Hemisphere...

northern_hemi = weather_data.loc[weather_data['Latitude'] >= 0]
northern_hemi

In [None]:
#And southern hemisphere here ->
southern_hemi = weather_data.loc[weather_data['Latitude'] < 0]
southern_hemi

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
cmap = sns.cubehelix_palette(rot=-.2, as_cmap=True)
a4_dims = (100,100)
fig, ax = pyplot.subplots(figsize=a4_dims)
sns.scatterplot(weather_data['Latitude'], weather_data['Max Temp'], marker="o", palette=cmap, hue=weather_data['Latitude'])
plt.title('Latitude vs Temperature Plot')
plt.xlabel('Latitude')
plt.ylabel('Temperature')
plt.savefig('latitude_vs_temp')


In [None]:
# With this temperature plot, it looks as though as the latitude increases, the temperature drops off.

## Latitude vs. Humidity Plot

In [None]:
sns.scatterplot(weather_data['Latitude'], weather_data['Humidity'], marker="o", palette=cmap, hue=weather_data['Latitude'])
plt.title('Latitude vs Humidity Plot')
plt.xlabel('Latitude')
plt.ylabel('Humidity %')
plt.savefig('latitude_vs_humidity')

In [None]:
# There isn't much to tell from this data other then there seem to be some clustering in the top left and top right,
# that may indicate there tends to be more humidty the more north and south you go.

## Latitude vs. Cloudiness Plot

In [None]:
sns.scatterplot(weather_data['Latitude'], weather_data['Cloudiness'], marker="o", palette=cmap,  hue=weather_data['Latitude'])
plt.title('Latitude vs Cloudiness Plot')
plt.xlabel('Latitude')
plt.ylabel('Cloudiness')
plt.savefig('latitude_vs_cloudiness')

In [None]:
# Again, this one is a bit harder to form conclusions about ,but it looks as though
# a good amount of either full cloudiness or none at all occur at all points along the latitude axis

## Latitude vs. Wind Speed Plot

In [None]:
sns.scatterplot(weather_data['Latitude'], weather_data['Wind Speed'], marker="o", palette=cmap, hue=weather_data['Latitude'])
plt.title('Latitude vs Wind Speed Plot')
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (mph)')
plt.savefig('latitude_vs_windspeed')

In [None]:
# With this particular plot, most of the data lies below the 15 mph mark for all the latitudes observed.

## Linear Regression

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
x_values = northern_hemi['Max Temp'].astype('float')
y_values = northern_hemi['Latitude'].astype('float')

# Run linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# Plot scatter plot
plt.scatter(x_values,y_values)

# Plot regression line
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")

# Label plot
plt.xlabel('Max Temp')
plt.ylabel('Latitude')
plt.title('Max Temp vs Latitude Linear Regression (Northern)')
# Print r square value
print(f"R squared: {rvalue**2}")
plt.savefig('northern_temp_latitude')
# Show plot
plt.show()


In [None]:
# It looks as though there is a clear trend downward for latitude downward as the max temp increases.

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
x_values = southern_hemi['Max Temp'].astype('float')
y_values = southern_hemi['Latitude'].astype('float')

# Run linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# Plot scatter plot
plt.scatter(x_values,y_values)

# Plot regression line
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")

# Label plot
plt.xlabel('Max Temp')
plt.ylabel('Latitude')
plt.title('Max Temp vs Latitude Linear Regression (Southern)')
# Print r square value
print(f"R squared: {rvalue**2}")
plt.savefig('southern_temp_latitude')
# Show plot
plt.show()


In [None]:
# Though not as concise a scatter plot as the last one, it looks as though there is a trend upward for latitude downward as the max temp increases.

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = northern_hemi['Humidity'].astype('float')
y_values = northern_hemi['Latitude'].astype('float')

# Run linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# Plot scatter plot
plt.scatter(x_values,y_values)

# Plot regression line
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")

# Label plot
plt.xlabel('Humidity %')
plt.ylabel('Latitude')
plt.title('Humidity vs Latitude Linear Regression (Northern)')
# Print r square value
print(f"R squared: {rvalue**2}")
plt.savefig('northern_humidity_latitude')
# Show plot
plt.show()


In [None]:
# There appears to be a slight increase in humidity as the latitudes increase

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = southern_hemi['Humidity'].astype('float')
y_values = southern_hemi['Latitude'].astype('float')

# Run linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# Plot scatter plot
plt.scatter(x_values,y_values)

# Plot regression line
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")

# Label plot
plt.xlabel('Humidity %')
plt.ylabel('Latitude')
plt.title('Humidity vs Latitude Linear Regression (Southern)')
# Print r square value
print(f"R squared: {rvalue**2}")
plt.savefig('southern_humidity_latitude')
# Show plot
plt.show()


In [None]:
# Inversely, it looks as though as the latitudes decrease, the humidity increases

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = northern_hemi['Cloudiness'].astype('float')
y_values = northern_hemi['Latitude'].astype('float')

# Run linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# Plot scatter plot
plt.scatter(x_values,y_values)

# Plot regression line
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")

# Label plot
plt.xlabel('Cloudiness %')
plt.ylabel('Latitude')
plt.title('Cloudiness vs Latitude Linear Regression (Northern)')
# Print r square value
print(f"R squared: {rvalue**2}")
plt.savefig('northern_cloudiness_latitude')
# Show plot
plt.show()


In [None]:
# These next two plots are a bit harder to truly come to a conclusion. It looks as though most of the data
# is clustered on either end of the cloudiness scale (close to 0 and around 100), as shown by the previous
# scatter plot shown above (Latitude vs Cloudiness)

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = southern_hemi['Cloudiness'].astype('float')
y_values = southern_hemi['Latitude'].astype('float')

# Run linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# Plot scatter plot
plt.scatter(x_values,y_values)

# Plot regression line
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")

# Label plot
plt.xlabel('Cloudiness %')
plt.ylabel('Latitude')
plt.title('Cloudiness vs Latitude Linear Regression (Southern)')
# Print r square value
print(f"R squared: {rvalue**2}")
plt.savefig('southern_humidity_latitude')
# Show plot
plt.show()


In [None]:
# The same thing, but a bit less so, as there aren't as many data points, but it also looks as though
# most of the data points are on either end of the cloudiness range

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = northern_hemi['Wind Speed'].astype('float')
y_values = northern_hemi['Latitude'].astype('float')

# Run linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# Plot scatter plot
plt.scatter(x_values,y_values)

# Plot regression line
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")

# Label plot
plt.xlabel('Wind Speed (mph)')
plt.ylabel('Latitude')
plt.title('Wind Speed vs Latitude Linear Regression (Northern)')
# Print r square value
print(f"R squared: {rvalue**2}")
plt.savefig('northern_windspeed_latitude')
# Show plot
plt.show()


In [None]:
# It appears as though most of the data points lie around where the wind speed is < 15 or so. Thus, latitude
# isn't as much a factor here as it is with other plots

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = southern_hemi['Wind Speed'].astype('float')
y_values = southern_hemi['Latitude'].astype('float')

# Run linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# Plot scatter plot
plt.scatter(x_values,y_values)

# Plot regression line
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")

# Label plot
plt.xlabel('Wind Speed (mph)')
plt.ylabel('Latitude')
plt.title('Wind Speed vs Latitude Linear Regression (Southern)')
# Print r square value
print(f"R squared: {rvalue**2}")
plt.savefig('southern_windspeed_latitude')
# Show plot
plt.show()


In [None]:
# This one is a bit more obscure though the linear regression shows a similar relationship; as the latitudes
# get closer to the south pole, the wind speed tends to drop off a bit.