# WeatherPy
----

Note: Plot descriptions are located in markdown cells under corresponding plots.

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy import stats

# ignore some warnings to make things look nicer
import warnings; warnings.simplefilter('ignore')

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
print(f'There are {len(cities)} cities in the list.')

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
# set up lists to hold reponse info
temp_max = []
humidity = []
country = []
wind = []
clouds = []
date = []
lat = []
long = []

# Build query URL
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "imperial"

# Build partial query URL

query_url = url + "appid=" + weather_api_key + "&units=" + units + "&q="

print(f"Beginning Data Retrieval")
print(f"-------------------------------")

    # Loop through the list of cities and perform a request for data on each
    # Make a request for each of the indices
    
for i in range(0, len(cities)):
    
    # use try/except to pass any cities that are not retrievable
    try:
        print(f"Processing Record: {i} of Set 1 | {cities[i]}")
            
        # get response data
        response = requests.get(query_url + cities[i]).json()
       
        # add response data to the appropriate values list
        lat.append(response['coord']['lat'])
        long.append(response['coord']['lon'])
        temp_max.append(response['main']['temp_max'])
        humidity.append(response['main']['humidity'])
        country.append(response['sys']['country'])
        wind.append(response['wind']['speed'])
        clouds.append(response['clouds']['all'])
        date.append(response['dt'])
        
        #time.sleep(45) 
        
    except:
        print("City not found. Skipping...")
        # write NA for values if city is not found
        lat.append("NA")
        long.append("NA")
        temp_max.append("NA")
        humidity.append("NA")
        country.append("NA")
        wind.append("NA")
        clouds.append("NA")
        date.append("NA")
    pass    

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
# create a data frame from cities, lat, lng, max_temp, humidity, cloudiness, wind speed, country, and date
weather_dict = {
    "city": cities,
    "lat": lat,
    "lng": long,
    "max temp": temp_max,
    "humidity": humidity,
    "cloudiness": clouds,
    "wind speed": wind,
    "country": country,
    "date": date
}

# create a dataframe from the dictionary
weather_data = pd.DataFrame(weather_dict)

# find rows with missing values 
indexNames = weather_data[weather_data["max temp"] == "NA" ].index

# delete these row indexes from dataFrame
weather_data.drop(indexNames, inplace=True)

# export data to .csv
weather_data.to_csv("..\output_data\cities.csv")

# display the dataframe
weather_data.head(5)



## Inspect the data and remove the cities where the humidity > 100%.

In [None]:
# Find cities with humidity > 100%
humidity_filter = weather_data[weather_data['humidity'] >= 100].index

# Print how many cities and what cities will be removed.
print(f'{len(humidity_filter)} cities have humidity values >/= 100%. They will be removed from the data.')
print(f'The removed cities are listed below.')
# list removed cities
print(weather_data.loc[humidity_filter])

# drop high humidity rows
weather_data.drop(humidity_filter, inplace=True)


## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
# Create a Scatter Plot for temperature vs latitude
x_values = weather_data['lat']
y_values = weather_data['max temp']
plt.scatter(x_values,y_values)
plt.xlabel('City Latitude')
plt.ylabel('Maximum Temperature (F)')
plt.title('City Latitude vs. Max Temperature (F)')

# save plt as .png
plt.savefig("..\output_data\lat_vs_temp.png")

# show plot in notebook
plt.show()



### Scatter Plot Discussion: City Latitude vs. Max Temperature

* The plot of city latitude versus maximum temperature indicates higher temperatures at latitudes less than approximately 20 degrees. 
* As latitude increases from 20 degrees, the maximum temperature in the cities decreases with a less-tight distribution of values. In other words, cities at higher latitudes show a greater distribution of maximum temperatures as compared to cities at smaller latitudes (~20 degree spread versus ~40 degree spread at lat. > 20 deg.). 

* The relationship between city latitude and maximum temperature is not linear. It appears downward quadratic and is somewhat centered at zero lattitude.

## Latitude vs. Humidity Plot

In [None]:
# Create a Scatter Plot for latitude vs humidity
x_values = weather_data['lat']
y_values = weather_data['humidity']
plt.scatter(x_values,y_values)
plt.xlabel('City Latitude')
plt.ylabel('Humidity (%)')
plt.title('City Latitude vs. Humidity (%)')

# save plt as .png
plt.savefig("..\output_data\lat_vs_humidity.png")

# show plot in notebook
plt.show()


### Scatter Plot Discussion: City Latitude vs. Humidity
* From the city latitude versus humidity plot it appears that higher lattitude cities (lat. > 40) have higher humidities, in general.
* Cities located in latitudes ranging from ~5 to 40 deg. appear to have lower humidity values; cities in this latitude region have humidity values distributed primarily (but not exclusively) below ~70 percent.
* There is no clear trend in this data except for what appears to be clustering of humidity ranges at different latitudes. 



## Latitude vs. Cloudiness Plot

In [None]:
# Create a Scatter Plot for latitude vs cloudiness
x_values = weather_data['lat']
y_values = weather_data['cloudiness']
plt.scatter(x_values,y_values)
plt.xlabel('City Latitude')
plt.ylabel('Cloudiness (%)')
plt.title('City Latitude vs. Cloudiness (%)')

# save plt as .png
plt.savefig("..\output_data\lat_vs_cloudiness.png")

# show plot in notebook
plt.show()


### Scatter Plot Discussion: City Latitude vs. Cloudiness

* Percent cloudiness versus city latitude does not show any strong relationships. Similar values are found for cloudiness at varying latitudes.
* The data for cloudiness appears discrete or banded, i.e., cloudiness is either 0, 20, 40, 80, and 100%, with dense amounts of data for 0 and 100% cloudiness.
* There are a few gaps for cloudiness values wherein most of the values are high (near 100%) or low (near 0%). This could suggest that some cities are alway cloudy while others are almost never cloudy. But there is no correlation with the latitude.


## Latitude vs. Wind Speed Plot

In [None]:
# Create a Scatter Plot for latitude vs wind speed
x_values = weather_data['lat']
y_values = weather_data['wind speed']
plt.scatter(x_values,y_values)
plt.xlabel('City Latitude')
plt.ylabel('Wind Speed (mph)')
plt.title('City Latitude vs. Wind Speed (mph)')

# save plt as .png
plt.savefig("..\output_data\lat_vs_wind.png")

# show plot in notebook
plt.show()

### Scatter Plot Discussion: City Latitude vs. Wind Speed

* The plot of city latitude versus wind speed does not suggest any trends between these two variables.
* The plot indicates that higher values of wind speed appear in cities at latitudes greater than 20 deg. However, these are more like flyers than the suggestion of a latitude-based relationship with wind speed.

## Linear Regression

In [None]:
# separate the data into Northern Hemisphere (greater than or equal to 0 degrees latitude) 
# and Southern Hemisphere (less than 0 degrees latitude)

northern_data = weather_data[weather_data['lat']>= 0]
southern_data = weather_data[weather_data['lat']< 0]


####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# choose variables
# change objects to float
northern_data['lat'] = northern_data['lat'].astype(float)
northern_data['max temp'] = northern_data['max temp'].astype(float)
x_north = northern_data['lat']
y_north = northern_data['max temp']

# Perform a linear regression 
(slope, intercept, rvalue, pvalue, stderr) = stats.linregress(x_north, y_north)

# Get regression values
regress_values = x_north * slope + intercept

# create line equation
line_eq = "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))

# create plot
plt.scatter(x_north,y_north)
plt.plot(x_north,regress_values,"r-")

# Label plot 
plt.title('Northern Hemisphere City Latitude vs. Max Temp (F)')
plt.xlabel('Latitude ')
plt.ylabel('Max Temperature (F)')

# Show plot
plt.show()

# Print r value
print(f"The r-value is: {rvalue**2}")
print(f"The equation of the line fit to the data is {line_eq}")


### Linear Regression Discussion: Northern City Latitude vs. Max Temperature

* The correlation coefficient suggests a moderate negative linear relationship between latitude and maximum temperature.
* As mentioned in the description of the plot of all cities (north and south hemisphere) and max temperature, the max temperature distribution is smaller in cities that have a latitude less than 20 deg. As latitude increases beyond this point, the maximum temperature fluctuates and the linear fit looks worse.
* Since the slope would be closer to zero for cities at latitudes ranging from zero to ten (less temperature fluctuations with latitude) and the temperature fluctuates significantly after 40 deg. latitude, I would not try to predict max temperature by this regression equation unless I was looking at cities between 10 and 40 deg. latitude.


####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# choose variables
# change objects to float
southern_data['lat'] = southern_data['lat'].astype(float)
southern_data['max temp'] = southern_data['max temp'].astype(float)
x_south = southern_data['lat']
y_south = southern_data['max temp']

# Perform a linear regression 
(slope, intercept, rvalue, pvalue, stderr) = stats.linregress(x_south, y_south)

# Get regression values
regress_values = x_south * slope + intercept

# create line equation
line_eq = "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))

# create plot
plt.scatter(x_south,y_south)
plt.plot(x_south,regress_values,"r-")

# Label plot and annotate the line equation
plt.title('Southern Hemisphere City Latitude vs. Max Temp (F)')
plt.xlabel('Latitude')
plt.ylabel('Max Temperature (F)')

# Show plot
plt.show()

# Print r value
print(f"The r-value is: {rvalue**2}")
print(f"The equation of the line fit to the data is {line_eq}")

### Linear Regression Discussion: Southern City Latitude vs. Max Temperature

* The correlation coefficient shows a weak positive linear relationship between city latitude and maximum temperature.
* A distinct cut off appears at about -40 deg. city latitude; there are fewer high max temp flyers.
* Compared to the Northern Hemisphere cities, there seem to be fewer cities in the southern hemisphere in general. This could change when different cities are selected, but it definitely appears unlikely that there would be more data/more cities below a latitude of -40.

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# choose variables
# change objects to float
northern_data['humidity'] = northern_data['humidity'].astype(float)
y_north_hum = northern_data['humidity']

# Perform a linear regression 
(slope, intercept, rvalue, pvalue, stderr) = stats.linregress(x_north, y_north_hum)

# Get regression values
regress_values = x_north * slope + intercept

# create line equation
line_eq = "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))

# create plot
plt.scatter(x_north,y_north_hum)
plt.plot(x_north,regress_values,"r-")

# Label plot and annotate the line equation
plt.title('Northern Hemisphere City Latitude vs. Humidity (%)')
plt.xlabel('Latitude')
plt.ylabel('Humidity')

# Show plot
plt.show()

# Print r value
print(f"The r-value is: {rvalue**2}")
print(f"The equation of the line fit to the data is {line_eq}")


### Linear Regression Discussion: Northern City Latitude vs. Humidity

* The r-value is very small and, therefore, there is only a very weak positive correlation between northern city latitude and humidity.
* There is a greater humidity value distribution in cities at latitudes less than 40 deg.

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# choose variables
# change objects to float
southern_data['humidity'] = southern_data['humidity'].astype(float)
y_south_hum = southern_data['humidity']

# Perform a linear regression 
(slope, intercept, rvalue, pvalue, stderr) = stats.linregress(x_south, y_south_hum)

# Get regression values
regress_values = x_south * slope + intercept

# create line equation
line_eq = "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))

# create plot
plt.scatter(x_south,y_south_hum)
plt.plot(x_south,regress_values,"r-")

# Label plot and annotate the line equation
plt.title('Southern Hemisphere City Latitude vs. Humidity (%)')
plt.xlabel('Latitude')
plt.ylabel('Humidity')

# Show plot
plt.show()

# Print r value
print(f"The r-value is: {rvalue**2}")
print(f"The equation of the line fit to the data is {line_eq}")


### Linear Regression Discussion: Southern City Latitude vs. Humidity

* The correlation coefficient/r-value for southern hemisphere city latitude versus humidity is even weaker than that for the northern hemisphere data.
* Similar to the max temp discussion, there appears to be a smaller amount of data for the southern hemisphere, especially at latitudes less than -40 deg.
* Compared to the discussion of northern city latitude versus humidity, southern latitude city humidity appears to have a wider distribution of values.


####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# choose variables
# change objects to float
northern_data['cloudiness'] = northern_data['cloudiness'].astype(float)
y_north_cloud = northern_data['cloudiness']

# Perform a linear regression 
(slope, intercept, rvalue, pvalue, stderr) = stats.linregress(x_north, y_north_cloud)

# Get regression values
regress_values = x_north * slope + intercept

# create line equation
line_eq = "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))

# create plot
plt.scatter(x_north,y_north_cloud)
plt.plot(x_north,regress_values,"r-")

# Label plot and annotate the line equation
plt.title('Northern Hemisphere City Latitude vs. Cloudiness')
plt.xlabel('Latitude')
plt.ylabel('Cloudiness (%)')

# Show plot
plt.show()

# Print r value
print(f"The r-value is: {rvalue**2}")
print(f"The equation of the line fit to the data is {line_eq}")


### Linear Regression Discussion: Northern City Latitude vs. Cloudiness

* The r-value suggests no strong linear correlation between cloudiness and northern hemisphere city latitude.
* There do appear to be more high cloudiness cities at higher latitudes, however the data is not really showing a linear correlation.


####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# choose variables
# change objects to float
southern_data['cloudiness'] = southern_data['cloudiness'].astype(float)
y_south_cloud = southern_data['cloudiness']

# Perform a linear regression 
(slope, intercept, rvalue, pvalue, stderr) = stats.linregress(x_south, y_south_cloud)

# Get regression values
regress_values = x_south * slope + intercept

# create line equation
line_eq = "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))

# create plot
plt.scatter(x_south,y_south_cloud)
plt.plot(x_south,regress_values,"r-")

# Label plot and annotate the line equation
plt.title('Southern Hemisphere City Latitude vs. Cloudiness (%)')
plt.xlabel('Latitude')
plt.ylabel('Cloudiness')

# Show plot
plt.show()

# Print r value
print(f"The r-value is: {rvalue**2}")
print(f"The equation of the line fit to the data is {line_eq}")

### Linear Regression Discussion: Southern City Latitude vs. Cloudiness

* Similar to the northern city data, there is no linear relationship between latitude and cloudiness.
* There does appear to be a higher percentage of cloudy cities at higher latitude values in this plot, however, beyond zero degrees latitude this trend is almost opposing. The plot of northern latitude cities shows greater percentage of clouds beyond 50 deg. latitude. Further supporting that there is no trend.


####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# choose variables
# change objects to float
northern_data['wind speed'] = northern_data['wind speed'].astype(float)
y_north_wind = northern_data['wind speed']

# Perform a linear regression 
(slope, intercept, rvalue, pvalue, stderr) = stats.linregress(x_north, y_north_wind)

# Get regression values
regress_values = x_north * slope + intercept

# create line equation
line_eq = "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))

# create plot
plt.scatter(x_north,y_north_wind)
plt.plot(x_north,regress_values,"r-")

# Label plot and annotate the line equation
plt.title('Northern Hemisphere City Latitude vs. Wind Speed (mph)')
plt.xlabel('Latitude')
plt.ylabel('Wind Speed')

# Show plot
plt.show()

# Print r value
print(f"The r-value is: {rvalue**2}")
print(f"The equation of the line fit to the data is {line_eq}")


### Linear Regression Discussion: Northern City Latitude vs. Wind Speed

* The linear regression data shows that there is not a linear relationship between northern city latitude and wind speed (unless you're considering a horizontal line).
* This suggests that wind is not a function of latitude in the northern hemisphere cities.


####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# choose variables
# change objects to float
southern_data['wind speed'] = southern_data['wind speed'].astype(float)
y_south_wind = southern_data['wind speed']

# Perform a linear regression 
(slope, intercept, rvalue, pvalue, stderr) = stats.linregress(x_south, y_south_wind)

# Get regression values
regress_values = x_south * slope + intercept

# create line equation
line_eq = "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))

# create plot
plt.scatter(x_south,y_south_wind)
plt.plot(x_south,regress_values,"r-")

# Label plot and annotate the line equation
plt.title('Southern Hemisphere City Latitude vs. Wind Speed (mph)')
plt.xlabel('Latitude')
plt.ylabel('Wind Speed')

# Show plot
plt.show()

# Print r value
print(f"The r-value is: {rvalue**2}")
print(f"The equation of the line fit to the data is {line_eq}")


### Linear Regression Discussion: Southern City Latitude vs. Wind Speed

* The linear regression data for southern cities also shows that there is not a linear relationship between city latitude and wind speed (similar to northern cities). However, the behavior is a teensy weensy bit more linear than the northern city data.
* This very weak linear behavior is negative, i.e. wind speed decreases as latitude increases in the southern hemisphere. This is likely driven by the small amount of data at latitudes less than -50 deg., which has very high wind speed.
