# Weather Maps

In [None]:
# Dependencies
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import json
import requests
import time
from scipy.stats import linregress

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# API keys
from config import api_key

In [None]:
# these next to cells i got from the starter code to generate list of cities to use
lat_range = (-90, 90)
lng_range = (-180, 180)

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
towns = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in towns:
        towns.append(city)

# Print the city count to confirm sufficient count
len(towns)

In [None]:
# setting my search paramaters
# towns = ['st. paul', 'albany', 'st. louis', 'miami', 'portland']
units = 'imperial'
base_url = 'http://api.openweathermap.org/data/2.5/weather?'

In [None]:
# running thru my list of cities and grabbing the data i want

# setting lists 
cities = []
lat = []
lng = []
max_temp = []
humidity = []
cloudiness = []
wind_speed = []
country = []
date = []

# setting incrementing numbers as zero
rec_num = 0
set_num = 0

print('Beginning Data Calls.')
print('-----------------------')

for city in towns:
    
    # requesting
    city_url = f"{base_url}q={city}&units={units}&appid={api_key}"
    response = requests.get(city_url)
    city_json = response.json()
    
    # sleep for one second to avoid requesting more than 60 per min
    time.sleep(1)
    
    # i want city, lat, lng, max temp, humidity, cloudiness, windspeed, country, date
    try:
        cities.append(city_json['name'])
        lat.append(city_json['coord']['lat'])
        lng.append(city_json['coord']['lon'])
        max_temp.append(city_json['main']['temp_max'])
        humidity.append(city_json['main']['humidity'])
        cloudiness.append(city_json['clouds']['all'])
        wind_speed.append(city_json['wind']['speed'])
        country.append(city_json['sys']['country'])
        date.append(city_json['dt'])
        
        # increment number for printing
        if rec_num < 49:
            rec_num += 1
        else:
            rec_num = 0
            set_num +=1
            
        # print record when requesting
        print(f'Processing Record {rec_num} of Set {set_num} | {city}')
        
    except:
        # prints when missing a value and city is skipped
        print('City not found Skipping.')
        

        
# making dictionary of all
weather_dict = {
    'City': cities,
    'Lat': lat,
    'Lng': lng,
    'Max Temp': max_temp,
    'Humidity': humidity,
    'Cloudiness': cloudiness,
    'Wind Speed': wind_speed,
    'Country': country,
    'Date': date
}

# putting all in data frame
weather_df = pd.DataFrame(weather_dict)

In [None]:
weather_df.head()

In [None]:
weather_df.dtypes

In [None]:
weather_df.count()

In [None]:
# getting rid of cities with humidity greater than 100
weather_df = weather_df.loc[weather_df['Humidity'] <= 100, :]

In [None]:
weather_df.count()

In [None]:
# where i want to save file
output_file = 'output/weather_data.csv'

# write so output_file
weather_df.to_csv(output_file, index=False)

### So I can use the same data I already got, and bypass above

In [None]:
output_file = 'output/weather_data.csv'

weather_df = pd.read_csv(output_file)

In [None]:
weather_df.head()

# ok, I have data.

## Scatterplots

In [None]:
# temperature vs latitude

# creating lists for plotting
lat = weather_df['Lat']
temp = weather_df['Max Temp']

# making size and plotting
plt.figure(figsize=(8,6))
plt.scatter(lat, temp, color='lightgreen', edgecolor='black', alpha=.8)

# adding labels
plt.title('Max Temperature VS City Latitude (2/9/21)')
plt.xlabel('Latitude')
plt.ylabel('Max Temperature (F)')

plt.grid(True)

plt.show()

The graph 'Max Temperature VS City Latitude' is looking into relationship between how tempurature is affected by the distance from the equator. The code is plotting the max temp column on the y axis and the lat column on the x axis. 

In [None]:
# humidity vs latitude
humid = weather_df['Humidity']

# sizing and plotting
plt.figure(figsize=(8,6))
plt.scatter(lat, humid, color='lightgreen', edgecolor='black', alpha=.8)

# adding labels
plt.title('Humidity VS City Latitude (2/9/21)')
plt.xlabel('Latitude')
plt.ylabel('Humidity (%)')

plt.grid(True)

plt.show()

This graph is 'Humidity VS City Latitude' it is comparing the relationship between humidity and nearness to the equator.
The code is plotting the Humidity column on the y axis and the lat column on the x.

In [None]:
# cloudiness vs latitude
clouds = weather_df['Cloudiness']

# sizing and plotting
plt.figure(figsize=(8,6))
plt.scatter(lat, clouds, color='lightgreen', edgecolor='black', alpha=.8)

# adding labels
plt.title('Cloudiness VS City Latitude (2/9/21)')
plt.xlabel('Latitude')
plt.ylabel('Cloudiness (%)')

plt.grid(True)

plt.show()

This Graph, 'Cloudiness VS City Latitude' is looking at the relationship of Cloud coverage compared to its latitude. 

In [None]:
# wind speed VS lat
windy = weather_df['Wind Speed']

# sizing and plotting
plt.figure(figsize=(8,6))
plt.scatter(lat, windy, color='lightgreen', edgecolor='black', alpha=.8)

# adding labels
plt.title('Wind Speed VS City Latitude (2/9/21)')
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (mph)')

plt.grid(True)

plt.show()

This graph 'Wind Speed VS City Latitude' is looking to if latitude correlates with Wind speed. the code plots wind speed column on y axis and lat column on x axis.

# Regressions

In [None]:
weather_df.head()

In [None]:
# need to split data into two df's one of northern hemisphere and one of southern hemisphere
north_df = weather_df.loc[weather_df['Lat'] > 0, :]
south_df = weather_df.loc[weather_df['Lat'] < 0, :]
north_df.count()

In [None]:
# north temp vs lat

# making a list of values for graphing
n_temp = north_df['Max Temp']
n_lat = north_df['Lat']

# getting my values from my linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(n_lat, n_temp)

# creating my linear regression line
regress_line = slope * n_lat + intercept

# printing my r value and r squared values
print(f'The r value is {rvalue}')
print(f'The r squared value is {rvalue**2}')

# making plot
plt.figure(figsize=(8,6))
plt.scatter(n_lat, n_temp, color='lightgreen', edgecolor='black', alpha=.8)
plt.plot(n_lat, regress_line, color='r', alpha=.5)

plt.title('Northern Hemisphere Max Temperature VS City Latitude (2/9/21)')
plt.xlabel('Latitude')
plt.ylabel('Max Temperature (F)')

line_str = f'y = {slope:.2f}x + {intercept:.2f}'
plt.annotate(line_str, (20,20), color='r')

plt.grid(True)

plt.show()

In the above Plot, 'Northern Hemisphere Max Temperature VS City Latitude' I am looking at the relationship between temperature and latitude. I am only looking at the northern hemisphere so that i can do an linear regression on it, as the southern hemisphere looks different. The correleation coeffecient here is very strong, -0.87 meaning the two are probably correlated. The regression is also pretty good with a r squared value of 0.76. I would say that there is a correlation between Temperature and Latitude.

In [None]:
# south temp vs lat

# making a list of values for graphing
s_temp = south_df['Max Temp']
s_lat = south_df['Lat']

# getting my values from my linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(s_lat, s_temp)

# creating my linear regression line
regress_line = slope * s_lat + intercept

# printing my r value and r squared values
print(f'The r value is {rvalue}')
print(f'The r squared value is {rvalue**2}')

# making plot
plt.figure(figsize=(8,6))
plt.scatter(s_lat, s_temp, color='lightgreen', edgecolor='black', alpha=.8)
plt.plot(s_lat, regress_line, color='r', alpha=.5)

plt.title('Southern Hemisphere Max Temperature VS City Latitude (2/9/21)')
plt.xlabel('Latitude')
plt.ylabel('Max Temperature (F)')

line_str = f'y = {slope:.2f}x + {intercept:.2f}'
plt.annotate(line_str, (-30,60), color='r')

plt.grid(True)

plt.show()

This graph 'Southern Hemisphere Max Temperature VS City Latitude' is looking at the same relationship as the above plot and regression, but this regression is not as strong. For starters, there are less data points to work with here, which never helps, and from just looking at it, they appear far more spread out then the northern hemisphere does. I don't believe that latitude doesn't magically have the same relationship down south as it does up north, but there are a lot of other factors in weather and climate realations that latitude going on there. Maybe the Southern hemisphere is just more intersting of a hemisphere than the north.

In [None]:
# north humid vs lat

# making a list of values for graphing
n_humid = north_df['Humidity']

# getting my values from my linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(n_lat, n_humid)

# creating my linear regression line
regress_line = slope * n_lat + intercept

# printing my r value and r squared values
print(f'The r value is {rvalue}')
print(f'The r squared value is {rvalue**2}')

# making plot
plt.figure(figsize=(8,6))
plt.scatter(n_lat, n_humid, color='lightgreen', edgecolor='black', alpha=.8)
plt.plot(n_lat, regress_line, color='r', alpha=.5)

plt.title('Northern Hemisphere Humidity VS City Latitude (2/9/21)')
plt.xlabel('Latitude')
plt.ylabel('Humidity (%)')

line_str = f'y = {slope:.2f}x + {intercept:.2f}'
plt.annotate(line_str, (40,30), color='r')

plt.grid(True)

plt.show()

The above graph 'Northern Hemisphere Humidity VS City Latitude' tries to show a relationship between humidity and latitude in the northern hemisphere. It fails. There is no relationship here between humidity and latitude. With a correlation coefficent of 0.38 I would say there is just nothing here. On top of that the linear regression is useless with a r^2 of 0.14. I don't believe latitude is the important factors in humidity.

In [None]:
# south humid vs lat

# making a list of values for graphing
s_humid = south_df['Humidity']

# getting my values from my linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(s_lat, s_humid)

# creating my linear regression line
regress_line = slope * s_lat + intercept

# printing my r value and r squared values
print(f'The r value is {rvalue}')
print(f'The r squared value is {rvalue**2}')

# making plot
plt.figure(figsize=(8,6))
plt.scatter(s_lat, s_humid, color='lightgreen', edgecolor='black', alpha=.8)
plt.plot(s_lat, regress_line, color='r', alpha=.5)

plt.title('Southern Hemisphere Humidity VS City Latitude (2/9/21)')
plt.xlabel('Latitude')
plt.ylabel('Humidity (%)')

line_str = f'y = {slope:.2f}x + {intercept:.2f}'
plt.annotate(line_str, (-20,30), color='r')

plt.grid(True)

plt.show()

The above graph 'Southern Hemisphere Humidity VS City Latitude' shows the same relationship as the last graph but for the southern hemisphere. It has even worse r and r^2 values, and I think latitude is not a driving factor in humidity.

In [None]:
# north clouds vs lat

# making a list of values for graphing
n_clouds = north_df['Cloudiness']

# getting my values from my linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(n_lat, n_clouds)

# creating my linear regression line
regress_line = slope * n_lat + intercept

# printing my r value and r squared values
print(f'The r value is {rvalue}')
print(f'The r squared value is {rvalue**2}')

# making plot
plt.figure(figsize=(8,6))
plt.scatter(n_lat, n_clouds, color='lightgreen', edgecolor='black', alpha=.8)
plt.plot(n_lat, regress_line, color='r', alpha=.5)

plt.title('Northern Hemisphere Cloudiness VS City Latitude (2/9/21)')
plt.xlabel('Latitude')
plt.ylabel('Cloudiness (%)')

line_str = f'y = {slope:.2f}x + {intercept:.2f}'
plt.annotate(line_str, (30,60), color='r')

plt.grid(True)

plt.show()

The graph above is 'Northern Hemisphere Cloudiness VS City Latitude' and it is comparing Cloudiness to Latitude. There is no correlation between the two. The r value is so low it is almost zero. The linear regression is also basically useless with r^2 at almost 0. This makes sense. There are clouds all over the world, not just by the equator or at certain latitudes.

In [None]:
# south clouds vs lat

# making a list of values for graphing
s_clouds = south_df['Cloudiness']

# getting my values from my linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(s_lat, s_clouds)

# creating my linear regression line
regress_line = slope * s_lat + intercept

# printing my r value and r squared values
print(f'The r value is {rvalue}')
print(f'The r squared value is {rvalue**2}')

# making plot
plt.figure(figsize=(8,6))
plt.scatter(s_lat, s_clouds, color='lightgreen', edgecolor='black', alpha=.8)
plt.plot(s_lat, regress_line, color='r', alpha=.5)

plt.title('Southern Hemisphere Cloudiness VS City Latitude (2/9/21)')
plt.xlabel('Latitude')
plt.ylabel('Cloudiness (%)')

line_str = f'y = {slope:.2f}x + {intercept:.2f}'
plt.annotate(line_str, (-20,30), color='r')

plt.grid(True)

plt.show()

The graph above is 'Southern Hemisphere Cloudiness VS City Latitude' and I don't have anything to say that I didn't already say above for northern hemisphere. There just is not a correlation.

In [None]:
# north windy vs lat

# making a list of values for graphing
n_windy = north_df['Wind Speed']

# getting my values from my linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(n_lat, n_windy)

# creating my linear regression line
regress_line = slope * n_lat + intercept

# printing my r value and r squared values
print(f'The r value is {rvalue}')
print(f'The r squared value is {rvalue**2}')

# making plot
plt.figure(figsize=(8,6))
plt.scatter(n_lat, n_windy, color='lightgreen', edgecolor='black', alpha=.8)
plt.plot(n_lat, regress_line, color='r', alpha=.5)

plt.title('Northern Hemisphere Wind Speed VS City Latitude (2/9/21)')
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (mph)')

line_str = f'y = {slope:.2f}x + {intercept:.2f}'
plt.annotate(line_str, (60,15), color='r')

plt.grid(True)

plt.show()

The above graph 'Northern Hemisphere Wind Speed VS City Latitude' is comparing the relationship of Wind speed and latitude. There is just not enough of a relationship. The r value is 0.03, that is so low. This makes sense as wind speed is influenced by the earths rotation, terrain, air pressure, etc. Their are so many influences I don't think latitude would matter. The regression is basically 0.

In [None]:
# south windy vs lat

# making a list of values for graphing
s_windy = south_df['Wind Speed']

# getting my values from my linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(s_lat, s_windy)

# creating my linear regression line
regress_line = slope * s_lat + intercept

# printing my r value and r squared values
print(f'The r value is {rvalue}')
print(f'The r squared value is {rvalue**2}')

# making plot
plt.figure(figsize=(8,6))
plt.scatter(s_lat, s_windy, color='lightgreen', edgecolor='black', alpha=.8)
plt.plot(s_lat, regress_line, color='r', alpha=.5)

plt.title('Southern Hemisphere Wind Speed VS City Latitude (2/9/21)')
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (mph)')

line_str = f'y = {slope:.2f}x + {intercept:.2f}'
plt.annotate(line_str, (-50,12.5), color='r')

plt.grid(True)

plt.show()

The above Graph is 'Southern Hemisphere Wind Speed VS City Latitdue' and this actually has higher r and r^2 value than the northern hemisphere, but is still not high enough to be useful. I still stand by my conclusion of no relationship.

# Final Take Aways

My final take away is that while Cloudiness, Humidity, and Wind Speed are independent of Latitude; Temperature, is in fact, correlated with Latitude. This correlation is stronger in the Norhtern Hemisphere than the Southern Hemishpere, but is there in both.

# My 3 Observations

## Number One
Looking at Northern Hemisphere Max Temperature VS City Latitude it looks like it gets warmer as you approach the equator. With the r value at -.87 I would say this is a fair assumption.

## Number Two
Wind Speed has nothing to do with Latitude. Looking at Northern Hemisphere Wind Speed VS City Latitude it is just a cascade of data. No real trend and the and the r value backs up being real low at .00314

## Number 3
clouds have very little to do with latitude. They are every where and nowhere. and the data backs this up. the r value is low at .05