In [None]:
# Dependencies and Setup

import matplotlib.pyplot as plt
import requests
import pandas as pd
import numpy as np
import random
import time
from scipy import stats
from pprint import pprint
from time import sleep

#import weather API Key
from config import api_key

#Use citipy to find city based on Latitude and Longitude
from citipy import citipy 


# Generate the List of Cities

In [None]:
# generate the list of coordinates used to find cities

#set the desired number of cities
num_cities = 10

#set the lists for latitude and longitude
lat = []
lng = []

#it is possible that duplicate coordinates may be generated, or unique coordinates may lead to duplicate cities.
# this buffer will allow the dataset to drop duplicates while still generating enough coordinates for the search.
coordinate_buffer = 5

lat = np.random.uniform(low=-90.000, high=90.000, size=num_cities*coordinate_buffer)
lng = np.random.uniform(low=-180.000, high=180.000, size=num_cities*coordinate_buffer)

#zip the latitudes and longitudes into a list of tuples to look up values in citipy
coordinates = list(zip(lat, lng))

In [None]:
# generate the list of cities using coordinates
cities = []

#buffer for num_cities in case some cities aren't in the weather database. 
# num_cities will be muliplied by the buffer to ensure there are enough data points even if some cities cannot be found.
buffer = 1.4

#find cities that match coordinates in citipy:
for coordinate in coordinates:
    city = citipy.nearest_city(coordinate[0], coordinate[1]).city_name
    
    #add to the list of cities only if it is not a duplicate
    if city not in cities:
        cities.append(city)
    
    #stop looking up cities by coordinates if the list is long enough.
#     if len(cities) == num_cities*buffer:
#         print(f'The list now has {num_cities*buffer} cities:')
#         break

# Search for the Cities

In [None]:
# set url and units for search query

url = "http://api.openweathermap.org/data/2.5/weather?"
units = "imperial"

# Build partial query URL
query_url = f"{url}appid={api_key}&units={units}&q="



In [None]:

#save the following values for each city: 
# Temperature, % Humidity, % Cloudiness, Wind speed MPH
cityList = []
temp = []
humid = []
cloud = []
wind = []
newlat = []
newlong = []
count = 0

# loop through each city
for city in cities:
    
#     create a search query, make request and store in json
    full_query = query_url + city
    response = requests.get(full_query)
    weather_data = response.json()
    print(response.url)
# check to see if the city is in the database. If not, move on to the next city in the list.
    try: 
        # if there is a match, add the needed information to each list
        newlat.append(weather_data['coord']['lat'])  
        newlong.append(weather_data['coord']['lon'])
        temp.append(weather_data['main']['temp'])
        humid.append(weather_data['main']['humidity'])
        wind.append(weather_data['wind']['speed'])
        cloud.append(weather_data['clouds']['all'])
        cityList.append(weather_data['name'])
        count = count + 1
        print(f'{count}. {city} data has been saved.')
#         if count == num_cities:
#             break
#     if there isn't a match, move on to the next city
    except:
        print(f'{city} could not be found. Trying next city')
    sleep(1)  


In [None]:
# #create a data frame from cities, temp, humid, cloud, and wind
weatherInfoDict = {
    "City":cityList,
    "Latitude":newlat,
    "Longintude":newlong, 
    "Temperature (F)":temp,
    "% Humidity":humid,
    "% Cloud Cover":cloud,
    "Wind Speed (MPH)":wind
}

weatherDF = pd.DataFrame(weatherInfoDict)
weatherDF.head()

# Scatter Plot Function

In [None]:
# create a function for scatter plots
def plotFunc(x_values, y_values):
    #create plot
    plt.figure(figsize=(6, 4), dpi=80)
    plt.scatter(x_values, y_values)


# Linear Regression Plot Function

In [None]:
#optional: create a function that writes linear regression plots
def linRegressFunc(x_values, y_values):
    (slope, intercept, rvalue, pvalue, stderr) = stats.linregress(x_values, y_values)
    regress_values = x_values * slope + intercept
    
    #create plot
    plt.figure(figsize=(6, 4), dpi=80)
    plt.scatter(x_values, y_values)
    plt.plot(x_values, regress_values, "r-")
    
    #create line equation and annotate
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
    plt.annotate(line_eq, (x_values.min(), y_values.min()), fontsize=15,color="red")

    
    #calculate and print r squared to determine strength of correlation
    print(f'The r-squared is: {rvalue**2}')

# Temperature vs. Latitude
Worldwide plot and both regressions

In [None]:
#First plot - Temperature vs. Latitude

#call the scatter plot function
plotFunc(weatherDF["Latitude"], weatherDF['Temperature (F)'])

#label the axes and create title
plt.xlabel("Latitude")
plt.ylabel("Temperature (F)")
plt.title("Temperature vs. Latitude - Worldwide")

#save chart as png
plt.savefig("images/Temp_vs_Lat_All.png")

plt.show()

In [None]:
#Northern Hemisphere - Temperature vs. latitude

#filter for values in the northern hemisphere
northDF = weatherDF.loc[weatherDF["Latitude"] > 0]

linRegressFunc(northDF["Latitude"], northDF['Temperature (F)'])

# label axes and add title
plt.xlabel("Latitude")
plt.ylabel("Temperature (F)")
plt.title("Temperature vs. Latitude - Northern Hemisphere")

#save plot as png
plt.savefig("images/Temp_vs_Lat_North.png")

plt.show()

In [None]:
#Southern Hemisphere - Temperature vs. latitude

#filter for values in the southern hemisphere
southDF = weatherDF.loc[weatherDF["Latitude"] < 0]

linRegressFunc(southDF["Latitude"], southDF['Temperature (F)'])

# label axes and add title
plt.xlabel("Latitude")
plt.ylabel("Temperature (F)")
plt.title("Temperature vs. Latitude - Southern Hemisphere")

#save plot as png
plt.savefig("images/Temp_vs_Lat_South.png")

plt.show()

# % Humidity vs. Latitude
Worldwide plot and both regressions

In [None]:
#Second plot - Humidity (%) vs. Latitude

#call the scatter plot function
plotFunc(weatherDF["Latitude"], weatherDF['% Humidity'])

#label the axes and create title
plt.xlabel("Latitude")
plt.ylabel("% Humidity")
plt.title("% Humidity vs. Latitude - Worldwide")

#save chart as png
plt.savefig("images/Humid_vs_Lat_All.png")

plt.show()

In [None]:
# Humidity (%) vs. Latitude Northern Hemisphere

#call the linear regresssion plot function
linRegressFunc(northDF["Latitude"], northDF['% Humidity'])

#label the axes and create title
plt.xlabel("Latitude")
plt.ylabel("% Humidity")
plt.title("% Humidity vs. Latitude - Northern Hemisphere")

#save chart as png
plt.savefig("images/Humid_vs_Lat_North.png")

plt.show()

In [None]:
# Humidity (%) vs. Latitude Southern Hemisphere

#call the linear regresssion plot function
linRegressFunc(southDF["Latitude"], southDF['% Humidity'])

#label the axes and create title
plt.xlabel("Latitude")
plt.ylabel("% Humidity")
plt.title("% Humidity vs. Latitude - Southern Hemisphere")

#save chart as png
plt.savefig("images/Humid_vs_Lat_South.png")

plt.show()

# Cloudiness (%) vs. Latitude
Worldwide plot and both regressions

In [None]:
#Third plot - Cloudiness (%) vs. Latitude

#call the scatter plot function
plotFunc(weatherDF["Latitude"], weatherDF['% Cloud Cover'])

#label the axes and create title
plt.xlabel("Latitude")
plt.ylabel("% Cloud Cover")
plt.title("% Cloud Cover vs. Latitude - Worldwide")

#save chart as png
plt.savefig("images/Cloud_vs_Lat_All.png")

plt.show()

In [None]:
# Cloudiness (%) vs. Latitude - Northern Hemisphere

#call the linear regresssion plot function
linRegressFunc(northDF["Latitude"], northDF['% Cloud Cover'])

#label the axes and create title
plt.xlabel("Latitude")
plt.ylabel("% Cloud Cover")
plt.title("% Cloud Cover vs. Latitude - Northern Hemisphere")

#save chart as png
plt.savefig("images/Cloud_vs_Lat_North.png")

plt.show()

In [None]:
# Cloudiness (%) vs. Latitude - Southern Hemisphere

#call the linear regresssion plot function
linRegressFunc(southDF["Latitude"], southDF['% Cloud Cover'])

#label the axes and create title
plt.xlabel("Latitude")
plt.ylabel("% Cloud Cover")
plt.title("% Cloud Cover vs. Latitude - Southern Hemisphere")

#save chart as png
plt.savefig("images/Cloud_vs_Lat_South.png")

plt.show()

# Wind Speed vs. Latitude
Worldwide plot and both regressions

In [None]:
#Fourth Plot - Wind Speed (mph) vs. Latitude

#call the scatter plot function
plotFunc(weatherDF["Latitude"], weatherDF['Wind Speed (MPH)'])

#label the axes and create title
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (MPH)")
plt.title("Wind Speed (MPH) vs. Latitude - Worldwide")

#save chart as png
plt.savefig("images/Wind_vs_Lat_All.png")

plt.show()


In [None]:
# Wind Speed (mph) vs. Latitude Northern Hemisphere

#call the linear regresssion plot function
linRegressFunc(northDF["Latitude"], northDF['Wind Speed (MPH)'])

#label the axes and create title
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (MPH)")
plt.title("Wind Speed (MPH) vs. Latitude - Northern Hemisphere")

#save chart as png
plt.savefig("images/Wind_vs_Lat_North.png")

plt.show()

In [None]:
# Wind Speed (mph) vs. Latitude Southern Hemisphere

#call the linear regresssion plot function
linRegressFunc(southDF["Latitude"], southDF['Wind Speed (MPH)'])

#label the axes and create title
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (MPH)")
plt.title("Wind Speed (MPH) vs. Latitude - Southern Hemisphere")

#save chart as png
plt.savefig("images/Wind_vs_Lat_South.png")

plt.show()

In [None]:
#save all data in a CSV
weatherDF.to_csv('../data/weather_data.csv', encoding='utf-8', index=False)