In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
from pprint import pprint

In [None]:
# Import API key
#from api_keys import weather_api_key

In [2]:
import os
from dotenv import load_dotenv
# load the .env environment variables into the notebook
load_dotenv()
#Get the API key from the environment variable as Python variable
weather_api_key = os.getenv("weather_api_key")

In [3]:
# Incorporated citipy to determine city based on latitude and longitude
#!pip install citipy
from citipy import citipy

In [None]:
#!pip install opencage
#from opencage.geocoder import OpenCageGeocode

In [None]:
#!pip install -U ipykernel

In [4]:
# Output File (CSV)
output_data_file = "output_data/cities.csv"

In [5]:
# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

In [6]:
# Generate Cities List
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

In [7]:
# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

In [15]:
# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

624

In [12]:
# Save config information
url = "http://api.openweathermap.org/data/2.5/weather?"

# Build and test the query
query_url = f"{url}appid={weather_api_key}&q=&units=imperial"
print(query_url)

http://api.openweathermap.org/data/2.5/weather?appid=6a3af63ab782549e7a87a984287f1313&q=&units=imperial


In [16]:
# Make the dataframe
cities_weather_df = pd.DataFrame({'City': cities, 'Latitude': "", 'Longitude': "", 'Max Temp': "", 'Humidity':"", 'Cloudiness': "", 'Wind Speed': "", 'Country': "", 'Date': "" })
cities_weather_df

Unnamed: 0,City,Latitude,Longitude,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,tasiilaq,,,,,,,,
1,leningradskiy,,,,,,,,
2,rikitea,,,,,,,,
3,ushuaia,,,,,,,,
4,bluff,,,,,,,,
...,...,...,...,...,...,...,...,...,...
619,baglung,,,,,,,,
620,broken hill,,,,,,,,
621,talnakh,,,,,,,,
622,timmins,,,,,,,,


In [10]:
# Specify the URL
url =  "http://api.openweathermap.org/data/2.5/weather?units=imperial"


In [18]:
#JSONify the response
#response_json = response
#pprint(response_json)

In [17]:
# Perform API Calls
#Variables to keep track of response number
record_num = 1
record_set = 1

# Run the API call for each city
print('''
-----------------------------
Beginning Data Retrieval     
-----------------------------''')
for index, row in cities_weather_df.iterrows():

    # create the query url
    query_url = f"{url}appid={weather_api_key}&units=imperial&q="
    query_url = url + row['City'] + '&APPID=' + weather_api_key

    # Make the API request and store the json response
    response = requests.get(query_url).json()

    # Exception for city not found
    try:
        # Message to display progress during retrieval
        print(f'Processing Record {record_num} for Set {record_set} | {response["name"]}')

        #input city data into dataframe
        cities_weather_df.loc[index, 'Latitude'] = response['coord']['lat']
        cities_weather_df.loc[index, 'Longitude'] = response['coord']['lon']
        cities_weather_df.loc[index, 'Max Temp'] = response['main']['max_temp']
        cities_weather_df.loc[index, 'Humidity'] = response['main']['humidity']
        cities_weather_df.loc[index, 'Cloudiness'] = response['clouds']['all']
        cities_weather_df.loc[index, 'Wind Speed'] = response['wind']['speed']
        cities_weather_df.loc[index, 'Country'] = response['sys']['country']
        cities_weather_df.loc[index, 'Date'] = response['dt']
    
        record_num += 1
    except(KeyError, IndexError):
        print("City not found. Skipping...")

    if record_num ==51:
        record_set += 1
        recprd_num = 1
        time.sleep(60)

print('''
-----------------------------
Data Retrieval Complete      
-----------------------------''')


-----------------------------
Beginning Data Retrieval     
-----------------------------
City not found. Skipping...
City not found. Skipping...
City not found. Skipping...
City not found. Skipping...
City not found. Skipping...
City not found. Skipping...
City not found. Skipping...
City not found. Skipping...
City not found. Skipping...
City not found. Skipping...
City not found. Skipping...
City not found. Skipping...
City not found. Skipping...
City not found. Skipping...
City not found. Skipping...
City not found. Skipping...
City not found. Skipping...
City not found. Skipping...
City not found. Skipping...
City not found. Skipping...
City not found. Skipping...
City not found. Skipping...
City not found. Skipping...
City not found. Skipping...
City not found. Skipping...
City not found. Skipping...
City not found. Skipping...
City not found. Skipping...
City not found. Skipping...
City not found. Skipping...
City not found. Skipping...
City not found. Skipping...
City not foun

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
# Set lists for Dataframe
city = []
lat = []
lon = []
temp = []
humidity = []
cloudiness = []
wind_speed = []
country = []
date = []

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
# clean the df to remove null cities
cities_weather_df = cities_weather_df.dropna()


In [None]:
# save the df to csv
cities_weather_df.to_csv()

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
#  Get the indices of cities that have humidity over 100%.


In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".


## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

## Latitude vs. Humidity Plot

## Latitude vs. Cloudiness Plot

## Latitude vs. Wind Speed Plot

## Linear Regression

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression