In [2]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import os
from dotenv import load_dotenv

# load the .env environment variables into the notebook
load_dotenv()

#Get the API key from the environment variable as Python variable
weather_api_key = os.getenv("weather_api_key")

temp_units = "imperial"

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

# define url
query_url = f"http://api.openweathermap.org/data/2.5/weather?appid={weather_api_key}&units={temp_units}&q="

# define count of pass throughs
count_one = 0
set_one = 1

In [3]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(low=-90.000, high=90.000, size=1500)
lngs = np.random.uniform(low=-180.000, high=180.000, size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
# If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

652

In [4]:
# set lists for the dataframe
city_two = []
cloudinesses = []
dates = []
humidities = []
lats = []
lngs = []
max_temps = []
wind_speeds = []
countries = []

# set initial count quantities for organization
count_one = 0
set_one = 1

# loops for creating dataframe columns
for city in cities:
    try:
        response = requests.get(query_url + city.replace(" ","&")).json()
        cloudinesses.append(response['clouds']['all'])
        countries.append(response['sys']['country'])
        dates.append(response['dt'])
        humidities.append(response['main']['humidity'])
        lats.append(response['coord']['lat'])
        lngs.append(response['coord']['lon'])
        max_temps.append(response['main']['temp_max'])
        wind_speeds.append(response['wind']['speed'])
        if count_one > 48:
            count_one = 1
            set_one += 1
            city_two.append(city)
        else:
            count_one += 1
            city_two.append(city)
        print(f"Processing Record {count_one} of Set {set_one} | {city}")
    except Exception:
        print("City not found. Skipping...")


Processing Record 1 of Set 1 | vetluga
Processing Record 2 of Set 1 | hobart
City not found. Skipping...
Processing Record 3 of Set 1 | pacific grove
City not found. Skipping...
Processing Record 4 of Set 1 | hermanus
Processing Record 5 of Set 1 | punta arenas
City not found. Skipping...
Processing Record 6 of Set 1 | ushuaia
Processing Record 7 of Set 1 | gawler
City not found. Skipping...
Processing Record 8 of Set 1 | khatanga
Processing Record 9 of Set 1 | mataura
Processing Record 10 of Set 1 | kavieng
Processing Record 11 of Set 1 | college
Processing Record 12 of Set 1 | dzhebariki-khaya
Processing Record 13 of Set 1 | busselton
Processing Record 14 of Set 1 | provideniya
Processing Record 15 of Set 1 | oktyabrskoye
City not found. Skipping...
Processing Record 16 of Set 1 | upernavik
Processing Record 17 of Set 1 | kaduna
Processing Record 18 of Set 1 | oriximina
Processing Record 19 of Set 1 | ranfurly
Processing Record 20 of Set 1 | vanimo
Processing Record 21 of Set 1 | rik

Processing Record 40 of Set 4 | ribeira grande
Processing Record 41 of Set 4 | cherskiy
Processing Record 42 of Set 4 | baykit
Processing Record 43 of Set 4 | villa union
Processing Record 44 of Set 4 | yamethin
Processing Record 45 of Set 4 | honningsvag
Processing Record 46 of Set 4 | imbituba
Processing Record 47 of Set 4 | nova odesa
Processing Record 48 of Set 4 | marsh harbour
City not found. Skipping...
Processing Record 49 of Set 4 | champerico
Processing Record 1 of Set 5 | lagoa
Processing Record 2 of Set 5 | dalhousie
Processing Record 3 of Set 5 | nicoya
Processing Record 4 of Set 5 | kaeo
Processing Record 5 of Set 5 | solton
Processing Record 6 of Set 5 | boyuibe
City not found. Skipping...
Processing Record 7 of Set 5 | dunedin
Processing Record 8 of Set 5 | leningradskiy
Processing Record 9 of Set 5 | carnarvon
Processing Record 10 of Set 5 | salalah
Processing Record 11 of Set 5 | lasa
Processing Record 12 of Set 5 | markova
Processing Record 13 of Set 5 | nhulunbuy
Pr

Processing Record 30 of Set 8 | kincardine
City not found. Skipping...
Processing Record 31 of Set 8 | adrar
Processing Record 32 of Set 8 | poninka
Processing Record 33 of Set 8 | kruisfontein
Processing Record 34 of Set 8 | clyde river
Processing Record 35 of Set 8 | cedar falls
Processing Record 36 of Set 8 | alofi
Processing Record 37 of Set 8 | ayan
Processing Record 38 of Set 8 | oranjestad
Processing Record 39 of Set 8 | astoria
Processing Record 40 of Set 8 | kysyl-syr
Processing Record 41 of Set 8 | penzance
Processing Record 42 of Set 8 | williams lake
Processing Record 43 of Set 8 | gushikawa
City not found. Skipping...
Processing Record 44 of Set 8 | cukai
City not found. Skipping...
Processing Record 45 of Set 8 | kamenka
Processing Record 46 of Set 8 | graham
Processing Record 47 of Set 8 | goderich
Processing Record 48 of Set 8 | almaznyy
Processing Record 49 of Set 8 | nogent-le-rotrou
Processing Record 1 of Set 9 | esperance
Processing Record 2 of Set 9 | santa
Process

Processing Record 13 of Set 12 | tulun
Processing Record 14 of Set 12 | sabang
Processing Record 15 of Set 12 | jasper
Processing Record 16 of Set 12 | zhukovka
Processing Record 17 of Set 12 | grindavik
Processing Record 18 of Set 12 | naliya
Processing Record 19 of Set 12 | port hawkesbury
City not found. Skipping...
Processing Record 20 of Set 12 | fomboni
City not found. Skipping...
Processing Record 21 of Set 12 | kiama
Processing Record 22 of Set 12 | waitati
Processing Record 23 of Set 12 | karratha
Processing Record 24 of Set 12 | kelme
Processing Record 25 of Set 12 | jizan
Processing Record 26 of Set 12 | chantada
Processing Record 27 of Set 12 | shangrao
Processing Record 28 of Set 12 | mana
Processing Record 29 of Set 12 | lima
Processing Record 30 of Set 12 | arroyo
Processing Record 31 of Set 12 | kirksville
Processing Record 32 of Set 12 | chabahar
Processing Record 33 of Set 12 | cayenne
Processing Record 34 of Set 12 | takeo
Processing Record 35 of Set 12 | lincoln
Pro

In [None]:
# Import API key
#from api_keys import weather_api_key

In [7]:
# create a dictionary for establishing dataframe
weather_dict = {
    "City":city_two,
    "Cloudiness":cloudinesses,
    "Country":countries,
    "Date":dates,
    "Humidity":humidities,
    "Lat":lats,
    "Lng":lngs,
    "Max Temp":max_temps,
    "Wind Speed":wind_speeds
}

# establish dataframe
weather_dataframe = pd.DataFrame(weather_dict)
weather_dataframe

Unnamed: 0,City,Cloudiness,Country,Date,Humidity,Lat,Lng,Max Temp,Wind Speed
0,vetluga,45,RU,1634459855,68,57.8557,45.7810,48.67,13.02
1,hobart,40,AU,1634459855,71,-42.8794,147.3294,56.12,4.61
2,pacific grove,0,US,1634459856,88,38.4820,-90.7415,47.77,4.18
3,hermanus,67,ZA,1634459856,69,-34.4187,19.2345,57.90,11.23
4,punta arenas,82,PH,1634459857,82,8.9897,125.3400,82.36,0.60
...,...,...,...,...,...,...,...,...,...
578,pirmasens,81,DE,1634460088,75,49.2015,7.6053,49.75,2.42
579,kurchum,70,KZ,1634460088,25,48.5722,83.6542,58.44,3.20
580,ulladulla,68,AU,1634460088,72,-35.3500,150.4667,61.70,6.91
581,carutapera,12,BR,1634460089,85,-1.1950,-46.0200,78.75,6.44


In [11]:
# Convert array of JSONs into Pandas DataFrame
weather_dataframe = pd.DataFrame(weather_dict)

# Export the City_Data into a csv
weather_dataframe.to_csv("cities.csv",encoding="utf-8",index=False)

In [None]:
# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

# Generate Cities List
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

In [None]:
# set lists for the dataframe
city_two = []
cloudinesses = []
dates = []
humidities = []
lats = []
lngs = []
max_temps = []
wind_speeds = []
countries = []

# set initial count quantities for organization
count_one = 0
set_one = 1

# loops for creating dataframe columns
for city in cities:
    try:
        response = requests.get(query_url + city.replace(" ","&")).json()
        cloudinesses.append(response['clouds']['all'])
        countries.append(response['sys']['country'])
        dates.append(response['dt'])
        humidities.append(response['main']['humidity'])
        lats.append(response['coord']['lat'])
        lngs.append(response['coord']['lon'])
        max_temps.append(response['main']['temp_max'])
        wind_speeds.append(response['wind']['speed'])
        if count_one > 48:
            count_one = 1
            set_one += 1
            city_two.append(city)
        else:
            count_one += 1
            city_two.append(city)
        print(f"Processing Record {count_one} of Set {set_one} | {city}")
    except Exception:
        print("City not found. Skipping...")
print("------------------------------\nData Retrieval Complete\n------------------------------")

In [None]:
# Save config information
url = "http://api.openweathermap.org/data/2.5/weather?"

# Build and test the query
query_url = f"{url}appid={weather_api_key}&q=&units=imperial"


# Set lists for the dictionary
name = []
lat = []
lon = []
max_temp = []
humidity = []
cloudiness = []
wind_speed = []
country = []
date = []

# Perform API Calls
#Variables to keep track of response number
record_num = 1
set_count = 1

# Run the API call for each city
print('''
-----------------------------
Beginning Data Retrieval     
-----------------------------''')

for city in cities:
    try:
        response = requests.get(query_url + cities).json()
        name.append(response['name'])
        lat.append(response['coord']['lat'])
        lon.append(response['coord']['lon'])
        max_temp.append(response['main']['max_temp'])
        humidity.append(response['main']['humidity'])
        cloudiness.append(response['clouds']['all'])
        wind_speed.append(response['wind']['speed'])
        country.append(response['sys']['country'])
        date.append(response['dt'])
        if record_count > 49:
            record_count = 1
            set_count += 1
            name.append(city)
        else:
            record_count += 1
            name.append(city)
        print(f"Processing Record {record_count} of Set {set_count} | {city}")
    except:
        print("City not found. Skipping...")
        
        
print('''
-----------------------------
Data Retrieval Complete      
-----------------------------''')

In [None]:
# Perform API Calls
#Variables to keep track of response number
record_num = 0
record_set = 1

# Run the API call for each city
print('''
-----------------------------
Beginning Data Retrieval     
-----------------------------''')

for city in cities:
#for index, row in cities_weather_df.iterrows():

    # create the query url
    query_url = f"{url}appid={weather_api_key}&units=imperial&q="
    query_url = url + row['City'] + '&APPID=' + weather_api_key

    # Make the API request and store the json response
    response = requests.get(query_url).json()

    # Exception for city not found
    try:
        # Message to display progress during retrieval
        print(f'Processing Record {record_num} for Set {record_set} | {response["name"]}')

        #input city data into dataframe
        cities_weather_df.loc[index, 'Latitude'] = response['coord']['lat']
        cities_weather_df.loc[index, 'Longitude'] = response['coord']['lon']
        cities_weather_df.loc[index, 'Max Temp'] = response['main']['max_temp']
        cities_weather_df.loc[index, 'Humidity'] = response['main']['humidity']
        cities_weather_df.loc[index, 'Cloudiness'] = response['clouds']['all']
        cities_weather_df.loc[index, 'Wind Speed'] = response['wind']['speed']
        cities_weather_df.loc[index, 'Country'] = response['sys']['country']
        cities_weather_df.loc[index, 'Date'] = response['dt']
    
        record_num += 1
    except Exception:
        print("City not found. Skipping...")

#    if record_num ==51:
        record_set += 1
        record_num = 1
        time.sleep(60)

print('''
-----------------------------
Data Retrieval Complete      
-----------------------------''')

In [None]:
# Make the dataframe
cities_weather_df = pd.DataFrame({'City': cities, 'Latitude': "", 'Longitude': "", 'Max Temp': "", 'Humidity':"", 'Cloudiness': "", 'Wind Speed': "", 'Country': "", 'Date': "" })
cities_weather_df

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
# Set lists for Dataframe
city = []
lat = []
lon = []
temp = []
humidity = []
cloudiness = []
wind_speed = []
country = []
date = []

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
# clean the df to remove null cities
cities_weather_df = cities_weather_df.dropna()


In [None]:
# save the df to csv
cities_weather_df.to_csv()

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
#  Get the indices of cities that have humidity over 100%.


In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".


## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

## Latitude vs. Humidity Plot

## Latitude vs. Cloudiness Plot

## Latitude vs. Wind Speed Plot

## Linear Regression

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression