In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import os
from dotenv import load_dotenv

# load the .env environment variables into the notebook
load_dotenv()

#Get the API key from the environment variable as Python variable
weather_api_key = os.getenv("weather_api_key")

temp_units = "imperial"

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

# define url
query_url = f"http://api.openweathermap.org/data/2.5/weather?appid={weather_api_key}&units={temp_units}&q="

# define count of pass throughs
count_one = 0
set_one = 1

In [2]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(low=-90.000, high=90.000, size=1500)
lngs = np.random.uniform(low=-180.000, high=180.000, size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
# If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

625

In [3]:
# set lists for the dataframe
city_name = []
lat = []
lng = []
max_temp = []
humidity = []
cloudiness = []
wind_speed = []
country = []
date = []

# set initial count quantities for organization
record_count = 0
set_count = 1

print('''
-----------------------------
Beginning Data Retrieval     
-----------------------------''')

# loops for creating dataframe columns
for city in cities:
    try:
        response = requests.get(query_url + city.replace(" ","&")).json()
        lat.append(response['coord']['lat'])
        lng.append(response['coord']['lon'])
        max_temp.append(response['main']['temp_max'])
        humidity.append(response['main']['humidity'])
        cloudiness.append(response['clouds']['all'])
        wind_speed.append(response['wind']['speed'])
        country.append(response['sys']['country'])
        date.append(response['dt'])
        if record_count > 48:
            record_count = 1
            set_count += 1
            city_name.append(city)
        else:
            record_count += 1
            city_name.append(city)
        print(f"Processing Record {record_count} of Set {set_count} | {city}")
    except Exception:
        print("City not found. Skipping...")

print('''
-----------------------------
Data Retrieval Complete      
-----------------------------''')


-----------------------------
Beginning Data Retrieval     
-----------------------------
Processing Record 1 of Set 1 | tiznit
Processing Record 2 of Set 1 | sokolo
Processing Record 3 of Set 1 | maldonado
Processing Record 4 of Set 1 | kalabo
Processing Record 5 of Set 1 | ushuaia
Processing Record 6 of Set 1 | touros
Processing Record 7 of Set 1 | rikitea
Processing Record 8 of Set 1 | san policarpo
City not found. Skipping...
City not found. Skipping...
City not found. Skipping...
Processing Record 9 of Set 1 | ancud
Processing Record 10 of Set 1 | lompoc
Processing Record 11 of Set 1 | atuona
Processing Record 12 of Set 1 | pandan niog
Processing Record 13 of Set 1 | raga
Processing Record 14 of Set 1 | torbay
Processing Record 15 of Set 1 | mataura
Processing Record 16 of Set 1 | sorland
Processing Record 17 of Set 1 | chapais
Processing Record 18 of Set 1 | ribeira grande
Processing Record 19 of Set 1 | hermanus
Processing Record 20 of Set 1 | rio grande
Processing Record 21 of

Processing Record 37 of Set 4 | kaitangata
Processing Record 38 of Set 4 | bereda
City not found. Skipping...
Processing Record 39 of Set 4 | hofn
Processing Record 40 of Set 4 | puerto colombia
City not found. Skipping...
Processing Record 41 of Set 4 | bathsheba
Processing Record 42 of Set 4 | hambantota
Processing Record 43 of Set 4 | mahebourg
Processing Record 44 of Set 4 | hasaki
Processing Record 45 of Set 4 | wilmington
Processing Record 46 of Set 4 | grindavik
Processing Record 47 of Set 4 | hay river
Processing Record 48 of Set 4 | kalmunai
Processing Record 49 of Set 4 | yakeshi
Processing Record 1 of Set 5 | rawson
Processing Record 2 of Set 5 | beloha
City not found. Skipping...
Processing Record 3 of Set 5 | yeppoon
Processing Record 4 of Set 5 | kangaatsiaq
Processing Record 5 of Set 5 | tomohon
City not found. Skipping...
Processing Record 6 of Set 5 | antalaha
Processing Record 7 of Set 5 | monte patria
Processing Record 8 of Set 5 | chabua
Processing Record 9 of Set 5

Processing Record 23 of Set 8 | romny
Processing Record 24 of Set 8 | uyskoye
Processing Record 25 of Set 8 | ramgarh
Processing Record 26 of Set 8 | hokitika
Processing Record 27 of Set 8 | eirunepe
City not found. Skipping...
Processing Record 28 of Set 8 | arkhara
Processing Record 29 of Set 8 | nanticoke
Processing Record 30 of Set 8 | gat
Processing Record 31 of Set 8 | flinders
Processing Record 32 of Set 8 | young
Processing Record 33 of Set 8 | smithers
Processing Record 34 of Set 8 | takahagi
Processing Record 35 of Set 8 | danshui
Processing Record 36 of Set 8 | vyritsa
City not found. Skipping...
City not found. Skipping...
Processing Record 37 of Set 8 | tucurui
Processing Record 38 of Set 8 | phichit
Processing Record 39 of Set 8 | gizo
Processing Record 40 of Set 8 | vostok
Processing Record 41 of Set 8 | illapel
Processing Record 42 of Set 8 | athabasca
Processing Record 43 of Set 8 | beyneu
City not found. Skipping...
Processing Record 44 of Set 8 | termiz
Processing Re

Processing Record 13 of Set 12 | labuhan
Processing Record 14 of Set 12 | mareeba
City not found. Skipping...
Processing Record 15 of Set 12 | sale
Processing Record 16 of Set 12 | kenai
Processing Record 17 of Set 12 | tomatlan
City not found. Skipping...

-----------------------------
Data Retrieval Complete      
-----------------------------


In [None]:
# Import API key
#from api_keys import weather_api_key

In [4]:
# create a dictionary for establishing dataframe
weather_dict = {
    "City":city_name,
    "Lat":lat,
    "Lng":lng,
    "Max Temp":max_temp,
    "Humidity":humidity,
    "Cloudiness":cloudiness,
    "Wind Speed":wind_speed,
    "Country":country,
    "Date":date,
}

# establish dataframe
weather_df = pd.DataFrame(weather_dict)
weather_df

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,tiznit,29.5833,-9.5000,79.95,19,1,0.51,MA,1634462485
1,sokolo,14.7328,-6.1219,90.27,28,93,2.98,ML,1634462486
2,maldonado,-34.6667,-54.9167,47.23,76,0,4.72,UY,1634462486
3,kalabo,-14.9700,22.6814,94.37,25,5,7.40,ZM,1634462487
4,ushuaia,-54.8000,-68.3000,33.46,86,75,12.66,AR,1634462250
...,...,...,...,...,...,...,...,...,...
551,labuhan,-6.8844,112.2051,87.37,64,75,13.91,ID,1634462734
552,mareeba,-17.0000,145.4333,76.48,83,62,6.53,AU,1634462734
553,sale,34.0389,-6.8166,71.62,94,100,0.00,MA,1634462533
554,kenai,60.5544,-151.2583,34.14,92,90,8.05,US,1634462735


In [5]:
# Export the City_Data into a csv
weather_df.to_csv("raw_cities.csv",encoding="utf-8",index=False)

In [6]:
# clean the df to remove null cities
weather_df = weather_df.dropna()

In [7]:
# Inspect the data and remove the cities where the humidity > 100%.
weather_df['Humidity'].max()

100

City       Lat       Lng        Max Temp  Cloudiness  Wind Speed  Country  Date      
zhigalovo   54.8097   105.1578  36.82     42          5.93        RU       1634462700    1
hasaki      35.7333   140.8333  57.60     100         26.66       JP       1634462565    1
hithadhoo  -0.6000    73.0833   83.16     53          17.85       MV       1634462522    1
hirara      24.8000   125.2833  75.47     75          20.71       JP       1634462717    1
hilo        19.7297  -155.0900  70.79     40          5.75        US       1634462407    1
                                                                                        ..
pevek       69.7008   170.3133  12.56     94          2.26        RU       1634462393    1
phan rang   19.5539   99.7405   81.81     97          2.77        TH       1634462662    1
phichit     16.2500   100.4167  77.14     100         5.46        TH       1634462657    1
piacabucu  -10.4056  -36.4344   77.22     98          9.80        BR       1634462621    1
aasi

In [None]:
# Perform API Calls
#Variables to keep track of response number
record_num = 0
record_set = 1

# Run the API call for each city
print('''
-----------------------------
Beginning Data Retrieval     
-----------------------------''')

for city in cities:
#for index, row in cities_weather_df.iterrows():

    # create the query url
    query_url = f"{url}appid={weather_api_key}&units=imperial&q="
    query_url = url + row['City'] + '&APPID=' + weather_api_key

    # Make the API request and store the json response
    response = requests.get(query_url).json()

    # Exception for city not found
    try:
        # Message to display progress during retrieval
        print(f'Processing Record {record_num} for Set {record_set} | {response["name"]}')

        #input city data into dataframe
        cities_weather_df.loc[index, 'Latitude'] = response['coord']['lat']
        cities_weather_df.loc[index, 'Longitude'] = response['coord']['lon']
        cities_weather_df.loc[index, 'Max Temp'] = response['main']['max_temp']
        cities_weather_df.loc[index, 'Humidity'] = response['main']['humidity']
        cities_weather_df.loc[index, 'Cloudiness'] = response['clouds']['all']
        cities_weather_df.loc[index, 'Wind Speed'] = response['wind']['speed']
        cities_weather_df.loc[index, 'Country'] = response['sys']['country']
        cities_weather_df.loc[index, 'Date'] = response['dt']
    
        record_num += 1
    except Exception:
        print("City not found. Skipping...")

#    if record_num ==51:
        record_set += 1
        record_num = 1
        time.sleep(60)

print('''
-----------------------------
Data Retrieval Complete      
-----------------------------''')

In [None]:
# Make the dataframe
cities_weather_df = pd.DataFrame({'City': cities, 'Latitude': "", 'Longitude': "", 'Max Temp': "", 'Humidity':"", 'Cloudiness': "", 'Wind Speed': "", 'Country': "", 'Date': "" })
cities_weather_df

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
# Set lists for Dataframe
city = []
lat = []
lon = []
temp = []
humidity = []
cloudiness = []
wind_speed = []
country = []
date = []

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
# clean the df to remove null cities
cities_weather_df = cities_weather_df.dropna()


In [None]:
# save the df to csv
cities_weather_df.to_csv()

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
#  Get the indices of cities that have humidity over 100%.


In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".


## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

## Latitude vs. Humidity Plot

## Latitude vs. Cloudiness Plot

## Latitude vs. Wind Speed Plot

## Linear Regression

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression