In [1]:
import pandas as pd

import matplotlib.pyplot as plt

import numpy as np

from citipy import citipy

In [2]:
# generating the random latitude and longitude arrays
# storing them in variables
# packing the latitude and longitude values as pairs using the zip function

lats = np.random.uniform(-90.000, 90.000, size=1500)

lngs = np.random.uniform(-180.000, 180.000, size=1500)

lats_lngs = zip(lats, lngs)

lats_lngs

# The output of this cell will be a zip object in memory

<zip at 0x7f86a8b8d0a0>

# Notes on above code block

- The zip function packs each pair of lats and lngs with the same index into a tuple
    - Here, that produces 1500 tuples of paired latitudes and longitues 
        - each latitude and longitude in a tuple can be accessed by the index of 0 and 1 respectively
    - cf the jupyter notebook API_Practice.ipynb for practice packing and unpacking zipped tuples

In [3]:
# adding the latitude and longitudes to a list
# means we only need to create the random sets once
# NOTE you can only unzup a zipped tuple once before it's removed from computer's memory.

coordinates = list(lats_lngs)

In [4]:
# creating an empty list to hold the cities

cities = []

# identifying nearest city for each latitude and longitude combination

for coordinate in coordinates:
    city = citipy.nearest_city(coordinate[0], coordinate[1]).city_name
    
    # if the city name is unique, add to the cities []
    # The for-loop could find duplicate cities, and we only want unique cities
    
    if city not in cities:
        cities.append(city)
        
# print the city count to confirm sufficient count:
# Note that citipy finds the nearest city to the latitude and longitude pair with a population of 500+

print(len(cities))

print(cities[3])

628
san lawrenz


# Getting Started with OpenWeather API

## JavaScript Object Notation Format for API Data

- Retrieving data from a website requires a requset which returns data in a text format (not tsv or csv)
- JavaScript Object Notation (JSON)
    - data within this sort of object opens and closes with curly braces {}, like a Python Dictionary
    - inside the JSON object, is a collection of dictionaries and arrays.

- To request JSON data over the internet, need the Requests Library in Python 
    - Anaconda comes witih version 2.22 or later installed.
    
[Doc for Requests Library](https://requests.kennethreitz.org/en/master/)]

[Quickstart Doc](https://requests.kennethreitz.org/en/master/user/quickstart/#make-a-request)

# Get the City Weather Data

## Pseudocoding 

1. Import the dependencies and initialize counters and an empty list that will hold weather data
2. Loop through the cities list
3. Group the cities in sets of 50 to log the process as we find weather data for each city
    - two counters will be need: one to log the city count from 1-50 and the other for the sets
4. Build the city_url or endpoint for each city
5. log the URL and the record and set numbers
6. Make an API request for each city
7. Parse the JSON weather data for the following: 
    - City, country, and date
    - Lat and long
    - Max temp
    - Humidity
    - Cloudiness 
    - Wind speed
8. Add the data to a list in a deictionary format and then convert to a DataFrame

In [5]:
# here, declare empty list city_data = []
# add print statement that ref the beginning of the logging 
# create counters for the record numbers 1-50
# set the counter

import requests

from config import weather_api_key

import time
from datetime import datetime

url = "https://api.openweathermap.org/data/2.5/weather?&appid=" + weather_api_key

#city_weather = url + "&q=" + "ushuaia"

#print(city_weather)

In [6]:
# creating the empty list to hold weather data

city_data = []

# Print the beginning of the logging

print("Beginning Data Retrieval    ")
print("----------------------------")

# create the counters

record_count = 1

set_count = 1

# here setting the counters at 1 because we want the first iteration of logging for each recorded response to st @ 1

# Now iterating through list of cities and building the URL for reach city, grouping in sets of 50
# use for i in range(len(cities)) and index to notify when reach 50
# at 50, pause for 60 sec using time.sleep(60) free OpenWeatherMap API allows for 60calls/min
# the pause prevents time-out errors
# retrieve city from cities list and add to the city_url using indexing:

# here using the enumerate method because it will return the city name - vital for ural
# the in range() would have returned an index. 
# enumerate doc: https://docs.python.org/3/library/functions.html#enumerate

for i, city in enumerate(cities):
# you have the 2 variables here because of the way enumerate ret. tuple w/ count and value    
    # Group cities in sets of 50 for logging
    if (i % 50 == 0 and i >= 50):
        
        set_count += 1
        
        record_count = 1
        
        time.sleep(60)
        
    # Create endpoint URL with each city
    
    
    city_url = url + "&q=" + city.replace(" ", "+")
    
    # Log the URL, record and set numbers and the city
    
    print(f"Processing Record {record_count} of Set {set_count} | {city}")
    
    # Add one to the record_count
    
    record_count += 1
    
    # Run API request for each of the cities using a try except
    
    try: 
        # Parse the JSON and retrieve the data
        city_weather = requests.get(city_url).json()
        
        # Parse the needed data
        
        city_lat = city_weather['coord']['lat']
        
        city_lng = city_weather['coord']['lon']
        
        city_max_temp = city_weather['main']['temp_max']
        
        city_humidity = city_weather['main']['humidity']
        
        city_clouds = city_weather['clouds']['all']
        
        city_wind = city_weather['wind']['speed']
        
        city_country = city_weather['sys']['country']
        
        # Convert the date to ISO standard
        
        city_date = datetime.utcfromtimestamp(city_weather['dt']).strftime('%Y-%m-%d %H:%M:%S')
        
        # Append city information to the city_data list
        
        city_data.append({"City": city.title(),
                         "Lat": city_lat, 
                         "Lng": city_lng,
                         "Max Temp": city_max_temp,
                         "Humidity": city_humidity,
                         "Cloudiness": city_clouds, 
                         "Wind Speed": city_wind,
                         "Country": city_country,
                         "Date": city_date})
        
    # if error is experienced, skip the city:
    
    except:
        
        print("City not found. Skipping...")
        
        # pass is a general purpose statement to handle all errors encountered and let prog. continue
        # in reality, pass statement is not best practice
        # better practice is to handle each error as it comes (add another try block or print error)
        pass
    
# Indicate that Data Loading is complete:

print('-------------------------------')

print('Data Retrieval Complete')

print('-------------------------------')
    
    

Beginning Data Retrieval    
----------------------------
Processing Record 1 of Set 1 | fort nelson
Processing Record 2 of Set 1 | hithadhoo
Processing Record 3 of Set 1 | samusu
City not found. Skipping...
Processing Record 4 of Set 1 | san lawrenz
Processing Record 5 of Set 1 | teya
Processing Record 6 of Set 1 | hovd
Processing Record 7 of Set 1 | mataura
Processing Record 8 of Set 1 | charters towers
Processing Record 9 of Set 1 | wajima
Processing Record 10 of Set 1 | ardistan
City not found. Skipping...
Processing Record 11 of Set 1 | ozernovskiy
Processing Record 12 of Set 1 | hilo
Processing Record 13 of Set 1 | busselton
Processing Record 14 of Set 1 | jijiga
Processing Record 15 of Set 1 | port augusta
Processing Record 16 of Set 1 | kapaa
Processing Record 17 of Set 1 | holme
Processing Record 18 of Set 1 | ilulissat
Processing Record 19 of Set 1 | faanui
Processing Record 20 of Set 1 | illoqqortoormiut
City not found. Skipping...
Processing Record 21 of Set 1 | esperance
P

Processing Record 34 of Set 4 | galveston
Processing Record 35 of Set 4 | naberera
Processing Record 36 of Set 4 | mrirt
City not found. Skipping...
Processing Record 37 of Set 4 | klaksvik
Processing Record 38 of Set 4 | comodoro rivadavia
Processing Record 39 of Set 4 | ahuimanu
Processing Record 40 of Set 4 | padang
Processing Record 41 of Set 4 | itarema
Processing Record 42 of Set 4 | mawlaik
Processing Record 43 of Set 4 | kavieng
Processing Record 44 of Set 4 | angra
City not found. Skipping...
Processing Record 45 of Set 4 | rawson
Processing Record 46 of Set 4 | sao jose da coroa grande
Processing Record 47 of Set 4 | buchanan
Processing Record 48 of Set 4 | verkhniy lomov
Processing Record 49 of Set 4 | bellevue
Processing Record 50 of Set 4 | leningradskiy
Processing Record 1 of Set 5 | koumac
Processing Record 2 of Set 5 | sioux lookout
Processing Record 3 of Set 5 | taloqan
Processing Record 4 of Set 5 | vaitupu
City not found. Skipping...
Processing Record 5 of Set 5 | mo

Processing Record 22 of Set 8 | juneau
Processing Record 23 of Set 8 | santiago
Processing Record 24 of Set 8 | talnakh
Processing Record 25 of Set 8 | podgornoye
Processing Record 26 of Set 8 | san juan
Processing Record 27 of Set 8 | ostrovnoy
Processing Record 28 of Set 8 | homer
Processing Record 29 of Set 8 | sarankhola
Processing Record 30 of Set 8 | shelabolikha
Processing Record 31 of Set 8 | venice
Processing Record 32 of Set 8 | senador guiomard
Processing Record 33 of Set 8 | nunoa
Processing Record 34 of Set 8 | barawe
City not found. Skipping...
Processing Record 35 of Set 8 | attawapiskat
City not found. Skipping...
Processing Record 36 of Set 8 | thaton
Processing Record 37 of Set 8 | zaozerne
Processing Record 38 of Set 8 | tuensang
Processing Record 39 of Set 8 | flin flon
Processing Record 40 of Set 8 | penzance
Processing Record 41 of Set 8 | rivadavia
Processing Record 42 of Set 8 | mattru
Processing Record 43 of Set 8 | cidreira
Processing Record 44 of Set 8 | hvol

Processing Record 6 of Set 12 | alofi
Processing Record 7 of Set 12 | sitangkai
Processing Record 8 of Set 12 | tazovskiy
Processing Record 9 of Set 12 | nago
Processing Record 10 of Set 12 | mao
Processing Record 11 of Set 12 | salym
Processing Record 12 of Set 12 | taburi
City not found. Skipping...
Processing Record 13 of Set 12 | murgab
Processing Record 14 of Set 12 | lompoc
Processing Record 15 of Set 12 | mamburao
Processing Record 16 of Set 12 | jardim
Processing Record 17 of Set 12 | muisne
Processing Record 18 of Set 12 | seydi
Processing Record 19 of Set 12 | harper
Processing Record 20 of Set 12 | howard springs
Processing Record 21 of Set 12 | fonte boa
Processing Record 22 of Set 12 | dubenskiy
City not found. Skipping...
Processing Record 23 of Set 12 | viedma
Processing Record 24 of Set 12 | huarmey
Processing Record 25 of Set 12 | hella
Processing Record 26 of Set 12 | kinshasa
Processing Record 27 of Set 12 | bagdarin
Processing Record 28 of Set 12 | krasnoarmeysk
Pro

In [7]:
#print(city_url)

In [8]:
# city_weather = requests.get(city_url).json()
# print(city_weather)

# Try and Except block

- help you try a block of code, if it throws an error, it goes to the except block
- The try block is the code you would want to run. 
    - set up is like a conditional statement - indention 
    - you can tell the except block the exact error 
    - good idea to write a print statement to communicate you've received an error
    - then add a continue so that it goes back to the loop.
    
[Doc for try-except](https://docs.python.org/3.7/tutorial/errors.html)

In [9]:
# converting the city_data list created from API request above to pandas DF

city_data_df = pd.DataFrame(city_data)

city_data_df.head(10)

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,Fort Nelson,58.8053,-122.7002,295.92,38,75,2.57,CA,2022-07-14 21:53:37
1,Hithadhoo,-0.6,73.0833,301.53,75,100,6.25,MV,2022-07-14 21:53:38
2,San Lawrenz,36.0553,14.204,298.63,78,0,2.2,MT,2022-07-14 21:53:38
3,Teya,60.3778,92.6267,282.28,86,100,1.33,RU,2022-07-14 21:53:39
4,Hovd,48.0056,91.6419,284.45,59,44,1.46,MN,2022-07-14 21:53:39
5,Mataura,-46.1927,168.8643,276.75,99,100,1.22,NZ,2022-07-14 21:53:40
6,Charters Towers,-20.1,146.2667,281.28,56,19,4.57,AU,2022-07-14 21:53:40
7,Wajima,37.4,136.9,297.84,88,85,7.49,JP,2022-07-14 21:53:41
8,Ozernovskiy,51.5,156.5167,284.46,99,100,1.77,RU,2022-07-14 21:53:41
9,Hilo,19.7297,-155.09,303.31,79,100,2.57,US,2022-07-14 21:52:35


In [13]:
# changing the order of the columns: City, Country, Date, Lat, Lng, Max Temp, Humidity, Cloudiness, Wind Speed

new_col_order = ["City", "Country", "Date", "Lat", "Lng", "Max Temp", "Humidity", "Cloudiness", "Wind Speed"]

city_data_ordered = city_data_df[new_col_order]

city_data_ordered.head(10)

Unnamed: 0,City,Country,Date,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed
0,Fort Nelson,CA,2022-07-14 21:53:37,58.8053,-122.7002,295.92,38,75,2.57
1,Hithadhoo,MV,2022-07-14 21:53:38,-0.6,73.0833,301.53,75,100,6.25
2,San Lawrenz,MT,2022-07-14 21:53:38,36.0553,14.204,298.63,78,0,2.2
3,Teya,RU,2022-07-14 21:53:39,60.3778,92.6267,282.28,86,100,1.33
4,Hovd,MN,2022-07-14 21:53:39,48.0056,91.6419,284.45,59,44,1.46
5,Mataura,NZ,2022-07-14 21:53:40,-46.1927,168.8643,276.75,99,100,1.22
6,Charters Towers,AU,2022-07-14 21:53:40,-20.1,146.2667,281.28,56,19,4.57
7,Wajima,JP,2022-07-14 21:53:41,37.4,136.9,297.84,88,85,7.49
8,Ozernovskiy,RU,2022-07-14 21:53:41,51.5,156.5167,284.46,99,100,1.77
9,Hilo,US,2022-07-14 21:52:35,19.7297,-155.09,303.31,79,100,2.57


In [14]:
# Creating an output file to save DataFrame to a CSV file in a new folder

output_data_file = "weather_data/cities.csv"

# Export the city_data_ordered into a csv

city_data_ordered.to_csv(output_data_file, index_label="City_ID")