In [28]:
# Import the dependencies.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [29]:
# Create a set of random latitude and longitude combinations.
lats = np.random.uniform(low=-90.000, high=90.000, size=1500)
lngs = np.random.uniform(low=-180.000, high=180.000, size=1500)
lat_lngs = zip(lats, lngs)
lat_lngs

<zip at 0x7fc599a6fd20>

The zip object packs each pair of lats and lngs having the same index in their respective array into a tuple. If there are 1,500 latitudes and longitudes, there will be 1,500 tuples of paired latitudes and longitudes, where each latitude and longitude in a tuple can be accessed by the index of 0 and 1, respectively.

In [30]:
# Add the latitudes and longitudes to a list.
coordinates = list(lat_lngs)

**NOTE**: You can only unzip a zipped tuple once before it is removed from the computer's memory. Make sure you unzip the latitudes and longitudes into the coordinates list before moving on.

In [31]:
# Use the citipy module to determine city based on latitude and longitude.
from citipy import citipy

- Iterate through the coordinates' zipped tuple.
- Use `citipy.nearest_city()` and inside the parentheses of `nearest_city()`, add the latitude and longitude in this format: `coordinate[0]`, `coordinate[1]`.
- To print the city name, chain the city_name to the `nearest_city()` function.
- To print the country name, chain the country_code to the `nearest_city()` function.

In [32]:
for x in coordinates:
    print(citipy.nearest_city(x[0], x[1]).city_name,
          citipy.nearest_city(x[0], x[1]).country_code)

rawson ar
punta arenas cl
hermanus za
nanortalik gl
tura ru
taolanaro mg
campos novos br
mataura pf
hermanus za
tuatapere nz
torres br
talnakh ru
beringovskiy ru
saint-philippe re
mataura pf
mahebourg mu
domoni km
douentza ml
chiknayakanhalli in
atuona pf
hermanus za
torbay ca
punta arenas cl
georgetown sh
merauke id
huarmey pe
kapaa us
taolanaro mg
touros br
vaini to
barrow us
port lincoln au
ilulissat gl
coihaique cl
pittsfield us
puerto ayora ec
puerto ayora ec
yellowknife ca
punta arenas cl
tidore id
ust-bolsheretsk ru
vaini to
sume br
shar kz
san patricio mx
qaanaaq gl
upernavik gl
ihosy mg
saleaula ws
anadyr ru
atuona pf
warqla dz
tiksi ru
rikitea pf
punta arenas cl
zhigansk ru
cape town za
touros br
punta arenas cl
jinka et
aklavik ca
torbay ca
kargasok ru
tasiilaq gl
sangar ru
ponta do sol cv
vaini to
puerto ayora ec
cayenne gf
ahipara nz
yulara au
mataura pf
sharanga ru
tateyama jp
richards bay za
ponta do sol cv
ambon id
arraial do cabo br
caravelas br
san miguel pa
kahului u

In [33]:
# Create a list for holding the cities.
cities = []

# Identify the nearest city for each latitude and longitude combination.
for coordinate in coordinates:
    city = citipy.nearest_city(coordinate[0], coordinate[1]).city_name

    # If the city is unique, then we will add it to the cities list.
    if city not in cities:
        cities.append(city)
        
# Print the city count to confirm sufficient count.
len(cities)

633

- We create a `cities` list to store city names.
- We iterate through the `coordinates`, as in our practice, and retrieve the nearest city using the latitude and longitude pair.
- We add a decision statement with the logical operator `not in` to determine whether the found city is already in the `cities` list. If not, then we'll use the `append()` function to add it. We are doing this because among the 1,500 latitudes and longitudes, there might be duplicates, which will retrieve duplicate cities, and we want to be sure we capture only the unique cities.

**Note**: The citipy module finds the nearest city to the latitude and longitude pair with a population of 500 or more.

**Finding**: When you run the code block, you should get slightly more than 500 unique cities. If you get fewer than 500, increase your `size` limit on the np.`random.uniform()` function.

1. Import our dependencies and initialize counters and an empty list that will hold the weather data.
2. Loop through the cities list.
3. Group the cities in sets of 50 to log the process as we find the weather data for each city.
   1. Two counters will be needed here: one to log the city count from 1 to 50, and another for the sets.
4. Build the `city_url` or endpoint for each city.
5. Log the URL and the record and set numbers.
6. Make an API request for each city.
7. Parse the JSON weather data for the following:
   1. City, country, and date
   2.  Latitude and longitude
   3.  Maximum temperature
   4.  Humidity
   5.  Cloudiness
   6.  Wind speed
8.  Add the data to a list in a dictionary format and then convert the list to a DataFrame.

In [34]:
# Import the time library and the datetime module from the datetime library 
import time
from datetime import datetime

# Import the requests library.
import requests

# Import the API key.
from config import weather_api_key

In [35]:
# Starting URL for Weather Map API Call.
url = "http://api.openweathermap.org/data/2.5/weather?units=Imperial&APPID=" + weather_api_key

In [36]:
# Create an empty list to hold the weather data.
city_data = []
# Print the beginning of the logging.
print("Beginning Data Retrieval     ")
print("-----------------------------")

# Create counters.
record_count = 1
set_count = 1

Beginning Data Retrieval     
-----------------------------


In [37]:
# Loop through all the cities in the list.
for i, city in enumerate(cities):

    # Group cities in sets of 50 for logging purposes.
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 1
        time.sleep(60)

    # Create endpoint URL with each city.
    city_url = url + "&q=" + city.replace(" ","+")

    # Log the URL, record, and set numbers and the city.
    print(f"Processing Record {record_count} of Set {set_count} | {city}")
    # Add 1 to the record count.
    record_count += 1

    # Run an API request for each of the cities.
    try:
        # Parse the JSON and retrieve data.
        city_weather = requests.get(city_url).json()
        # Parse out the needed data.
        city_lat = city_weather["coord"]["lat"]
        city_lng = city_weather["coord"]["lon"]
        city_max_temp = city_weather["main"]["temp_max"]
        city_humidity = city_weather["main"]["humidity"]
        city_clouds = city_weather["clouds"]["all"]
        city_wind = city_weather["wind"]["speed"]
        city_country = city_weather["sys"]["country"]
        # Convert the date to ISO standard.
        city_date = datetime.utcfromtimestamp(city_weather["dt"]).strftime('%Y-%m-%d %H:%M:%S')
        # Append the city information into city_data list.
        city_data.append({"City": city.title(),
                          "Lat": city_lat,
                          "Lng": city_lng,
                          "Max Temp": city_max_temp,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country,
                          "Date": city_date})

# If an error is experienced, skip the city.
    except:
        print("City not found. Skipping...")
        pass

# Indicate that Data Loading is complete.
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

Processing Record 1 of Set 1 | rawson
Processing Record 2 of Set 1 | punta arenas
Processing Record 3 of Set 1 | hermanus
Processing Record 4 of Set 1 | nanortalik
Processing Record 5 of Set 1 | tura
Processing Record 6 of Set 1 | taolanaro
City not found. Skipping...
Processing Record 7 of Set 1 | campos novos
Processing Record 8 of Set 1 | mataura
Processing Record 9 of Set 1 | tuatapere
Processing Record 10 of Set 1 | torres
Processing Record 11 of Set 1 | talnakh
Processing Record 12 of Set 1 | beringovskiy
Processing Record 13 of Set 1 | saint-philippe
Processing Record 14 of Set 1 | mahebourg
Processing Record 15 of Set 1 | domoni
Processing Record 16 of Set 1 | douentza
Processing Record 17 of Set 1 | chiknayakanhalli
Processing Record 18 of Set 1 | atuona
Processing Record 19 of Set 1 | torbay
Processing Record 20 of Set 1 | georgetown
Processing Record 21 of Set 1 | merauke
Processing Record 22 of Set 1 | huarmey
Processing Record 23 of Set 1 | kapaa
Processing Record 24 of Se

Let's break down the code so we understand fully before continuing:

- We create the `for` loop with the `enumerate()` method and reference the index and the city in the list.
- In the conditional statement, we check if the remainder of the index divided by 50 is equal to 0 and if the index is greater than or equal to 50. If the statement is true, then the `set_count` and the `record_count` are incremented by 1.
- Inside the conditional statement, we create the URL endpoint for each city, as before. However, we are removing the blank spaces in the city name and concatenating the city name with, `city.replace(" ","+")`. This will find the corresponding weather data for the city instead of finding the weather data for the first part of the city name.
- Also, we add a print statement that tells us the record count and set count, and the city that is being processed.
- Then we add one to the record count before the next city is processed.

In [38]:
# Convert the array of dictionaries to a Pandas DataFrame.
city_data_df = pd.DataFrame(city_data)
city_data_df.head(10)

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,Rawson,-43.3002,-65.1023,59.27,60,69,10.89,AR,2022-01-19 06:21:10
1,Punta Arenas,-53.15,-70.9167,46.51,87,20,6.91,CL,2022-01-19 06:16:44
2,Hermanus,-34.4187,19.2345,65.8,65,6,3.85,ZA,2022-01-19 06:21:10
3,Nanortalik,60.1432,-45.2371,33.12,89,100,20.85,GL,2022-01-19 06:21:10
4,Tura,25.5198,90.2201,73.65,38,31,4.34,IN,2022-01-19 06:21:11
5,Campos Novos,-27.4017,-51.225,61.39,96,28,3.76,BR,2022-01-19 06:21:11
6,Mataura,-46.1927,168.8643,55.47,96,100,7.45,NZ,2022-01-19 06:21:11
7,Tuatapere,-46.1333,167.6833,55.81,66,82,16.37,NZ,2022-01-19 06:21:12
8,Torres,-29.3353,-49.7269,75.83,87,65,2.68,BR,2022-01-19 06:21:12
9,Talnakh,69.4865,88.3972,5.49,94,100,17.83,RU,2022-01-19 06:21:12


In [39]:
new_column_order = ["City","Country","Date","Lat","Lng","Max Temp","Humidity","Cloudiness","Wind Speed"]
city_data_df = city_data_df[new_column_order]
city_data_df

Unnamed: 0,City,Country,Date,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed
0,Rawson,AR,2022-01-19 06:21:10,-43.3002,-65.1023,59.27,60,69,10.89
1,Punta Arenas,CL,2022-01-19 06:16:44,-53.1500,-70.9167,46.51,87,20,6.91
2,Hermanus,ZA,2022-01-19 06:21:10,-34.4187,19.2345,65.80,65,6,3.85
3,Nanortalik,GL,2022-01-19 06:21:10,60.1432,-45.2371,33.12,89,100,20.85
4,Tura,IN,2022-01-19 06:21:11,25.5198,90.2201,73.65,38,31,4.34
...,...,...,...,...,...,...,...,...,...
580,Lima,PE,2022-01-19 06:36:12,-12.0432,-77.0282,70.05,78,0,8.05
581,Whitianga,NZ,2022-01-19 06:36:13,-36.8333,175.7000,71.11,69,6,10.13
582,Esna,EG,2022-01-19 06:36:13,25.2934,32.5540,44.62,38,0,9.69
583,Mailsi,PK,2022-01-19 06:36:13,29.8003,72.1758,72.63,22,85,7.67


In [41]:
# Create the output file (CSV).
output_data_file = "weather_data/cities.csv"
# Export the City_Data into a CSV.
city_data_df.to_csv(output_data_file, index_label="City_ID")