In [35]:
#Import dependencies
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
#Create a set of random latitude and longitude combinations
lats = np.random.uniform(low=-90.000, high=90.000, size=1500)
lngs = np.random.uniform(low=-180.000, high=180.000, size=1500)
lat_lngs = zip(lats, lngs)
lat_lngs

#This zip objects packs each pair of lats and lngs having the same index in their respective array into a tuple

<zip at 0x1d78c751a48>

In [36]:
#Add the latitudes and longitudes to a list
coordinates = list(lat_lngs)
#Use the citipy module to determine city based on latitude and longitude
from citipy import citipy
#Create a list for holding the cities
cities = []

#Identify the nearest city for each latitude and longitude combination
for coordinate in coordinates:
    city = citipy.nearest_city(coordinate[0], coordinate[1]).city_name
    
    #If the city is unique, then we will add it to the cities list
    if city not in cities:
        cities.append(city)
#Print the city count to confirm sufficient count
len(cities)

625

In [37]:
#Import dependencies
from citipy import citipy
import requests
from config import weather_api_key
from datetime import datetime

url = "http://api.openweathermap.org/data/2.5/weather?units=Imperial&APPID=" + weather_api_key

In [30]:
#Create an empty list to hold the weather data
city_data = []

#Print the beginning of the logging
print("Beginning Data Retrieval     ")
print("-----------------------------")

#Create counters
record_count = 1
set_count = 1

#Loop through all the cities in our list
for i, city in enumerate(cities):
    
    #Group cities in sets of 50 for logging purposes
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 1
    #Create endpoint URL with each city
    city_url = url + "&q=" + city.replace(" ","+")
    #Log the URL, record, and set numbers and the city
    print(f"Processing Record {record_count} of Set {set_count} | {city}")
    #Add 1 toS the record count
    record_count += 1

    ### Code Breakdown ###
    #Create a for loop with the enumerate() method and reference the index and the city in the list
    #Check if the remainder of the index divided by 50 is equal to zero, AND if the index is greater than or equal to 50,
    # using a conditional statement.
    #Create a URL endpoint in the conditional statement. However, we are removing blank spaces in the city name and 
    # concatenating the city name using city.replace(" ","+"). This will find the corresponding weather data for the city
    # instead of finding the weather data for the first part of the city name.
    #Add a print statement that tells the record count, set count, and the city that is being processed
    #Then add 1 to the record count before the next city is processed

#Run an API request for each of the cites 
    try:
        #Parse the JSON and retrieve data
        city_weather = requests.get(city_url).json()
        #Parse out the needed data
        city_lat = city_weather['coord']['lat']
        city_lng = city_weather['coord']['lon']
        city_max_temp = city_weather['main']['temp_max']
        city_humidity = city_weather['main']['humidity']
        city_clouds = city_weather['clouds']['all']
        city_wind = city_weather['wind']['speed']
        city_country = city_weather['sys']['country']
        #Convert the date to ISO standard
        city_date = datetime.utcfromtimestamp(city_weather['dt']).strftime('%Y-%m-%d %H:%M:%S')
        #Append the city information into city_data list
        city_data.append({'City': city.title(),
                          'Lat': city_lat, 
                          'Lng': city_lng, 
                          'Max Temp': city_max_temp, 
                          'Humidity': city_humidity, 
                          'Cloudiness': city_clouds, 
                          'Wind Speed': city_wind, 
                          'Country': city_country, 
                          'Date': city_date})
#If an error is experienced, skip the city
    except:
        print("City not found. Skipping...")
        pass

#Indicate that the Data Loading is complete
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

    ### Code Breakdown ###
    #Parse the JSON file for the current city.
    #If there is weather data for the city, the metrics are retrieved.
    #The cities list is appended with a dictionary for that city, where the key value pairs 
    # are the values from the parameters.
    #Add the try and except block.
    #Add a pass statement to handle all errors encountered and to allow the program to continue.

Beginning Data Retrieval     
-----------------------------
Processing Record 1 of Set 1 | kloulklubed
Processing Record 2 of Set 1 | new norfolk
Processing Record 3 of Set 1 | guozhen
Processing Record 4 of Set 1 | ushuaia
Processing Record 5 of Set 1 | bay roberts
Processing Record 6 of Set 1 | albany
Processing Record 7 of Set 1 | cape town
Processing Record 8 of Set 1 | hilo
Processing Record 9 of Set 1 | busselton
Processing Record 10 of Set 1 | barrow
Processing Record 11 of Set 1 | sioux lookout
Processing Record 12 of Set 1 | nizhniy kuranakh
Processing Record 13 of Set 1 | aklavik
Processing Record 14 of Set 1 | kapaa
Processing Record 15 of Set 1 | bluff
Processing Record 16 of Set 1 | hermanus
Processing Record 17 of Set 1 | socorro
Processing Record 18 of Set 1 | pevek
Processing Record 19 of Set 1 | bilma
Processing Record 20 of Set 1 | butaritari
Processing Record 21 of Set 1 | buchanan
Processing Record 22 of Set 1 | nikolskoye
Processing Record 23 of Set 1 | rikitea
Pro

In [31]:
len(city_data)

548

In [32]:
#Convert the array of dictionaries into a Pandas data frame
city_data_df = pd.DataFrame(city_data)
city_data_df.head(10)

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,Kloulklubed,7.0419,134.2556,81.86,72,100,1.99,PW,2022-08-11 19:17:55
1,New Norfolk,-42.7826,147.0587,43.72,80,92,1.32,AU,2022-08-11 19:17:55
2,Guozhen,34.3659,107.359,80.64,67,23,2.55,CN,2022-08-11 19:17:55
3,Ushuaia,-54.8,-68.3,38.82,87,40,2.3,AR,2022-08-11 19:17:55
4,Bay Roberts,47.5999,-53.2648,62.51,85,99,4.0,CA,2022-08-11 19:16:09
5,Albany,42.6001,-73.9662,86.18,47,85,1.99,US,2022-08-11 19:13:01
6,Cape Town,-33.9258,18.4232,59.88,79,40,10.36,ZA,2022-08-11 19:17:06
7,Hilo,19.7297,-155.09,87.28,90,100,5.75,US,2022-08-11 19:14:29
8,Busselton,-33.65,115.3333,49.24,85,1,9.75,AU,2022-08-11 19:14:35
9,Barrow,71.2906,-156.7887,44.62,87,100,20.71,US,2022-08-11 19:17:56


In [33]:
#Reorder the columns
new_column_order = ["City", "Country", "Date", "Lat", "Lng", "Max Temp", "Humidity", "Cloudiness", "Wind Speed"]
city_data_df = city_data_df[new_column_order]
city_data_df.head(10)

Unnamed: 0,City,Country,Date,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed
0,Kloulklubed,PW,2022-08-11 19:17:55,7.0419,134.2556,81.86,72,100,1.99
1,New Norfolk,AU,2022-08-11 19:17:55,-42.7826,147.0587,43.72,80,92,1.32
2,Guozhen,CN,2022-08-11 19:17:55,34.3659,107.359,80.64,67,23,2.55
3,Ushuaia,AR,2022-08-11 19:17:55,-54.8,-68.3,38.82,87,40,2.3
4,Bay Roberts,CA,2022-08-11 19:16:09,47.5999,-53.2648,62.51,85,99,4.0
5,Albany,US,2022-08-11 19:13:01,42.6001,-73.9662,86.18,47,85,1.99
6,Cape Town,ZA,2022-08-11 19:17:06,-33.9258,18.4232,59.88,79,40,10.36
7,Hilo,US,2022-08-11 19:14:29,19.7297,-155.09,87.28,90,100,5.75
8,Busselton,AU,2022-08-11 19:14:35,-33.65,115.3333,49.24,85,1,9.75
9,Barrow,US,2022-08-11 19:17:56,71.2906,-156.7887,44.62,87,100,20.71


In [34]:
#Create the output file (CSV)
output_data_file = "weather_data/cities.csv"

#Export the city_data into a CSV
city_data_df.to_csv(output_data_file, index_label='City_ID')