In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# import dependencies

In [2]:
## create a set of random latitude and longitude combinations

lats = np.random.uniform(low=-90.000, high=90.000, size=1500)
# create a floating point list of random latitudes
lngs = np.random.uniform(low=-180.000, high=180.000, size=1500)
# create a floating point list of random longitudes
lat_lngs = zip(lats, lngs)
# returns an object with both tuples paired together
lat_lngs

<zip at 0x28c532d0b00>

In [3]:
coordinates = list(lat_lngs)
# turn the 'lat_lngs' object into a list so that we can use it

In [4]:
## now we can use the citipy module to find the nearest cities to the random coordinates in our list

from citipy import citipy
# import citipy module

cities = []
# create a list for holding the cities

# identify the nearest city for each latitude and longitude combination
for coordinate in coordinates:
    city = citipy.nearest_city(coordinate[0], coordinate[1]).city_name

    # if the city is unique, then we will add it to the cities list
    if city not in cities:
        cities.append(city)
    
# print the city count to confirm sufficient count
len(cities)

625

In [5]:
import requests
# import the requests library

from config import weather_api_key
# import the api key from the config file we created

In [6]:
url = "https://api.openweathermap.org/data/2.5/weather?lat=35&lon=139&appid=" + weather_api_key
# starting url for weather map api call
print(url)

https://api.openweathermap.org/data/2.5/weather?lat=35&lon=139&appid=16d24d2eb2143f2ff1efd161db7f61cb


In [7]:
# create an endpoint URL for a city

city_url = url + "&q=" + "Boston"
# creates endpoint reference for a city
city_weather = requests.get(city_url)
# uses the get() function to return city weather data in json format
city_weather.json()
# prints the data

{'coord': {'lon': -71.0598, 'lat': 42.3584},
 'weather': [{'id': 804,
   'main': 'Clouds',
   'description': 'overcast clouds',
   'icon': '04d'}],
 'base': 'stations',
 'main': {'temp': 288.06,
  'feels_like': 287.83,
  'temp_min': 286.45,
  'temp_max': 290.45,
  'pressure': 1009,
  'humidity': 85},
 'visibility': 10000,
 'wind': {'speed': 5.66, 'deg': 80},
 'clouds': {'all': 100},
 'dt': 1654209152,
 'sys': {'type': 2,
  'id': 2013408,
  'country': 'US',
  'sunrise': 1654160985,
  'sunset': 1654215304},
 'timezone': -14400,
 'id': 4930956,
 'name': 'Boston',
 'cod': 200}

In [8]:
boston_data = city_weather.json()
# assigns a variable to json weather data for Boston

In [9]:
boston_data["sys"], boston_data["sys"]["country"], boston_data["dt"]

({'type': 2,
  'id': 2013408,
  'country': 'US',
  'sunrise': 1654160985,
  'sunset': 1654215304},
 'US',
 1654209152)

In [10]:
lat = boston_data["coord"]["lat"]
lng = boston_data["coord"]["lon"]
max_temp = boston_data["main"]["temp_max"]
humidity = boston_data["main"]["humidity"]
clouds = boston_data["clouds"]["all"]
wind = boston_data["wind"]["speed"]
print(lat, lng, max_temp, humidity, clouds, wind)

42.3584 -71.0598 290.45 85 100 5.66


In [11]:
## this bit will allow us to convert the UTC to ISO format

from datetime import datetime
# import the datetime module
date = boston_data["dt"]
# get the date from the json data
datetime.utcfromtimestamp(date)
# convert utc to date format with year, month, day, hours, minutes, and seconds

datetime.datetime(2022, 6, 2, 22, 32, 32)

In [12]:
datetime.utcfromtimestamp(date).strftime('%Y-%m-%d %H:%M:%S')
# same function, but with formatting parameters to make it look nice

'2022-06-02 22:32:32'

In [13]:
## this bit will create a list of city urls

import time
# imports time dependency

city_data = []
# create an empty list to hold the weather data
print("Beginning Data Retrieval     ")
print("-----------------------------")
# print the beginning of the logging

record_count = 1
set_count = 1
# create and set counters to 1

for i, city in enumerate(cities):
# loops through all the cities in the list and uses enumerate() to retrieve index and city name

    if (i % 50 == 0 and i >= 50):
    # group cities in sets of 50 for logging purposes
        set_count += 1
        record_count = 1
        time.sleep(60)

    city_url = url + "&q=" + city.replace(" ","+")
    # create endpoint URl for each city

    print(f"Processing Record {record_count} of Set {set_count} | {city}")
    # log the url, record, set numbers, and the city
    record_count += 1
    # add 1 to record count
    # Run an API request for each of the cities.
    try:
        # Parse the JSON and retrieve data.
        city_weather = requests.get(city_url).json()
        # Parse out the needed data.
        city_lat = city_weather["coord"]["lat"]
        city_lng = city_weather["coord"]["lon"]
        city_max_temp = city_weather["main"]["temp_max"]
        city_humidity = city_weather["main"]["humidity"]
        city_clouds = city_weather["clouds"]["all"]
        city_wind = city_weather["wind"]["speed"]
        city_country = city_weather["sys"]["country"]
        # Convert the date to ISO standard.
        city_date = datetime.utcfromtimestamp(city_weather["dt"]).strftime('%Y-%m-%d %H:%M:%S')
        # Append the city information into city_data list.
        city_data.append({"City": city.title(),
                          "Lat": city_lat,
                          "Lng": city_lng,
                          "Max Temp": city_max_temp,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country,
                          "Date": city_date})

# If an error is experienced, skip the city.
    except:
        print("City not found. Skipping...")
        pass

# Indicate that Data Loading is complete.
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

Beginning Data Retrieval     
-----------------------------
Processing Record 1 of Set 1 | rikitea
Processing Record 2 of Set 1 | beringovskiy
Processing Record 3 of Set 1 | tiarei
Processing Record 4 of Set 1 | saskylakh
Processing Record 5 of Set 1 | mehran
Processing Record 6 of Set 1 | acapulco
Processing Record 7 of Set 1 | busselton
Processing Record 8 of Set 1 | sedalia
Processing Record 9 of Set 1 | dikson
Processing Record 10 of Set 1 | severo-kurilsk
Processing Record 11 of Set 1 | khatanga
Processing Record 12 of Set 1 | mataura
Processing Record 13 of Set 1 | qaanaaq
Processing Record 14 of Set 1 | bad urach
Processing Record 15 of Set 1 | malente
Processing Record 16 of Set 1 | evensk
Processing Record 17 of Set 1 | new norfolk
Processing Record 18 of Set 1 | sint-laureins
Processing Record 19 of Set 1 | barentsburg
City not found. Skipping...
Processing Record 20 of Set 1 | ushuaia
Processing Record 21 of Set 1 | diglur
Processing Record 22 of Set 1 | nanning
Processing R

Processing Record 35 of Set 4 | okhotsk
Processing Record 36 of Set 4 | raudeberg
Processing Record 37 of Set 4 | sikonge
Processing Record 38 of Set 4 | anadyr
Processing Record 39 of Set 4 | san felipe
Processing Record 40 of Set 4 | tessalit
Processing Record 41 of Set 4 | provideniya
Processing Record 42 of Set 4 | torbay
Processing Record 43 of Set 4 | kamenka
Processing Record 44 of Set 4 | chokurdakh
Processing Record 45 of Set 4 | itacoatiara
Processing Record 46 of Set 4 | amudat
Processing Record 47 of Set 4 | dzaoudzi
Processing Record 48 of Set 4 | caldwell
Processing Record 49 of Set 4 | burica
City not found. Skipping...
Processing Record 50 of Set 4 | pingyin
Processing Record 1 of Set 5 | asau
Processing Record 2 of Set 5 | grand gaube
Processing Record 3 of Set 5 | hue
Processing Record 4 of Set 5 | porto nacional
Processing Record 5 of Set 5 | jamestown
Processing Record 6 of Set 5 | gigmoto
Processing Record 7 of Set 5 | nakhon thai
Processing Record 8 of Set 5 | sar

Processing Record 28 of Set 8 | norman wells
Processing Record 29 of Set 8 | price
Processing Record 30 of Set 8 | mahebourg
Processing Record 31 of Set 8 | adrar
Processing Record 32 of Set 8 | kutum
Processing Record 33 of Set 8 | north battleford
Processing Record 34 of Set 8 | dingle
Processing Record 35 of Set 8 | marcona
City not found. Skipping...
Processing Record 36 of Set 8 | geraldton
Processing Record 37 of Set 8 | rock sound
Processing Record 38 of Set 8 | plaster rock
Processing Record 39 of Set 8 | longyearbyen
Processing Record 40 of Set 8 | tungor
Processing Record 41 of Set 8 | vila franca do campo
Processing Record 42 of Set 8 | kesennuma
City not found. Skipping...
Processing Record 43 of Set 8 | kamina
Processing Record 44 of Set 8 | ucluelet
Processing Record 45 of Set 8 | cockburn town
Processing Record 46 of Set 8 | san miguel
Processing Record 47 of Set 8 | cabra
Processing Record 48 of Set 8 | dalby
Processing Record 49 of Set 8 | avera
Processing Record 50 of

Processing Record 15 of Set 12 | hammerfest
Processing Record 16 of Set 12 | tingrela
City not found. Skipping...
Processing Record 17 of Set 12 | suzun
Processing Record 18 of Set 12 | santo estevao
Processing Record 19 of Set 12 | fairbanks
Processing Record 20 of Set 12 | namibe
Processing Record 21 of Set 12 | fairlie
Processing Record 22 of Set 12 | kangaatsiaq
Processing Record 23 of Set 12 | pangody
Processing Record 24 of Set 12 | porto walter
Processing Record 25 of Set 12 | san policarpo
Processing Record 26 of Set 12 | ketchikan
Processing Record 27 of Set 12 | bago
Processing Record 28 of Set 12 | stornoway
Processing Record 29 of Set 12 | mecca
Processing Record 30 of Set 12 | reshetylivka
Processing Record 31 of Set 12 | caborca
Processing Record 32 of Set 12 | burladingen
Processing Record 33 of Set 12 | guerrero negro
Processing Record 34 of Set 12 | nongan
Processing Record 35 of Set 12 | sonoita
Processing Record 36 of Set 12 | olafsvik
Processing Record 37 of Set 12 

In [14]:
print(len(city_data))

575


In [20]:
city_data_df = pd.DataFrame(city_data)
# convert the 'city_data' array to a dataframe 
city_data_df.head(10)
# print the first ten rows of the dataframe

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,Rikitea,-23.1203,-134.9692,298.34,78,81,6.89,PF,2022-06-02 22:34:43
1,Beringovskiy,63.05,179.3167,278.46,73,18,4.39,RU,2022-06-02 22:34:44
2,Tiarei,-17.5333,-149.3333,299.16,83,100,7.0,PF,2022-06-02 22:34:45
3,Saskylakh,71.9167,114.0833,276.81,93,88,1.47,RU,2022-06-02 22:34:45
4,Mehran,33.1222,46.1646,307.02,7,5,3.3,IR,2022-06-02 22:34:46
5,Acapulco,16.8634,-99.8901,302.05,70,75,5.66,MX,2022-06-02 22:34:47
6,Busselton,-33.65,115.3333,286.83,69,100,2.68,AU,2022-06-02 22:34:47
7,Sedalia,38.7045,-93.2283,296.99,49,40,4.63,US,2022-06-02 22:34:48
8,Dikson,73.5069,80.5464,270.87,87,100,12.9,RU,2022-06-02 22:34:49
9,Severo-Kurilsk,50.6789,156.125,279.78,89,100,3.3,RU,2022-06-02 22:32:16


In [38]:
new_column_order = ["City", "Country", "Date", "Lat", "Lng", "Max Temp", "Humidity", 
                    "Cloudiness", "Wind Speed"]
# create variable with new column order
city_data_df = city_data_df[new_column_order]
# apply to dataframe
city_data_df.head()
# check work

Unnamed: 0,City,Country,Date,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed
0,Rikitea,PF,2022-06-02 22:34:43,-23.1203,-134.9692,298.34,78,81,6.89
1,Beringovskiy,RU,2022-06-02 22:34:44,63.05,179.3167,278.46,73,18,4.39
2,Tiarei,PF,2022-06-02 22:34:45,-17.5333,-149.3333,299.16,83,100,7.0
3,Saskylakh,RU,2022-06-02 22:34:45,71.9167,114.0833,276.81,93,88,1.47
4,Mehran,IR,2022-06-02 22:34:46,33.1222,46.1646,307.02,7,5,3.3


In [40]:
output_data_file = "weather_data/cities.csv"
# create the output file (CSV)
city_data_df.to_csv(output_data_file, index_label="City_ID")
# export the city data dataframe into a csv file