In [14]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

import requests
from config import weather_api_key
from citipy import citipy 
from datetime import datetime

In [20]:
url = "http://api.openweathermap.org/data/2.5/weather?units=Imperial&APPID=" + weather_api_key 

In [15]:
lats = np.random.uniform(-90.000,90.000,1500)
lngs = np.random.uniform(-180.000,180.000,1500)
lat_lngs = zip(lats, lngs)
lat_lngs

<zip at 0x7f9d81f52640>

In [16]:
# Add the latitudes and longitudes to a list
# Now we have a list of tuples we can iterate with
coordinates = list(lat_lngs)

In [17]:
# Create a list for holding the cities
cities = []
# Identify the nearest city for each latitude and longitude combination
for coordinate in coordinates:
    city = citipy.nearest_city(coordinate[0], coordinate[1]).city_name
    
    # If the city is unique, then we will add it to the cities list
    if city not in cities:
        cities.append(city)
        
len(cities)
    

611

This is basically saying that from the 1500 GCS locations we had, only 620 came from it as there were some repeated that were close to the same random location. Remember that we were trying to come up with more than 500 hundred cities and we did this by setting 1500 GSC locations knowing that almost 70% of the earth is covered with water. The other 30% were our chances of getting a specific city. 

In [27]:
# We can index through all the cities
cities[0]

'bluff'

In [None]:
# Loop through all the cities in our list
# for i in range(len(cities)):
    
#     # Group cities in sets of 50 for logging purposes
#     if (i % 50 == 0 and i >= 50):
#         set_count += 1
#         record_count = 1
        
#     # Create endpoint URL with each city
#     city_url = url + '&q=' + cities[i]

In [None]:
# The code above would cause trouble when including the city name in the URL, so we use this
# for i, city in enumerate(cities):
#     # Group cities in sets of 50 for logging purposes.
#     if (i % 50 == 0 and i >= 50):
#         set_count += 1
#         record_count = 1
#     # Create endpoint URL with each city.
#     city_url = url + "&q=" + city.replace(" ","+")

#     # Log the URL, record, and set numbers and the city.
#     print(f"Processing Record {record_count} of Set {set_count} | {city}")
#     # Add 1 to the record count.
#     record_count += 1

In [28]:
# Create an empty list to hold the weather data
city_data = []
# Print the beginning of the logging
print("Beginning Data Retrieval.     ")
print("------------------------------")

# Counters for 50 cities in 10 sets for the 500 total
record_count = 1
set_count = 1

for i, city in enumerate(cities):
    # Group cities in sets of 50 for logging purposes.
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 1
    # Create endpoint URL with each city.
    city_url = url + "&q=" + city.replace(" ","+")

    # Log the URL, record, and set numbers and the city.
    print(f"Processing Record {record_count} of Set {set_count} | {city}")
    # Add 1 to the record count.
    record_count += 1
    
# Run an API request for each of the cities.
    try:
        # Parse the JSON and retrieve data.
        city_weather = requests.get(city_url).json()
        # Parse out the needed data.
        city_lat = city_weather["coord"]["lat"]
        city_lng = city_weather["coord"]["lon"]
        city_max_temp = city_weather["main"]["temp_max"]
        city_humidity = city_weather["main"]["humidity"]
        city_clouds = city_weather["clouds"]["all"]
        city_wind = city_weather["wind"]["speed"]
        city_country = city_weather["sys"]["country"]
        # Convert the date to ISO standard.
        city_date = datetime.utcfromtimestamp(city_weather["dt"]).strftime('%Y-%m-%d %H:%M:%S')
        # Append the city information into city_data list.
        city_data.append({"City": city.title(),
                          "Lat": city_lat,
                          "Lng": city_lng,
                          "Max Temp": city_max_temp,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country,
                          "Date": city_date})

# If an error is experienced, skip the city.
    except:
        print("City not found. Skipping...")
        pass

# Indicate that Data Loading is complete.
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

Beginning Data Retrieval.     
------------------------------
Processing Record 1 of Set 1 | bluff
Processing Record 2 of Set 1 | sapucai
Processing Record 3 of Set 1 | katangli
Processing Record 4 of Set 1 | rikitea
Processing Record 5 of Set 1 | saskylakh
Processing Record 6 of Set 1 | albany
Processing Record 7 of Set 1 | pangai
Processing Record 8 of Set 1 | severo-kurilsk
Processing Record 9 of Set 1 | amderma
City not found. Skipping...
Processing Record 10 of Set 1 | busselton
Processing Record 11 of Set 1 | san patricio
Processing Record 12 of Set 1 | bredasdorp
Processing Record 13 of Set 1 | cayenne
Processing Record 14 of Set 1 | tiksi
Processing Record 15 of Set 1 | taolanaro
City not found. Skipping...
Processing Record 16 of Set 1 | porterville
Processing Record 17 of Set 1 | poya
Processing Record 18 of Set 1 | port alfred
Processing Record 19 of Set 1 | thompson
Processing Record 20 of Set 1 | urumqi
Processing Record 21 of Set 1 | atuona
Processing Record 22 of Set 1 |

Processing Record 37 of Set 4 | san andres
Processing Record 38 of Set 4 | miri
Processing Record 39 of Set 4 | pauri
Processing Record 40 of Set 4 | mukhen
Processing Record 41 of Set 4 | idenao
Processing Record 42 of Set 4 | tianpeng
Processing Record 43 of Set 4 | broome
Processing Record 44 of Set 4 | slave lake
Processing Record 45 of Set 4 | alofi
Processing Record 46 of Set 4 | tsihombe
City not found. Skipping...
Processing Record 47 of Set 4 | nha trang
Processing Record 48 of Set 4 | zafra
Processing Record 49 of Set 4 | altamira
Processing Record 50 of Set 4 | rock sound
Processing Record 1 of Set 5 | sao filipe
Processing Record 2 of Set 5 | nikolayevsk-na-amure
Processing Record 3 of Set 5 | necochea
Processing Record 4 of Set 5 | dikson
Processing Record 5 of Set 5 | rantepao
Processing Record 6 of Set 5 | coihaique
Processing Record 7 of Set 5 | tutoia
Processing Record 8 of Set 5 | waingapu
Processing Record 9 of Set 5 | sarkand
Processing Record 10 of Set 5 | dabat
Pr

Processing Record 28 of Set 8 | nileshwar
Processing Record 29 of Set 8 | sanming
Processing Record 30 of Set 8 | impfondo
Processing Record 31 of Set 8 | kuche
City not found. Skipping...
Processing Record 32 of Set 8 | maarianhamina
Processing Record 33 of Set 8 | vila velha
Processing Record 34 of Set 8 | satitoa
City not found. Skipping...
Processing Record 35 of Set 8 | batemans bay
Processing Record 36 of Set 8 | la asuncion
Processing Record 37 of Set 8 | zlatoustovsk
City not found. Skipping...
Processing Record 38 of Set 8 | comodoro rivadavia
Processing Record 39 of Set 8 | georgetown
Processing Record 40 of Set 8 | aden
Processing Record 41 of Set 8 | cape coast
Processing Record 42 of Set 8 | suhut
Processing Record 43 of Set 8 | dingle
Processing Record 44 of Set 8 | linxia
Processing Record 45 of Set 8 | fortuna
Processing Record 46 of Set 8 | turan
Processing Record 47 of Set 8 | tabou
Processing Record 48 of Set 8 | lorengau
Processing Record 49 of Set 8 | la palma
Proc

Processing Record 16 of Set 12 | odweyne
Processing Record 17 of Set 12 | mokhsogollokh
Processing Record 18 of Set 12 | san lawrenz
Processing Record 19 of Set 12 | estelle
Processing Record 20 of Set 12 | presidencia roque saenz pena
Processing Record 21 of Set 12 | zavoronezhskoye
Processing Record 22 of Set 12 | labuan
Processing Record 23 of Set 12 | paciran
Processing Record 24 of Set 12 | oranjemund
Processing Record 25 of Set 12 | los llanos de aridane
Processing Record 26 of Set 12 | becerril
Processing Record 27 of Set 12 | kachiry
Processing Record 28 of Set 12 | wilmington
Processing Record 29 of Set 12 | chutung
City not found. Skipping...
Processing Record 30 of Set 12 | port hedland
Processing Record 31 of Set 12 | kaduqli
Processing Record 32 of Set 12 | trairi
Processing Record 33 of Set 12 | male
Processing Record 34 of Set 12 | pousat
City not found. Skipping...
Processing Record 35 of Set 12 | fevralsk
City not found. Skipping...
Processing Record 36 of Set 12 | abu

In [35]:
# To check that the product is a list of dictionaries
#city_data

## We now want to convert it into a data frame

In [34]:
city_data_df = pd.DataFrame(city_data)
city_data_df.head(10)

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,Bluff,-46.6,168.3333,46.8,96,100,8.08,NZ,2021-06-14 19:50:42
1,Sapucai,-25.6667,-56.9167,73.17,56,0,1.45,PY,2021-06-14 19:54:29
2,Katangli,51.7102,143.2326,46.29,92,100,6.04,RU,2021-06-14 19:54:29
3,Rikitea,-23.1203,-134.9692,75.38,77,100,12.86,PF,2021-06-14 19:54:30
4,Saskylakh,71.9167,114.0833,45.7,92,100,13.51,RU,2021-06-14 19:51:27
5,Albany,42.6001,-73.9662,69.31,89,94,1.01,US,2021-06-14 19:51:19
6,Pangai,-19.8,-174.35,77.11,78,40,13.8,TO,2021-06-14 19:54:30
7,Severo-Kurilsk,50.6789,156.125,45.16,87,0,3.27,RU,2021-06-14 19:54:30
8,Busselton,-33.65,115.3333,49.08,75,92,10.69,AU,2021-06-14 19:54:31
9,San Patricio,28.017,-97.5169,96.08,48,9,5.88,US,2021-06-14 19:54:31


## To reorder the columns

In [37]:
column_order = ['City','Country','Date','Lat','Lng','Max Temp','Humidity','Cloudiness','Wind Speed']
city_data_df = city_data_df[column_order]
city_data_df.head(10)

Unnamed: 0,City,Country,Date,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed
0,Bluff,NZ,2021-06-14 19:50:42,-46.6,168.3333,46.8,96,100,8.08
1,Sapucai,PY,2021-06-14 19:54:29,-25.6667,-56.9167,73.17,56,0,1.45
2,Katangli,RU,2021-06-14 19:54:29,51.7102,143.2326,46.29,92,100,6.04
3,Rikitea,PF,2021-06-14 19:54:30,-23.1203,-134.9692,75.38,77,100,12.86
4,Saskylakh,RU,2021-06-14 19:51:27,71.9167,114.0833,45.7,92,100,13.51
5,Albany,US,2021-06-14 19:51:19,42.6001,-73.9662,69.31,89,94,1.01
6,Pangai,TO,2021-06-14 19:54:30,-19.8,-174.35,77.11,78,40,13.8
7,Severo-Kurilsk,RU,2021-06-14 19:54:30,50.6789,156.125,45.16,87,0,3.27
8,Busselton,AU,2021-06-14 19:54:31,-33.65,115.3333,49.08,75,92,10.69
9,San Patricio,US,2021-06-14 19:54:31,28.017,-97.5169,96.08,48,9,5.88


## To save the df as a CSV and make it a dataset in our working folder

In [38]:
# Create the output file CSV
output_data_file = 'weather_data/cities.csv'
# Export the city_data into a CSV
city_data_df.to_csv(output_data_file, index_label='City_ID')