In [1]:
# Import dependencies

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from citipy import citipy


In [2]:
from config import key_dict

if 'owm' in key_dict:
    owm_key = key_dict['owm']
else:
    owm_key = api_key

In [14]:
# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

In [15]:
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(low=-90.000, high=90.000, size=1500)
lngs = np.random.uniform(low=-180.000, high=180.000, size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

596

In [17]:
# Create row to process down the list
row_counter = 1
# Create a dictionary with parameters using api
settings = {"appid": owm_key, "units": "imperial"}
# Call the input URL
base_url = "http://api.openweathermap.org/data/2.5/weather?"
# Create query url
query_url = f"{base_url}appid={settings['appid']}&units={settings['units']}&q="

In [18]:
# set lists for the dataframe
city_two = []
cloudinesses = []
dates = []
humidities = []
lats = []
lngs = []
max_temps = []
wind_speeds = []
countries = []

# set initial count quantities for organization
count_one = 0
set_one = 1

# loops for creating dataframe columns
print("Beginning Data Retrieval\n------------------------------")
for city in cities:
    try:
        response = requests.get(query_url + city.replace(" ","&")).json()
        cloudinesses.append(response['clouds']['all'])
        countries.append(response['sys']['country'])
        dates.append(response['dt'])
        humidities.append(response['main']['humidity'])
        lats.append(response['coord']['lat'])
        lngs.append(response['coord']['lon'])
        max_temps.append(response['main']['temp_max'])
        wind_speeds.append(response['wind']['speed'])
        if count_one > 48:
            count_one = 1
            set_one += 1
            city_two.append(city)
        else:
            count_one += 1
            city_two.append(city)
        print(f"Processing Record {count_one} of Set {set_one} | {city}")
    except Exception:
        print("City not found. Skipping...")
print("------------------------------\nData Retrieval Complete\n------------------------------")

Beginning Data Retrieval
------------------------------
Processing Record 1 of Set 1 | hithadhoo
Processing Record 2 of Set 1 | avarua
Processing Record 3 of Set 1 | butaritari
Processing Record 4 of Set 1 | narsaq
Processing Record 5 of Set 1 | busselton
Processing Record 6 of Set 1 | albany
Processing Record 7 of Set 1 | rikitea
Processing Record 8 of Set 1 | barrow
Processing Record 9 of Set 1 | port alfred
Processing Record 10 of Set 1 | kaeo
Processing Record 11 of Set 1 | ponta do sol
Processing Record 12 of Set 1 | pevek
Processing Record 13 of Set 1 | victoria
Processing Record 14 of Set 1 | bermejillo
Processing Record 15 of Set 1 | beringovskiy
Processing Record 16 of Set 1 | adiake
Processing Record 17 of Set 1 | ribeira grande
Processing Record 18 of Set 1 | mataura
Processing Record 19 of Set 1 | cidreira
Processing Record 20 of Set 1 | padang
Processing Record 21 of Set 1 | ilulissat
Processing Record 22 of Set 1 | ushuaia
Processing Record 23 of Set 1 | lyubech
City not 

Processing Record 40 of Set 4 | rocha
Processing Record 41 of Set 4 | hasaki
Processing Record 42 of Set 4 | dunmore town
Processing Record 43 of Set 4 | dingle
Processing Record 44 of Set 4 | lompoc
Processing Record 45 of Set 4 | nanortalik
City not found. Skipping...
Processing Record 46 of Set 4 | grand river south east
Processing Record 47 of Set 4 | lata
Processing Record 48 of Set 4 | rutana
Processing Record 49 of Set 4 | wawa
City not found. Skipping...
Processing Record 1 of Set 5 | portland
City not found. Skipping...
Processing Record 2 of Set 5 | mabamba
Processing Record 3 of Set 5 | severo-kurilsk
Processing Record 4 of Set 5 | brookhaven
Processing Record 5 of Set 5 | talara
City not found. Skipping...
Processing Record 6 of Set 5 | shelburne
Processing Record 7 of Set 5 | sinjar
Processing Record 8 of Set 5 | andenes
Processing Record 9 of Set 5 | miraflores
Processing Record 10 of Set 5 | shellbrook
Processing Record 11 of Set 5 | merauke
Processing Record 12 of Set 5

City not found. Skipping...
Processing Record 31 of Set 8 | touros
Processing Record 32 of Set 8 | takoradi
Processing Record 33 of Set 8 | cumra
Processing Record 34 of Set 8 | hualmay
Processing Record 35 of Set 8 | bernay
Processing Record 36 of Set 8 | rodrigues alves
City not found. Skipping...
Processing Record 37 of Set 8 | marzuq
Processing Record 38 of Set 8 | mar del plata
City not found. Skipping...
Processing Record 39 of Set 8 | inhambane
Processing Record 40 of Set 8 | shu
Processing Record 41 of Set 8 | bengkulu
Processing Record 42 of Set 8 | aranos
Processing Record 43 of Set 8 | nanchong
Processing Record 44 of Set 8 | alofi
Processing Record 45 of Set 8 | saint-pierre
Processing Record 46 of Set 8 | bathsheba
Processing Record 47 of Set 8 | huilong
Processing Record 48 of Set 8 | buraydah
Processing Record 49 of Set 8 | chicama
Processing Record 1 of Set 9 | talcahuano
Processing Record 2 of Set 9 | igboho
Processing Record 3 of Set 9 | bandundu
City not found. Skipp

In [23]:
# create a dictionary for establishing dataframe
weather_dict = {
    "City":city_two,
    "Cloudiness":cloudinesses,
    "Country":countries,
    "Date":dates,
    "Humidity":humidities,
    "Lat":lats,
    "Lng":lngs,
    "Max Temp":max_temps,
    "Wind Speed":wind_speeds
}

# establish dataframe
weather_df = pd.DataFrame(weather_dict)

In [24]:
weather_df.count()

City          535
Cloudiness    535
Country       535
Date          535
Humidity      535
Lat           535
Lng           535
Max Temp      535
Wind Speed    535
dtype: int64

In [26]:
output_data_file = "output_data/cities.csv"
weather_df

Unnamed: 0,City,Cloudiness,Country,Date,Humidity,Lat,Lng,Max Temp,Wind Speed
0,hithadhoo,95,MV,1616294004,74,-0.6000,73.0833,82.96,9.93
1,avarua,25,CK,1616294005,74,-21.2078,-159.7750,84.20,9.22
2,butaritari,33,KI,1616294005,79,3.0707,172.7902,81.95,17.29
3,narsaq,75,GL,1616293531,93,60.9167,-46.0500,32.00,11.50
4,busselton,8,AU,1616294006,40,-33.6500,115.3333,89.01,1.99
...,...,...,...,...,...,...,...,...,...
530,yakhroma,0,RU,1616294206,86,56.3000,37.4833,23.00,8.95
531,puerto leguizamo,0,ES,1616294206,76,36.5939,-6.2330,46.00,8.05
532,vila franca do campo,0,PT,1616294206,78,42.0304,-8.1588,37.00,1.01
533,ipameri,2,BR,1616294207,93,-17.7219,-48.1597,65.77,5.77


In [39]:
# Drop extra samples (500 total samples)

update_weather_df = weather_df.drop(weather_df.index[500:535])

update_weather_df

Unnamed: 0,City,Cloudiness,Country,Date,Humidity,Lat,Lng,Max Temp,Wind Speed
0,hithadhoo,95,MV,1616294004,74,-0.6000,73.0833,82.96,9.93
1,avarua,25,CK,1616294005,74,-21.2078,-159.7750,84.20,9.22
2,butaritari,33,KI,1616294005,79,3.0707,172.7902,81.95,17.29
3,narsaq,75,GL,1616293531,93,60.9167,-46.0500,32.00,11.50
4,busselton,8,AU,1616294006,40,-33.6500,115.3333,89.01,1.99
...,...,...,...,...,...,...,...,...,...
495,seminole,90,US,1616294192,87,28.7086,-81.2081,55.99,12.66
496,raudeberg,100,NO,1616294192,85,61.9875,5.1352,36.00,18.01
497,kovdor,100,RU,1616293996,96,67.5662,30.4758,21.51,5.14
498,tecpan,20,GT,1616294193,63,14.7623,-90.9947,64.40,18.41
