In [1]:
import os
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from citipy import citipy
import requests
from api_keys import weather_api_key
import json
import time
import datetime

# Using pycountry to obtain full names of the countries
# source: https://pypi.org/project/pycountry/
import pycountry

# Adding options to display all of the rows of DataFrame w/o truncation
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

# Range of latitudes and longitudes
range_of_latitude = (-90, 90)
range_of_longitude = (-180, 180)

output_path = os.path.join('..','Outputs')

In [2]:
# List for holding cities
cities = []
countries = []

# Create a set of random lat and lng combinations
random_latitudes = np.random.uniform(low = -90, high = 90, size=1500)
random_longitudes = np.random.uniform(low = -180, high = 180, size=1500)
random_geographic_locations = zip(random_latitudes, random_longitudes)

for a_location in random_geographic_locations:
    city = citipy.nearest_city(a_location[0], a_location[1]).city_name
    country = citipy.nearest_city(a_location[0], a_location[1]).country_code
    
    # If city is unique, then append it to city list along with full country name
    if city not in cities:
        cities.append(city)
        try:
            countries.append(pycountry.countries.get(alpha_2 = country.upper()).name)
        except:
            countries.append('N/A')

cities_and_countries = zip(cities,countries)

print(len(cities))
print(len(countries))

623
623


In [None]:
base_url = "http://api.openweathermap.org/data/2.5/weather?"

requests_per_minute = 0
elapsed_time_in_seconds = 0
record_number = 0
request_counter = 0

start_time = time.time()
print('Session started at: ' + time.ctime() + '\n')

# setup weather_df to append data
weather_df = pd.DataFrame({'City':[],\
                           'Country':[],\
                           'Country Code':[],\
                           'Date':[],\
                           'Latitude':[],\
                           'Longitude':[],\
                           'Max Temp':[],\
                           'Humidity':[],\
                           'Cloudiness':[],\
                           'Wind Speed':[]})

for _ in cities_and_countries:       
    a_city = _[0]
    a_country = _[1]       
    
    current_time = time.time()
    elapsed_time_in_seconds = round(current_time - start_time, 2)
    
    # Query OpenWeatherMap API and update counters
    query_url = base_url + '&APPID=' + weather_api_key + '&q=' + a_city
    response = requests.get(query_url)
    weather_response = response.json()
    
    request_counter = request_counter + 1
    
    record_number = record_number + 1
    if response:
        print(f'Record# {record_number}/{len(cities)} : {a_city.title()}, {a_country} : PROCESSED')
        
        try:
            response_items_df = pd.DataFrame([[a_city.title(),\
                                a_country,\
                                weather_response['sys']['country'],\
                                str(weather_response['dt']),\
                                weather_response['coord']['lat'],\
                                weather_response['coord']['lon'],\
                                weather_response['main']['temp_max'],\
                                weather_response['main']['humidity'],\
                                weather_response['clouds']['all'],\
                                weather_response['wind']['speed']]], columns=list(weather_df.columns))
                        
            weather_df = weather_df.append(response_items_df)
        except:
            pass
    else:
        print(f'Record# {record_number}/{len(cities)} : {a_city.title()}, {a_country} : NOT FOUND')
    
    if (elapsed_time_in_seconds <= 59.0 and request_counter >= 59):
        print('\nSession ended at: ' + time.ctime())
        print(f'{request_counter} requests made in {elapsed_time_in_seconds} secs')        
        print(f'\nWaiting for next minute to begin...')
        time.sleep(60.0 - elapsed_time_in_seconds)
        
        # Reset start_time and request_counter
        start_time = time.time()
        request_counter = 0
        print('\nSession continued at: ' + time.ctime())
        
#     if (request_counter == 30):
#         break

Session started at: Wed Jan 22 16:08:24 2020

Record# 1/623 : Yellowknife, Canada : PROCESSED
Record# 2/623 : Kapaa, United States : PROCESSED
Record# 3/623 : Ostrovnoy, Russian Federation : PROCESSED
Record# 4/623 : Kudahuvadhoo, Maldives : PROCESSED
Record# 5/623 : Geraldton, Australia : PROCESSED
Record# 6/623 : Victoria, Seychelles : PROCESSED
Record# 7/623 : Bredasdorp, South Africa : PROCESSED
Record# 8/623 : Bluff, New Zealand : PROCESSED
Record# 9/623 : Busselton, Australia : PROCESSED
Record# 10/623 : Upernavik, Greenland : PROCESSED
Record# 11/623 : Barentsburg, Svalbard and Jan Mayen : NOT FOUND
Record# 12/623 : Bengkulu, Indonesia : PROCESSED
Record# 13/623 : Miracema Do Tocantins, Brazil : PROCESSED
Record# 14/623 : Vaini, Tonga : PROCESSED
Record# 15/623 : Vila Velha, Brazil : PROCESSED
Record# 16/623 : Ribeira Grande, Portugal : PROCESSED
Record# 17/623 : Vestmanna, Faroe Islands : PROCESSED
Record# 18/623 : Dambulla, Sri Lanka : PROCESSED
Record# 19/623 : San Quintin, M

In [None]:
weather_df = weather_df.reset_index(drop=True)
print(weather_df.count())
weather_df.head(20)

In [None]:
weather_df.to_csv(output_path+'/weather_df.csv', index = False)