# Weather Mapping - Data Collection

### Dependencies

In [1]:
# Dependencies
import os
import requests
import numpy as np
import pandas as pd
import datetime as dt
from citipy import citipy
from config import OWM_KEY

### Generate at least 1000 cities around the world randomly

In [2]:
# Generate random GCS coords
np.random.seed(0)
n_coords = 3500
lat, lon = np.random.uniform(-90, 90, n_coords), np.random.uniform(-180, 180, n_coords)
lat[:5], lon[:5]

(array([  8.78643071,  38.73408595,  18.49740769,   8.07897294,
        -13.74213612]),
 array([  47.85016042,   -8.40824008,  169.70141582, -163.35849583,
          56.95484869]))

In [3]:
# Find cities closest to the generated coords
cities = []
for i in range(n_coords):
    city = citipy.nearest_city(lat[i], lon[i])
    if city.city_name not in cities:
        cities.append(city.city_name)
    
len(cities)

1111

### Get weather data for each city

In [4]:
# OpenWeatherMap URL for API calls
url = 'http://api.openweathermap.org/data/2.5/weather?units=imperial'
url += '&appid=' + OWM_KEY + '&q='

# Sample OWM response
response = requests.get(url + cities[0]).json()
response

{'coord': {'lon': 48.4845, 'lat': 8.4054},
 'weather': [{'id': 800,
   'main': 'Clear',
   'description': 'clear sky',
   'icon': '01n'}],
 'base': 'stations',
 'main': {'temp': 74.7,
  'feels_like': 75.58,
  'temp_min': 74.7,
  'temp_max': 74.7,
  'pressure': 1013,
  'humidity': 79,
  'sea_level': 1013,
  'grnd_level': 961},
 'visibility': 10000,
 'wind': {'speed': 12.19, 'deg': 97, 'gust': 17.78},
 'clouds': {'all': 7},
 'dt': 1618695761,
 'sys': {'country': 'SO', 'sunrise': 1618713317, 'sunset': 1618757706},
 'timezone': 10800,
 'id': 58933,
 'name': 'Garoowe',
 'cod': 200}

In [5]:
# Store 
weather200, weather404 = [], []

# Print header
print('Starting data collection...\n')
sep = (' ' * 7) + '| ' # col separator
header = 'Num API requests | Cities collected | Cities not found'
print(header)
print('-' * len(header))

# Get weather for each city
for i, city in enumerate(cities):
    
    # Make request
    response = requests.get(url + city.replace(' ', '+')).json()
    
    # For 200 responses (weather was found for city)
    try: 
        
        # Main weather
        weather = {
            'City': response['name'],
            'Country': response['sys']['country'],
            'Latitude': response['coord']['lat'],
            'Longitude': response['coord']['lon'],
            'Time': dt.datetime.utcfromtimestamp(response['dt']).strftime('%Y-%m-%d %H:%M:%S'),
            'Description': response['weather'][0]['description'],
            'Max Temp': response['main']['temp_max'],
            'Humidity': response['main']['humidity'],
            'Wind Speed': response['wind']['speed'],
            'Cloudiness': response['clouds']['all']
        }
        
        # Add rain and snow
        for meas in ['Rain', 'Snow']:
            try:
                weather[meas] = response[mean.lower()]['1h']
            except:
                weather[meas] = 0
        
        # Add weather to 200 list
        weather200.append(weather)
    
    except: # for 404 responses (city was not found)
        weather404.append(city) # add city to 404 list
        
    # Print progress every 100 cities and on the last city
    if (i > 0 and (i + 1) % 100 == 0) or i == len(cities) - 1:
        print(f'{i + 1:10}', end=sep) # cities requested
        print(f'{len(weather200):10}', end=sep) # cities collected
        print(f'{len(weather404):10}') # cities not found
        
print('-' * len(header))
print('\nData collection complete.\n')
print('The following cities were not found with the OpenWeatherMap API:')
print(weather404)

Starting data collection...

Num API requests | Cities collected | Cities not found
------------------------------------------------------
       100       |         91       |          9
       200       |        185       |         15
       300       |        277       |         23
       400       |        372       |         28
       500       |        465       |         35
       600       |        557       |         43
       700       |        651       |         49
       800       |        741       |         59
       900       |        833       |         67
      1000       |        925       |         75
      1100       |       1012       |         88
      1111       |       1023       |         88
------------------------------------------------------

Data collection complete.

The following cities were not found with the OpenWeatherMap API:


['mys shmidta',
 'taolanaro',
 'illoqqortoormiut',
 'cam pha',
 'umzimvubu',
 'belushya guba',
 'naftah',
 'babanusah',
 'formoso do araguaia',
 'sentyabrskiy',
 'barentsburg',
 'saleaula',
 'amderma',
 'attawapiskat',
 'parras',
 'mutsamudu',
 'chagda',
 'warqla',
 'kuche',
 'marcona',
 'ust-kamchatsk',
 'koboldo',
 'grand river south east',
 'nizhneyansk',
 'tawzar',
 'linchuan',
 'rolim de moura',
 'palabuhanratu',
 'bolungarvik',
 'samusu',
 'vaitupu',
 'kankavli',
 'lokken verk',
 'malwan',
 'tsihombe',
 'karaul',
 'balasinor',
 'tidore',
 'sug-aksy',
 'qabis',
 'zhitikara',
 'jarjis',
 'karauzyak',
 'solovetskiy',
 'macaboboni',
 'ituni',
 'lolua',
 'jiroft',
 'paradwip',
 'mouzakion',
 'tawkar',
 'halalo',
 'kamenskoye',
 'tumannyy',
 'wulanhaote',
 'ahumada',
 'viksoyri',
 'gorno-chuyskiy',
 'fevralsk',
 'tarudant',
 'satitoa',
 'mocambique',
 'tome-acu',
 'phan rang',
 'tasbuget',
 'chhukha',
 'barawe',
 'canitas',
 'sorvag',
 'dzhusaly',
 'angamacutiro',
 'saryshagan',
 'buri

### Convert data to dataframe

In [6]:
# Create dataframe for weather data
weather_df = pd.DataFrame(weather200)
weather_df.head(2)

Unnamed: 0,City,Country,Latitude,Longitude,Time,Description,Max Temp,Humidity,Wind Speed,Cloudiness,Rain,Snow
0,Garoowe,SO,8.4054,48.4845,2021-04-17 21:42:41,clear sky,74.7,79,12.19,7,0,0
1,Vendas Novas,PT,38.6771,-8.4579,2021-04-17 21:42:41,clear sky,62.6,59,6.91,0,0,0


In [7]:
# Data path
os.makedirs('data', exist_ok=True) # create data dir
data_path = os.path.join('data', 'weather.csv')
data_path

'data/weather.csv'

In [8]:
# Save data
weather_df.to_csv(data_path, index_label='Id')
pd.read_csv(data_path).head(2)

Unnamed: 0,Id,City,Country,Latitude,Longitude,Time,Description,Max Temp,Humidity,Wind Speed,Cloudiness,Rain,Snow
0,0,Garoowe,SO,8.4054,48.4845,2021-04-17 21:42:41,clear sky,74.7,79,12.19,7,0,0
1,1,Vendas Novas,PT,38.6771,-8.4579,2021-04-17 21:42:41,clear sky,62.6,59,6.91,0,0,0
