# WeatherPy - Data Collection

### Dependencies

In [1]:
# Dependencies
from citipy import citipy
import datetime as dt
import requests
import numpy as np
import pandas as pd

from config import OWM_API_KEY

### Generate at least 1000 cities around the world randomly

In [3]:
# Generate random GCS coordinates
np.random.seed(0)
n_coords = 3500
lat, lon = np.random.uniform(-90, 90, n_coords), np.random.uniform(-180, 180, n_coords)
lat[:5], lon[:5]

(array([  8.78643071,  38.73408595,  18.49740769,   8.07897294,
        -13.74213612]),
 array([  47.85016042,   -8.40824008,  169.70141582, -163.35849583,
          56.95484869]))

In [2]:
# Get unique cities closest to the random coordinates
cities = []
for i in range(n_coords):
    city = citipy.nearest_city(lat[i], lon[i])
    if city.city_name not in cities:
        cities.append(city.city_name)
    
len(cities)

1111

### Get weather data for each city

In [3]:
# Openweathermap URL for API calls
url = 'http://api.openweathermap.org/data/2.5/weather?units=imperial'
url += '&appid=' + config.OWM_API_KEY + '&q='

# Sample OWM response
response = requests.get(url + cities[0]).json()
response

{'coord': {'lon': 48.4845, 'lat': 8.4054},
 'weather': [{'id': 800,
   'main': 'Clear',
   'description': 'clear sky',
   'icon': '01n'}],
 'base': 'stations',
 'main': {'temp': 66.99,
  'feels_like': 59.05,
  'temp_min': 66.99,
  'temp_max': 66.99,
  'pressure': 1013,
  'humidity': 64,
  'sea_level': 1013,
  'grnd_level': 960},
 'visibility': 10000,
 'wind': {'speed': 16.51, 'deg': 71},
 'clouds': {'all': 0},
 'dt': 1610225148,
 'sys': {'country': 'SO', 'sunrise': 1610161384, 'sunset': 1610203369},
 'timezone': 10800,
 'id': 58933,
 'name': 'Garoowe',
 'cod': 200}

In [4]:
# Store responses
cities200, cities404 = [], []

# Print progress
print('Starting data collection...\n')
header = 'Num API requests | Cities collected | Cities not found'
print(header)
print('-' * len(header))

# Get weather for each city
for i, city in enumerate(cities):
    
    # Make request
    response = requests.get(url + city.replace(' ', '+'))
    response_json = response.json()
    
    # For 200 responses (weather was found for city)
    try: 
        
        # Main weather
        weather = {
            'City': response_json['name'],
            'Country': response_json['sys']['country'],
            'Latitude': response_json['coord']['lat'],
            'Longitude': response_json['coord']['lon'],
            'Time': dt.datetime.utcfromtimestamp(response_json['dt']).strftime('%Y-%m-%d %H:%M:%S'),
            'Description': response_json['weather'][0]['description'],
            'Max Temp': response_json['main']['temp_max'],
            'Humidity': response_json['main']['humidity'],
            'Wind Speed': response_json['wind']['speed'],
            'Cloudiness': response_json['clouds']['all']
        }
        
        # Add rain to weather
        try:
            weather['Rain'] = response_json['rain']['1h']
        except:
            weather['Rain'] = 0
            
        # Add snow to weather
        try:
            weather['Snow'] = response_json['snow']['1h']
        except:
            weather['Snow'] = 0
        
        # Add weather to 200 list
        cities200.append(weather)
    
    # For 404 responses (city was not found)
    except:
        
        # Add city to 404 list
        cities404.append(city)
        
    # Print progress every 100 cities and on the last city
    if (i > 0 and (i + 1) % 100 == 0) or i == len(cities) - 1:
        sep = (' ' * 7) + '| ' # col separator
        print(f'{i + 1:10}', end=sep) # num API requests
        print(f'{len(cities200):10}', end=sep) # cities collected
        print(f'{len(cities404):10}') # cities not found
        
print('-' * len(header))
print('\nData collection complete.')

Starting data collection...

Num API requests | Cities collected | Cities not found
------------------------------------------------------
       100       |         91       |          9
       200       |        185       |         15
       300       |        277       |         23
       400       |        372       |         28
       500       |        465       |         35
       600       |        557       |         43
       700       |        651       |         49
       800       |        741       |         59
       900       |        833       |         67
      1000       |        925       |         75
      1100       |       1012       |         88
      1111       |       1023       |         88
------------------------------------------------------

Data collection complete.


### Convert data to dataframe

In [5]:
# Create dataframe for weather data
weather_df = pd.DataFrame(cities200)
print(weather_df.info())
weather_df.head(2)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1023 entries, 0 to 1022
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   City         1023 non-null   object 
 1   Country      1023 non-null   object 
 2   Latitude     1023 non-null   float64
 3   Longitude    1023 non-null   float64
 4   Time         1023 non-null   object 
 5   Description  1023 non-null   object 
 6   Max Temp     1023 non-null   float64
 7   Humidity     1023 non-null   int64  
 8   Wind Speed   1023 non-null   float64
 9   Cloudiness   1023 non-null   int64  
 10  Rain         1023 non-null   float64
 11  Snow         1023 non-null   float64
dtypes: float64(6), int64(2), object(4)
memory usage: 96.0+ KB
None


Unnamed: 0,City,Country,Latitude,Longitude,Time,Description,Max Temp,Humidity,Wind Speed,Cloudiness,Rain,Snow
0,Garoowe,SO,8.4054,48.4845,2021-01-09 20:45:48,clear sky,66.99,64,16.51,0,0.0,0.0
1,Vendas Novas,PT,38.6771,-8.4579,2021-01-09 20:45:49,few clouds,43.0,65,11.5,20,0.0,0.0


In [7]:
# Save data
weather_df.to_csv('data/weather.csv', index_label='ID')
pd.read_csv('data/weather.csv').head(2)

Unnamed: 0,ID,City,Country,Latitude,Longitude,Time,Description,Max Temp,Humidity,Wind Speed,Cloudiness,Rain,Snow
0,0,Garoowe,SO,8.4054,48.4845,2021-01-09 20:45:48,clear sky,66.99,64,16.51,0,0.0,0.0
1,1,Vendas Novas,PT,38.6771,-8.4579,2021-01-09 20:45:49,few clouds,43.0,65,11.5,20,0.0,0.0
