In [1]:
# import dependencies
import random
import pandas as pd
import matplotlib.pyplot as plt
from citipy import citipy
import csv
import openweathermapy.core as owm
import json
import os
from config import api_key
from urllib.error import HTTPError

In [2]:
# create a lists of random geographic coordinates in order to create a list of cities
latitude = [random.uniform(-90,90) for x in range(2000)]
longitude = [random.uniform(-180,180) for x in range(2000)]
coordinates = list(zip(latitude, longitude))
# create empty lists to hold city and country codes
city_data = []
cities = []
country = []


In [3]:
# loop through the coordinates to grab the city data from citipy
for coordinate_pair in coordinates:
    lat, lon = coordinate_pair
    city_data.append(citipy.nearest_city(lat, lon))


In [4]:
# loop through the city data to collect the names and the country codes
for city in city_data:
    name = city.city_name
    country_code = city.country_code
    cities.append(name)
    country.append(country_code)
    


In [5]:
# use citipy to find the cities and create a list of cities

In [6]:
# store name, longitute, and latitude into a pandas dataframe
cities_df = pd.DataFrame({
    "Name": cities,
    "Country": country,
   })

# Check dataframe for accuracy
cities_df.head()

Unnamed: 0,Country,Name
0,ca,la ronge
1,us,pacific grove
2,ar,ushuaia
3,ru,tyulgan
4,mx,acapulco


In [7]:
# make sure no duplicate cities made it into the list
cities_df = cities_df.drop_duplicates(subset=['Name'], keep="first")
cities_df = cities_df.reset_index(drop=True)
cities_df.head(35)

Unnamed: 0,Country,Name
0,ca,la ronge
1,us,pacific grove
2,ar,ushuaia
3,ru,tyulgan
4,mx,acapulco
5,no,roald
6,au,busselton
7,kr,seoul
8,mu,mahebourg
9,nz,bluff


In [8]:
len(cities_df["Name"])

792

In [9]:
# set variables for api calls to Open Weather Map
settings = {"units": "imperial", "appid": api_key}

In [10]:
# manually check a few of the cities with the openweather api
test = owm.get_current(cities_df['Name'][33], **settings)
test

{'base': 'stations',
 'clouds': {'all': 1},
 'cod': 200,
 'coord': {'lat': 30.37, 'lon': -88.56},
 'dt': 1527868740,
 'id': 4440397,
 'main': {'humidity': 59,
  'pressure': 1016,
  'temp': 85.64,
  'temp_max': 89.6,
  'temp_min': 80.6},
 'name': 'Pascagoula',
 'sys': {'country': 'US',
  'id': 1699,
  'message': 0.0047,
  'sunrise': 1527850357,
  'sunset': 1527900722,
  'type': 1},
 'visibility': 16093,
 'weather': [{'description': 'light rain',
   'icon': '10d',
   'id': 500,
   'main': 'Rain'}],
 'wind': {'deg': 203.5, 'speed': 4.18}}

In [11]:
#create variables that will hold the json data for each city.
city_data = []

In [12]:
# create a loop that checks if the city from citipy is in the openweathermap database and drops it 
#from the cities list if it isn't.

for city in cities_df['Name']:
    try:
        weather_data = owm.get_current(city, **settings)
    except HTTPError:
        print(f"{city} is not in OWM database.")
    else:
        city_data.append(weather_data)

bengkulu is not in OWM database.
illoqqortoormiut is not in OWM database.
morvi is not in OWM database.
asau is not in OWM database.
taolanaro is not in OWM database.
urdzhar is not in OWM database.
mys shmidta is not in OWM database.
nizhneyansk is not in OWM database.
bargal is not in OWM database.
quixada is not in OWM database.
barentsburg is not in OWM database.
umzimvubu is not in OWM database.
falealupo is not in OWM database.
attawapiskat is not in OWM database.
belushya guba is not in OWM database.
vicuna is not in OWM database.
olafsvik is not in OWM database.
grand river south east is not in OWM database.
tumannyy is not in OWM database.
camana is not in OWM database.
tsihombe is not in OWM database.
kazalinsk is not in OWM database.
marmaron is not in OWM database.
sentyabrskiy is not in OWM database.
korla is not in OWM database.
barawe is not in OWM database.
samusu is not in OWM database.
ndende is not in OWM database.
longlac is not in OWM database.
galiwinku is not in 

In [13]:
len(city_data)

719

In [14]:
with open('city_data.json', 'w') as outfile:
    json.dump(city_data, outfile, sort_keys=True, indent=4)

In [16]:
# create a dataframe from the dataset collected from OWM
humidity = [city['main']['humidity'] for city in city_data]
ws = [city['wind']['speed'] for city in city_data]
country = [city['sys']['country'] for city in city_data]
longitude = [city['coord']['lon'] for city in city_data]
latitude = [city['coord']['lat'] for city in city_data]
temperature = [city['main']['temp'] for city in city_data]
cloudiness = [city['clouds']['all'] for city in city_data]
city = [city['name'] for city in city_data]

final_cities_df = pd.DataFrame({
    'Name': city,
    'Country': country,
    'Latitude': latitude,
    'Longitude': longitude,
    'Temperature': temperature,
    'Humidity': humidity,
    "% Cloudiness": cloudiness,
    'Wind Speed': ws
})

final_cities_df.head(20)

Unnamed: 0,% Cloudiness,Country,Humidity,Latitude,Longitude,Name,Temperature,Wind Speed
0,90,CA,76,55.1,-105.3,La Ronge,48.2,13.87
1,1,US,67,36.62,-121.92,Pacific Grove,56.28,4.7
2,40,AR,65,-54.81,-68.31,Ushuaia,45.59,33.33
3,0,RU,62,52.34,56.17,Tyulgan,40.71,11.12
4,75,MX,66,16.86,-99.88,Acapulco,89.6,5.82
5,0,NO,93,62.58,6.12,Roald,68.97,11.41
6,0,AU,100,-33.64,115.35,Busselton,53.58,6.08
7,0,KR,63,37.57,126.98,Seoul,64.38,2.24
8,40,MU,88,-20.41,57.7,Mahebourg,69.8,2.24
9,0,AU,79,-23.58,149.07,Bluff,41.61,7.2


In [18]:
# minor data clean up for presentable dataframe
final_cities_df = final_cities_df[["Name", 'Country', 'Latitude', 'Longitude', "Temperature", 'Humidity', "Wind Speed", 
                                 '% Cloudiness']]
final_cities_df.head()

Unnamed: 0,Name,Country,Latitude,Longitude,Temperature,Humidity,Wind Speed,% Cloudiness
0,La Ronge,CA,55.1,-105.3,48.2,76,13.87,90
1,Pacific Grove,US,36.62,-121.92,56.28,67,4.7,1
2,Ushuaia,AR,-54.81,-68.31,45.59,65,33.33,40
3,Tyulgan,RU,52.34,56.17,40.71,62,11.12,0
4,Acapulco,MX,16.86,-99.88,89.6,66,5.82,75
