# Deliverable 1 - Collecting Datapoints From an API

In [1]:
#Import Dependencies
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import requests
import time

from datetime import datetime

from citipy import citipy
from config import WEATHER_API_KEY

In [2]:
# Making url
url = "http://api.openweathermap.org/data/2.5/weather?units=Imperial&APPID=" + WEATHER_API_KEY

In [3]:
#Creating set of random coordinates
num_locs=2000

lats = np.random.uniform(-90, 90, size = 2000)
lngs = np.random.uniform(-180, 180, size = 2000)

lat_lngs = zip(lats, lngs)
lat_lngs

<zip at 0x7fa820d9be00>

In [4]:
#Adding the coordinates to a list
coordinates = list(lat_lngs)

In [5]:
# Create lists to store cities and countries.
cities = []

# Loop over the coordinates.
for coordinate in coordinates:
    city = citipy.nearest_city(coordinate[0], coordinate[1]).city_name
    
    # Only append if it is a new city.
    if city not in cities:
        cities.append(city)
    
# Sanity checks:
print (f"Initial count of cities: {len(cities)}")

Initial count of cities: 728


In [6]:

# Base URL for the OpenWeather API call.
base_url = "http://api.openweathermap.org/data/2.5/weather?units=Imperial&APPID=" + WEATHER_API_KEY

print(f"Beginning Data Retrieval")
print("---------------------------\n")

# Create an empty list to hold the weather data.
city_data = []

#Start and create counter lists
record_count = 1
set_count = 1

# Initialize variables to keep track of rate limiting.
start_time = int(time.time())
current_time = start_time

# Loop over all cities.
for i, city in enumerate(cities):

    # Group cities in sets of 50 for logging purposes.
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 1
        
    # Create endpoint URL with each city.
    city_url = url + "&q=" + cities[i]
    print (f"Processing record {record_count} of set {set_count} | {city}")
    record_count += 1
        
    if i > 0:
        # Get current time.
        current_time = int(time.time())
        
#         # If needed introduce a sleep process so not to get blocked by API, if we surpass a certain amount of calls and 1 min hasn't passed, the code will add a sleep break
#     elif current_time - start_time < 60:
#         sleep_time = 60 - (current_time - start_time)
#         print (f"Introducting a sleep of {sleep_time} seconds!")
#         time.sleep(sleep_time) 
        
            # Get the new start time.
    start_time = int(time.time())
        
    
    # Create endpoint URL with each city.
    city_url = base_url + "&q=" + city.replace(" ","+")
    
    #https://docs.python.org/3.6/tutorial/errors.html
    try:
    
        city_weather = requests.get(city_url).json()
        lat = city_weather["coord"]["lat"]
        lng = city_weather["coord"]["lon"]
        max_temp = city_weather["main"]["temp_max"]
        humidity = city_weather["main"]["humidity"]
        clouds = city_weather["clouds"]["all"]
        wind = city_weather["wind"]["speed"]
        country = city_weather["sys"]["country"]
        current = city_weather["weather"][0]["description"]

        #Convert date to ISO
        city_date = datetime.utcfromtimestamp(city_weather["dt"]).strftime('%Y-%m-%d %H:%M:%S')
       
        #Append city info to city_data list.
        city_data.append({"City": city,
                          "Lat": lat,
                          "Lng": lng,
                          "Max Temp": max_temp,
                          "Humidity": humidity,
                          "Cloudiness": clouds,
                          "Wind Speed": wind,
                          "Country": country,
                          "Current Description": current})
        
        city_weather = requests.get(city_url).json()
    
        
        # Parse out the needed data.
        lat = city_weather["coord"]["lat"]
        lng = city_weather["coord"]["lon"]
        max_temp = city_weather["main"]["temp_max"]
        humidity = city_weather["main"]["humidity"]
        clouds = city_weather["clouds"]["all"]
        wind = city_weather["wind"]["speed"]
        country = city_weather["sys"]["country"]
        desc = city_weather['weather'][0]['description']

        # Append the city information into city_data list.
        city_data.append({'City': city.title(),
                          'Lat': lat,
                          'Lng': lng,
                          'Max Temp': max_temp,
                          'Humidity': humidity,
                          'Cloudiness': clouds,
                          'Wind Speed': wind,
                          'Country': country,
                          'Current Description': desc})

       
    # Skip errors
    except:
        print("City not found. Skipping...")
        pass
    
#Indicate Data Loading complete.
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

Beginning Data Retrieval
---------------------------

Processing record 1 of set 1 | tuatapere
Processing record 2 of set 1 | butaritari
Processing record 3 of set 1 | ugento
Processing record 4 of set 1 | bredasdorp
Processing record 5 of set 1 | podporozhye
Processing record 6 of set 1 | ust-nera
Processing record 7 of set 1 | severo-kurilsk
Processing record 8 of set 1 | barrow
Processing record 9 of set 1 | hilo
Processing record 10 of set 1 | brigantine
Processing record 11 of set 1 | melfi
Processing record 12 of set 1 | rikitea
Processing record 13 of set 1 | cherskiy
Processing record 14 of set 1 | lorengau
Processing record 15 of set 1 | georgetown
Processing record 16 of set 1 | wittenheim
Processing record 17 of set 1 | mataura
Processing record 18 of set 1 | kavieng
Processing record 19 of set 1 | illoqqortoormiut
City not found. Skipping...
Processing record 20 of set 1 | albany
Processing record 21 of set 1 | rio gallegos
Processing record 22 of set 1 | klaksvik
Processin

Processing record 40 of set 4 | brae
Processing record 41 of set 4 | ambon
Processing record 42 of set 4 | rongcheng
Processing record 43 of set 4 | kamaishi
Processing record 44 of set 4 | vestmannaeyjar
Processing record 45 of set 4 | taunggyi
Processing record 46 of set 4 | roanoke rapids
Processing record 47 of set 4 | eten
Processing record 48 of set 4 | tocopilla
Processing record 49 of set 4 | lavrentiya
Processing record 50 of set 4 | tigzirt
Processing record 1 of set 5 | lebu
Processing record 2 of set 5 | saint george
Processing record 3 of set 5 | bahia blanca
Processing record 4 of set 5 | mecca
Processing record 5 of set 5 | zavyalovo
Processing record 6 of set 5 | hattiesburg
Processing record 7 of set 5 | kondinskoye
Processing record 8 of set 5 | karratha
Processing record 9 of set 5 | novooleksiyivka
Processing record 10 of set 5 | the valley
Processing record 11 of set 5 | ribeira grande
Processing record 12 of set 5 | anantapur
Processing record 13 of set 5 | whyall

Processing record 32 of set 8 | vaitupu
City not found. Skipping...
Processing record 33 of set 8 | sawakin
Processing record 34 of set 8 | takoradi
Processing record 35 of set 8 | mouila
Processing record 36 of set 8 | ahuimanu
Processing record 37 of set 8 | qrendi
Processing record 38 of set 8 | mrirt
City not found. Skipping...
Processing record 39 of set 8 | sao raimundo das mangabeiras
Processing record 40 of set 8 | zhigansk
Processing record 41 of set 8 | lata
Processing record 42 of set 8 | maceio
Processing record 43 of set 8 | cessnock
Processing record 44 of set 8 | grand river south east
City not found. Skipping...
Processing record 45 of set 8 | yar-sale
Processing record 46 of set 8 | palu
Processing record 47 of set 8 | kavaratti
Processing record 48 of set 8 | tabiauea
City not found. Skipping...
Processing record 49 of set 8 | denpasar
Processing record 50 of set 8 | botwood
Processing record 1 of set 9 | nhamunda
Processing record 2 of set 9 | port blair
Processing r

Processing record 21 of set 12 | presidencia roque saenz pena
Processing record 22 of set 12 | dunedin
Processing record 23 of set 12 | nyurba
Processing record 24 of set 12 | dongkan
Processing record 25 of set 12 | iracoubo
Processing record 26 of set 12 | akyab
Processing record 27 of set 12 | agadir
Processing record 28 of set 12 | mantua
Processing record 29 of set 12 | tome-acu
City not found. Skipping...
Processing record 30 of set 12 | kushiro
Processing record 31 of set 12 | sulangan
Processing record 32 of set 12 | minab
Processing record 33 of set 12 | kidal
Processing record 34 of set 12 | saint-gabriel
Processing record 35 of set 12 | naryan-mar
Processing record 36 of set 12 | danilovka
Processing record 37 of set 12 | kapit
Processing record 38 of set 12 | vao
Processing record 39 of set 12 | ordzhonikidze
Processing record 40 of set 12 | viedma
Processing record 41 of set 12 | port moresby
Processing record 42 of set 12 | havre-saint-pierre
Processing record 43 of set 1

In [7]:
#establishing final length to get a sense of how many cities were skipped
len(city_data)

1366

In [9]:
#Converting l-d to pandas df
city_data_df = pd.DataFrame(city_data)
city_data_df.head(10)

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Current Description
0,tuatapere,-46.1333,167.6833,54.12,60,39,8.97,NZ,scattered clouds
1,Tuatapere,-46.1333,167.6833,54.12,60,39,8.97,NZ,scattered clouds
2,butaritari,3.0707,172.7902,82.71,71,84,15.77,KI,broken clouds
3,Butaritari,3.0707,172.7902,82.71,71,84,15.77,KI,broken clouds
4,ugento,39.9263,18.1574,56.7,65,100,26.66,IT,overcast clouds
5,Ugento,39.9263,18.1574,56.7,65,100,26.66,IT,overcast clouds
6,bredasdorp,-34.5322,20.0403,50.29,90,100,4.41,ZA,overcast clouds
7,Bredasdorp,-34.5322,20.0403,50.29,90,100,4.41,ZA,overcast clouds
8,podporozhye,60.9112,34.1706,34.63,94,9,2.8,RU,clear sky
9,Podporozhye,60.9112,34.1706,34.63,94,9,2.8,RU,clear sky


In [30]:
#Reordering columns
new_col_order = ["City", "Country", "Lat", "Lng", "Max Temp", "Humidity", "Cloudiness", "Wind Speed", "Current Description"]
city_data_df = city_data_df[new_col_order]
city_data_df.head(10)


Unnamed: 0,City,Country,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Current Description
0,tuatapere,NZ,-46.1333,167.6833,54.12,60,39,8.97,scattered clouds
1,Tuatapere,NZ,-46.1333,167.6833,54.12,60,39,8.97,scattered clouds
2,butaritari,KI,3.0707,172.7902,82.71,71,84,15.77,broken clouds
3,Butaritari,KI,3.0707,172.7902,82.71,71,84,15.77,broken clouds
4,ugento,IT,39.9263,18.1574,56.7,65,100,26.66,overcast clouds
5,Ugento,IT,39.9263,18.1574,56.7,65,100,26.66,overcast clouds
6,bredasdorp,ZA,-34.5322,20.0403,50.29,90,100,4.41,overcast clouds
7,Bredasdorp,ZA,-34.5322,20.0403,50.29,90,100,4.41,overcast clouds
8,podporozhye,RU,60.9112,34.1706,34.63,94,9,2.8,clear sky
9,Podporozhye,RU,60.9112,34.1706,34.63,94,9,2.8,clear sky


In [31]:
#Working to clean data and remove duplicates
city_data_df["is_duplicate"]= city_data_df["Lat"].duplicated()

city_data_df.head(10)

Unnamed: 0,City,Country,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Current Description,is_duplicate
0,tuatapere,NZ,-46.1333,167.6833,54.12,60,39,8.97,scattered clouds,False
1,Tuatapere,NZ,-46.1333,167.6833,54.12,60,39,8.97,scattered clouds,True
2,butaritari,KI,3.0707,172.7902,82.71,71,84,15.77,broken clouds,False
3,Butaritari,KI,3.0707,172.7902,82.71,71,84,15.77,broken clouds,True
4,ugento,IT,39.9263,18.1574,56.7,65,100,26.66,overcast clouds,False
5,Ugento,IT,39.9263,18.1574,56.7,65,100,26.66,overcast clouds,True
6,bredasdorp,ZA,-34.5322,20.0403,50.29,90,100,4.41,overcast clouds,False
7,Bredasdorp,ZA,-34.5322,20.0403,50.29,90,100,4.41,overcast clouds,True
8,podporozhye,RU,60.9112,34.1706,34.63,94,9,2.8,clear sky,False
9,Podporozhye,RU,60.9112,34.1706,34.63,94,9,2.8,clear sky,True


In [40]:
# Droping duplicates using the boolean above as a guide
city_data_df.drop(city_data_df[city_data_df['is_duplicate'] == False].index, inplace=True)
city_data_df.head()

Unnamed: 0,City,Country,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Current Description,is_duplicate
1,Tuatapere,NZ,-46.1333,167.6833,54.12,60,39,8.97,scattered clouds,True
3,Butaritari,KI,3.0707,172.7902,82.71,71,84,15.77,broken clouds,True
5,Ugento,IT,39.9263,18.1574,56.7,65,100,26.66,overcast clouds,True
7,Bredasdorp,ZA,-34.5322,20.0403,50.29,90,100,4.41,overcast clouds,True
9,Podporozhye,RU,60.9112,34.1706,34.63,94,9,2.8,clear sky,True


In [41]:
city_data_df = city_data_df.drop('is_duplicate', 1)
city_data_df.head()

  city_data_df = city_data_df.drop('is_duplicate', 1)


Unnamed: 0,City,Country,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Current Description
1,Tuatapere,NZ,-46.1333,167.6833,54.12,60,39,8.97,scattered clouds
3,Butaritari,KI,3.0707,172.7902,82.71,71,84,15.77,broken clouds
5,Ugento,IT,39.9263,18.1574,56.7,65,100,26.66,overcast clouds
7,Bredasdorp,ZA,-34.5322,20.0403,50.29,90,100,4.41,overcast clouds
9,Podporozhye,RU,60.9112,34.1706,34.63,94,9,2.8,clear sky


In [42]:
#Creating output file path
output_path = "Weather_Database/WeatherPy_Database.csv"

#Exporting city_data
city_data_df.to_csv(output_path, index_label = "City_ID")

In [43]:
file_name = "Weather_Database/WeatherPy_Database.csv"
file_name_output = "Weather_Database/WeatherPy_Database.csv"