In [2]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Import API key

from api_keys import weather_api_key

# Incorporated citipy to determine city using latitude and longitude

from citipy import citipy

# CSV File

output_data_file = "output_data/cities.csv"

# Latitude/Longitude range

lat_range = (-90, 90)
lng_range = (-180, 180)

In [13]:
# Empty lists for holding lat/lng coordinates and cities

lat_lngs = []
cities = []

# Random lat/lng combinations

lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination

for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count

len(cities) 

629

In [5]:
# Base URL for Weather Map API Call

url = "http://api.openweathermap.org/data/2.5/weather?units=Imperial&APPID=f2e7be3fbf6a6ca5ee22f83c7998246c"

# Empty ist for holding the city data

city_data = []

# Print to logger

print("Beginning Data Retrieval     ")
print("-----------------------------")

# Create counters

record_count = 1
set_count = 1

# Loop through all the cities in the list

for i, city in enumerate(cities):
        
    # Group cities in order to log them
    
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 0

    # Create unique URL for each city
    
    city_url = url + "&q=" + city
    
    # Log the url, record, and set numbers
    
    print("Processing Record %s of Set %s | %s" % (record_count, set_count, city))

    # Add 1 to the record count
    
    record_count += 1

    # Run an API request for each of the cities
    
    try:
        
        # Retrieve data from JSON
        
        city_weather = requests.get(city_url).json()

        # Parse out the max temp, humidity, and cloudiness
        
        city_lat = city_weather["coord"]["lat"]
        city_lng = city_weather["coord"]["lon"]
        city_max_temp = city_weather["main"]["temp_max"]
        city_humidity = city_weather["main"]["humidity"]
        city_clouds = city_weather["clouds"]["all"]
        city_wind = city_weather["wind"]["speed"]
        city_country = city_weather["sys"]["country"]
        city_date = city_weather["dt"]

        # Append the City information into city_data list
        
        city_data.append({"City": city, 
                          "Lat": city_lat, 
                          "Lng": city_lng, 
                          "Max Temp": city_max_temp,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country,
                          "Date": city_date})

    # If an error is experienced, skip the city
    
    except:
        print("City not found. Skipping...")
        pass
              
# Indicate that Data Loading is complete 

print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")
        

Beginning Data Retrieval     
-----------------------------
Processing Record 1 of Set 1 | dingzhou
Processing Record 2 of Set 1 | rikitea
Processing Record 3 of Set 1 | yellowknife
Processing Record 4 of Set 1 | manado
Processing Record 5 of Set 1 | sentyabrskiy
City not found. Skipping...
Processing Record 6 of Set 1 | kaspiysk
Processing Record 7 of Set 1 | schermbeck
Processing Record 8 of Set 1 | lebu
Processing Record 9 of Set 1 | joshimath
Processing Record 10 of Set 1 | lompoc
Processing Record 11 of Set 1 | bluff
Processing Record 12 of Set 1 | ostrovnoy
Processing Record 13 of Set 1 | saint-philippe
Processing Record 14 of Set 1 | wokha
Processing Record 15 of Set 1 | manono
Processing Record 16 of Set 1 | barrow
Processing Record 17 of Set 1 | chuy
Processing Record 18 of Set 1 | arona
Processing Record 19 of Set 1 | saint george
Processing Record 20 of Set 1 | saint-augustin
Processing Record 21 of Set 1 | tuktoyaktuk
Processing Record 22 of Set 1 | hobart
Processing Record

Processing Record 36 of Set 4 | kalaleh
Processing Record 37 of Set 4 | codrington
Processing Record 38 of Set 4 | san jose
Processing Record 39 of Set 4 | manicore
Processing Record 40 of Set 4 | vegreville
Processing Record 41 of Set 4 | tasiilaq
Processing Record 42 of Set 4 | newala
Processing Record 43 of Set 4 | port-gentil
Processing Record 44 of Set 4 | husavik
Processing Record 45 of Set 4 | coria
Processing Record 46 of Set 4 | touros
Processing Record 47 of Set 4 | victoria
Processing Record 48 of Set 4 | curaca
Processing Record 49 of Set 4 | umm lajj
Processing Record 0 of Set 5 | coronado
Processing Record 1 of Set 5 | mar del plata
Processing Record 2 of Set 5 | padang
Processing Record 3 of Set 5 | flinders
Processing Record 4 of Set 5 | baruun-urt
Processing Record 5 of Set 5 | harnosand
Processing Record 6 of Set 5 | mayor pablo lagerenza
Processing Record 7 of Set 5 | namtsy
Processing Record 8 of Set 5 | viedma
Processing Record 9 of Set 5 | lazaro cardenas
Processi

Processing Record 26 of Set 8 | fare
Processing Record 27 of Set 8 | aykhal
Processing Record 28 of Set 8 | ancud
Processing Record 29 of Set 8 | lasa
Processing Record 30 of Set 8 | fallon
Processing Record 31 of Set 8 | kjollefjord
Processing Record 32 of Set 8 | fukue
Processing Record 33 of Set 8 | jaguaruna
Processing Record 34 of Set 8 | maragogi
Processing Record 35 of Set 8 | biak
Processing Record 36 of Set 8 | kamenskoye
City not found. Skipping...
Processing Record 37 of Set 8 | deputatskiy
Processing Record 38 of Set 8 | barda
Processing Record 39 of Set 8 | dorgos
Processing Record 40 of Set 8 | abha
Processing Record 41 of Set 8 | yatou
Processing Record 42 of Set 8 | khasan
Processing Record 43 of Set 8 | axim
Processing Record 44 of Set 8 | mezen
Processing Record 45 of Set 8 | coquimbo
Processing Record 46 of Set 8 | zhigansk
Processing Record 47 of Set 8 | wanning
Processing Record 48 of Set 8 | thilogne
City not found. Skipping...
Processing Record 49 of Set 8 | dzhu

Processing Record 15 of Set 12 | santa cruz
Processing Record 16 of Set 12 | ecroignard
Processing Record 17 of Set 12 | pueblo
Processing Record 18 of Set 12 | carbondale
Processing Record 19 of Set 12 | ahuimanu
Processing Record 20 of Set 12 | dickinson
Processing Record 21 of Set 12 | igra
Processing Record 22 of Set 12 | bure
Processing Record 23 of Set 12 | bowen
Processing Record 24 of Set 12 | galiwinku
City not found. Skipping...
Processing Record 25 of Set 12 | blackwater
Processing Record 26 of Set 12 | ankang
Processing Record 27 of Set 12 | altamira
Processing Record 28 of Set 12 | turukhansk
Processing Record 29 of Set 12 | kuche
City not found. Skipping...
Processing Record 30 of Set 12 | donghai
Processing Record 31 of Set 12 | manzanillo
Processing Record 32 of Set 12 | breves
Processing Record 33 of Set 12 | rio pardo
Processing Record 34 of Set 12 | marovoay
Processing Record 35 of Set 12 | ostersund
Processing Record 36 of Set 12 | baculin
Processing Record 37 of Se

In [6]:
 # Convert array of JSONs into Pandas DataFrame
    
city_data_df = pd.DataFrame(city_data)

# Show Record Count

city_data_df.count()

City          541
Lat           541
Lng           541
Max Temp      541
Humidity      541
Cloudiness    541
Wind Speed    541
Country       541
Date          541
dtype: int64

In [7]:
 # Display the City Data Frame
    
city_data_pd.head()

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,dingzhou,38.5131,114.9956,48.31,65,100,5.82,CN,1635205439
1,rikitea,-23.1203,-134.9692,75.45,72,69,18.92,PF,1635205439
2,yellowknife,62.456,-114.3525,42.42,94,90,1.99,CA,1635205439
3,manado,1.487,124.8455,83.17,83,20,1.14,ID,1635205440
4,kaspiysk,42.8816,47.6392,44.78,81,90,11.01,RU,1635205440


In [8]:
city_data_pd.describe()

Unnamed: 0,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Date
count,541.0,541.0,541.0,541.0,541.0,541.0,541.0
mean,19.018219,19.440757,60.342717,74.645102,58.275416,7.469242,1635206000.0
std,32.747177,91.380311,20.459238,18.632601,37.622195,5.681098,123.0501
min,-54.8,-175.2,-22.65,10.0,0.0,0.0,1635205000.0
25%,-7.95,-61.3003,47.23,65.0,20.0,3.09,1635205000.0
50%,21.4447,21.8865,64.54,78.0,70.0,6.15,1635206000.0
75%,46.5389,104.8887,77.31,89.0,95.0,9.95,1635206000.0
max,78.2186,179.3167,91.17,100.0,100.0,35.48,1635206000.0


In [9]:
 #  Get the indices of cities that have humidity over 100%
    .
dirty_city_data = city_data_pd[(city_data_pd["Humidity"] > 100)].index
                                  
dirty_city_data

Int64Index([], dtype='int64')

In [10]:
 # Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".

clean_city_data = city_data_pd.drop(dirty_city_data, inplace=False)
clean_city_data.head()

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,dingzhou,38.5131,114.9956,48.31,65,100,5.82,CN,1635205439
1,rikitea,-23.1203,-134.9692,75.45,72,69,18.92,PF,1635205439
2,yellowknife,62.456,-114.3525,42.42,94,90,1.99,CA,1635205439
3,manado,1.487,124.8455,83.17,83,20,1.14,ID,1635205440
4,kaspiysk,42.8816,47.6392,44.78,81,90,11.01,RU,1635205440


In [12]:
# Extract relevant fields from the data frame

lats = clean_city_data["Lat"]
max_temps = clean_city_data["Max Temp"]
humidity = clean_city_data["Humidity"]
cloudiness = clean_city_data["Cloudiness"]
wind_speed = clean_city_data["Wind Speed"]

# Export the City_Data into a csv

clean_city_data.to_csv(output_data_file, index_label="City_ID")