In [12]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Import API key
from config import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "../output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

In [2]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=15) #size=1500
lngs = np.random.uniform(lng_range[0], lng_range[1], size=15) #size=1500
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
print(len(cities))

13


In [3]:
#create data frame and add empty columns
weather_df = pd.DataFrame({'City':cities})
weather_df['Lat'] = ""
weather_df['Lng'] = ""
weather_df['MaxTemp'] = ""
weather_df['Humidity'] = ""
weather_df['Cloudiness'] = ""
weather_df['WindSpeed'] = ""
weather_df['Country'] = ""
weather_df['Date'] = ""


Unnamed: 0,City,Lat,Lng,MaxTemp,Humidity,Cloudiness,WindSpeed,Country,Date
0,albany,,,,,,,,
1,port elizabeth,,,,,,,,
2,severo-kurilsk,,,,,,,,
3,puerto ayora,,,,,,,,
4,hirado,,,,,,,,


In [25]:
# Set units to imperial
units = 'imperial'

# Iterate over the rows in the df and fill the columns
print("Beginning data retrieval")
print("-------------------------")
for index, row in weather_df.iterrows():
    base_url = "https://api.openweathermap.org/data/2.5/weather?" 

    city = row['City']
    
    target_url = f"{base_url}q={city}&appid={weather_api_key}&units={units}"
    
    response = requests.get(target_url).json() 

    time.sleep(1)
    
    try:
        print(f"Processing record {index}|{city}")
        weather_df.loc[index,'Lat'] = response['coord']['lat']
        weather_df.loc[index,'Lng'] = response['coord']['lon']
        weather_df.loc[index,'MaxTemp'] = response['main']['temp_max']
        weather_df.loc[index,'Humidity'] = response['main']['humidity']
        weather_df.loc[index,'Cloudiness'] = response['clouds']['all']
        weather_df.loc[index,'WindSpeed'] = response['wind']['speed']
        weather_df.loc[index,'Country'] = response['sys']['country']
        weather_df.loc[index,'Date'] = response['dt']
        
    except:
        
        print(f"{city} not found. Skipping...")
    
weather_df

Beginning data retrieval
-------------------------
Processing record 0|albany
Processing record 1|port elizabeth
Processing record 2|severo-kurilsk
Processing record 3|puerto ayora
Processing record 4|hirado
Processing record 5|jeremie
Processing record 6|kaeo
Processing record 7|rio grande
Processing record 8|hermanus
Processing record 9|lovozero
Processing record 10|qaanaaq
Processing record 11|saskylakh
Processing record 12|torbay


Unnamed: 0,City,Lat,Lng,MaxTemp,Humidity,Cloudiness,WindSpeed,Country,Date
0,albany,42.6001,-73.9662,18.0,50,19,1.01,US,1612763404
1,port elizabeth,-33.918,25.5701,66.2,77,0,17.27,ZA,1612763577
2,severo-kurilsk,50.6789,156.125,24.57,92,92,17.54,RU,1612763579
3,puerto ayora,-0.7393,-90.3518,77.0,95,83,4.18,EC,1612763580
4,hirado,33.3597,129.553,51.8,35,40,20.71,JP,1612763584
5,jeremie,18.65,-74.1167,76.15,76,27,5.66,HT,1612763586
6,kaeo,-35.1,173.783,72.0,72,84,5.99,NZ,1612763588
7,rio grande,-32.035,-52.0986,63.0,93,19,9.95,BR,1612763589
8,hermanus,-34.4187,19.2345,61.0,90,25,1.99,ZA,1612763590
9,lovozero,68.005,35.0117,14.72,96,100,17.63,RU,1612763592


In [19]:
# Save DataFrame to csv file
weather_df.to_csv(output_data_file)

# Display DataFrame
weather_df.head()

City          object
Lat           object
Lng           object
MaxTemp       object
Humidity      object
Cloudiness    object
WindSpeed     object
Country       object
Date          object
dtype: object

In [26]:
# Convert to floats
weather_df.loc[:,"Lat"] = weather_df["Lat"].astype('float')
weather_df.loc[:,"Lng"] = weather_df["Lng"].astype('float')
weather_df.loc[:,"MaxTemp"] = weather_df["MaxTemp"].astype('float')
weather_df.loc[:,"Humidity"] = weather_df["Humidity"].astype('float')
weather_df.loc[:,"Cloudiness"] = weather_df["Cloudiness"].astype('float')
weather_df.loc[:,"WindSpeed"] = weather_df["WindSpeed"].astype('float')
weather_df.loc[:,"Date"] = weather_df["Date"].astype('float')


In [30]:
summary_stats = weather_df[['Lat','Lng','MaxTemp','Humidity','Cloudiness','WindSpeed','Date']]
summary_stats.describe(include='all')

Unnamed: 0,Lat,Lng,MaxTemp,Humidity,Cloudiness,WindSpeed,Date
count,13.0,13.0,13.0,13.0,13.0,13.0,13.0
mean,21.088462,18.517931,41.366923,78.615385,55.153846,10.440769,1612764000.0
std,43.520926,96.436612,32.901214,18.048723,36.610213,6.969859,84.86211
min,-35.1,-90.3518,-34.65,35.0,0.0,1.01,1612763000.0
25%,-32.035,-69.3632,18.0,76.0,25.0,5.66,1612764000.0
50%,33.3597,19.2345,51.8,81.0,40.0,9.66,1612764000.0
75%,50.6789,114.0833,66.2,92.0,90.0,17.54,1612764000.0
max,77.484,173.7833,77.0,96.0,100.0,20.71,1612764000.0
