In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import json 

# Import API key
import api_keys

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)


## Generate Cities List

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(low=-90.000, high=90.000, size=1500)
lngs = np.random.uniform(low=-180.000, high=180.000, size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

## Perform API Calls

In [None]:
# OpenWeatherMap API Key
api_key = api_keys.api_key

# Starting URL for Weather Map API Call
url = "http://api.openweathermap.org/data/2.5/weather?units=Imperial&APPID=" + api_key 

#create lists 
lat = []
lng = []
temp = []
humidity = []
clouds = []
wind_speed = []
date = []
country = []
city_list = []

print("---------------------")

for city in cities:
    query_url = url + "&q=" + city
    
    #parse out the information in json
    weather_json = requests.get(query_url).json()

    #wrap it in a try: except: in case there are missing info for some cities
    try:
        #append info to list 
        lat.append(weather_json["coord"]["lat"])
    except: 
        #if there is missing info, add a nan value
        lat.append(np.nan)
        
    try:
        lng.append(weather_json["coord"]["lon"])
    except:
        lng.append(np.nan)
        
    try:
        temp.append(weather_json["main"]["temp"])
    except:
        temp.append(np.nan)
        
    try:
        humidity.append(weather_json["main"]["humidity"])
    except:
        humidity.append(np.nan)
        
    try:
        clouds.append(weather_json["clouds"]["all"])
    except:
        clouds.append(np.nan)
        
    try:
        wind_speed.append(weather_json["wind"]["speed"])
    except:
        wind_speed.append(np.nan)
    
    try:
        date.append(weather_json["dt"])
    except:
        date.append(np.nan)
        
    try:
        country.append(weather_json["sys"]["country"])
    except:
        country.append(np.nan)
        
    try:
        city_list.append(weather_json["name"])
    except:
        city_list.append(np.nan)
    
    #print processing log
    try:
        city_name = weather_json["name"]
        city_id = weather_json["id"]

        print("Processing " + city_name + " | City ID: " + str(city_id))
        print(query_url)
    except KeyError: 
        print("Skipping.. no city information")

print("====================================================")




In [None]:
#print it out to see where the required info is located
#print(json.dumps(weather_json,indent=4))

In [None]:
#build data frame
weather_df = pd.DataFrame()
weather_df["City"] = city_list
weather_df["Country"] = country
weather_df["Date"] = date
weather_df["Latitude"] = lat
weather_df["Longitude"] = lng
weather_df["Temperature"] = temp
weather_df["Humidity"] = humidity
weather_df["Cloudiness"] = clouds
weather_df["Wind Speed"] = wind_speed
weather_df.head()


In [None]:
#drop nan values in data frame (axis=0 --> dropping rows) (how="any" --> drop the whole row if there is a nan value)
cleaned_weather_df = weather_df.dropna(axis=0,how="any")

#reset index
cleaned_weather_df = cleaned_weather_df.reset_index()

#drop "index" column created while resetting index
new_weather_df = cleaned_weather_df[["City", 
                                     "Country", 
                                     "Date", 
                                     "Latitude", 
                                     "Longitude",
                                     "Temperature", 
                                     "Humidity", 
                                     "Cloudiness", 
                                     "Wind Speed"]]

#format so that date is not displayed in scientific form
pd.options.display.float_format = '{:.2f}'.format

#save to csv
new_weather_df.to_csv("../output_data/cities.csv")

#visualize data frame 
new_weather_df.head(10)

In [None]:
#convert unix timestamp to date
date = time.ctime(new_weather_df["Date"][0])
#print(date)

#set variables for plot attributes 
color = "salmon"
edgecolor = "black"
alpha = 1

In [None]:
plt.scatter(
    new_weather_df["Latitude"],
    new_weather_df["Temperature"], 
    facecolor=color,
    edgecolor=edgecolor,
    alpha=alpha)
plt.ylim(0,120)
plt.xlim(-80,100)
plt.title(f"City Latitude vs. Temperature ({date})")
plt.xlabel("Latitude")
plt.ylabel("Temperature (F)")
plt.grid(alpha=0.5)

#save plot to fig
plt.savefig("../Images/temperature.png")

In [None]:
plt.scatter(
    new_weather_df["Latitude"],
    new_weather_df["Humidity"], 
    facecolor=color, 
    edgecolor=edgecolor,
    alpha=alpha)
plt.ylim(-20,120)
plt.xlim(-80,100)
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.title(f"City Latitude vs. Humidity ({date})")
plt.grid(alpha=0.5)
plt.savefig("../Images/humidity.png")

In [None]:
#zooming into part of the plot for observations
plt.scatter(
    new_weather_df["Latitude"],
    new_weather_df["Humidity"], 
    facecolor=color, 
    edgecolor=edgecolor,
    alpha=alpha)
plt.ylim(20,120)
plt.xlim(-20,20)
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.title(f"City Latitude vs. Humidity (Zoomed In for observations around equator)")
plt.grid(alpha=0.5)

In [None]:
plt.scatter(
    new_weather_df["Latitude"],
    new_weather_df["Cloudiness"], 
    facecolor=color, 
    edgecolor=edgecolor,
    alpha=alpha)
plt.ylim(-20,120)
plt.xlim(-80,100)
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
plt.title(f"City Latitude vs. Cloudiness ({date})")
plt.grid(alpha=0.5)
plt.savefig("../Images/cloudiness.png")

In [None]:
plt.scatter(
    new_weather_df["Latitude"],
    new_weather_df["Wind Speed"],
    facecolor=color,
    edgecolor=edgecolor,
    alpha=alpha)
plt.ylim(-5,40)
plt.xlim(-80,100)
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")
plt.title(f"City Latitude vs. Wind Speed ({date})")
plt.grid(alpha=0.5)
plt.savefig("../Images/windspeed.png")