In [1]:
#Analysis based on Information Dated December 2020
#What's the weather like as we approach the equator?

#Trend 1 : Sample cities near the equator are likely to have higher maximum temperature (between 70 and 90 degrees). Sample cities between -20 and 20 latitudes are likely to have higher maximum temperatures than those that are not. Sample cities are more likely to have lower maximum temperature as (1) latitude decreases from -20 and (2) latitude increases from 20.
#Trend 2 : There seems to be little correlation between cloudiness and windspeed and distance from the equator (i.e., latitude) for the sample cities. Also note that sample cities' wind speed seem to be concentrated between 0 and 10 mph.
#Trend 3 : Sample cities near the equator are likely to have higher humidity (between 60 and 100%). Also note that sample cities between 40 and 80 latitude tend to have high humidity (between 70% and 100%).

In [2]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import openweathermapy.core as owm

# Import API key
from api_key import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

In [3]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(low=-90.000, high=90.000, size=1500)
lngs = np.random.uniform(low=-180.000, high=180.000, size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

600

In [4]:
# Create DataFrame
cities_df = pd.DataFrame({"City": cities})

# Create columns in DataFrame
cities_df["Cloudiness"] = ""
cities_df["Country"] = ""
cities_df["Date"] = ""
cities_df["Humidity"] = ""
cities_df["Lat"] = ""
cities_df["Lng"] = ""
cities_df["Max Temp"] = ""
cities_df["Wind Speed"] = ""

print("Beginning Data Retrieval")
print ("--------------------------------")
#set counters for printing status
counter = 1
max_counter = len(cities_df["City"])

# Create settings dictionary with information we're interested in
settings = {"units": "imperial", "appid": weather_api_key}

# Loop through the cities_df and run a search for each city for the columns
for index, row in cities_df.iterrows():
    city = row['City']
    print(f"Processing Record {counter} of {max_counter} | {city}")
    counter = counter + 1
    try:
        current_weather = owm.get_current(city, **settings)
        cities_df.loc[index, "Cloudiness"] = current_weather['clouds']['all']
        cities_df.loc[index, "Country"] = current_weather['sys']['country']
        cities_df.loc[index, "Date"] = current_weather['dt']
        cities_df.loc[index, "Humidity"] = current_weather['main']['humidity']
        cities_df.loc[index, "Lat"] = current_weather['coord']['lat']
        cities_df.loc[index, "Lng"] = current_weather['coord']['lon']
        cities_df.loc[index, "Max Temp"] = current_weather['main']["temp_max"]
        cities_df.loc[index, "Wind Speed"] = current_weather['wind']["speed"]        
    except:
        print ("City not found. Skipping...")
    
    time.sleep(0.5)

Beginning Data Retrieval
--------------------------------
Processing Record 1 of 600 | saskylakh
Processing Record 2 of 600 | hithadhoo
Processing Record 3 of 600 | punta arenas
Processing Record 4 of 600 | ostrovnoy
Processing Record 5 of 600 | suoyarvi
Processing Record 6 of 600 | mount isa
Processing Record 7 of 600 | albany
Processing Record 8 of 600 | rikitea
Processing Record 9 of 600 | lata
Processing Record 10 of 600 | leningradskiy
Processing Record 11 of 600 | port-gentil
Processing Record 12 of 600 | provideniya
Processing Record 13 of 600 | arraial do cabo
Processing Record 14 of 600 | batemans bay
Processing Record 15 of 600 | taolanaro
City not found. Skipping...
Processing Record 16 of 600 | barrow
Processing Record 17 of 600 | zhanaozen
Processing Record 18 of 600 | bubaque
Processing Record 19 of 600 | hermanus
Processing Record 20 of 600 | lagoa
Processing Record 21 of 600 | busselton
Processing Record 22 of 600 | caiaponia
Processing Record 23 of 600 | alpena
Process

Processing Record 194 of 600 | tsihombe
City not found. Skipping...
Processing Record 195 of 600 | davenda
Processing Record 196 of 600 | matina
Processing Record 197 of 600 | kaitangata
Processing Record 198 of 600 | jiddah
City not found. Skipping...
Processing Record 199 of 600 | saint-pierre
Processing Record 200 of 600 | luderitz
Processing Record 201 of 600 | henties bay
Processing Record 202 of 600 | esperance
Processing Record 203 of 600 | gornopravdinsk
Processing Record 204 of 600 | port macquarie
Processing Record 205 of 600 | narsaq
Processing Record 206 of 600 | mamou
Processing Record 207 of 600 | atar
Processing Record 208 of 600 | catarman
Processing Record 209 of 600 | sao miguel do araguaia
Processing Record 210 of 600 | dangriga
Processing Record 211 of 600 | illoqqortoormiut
City not found. Skipping...
Processing Record 212 of 600 | calvinia
Processing Record 213 of 600 | angoche
Processing Record 214 of 600 | coahuayana
Processing Record 215 of 600 | magadan
Proces

Processing Record 370 of 600 | coihaique
City not found. Skipping...
Processing Record 371 of 600 | san isidro
City not found. Skipping...
Processing Record 372 of 600 | vardo
City not found. Skipping...
Processing Record 373 of 600 | sadovoye
City not found. Skipping...
Processing Record 374 of 600 | sovetskiy
City not found. Skipping...
Processing Record 375 of 600 | balabac
City not found. Skipping...
Processing Record 376 of 600 | khani
City not found. Skipping...
Processing Record 377 of 600 | khatanga
City not found. Skipping...
Processing Record 378 of 600 | kirakira
City not found. Skipping...
Processing Record 379 of 600 | fortuna
City not found. Skipping...
Processing Record 380 of 600 | galbshtadt
City not found. Skipping...
Processing Record 381 of 600 | shangzhi
City not found. Skipping...
Processing Record 382 of 600 | borama
City not found. Skipping...
Processing Record 383 of 600 | chancay
City not found. Skipping...
Processing Record 384 of 600 | olafsvik
City not foun

Processing Record 490 of 600 | vanavara
City not found. Skipping...
Processing Record 491 of 600 | srednekolymsk
City not found. Skipping...
Processing Record 492 of 600 | padampur
City not found. Skipping...
Processing Record 493 of 600 | richmond
City not found. Skipping...
Processing Record 494 of 600 | chimore
City not found. Skipping...
Processing Record 495 of 600 | smiltene
City not found. Skipping...
Processing Record 496 of 600 | poli
City not found. Skipping...
Processing Record 497 of 600 | hovd
City not found. Skipping...
Processing Record 498 of 600 | garissa
City not found. Skipping...
Processing Record 499 of 600 | fort saint james
City not found. Skipping...
Processing Record 500 of 600 | coshocton
City not found. Skipping...
Processing Record 501 of 600 | kholm
City not found. Skipping...
Processing Record 502 of 600 | panorama
City not found. Skipping...
Processing Record 503 of 600 | ratnapura
City not found. Skipping...
Processing Record 504 of 600 | cacapava do sul

In [None]:
#create a copy of the data frame
cities_df_clean = cities_df

# clean data frame
cities_df_clean.replace('', np.nan, inplace=True)
cities_df_clean = cities_df_clean.dropna(how='any', inplace=False)
cities_df_clean = cities_df_clean.reset_index(drop = True)
cities_df_clean.count()

In [6]:
#save to CSV
cities_df_clean.to_csv(output_data_file)

#display data frame
cities_df_clean.head()

FileNotFoundError: [Errno 2] No such file or directory: 'output_data/cities.csv'

In [None]:
x_axis = cities_df_clean["Lat"]
y_axis = cities_df_clean["Max Temp"]

plt.figure(figsize = [10,5])
plt.scatter(x_axis, y_axis, marker="o", facecolors="blue", edgecolors="black", alpha=0.75)
date_code = cities_df_clean["Date"][0]
date = str(time.strftime("%D", time.localtime(int(date_code))))
title = f"City Latitude vs. Max Temperature ({date})"
plt.title(title, fontsize=14, fontweight="bold")
plt.grid(which = 'major', axis = "both")
plt.xlabel("Latitude")
plt.xlim(-80,80)
plt.ylabel("Max Temperature (F)")

# Save the Figure
plt.tight_layout()
plt.savefig("output_data/MaxTemp_Lat.png")

# Show the Figure
plt.show()

In [None]:
x_axis = cities_df_clean["Lat"]
y_axis = cities_df_clean["Humidity"]

plt.figure(figsize = [10,5])
plt.scatter(x_axis, y_axis, marker="o", facecolors="blue", edgecolors="black", alpha=0.75)
date_code = cities_df_clean["Date"][0]
date = str(time.strftime("%D", time.localtime(int(date_code))))
title = f"City Latitude vs.Humidity ({date})"
plt.title(title, fontsize=14, fontweight="bold")
plt.grid(which = 'major', axis = "both")
plt.xlabel("Latitude")
plt.xlim(-80,80)
plt.ylabel("Humidity (%)")

# Save the Figure
plt.tight_layout()
plt.savefig("output_data/Humidity_Lat.png")

# Show the Figure
plt.show()

In [None]:
x_axis = cities_df_clean["Lat"]
y_axis = cities_df_clean["Cloudiness"]

plt.figure(figsize = [10,5])
plt.scatter(x_axis, y_axis, marker="o", facecolors="blue", edgecolors="black", alpha=0.75)
date_code = cities_df_clean["Date"][0]
date = str(time.strftime("%D", time.localtime(int(date_code))))
title = f"City Latitude vs.Cloudiness ({date})"
plt.title(title, fontsize=14, fontweight="bold")
plt.grid(which = 'major', axis = "both")
plt.xlabel("Latitude")
plt.xlim(-80,80)
plt.ylabel("Cloudiness (%)")

# Save the Figure
plt.tight_layout()
plt.savefig("output_data/Cloudiness_Lat.png")

# Show the Figure
plt.show()

In [None]:
x_axis = cities_df_clean["Lat"]
y_axis = cities_df_clean["Wind Speed"]

plt.figure(figsize = [10,5])
plt.scatter(x_axis, y_axis, marker="o", facecolors="blue", edgecolors="black", alpha=0.75)
date_code = cities_df_clean["Date"][0]
date = str(time.strftime("%D", time.localtime(int(date_code))))
title = f"City Latitude vs.Wind Speed ({date})"
plt.title(title, fontsize=14, fontweight="bold")
plt.grid(which = 'major', axis = "both")
plt.xlabel("Latitude")
plt.xlim(-80,80)
plt.ylabel("Wind Speed (mph)")

# Save the Figure
plt.tight_layout()
plt.savefig("output_data/WindSpeed_Lat.png")

# Show the Figure
plt.show()