In [1]:
# Import the dependencies.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from citipy import citipy

In [2]:
# Create a set of random latitude and longitude combinations.
lats = np.random.uniform(low=-90.000, high=90.000, size=1500)
lngs = np.random.uniform(low=-180.000, high=180.000, size=1500)
lat_lngs = zip(lats, lngs)
lat_lngs

<zip at 0x125d8bb1808>

In [3]:
# Create a practice set of random latitude and longitude combinations.
x = [25.12903645, 25.92017388, 26.62509167, -59.98969384, 37.30571269]
y = [-67.59741259, 11.09532135, 74.84233102, -76.89176677, -61.13376282]
coordinates = zip(x, y)

In [4]:
# Use the tuple() function to display the latitude and longitude combinations.
for coordinate in coordinates:
    print(coordinate[0], coordinate[1])

25.12903645 -67.59741259
25.92017388 11.09532135
26.62509167 74.84233102
-59.98969384 -76.89176677
37.30571269 -61.13376282


In [5]:
# Add the latitudes and longitudes to a list.
coordinates = list(lat_lngs)

In [6]:
# Use the tuple() function to display the latitude and longitude combinations.
for coordinate in coordinates:
    print(citipy.nearest_city(coordinate[0], coordinate[1]).city_name,
          citipy.nearest_city(coordinate[0], coordinate[1]).country_code)

tasiilaq gl
kamenskoye ru
nurota uz
miraflores co
lebu cl
whyalla au
vaitupu wf
mataura pf
cape town za
gold coast au
promyshlennovskiy ru
nhulunbuy au
severo-kurilsk ru
new norfolk au
tevriz ru
kirakira sb
guerrero negro mx
cabo san lucas mx
vila velha br
mataura pf
ancud cl
jamestown sh
gaozhou cn
tasiilaq gl
constitucion cl
saint george bm
bam ir
bogyiszlo hu
new norfolk au
kapaa us
ayorou ne
chumikan ru
san antonio cl
pisco pe
kirakira sb
hami cn
taolanaro mg
illoqqortoormiut gl
bosaso so
upernavik gl
turukhansk ru
bambous virieux mu
cidreira br
huntington us
tual id
taolanaro mg
rikitea pf
atuona pf
katsuura jp
tuktoyaktuk ca
luganville vu
inderborskiy kz
east london za
aklavik ca
fossombrone it
butaritari ki
taldan ru
puerto el triunfo sv
broadstairs gb
rikitea pf
pontian kecil my
rikitea pf
puro ph
hermanus za
tuy hoa vn
bethel us
narsaq gl
busselton au
tiksi ru
mataura pf
severo-kurilsk ru
lagoa pt
albany au
attawapiskat ca
ambilobe mg
albany au
bredasdorp za
hermanus za
ushuai

longyearbyen sj
galgani sd
avera pf
zhigansk ru
saint-philippe re
vysokogornyy ru
san patricio mx
kapaa us
samusu ws
georgetown sh
kapaa us
cayenne gf
new norfolk au
mogoytuy ru
busselton au
coahuayana mx
yellowknife ca
lorengau pg
hilo us
saldanha za
diu in
hermanus za
hilo us
vaini to
cape town za
tsiroanomandidy mg
peniche pt
belushya guba ru
waingapu id
port alfred za
rikitea pf
mataura pf
panama city us
chuy uy
chokurdakh ru
jibuti dj
butaritari ki
te anau nz
tutoia br
hasaki jp
sao filipe cv
attawapiskat ca
ust-kamchatsk ru
isangel vu
punta arenas cl
winneba gh
ponta do sol cv
port alfred za
kahului us
ponta do sol cv
mar del plata ar
puerto suarez bo
atuona pf
siocon ph
lolua tv
geraldton au
matamoros mx
rikitea pf
mulanje mw
rikitea pf
provideniya ru
rikitea pf
sioux lookout ca
ambon id
tumannyy ru
luwuk id
northam au
aklavik ca
chuy uy
port blair in
sao filipe cv
hilo us
malanje ao
vegreville ca
jamestown sh
launceston au
taguatinga br
castro cl
rikitea pf
norman wells ca
mar 

In [7]:
# Create a list for holding the cities.
cities = []
# Identify the nearest city for each latitude and longitude combination.
for coordinate in coordinates:
    city = citipy.nearest_city(coordinate[0], coordinate[1]).city_name

    # If the city is unique, then we will add it to the cities list.
    if city not in cities:
        cities.append(city)
# Print the city count to confirm sufficient count.
len(cities)

629

In [8]:
# Import the time library and the datetime module from the datetime library 
import time
from datetime import datetime

In [9]:
# Create an empty list to hold the weather data.
city_data = []
# Print the beginning of the logging.
print("Beginning Data Retrieval     ")
print("-----------------------------")

# Create counters.
record_count = 1
set_count = 1

Beginning Data Retrieval     
-----------------------------


In [10]:
# Import the requests library.
import requests

# Import the API key.
from config import weather_api_key

In [11]:
url = "http://api.openweathermap.org/data/2.5/weather?units=Imperial&APPID=" + weather_api_key


In [12]:
# Loop through all the cities in the list.
for i, city in enumerate(cities):

    # Group cities in sets of 50 for logging purposes.
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 1
        time.sleep(60)

    # Create endpoint URL with each city.
    city_url = url + "&q=" + city.replace(" ","+")

    # Log the URL, record, and set numbers and the city.
    print(f"Processing Record {record_count} of Set {set_count} | {city}")
    # Add 1 to the record count.
    record_count += 1
    # Run an API request for each of the cities.
    try:
        # Parse the JSON and retrieve data.
        city_weather = requests.get(city_url).json()
        # Parse out the needed data.
        city_lat = city_weather["coord"]["lat"]
        city_lng = city_weather["coord"]["lon"]
        city_max_temp = city_weather["main"]["temp_max"]
        city_humidity = city_weather["main"]["humidity"]
        city_clouds = city_weather["clouds"]["all"]
        city_wind = city_weather["wind"]["speed"]
        city_country = city_weather["sys"]["country"]
        # Convert the date to ISO standard.
        city_date = datetime.utcfromtimestamp(city_weather["dt"]).strftime('%Y-%m-%d %H:%M:%S')
        # Append the city information into city_data list.
        city_data.append({"City": city.title(),
                          "Lat": city_lat,
                          "Lng": city_lng,
                          "Max Temp": city_max_temp,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country,
                          "Date": city_date})

# If an error is experienced, skip the city.
    except:
        print("City not found. Skipping...")
        pass

# Indicate that Data Loading is complete.
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

Processing Record 1 of Set 1 | tasiilaq
Processing Record 2 of Set 1 | kamenskoye
City not found. Skipping...
Processing Record 3 of Set 1 | nurota
Processing Record 4 of Set 1 | miraflores
Processing Record 5 of Set 1 | lebu
Processing Record 6 of Set 1 | whyalla
Processing Record 7 of Set 1 | vaitupu
City not found. Skipping...
Processing Record 8 of Set 1 | mataura
Processing Record 9 of Set 1 | cape town
Processing Record 10 of Set 1 | gold coast
Processing Record 11 of Set 1 | promyshlennovskiy
Processing Record 12 of Set 1 | nhulunbuy
Processing Record 13 of Set 1 | severo-kurilsk
Processing Record 14 of Set 1 | new norfolk
Processing Record 15 of Set 1 | tevriz
Processing Record 16 of Set 1 | kirakira
Processing Record 17 of Set 1 | guerrero negro
Processing Record 18 of Set 1 | cabo san lucas
Processing Record 19 of Set 1 | vila velha
Processing Record 20 of Set 1 | ancud
Processing Record 21 of Set 1 | jamestown
Processing Record 22 of Set 1 | gaozhou
Processing Record 23 of S

Processing Record 39 of Set 4 | axim
Processing Record 40 of Set 4 | khotynets
Processing Record 41 of Set 4 | harper
Processing Record 42 of Set 4 | adrar
Processing Record 43 of Set 4 | nanortalik
Processing Record 44 of Set 4 | khawhai
Processing Record 45 of Set 4 | dikson
Processing Record 46 of Set 4 | xiaoweizhai
Processing Record 47 of Set 4 | ostersund
Processing Record 48 of Set 4 | kharp
Processing Record 49 of Set 4 | atasu
Processing Record 50 of Set 4 | longyearbyen
Processing Record 1 of Set 5 | perigueux
Processing Record 2 of Set 5 | mount gambier
Processing Record 3 of Set 5 | kaitangata
Processing Record 4 of Set 5 | atar
Processing Record 5 of Set 5 | kruisfontein
Processing Record 6 of Set 5 | port elizabeth
Processing Record 7 of Set 5 | ewa beach
Processing Record 8 of Set 5 | wajima
Processing Record 9 of Set 5 | carbonia
Processing Record 10 of Set 5 | dibulla
Processing Record 11 of Set 5 | clyde river
Processing Record 12 of Set 5 | rzeszow
Processing Record 

KeyboardInterrupt: 

In [None]:
# Convert the array of dictionaries to a Pandas DataFrame.
city_data_df = pd.DataFrame(city_data)
city_data_df.head(10)

In [None]:
# Create the output file (CSV).
output_data_file = "weather_data/cities.csv"
# Export the City_Data into a CSV.
city_data_df.to_csv(output_data_file, index_label="City_ID")

In [None]:
# Extract relevant fields from the DataFrame for plotting.
lats = city_data_df["Lat"]
max_temps = city_data_df["Max Temp"]
humidity = city_data_df["Humidity"]
cloudiness = city_data_df["Cloudiness"]
wind_speed = city_data_df["Wind Speed"]

In [None]:
# Import the time module.
import time
# Get today's date in seconds.
today = time.time()
today

In [None]:
# Import time module
import time

# Build the scatter plot for latitude vs. max temperature.
plt.scatter(lats,
            max_temps,
            edgecolor="black", linewidths=1, marker="o",
            alpha=0.8, label="Cities")

# Incorporate the other graph properties.
plt.title(f"City Latitude vs. Max Temperature "+ time.strftime("%x"))
plt.ylabel("Max Temperature (F)")
plt.xlabel("Latitude")
plt.grid(True)

# Save the figure.
plt.savefig("weather_data/Fig1.png")

# Show plot.
plt.show()

In [None]:
# Build the scatter plots for latitude vs. humidity.
plt.scatter(lats,
            humidity,
            edgecolor="black", linewidths=1, marker="o",
            alpha=0.8, label="Cities")

# Incorporate the other graph properties.
plt.title(f"City Latitude vs. Humidity "+ time.strftime("%x"))
plt.ylabel("Humidity (%)")
plt.xlabel("Latitude")
plt.grid(True)
# Save the figure.
plt.savefig("weather_data/Fig2.png")
# Show plot.
plt.show()

In [None]:
# Build the scatter plots for latitude vs. cloudiness.
plt.scatter(lats,
            cloudiness,
            edgecolor="black", linewidths=1, marker="o",
            alpha=0.8, label="Cities")

# Incorporate the other graph properties.
plt.title(f"City Latitude vs. Cloudiness (%) "+ time.strftime("%x"))
plt.ylabel("Cloudiness (%)")
plt.xlabel("Latitude")
plt.grid(True)
# Save the figure.
plt.savefig("weather_data/Fig3.png")
# Show plot.
plt.show()

In [None]:
# Build the scatter plots for latitude vs. wind speed.
plt.scatter(lats,
            wind_speed,
            edgecolor="black", linewidths=1, marker="o",
            alpha=0.8, label="Cities")

# Incorporate the other graph properties.
plt.title(f"City Latitude vs. Wind Speed "+ time.strftime("%x"))
plt.ylabel("Wind Speed (mph)")
plt.xlabel("Latitude")
plt.grid(True)
# Save the figure.
plt.savefig("weather_data/Fig4.png")
# Show plot.
plt.show()

In [None]:
# Perform linear regression.
(slope, intercept, r_value, p_value, std_err) = linregress(x_values, y_values)

# Calculate the regression line "y values" from the slope and intercept.
regress_values = x_values * slope + intercept

# Get the equation of the line.
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# Create a scatter plot of the x and y values.
plt.scatter(x_values,y_values)
# Plot the regression line with the x-values and the y coordinates based on the intercept and slope.
plt.plot(x_values,regress_values,"r")
# Annotate the text for the line equation and add its coordinates.
plt.annotate(line_eq, (10,40), fontsize=15, color="red")
plt.title(title)
plt.xlabel('Latitude')
plt.ylabel('Temp')
plt.show()

In [None]:
# Import linregress
from scipy.stats import linregress

# Create a function to create perform linear regression on the weather data
# and plot a regression line and the equation with the data.
def plot_linear_regression(x_values, y_values, title, y_label, text_coordinates):

    # Run regression on hemisphere weather data.
    (slope, intercept, r_value, p_value, std_err) = linregress(x_values, y_values)

    # Calculate the regression line "y values" from the slope and intercept.
    regress_values = x_values * slope + intercept
    # Get the equation of the line.
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
    # Create a scatter plot and plot the regression line.
    plt.scatter(x_values,y_values)
    plt.plot(x_values,regress_values,"r")
    # Annotate the text for the line equation.
    plt.annotate(line_eq, text_coordinates, fontsize=15, color="red")
    plt.title(title)
    plt.xlabel('Latitude')
    plt.ylabel(y_label)
    plt.show()

In [None]:
city_data_df.loc[(city_data_df["Lat"] >= 0)].head()

In [None]:
# Create Northern and Southern Hemisphere DataFrames.
northern_hemi_df = city_data_df.loc[(city_data_df["Lat"] >= 0)]
southern_hemi_df = city_data_df.loc[(city_data_df["Lat"] < 0)]

In [None]:
# Linear regression on the Northern Hemisphere
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Max Temp"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Northern Hemisphere \
                        for Maximum Temperature', 'Max Temp',(10,40))

In [None]:
# Linear regression on the Southern Hemisphere
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Max Temp"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Southern Hemisphere \nfor Maximum Temperature', 'Max Temp',(-50,90))

In [None]:
# Linear regression on the Northern Hemisphere
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Humidity"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Northern Hemisphere \nfor % Humidity', '% Humidity',(40,10))

In [None]:
# Linear regression on the Southern Hemisphere
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Humidity"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Southern Hemisphere \nfor % Humidity', '% Humidity',(-50,15))

In [None]:
# Linear regression on the Northern Hemisphere
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Cloudiness"]
# Call the function
plot_linear_regression(x_values, y_values,
                   'Linear Regression on the Northern Hemisphere \nfor % Cloudiness', '% Cloudiness',(10,55))

In [None]:
# Linear regression on the Southern Hemisphere
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Cloudiness"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Southern Hemisphere \nfor % Cloudiness', '% Cloudiness',(-50,60))

In [None]:
# Linear regression on the Northern Hemisphere
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Wind Speed"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Northern Hemisphere \nfor Wind Speed', 'Wind Speed',(40,35))

In [None]:
# Linear regression on the Southern Hemisphere
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Wind Speed"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Southern Hemisphere \nfor Wind Speed', 'Wind Speed',(-50,35))