In [1]:
# Import the dependencies.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
import time
import requests
from citipy import citipy

# Import the API key.
from config import weather_api_key

In [2]:
# Create a set of random latitude and longitude combinations.
lats = np.random.uniform(low=-90.000, high=90.000, size=1500)
lngs = np.random.uniform(low=-180.000, high=180.000, size=1500)
lat_lngs = zip(lats, lngs)
lat_lngs

<zip at 0x7f8a26b3b690>

In [3]:
# Add the latitudes and longitudes to a list.
coordinates = list(lat_lngs)

In [4]:
# Big NOTE - You can only unzip a zipped tuple ***once*** before it is removed from the computer's memory
# If you run it a second time it will clear your list because you are unzipping a blank list -the memory was cleared after the initial unpacking!
coordinates

[(-31.05254276608275, 102.62119992295521),
 (54.191528412216854, 7.8115794158131),
 (67.86682694290369, 47.42926677009481),
 (-32.119132672943095, -82.51367651889609),
 (65.52221191546016, -33.933340468083856),
 (-49.88560582687573, -83.41968955915263),
 (37.70355743932946, -6.2110877614708215),
 (82.81902198450351, -104.31024765972327),
 (-49.72076664603474, 115.91286751692803),
 (-0.9857828950691498, 79.80793389356376),
 (61.98562064387502, 91.64786235606022),
 (-32.61625458289341, -108.39759051408953),
 (50.2363588181521, 40.67793742409236),
 (-31.454844111326544, 56.24552618562029),
 (-84.04096401544344, -77.46410660439976),
 (-1.6487138746447272, -142.98447618019247),
 (-49.490478794539115, -7.077135485821202),
 (54.09519769230039, 135.03487589418273),
 (17.04410228288613, -12.81238721763259),
 (-36.24608444854676, 103.33812466368835),
 (47.81257546654359, -161.87671804005436),
 (-77.9584703379432, 56.64997662184129),
 (82.19030999459562, -173.84346936509155),
 (46.936187472928225

In [5]:
# Create a list for holding the cities.
cities = []
# Identify the nearest city for each latitude and longitude combination.
for coordinate in coordinates:
    city = citipy.nearest_city(coordinate[0], coordinate[1]).city_name

    # If the city is unique, then we will add it to the cities list.
    if city not in cities:
        cities.append(city)
# Print the city count to confirm sufficient count.
len(cities)

634

In [6]:
cities

['geraldton',
 'jever',
 'leshukonskoye',
 'lebu',
 'tasiilaq',
 'castro',
 'la algaba',
 'yellowknife',
 'albany',
 'viligili',
 'teya',
 'rikitea',
 'nizhniy mamon',
 'saint-joseph',
 'ushuaia',
 'atuona',
 'cape town',
 'chumikan',
 'thilogne',
 'busselton',
 'bethel',
 'east london',
 'mys shmidta',
 'iskateley',
 'tiksi',
 'qaanaaq',
 'barentsburg',
 'bonfim',
 'isabela',
 'san felipe',
 'deputatskiy',
 'neka',
 'bluff',
 'georgetown',
 'tsihombe',
 'umm lajj',
 'kahului',
 'general pico',
 'thinadhoo',
 'narsaq',
 'cidreira',
 'paamiut',
 'nanortalik',
 'san quintin',
 'new norfolk',
 'butaritari',
 'port alfred',
 'taolanaro',
 'sosva',
 'puebloviejo',
 'san policarpo',
 'punta arenas',
 'ponta do sol',
 'airai',
 'yerbogachen',
 'raditsa-krylovka',
 'saint-philippe',
 'tuatapere',
 'katherine',
 'cabo san lucas',
 'juneau',
 'marsaxlokk',
 'mataura',
 'severo-kurilsk',
 'ahipara',
 'kavieng',
 'sabha',
 'saskylakh',
 'kieta',
 'khatanga',
 'bilma',
 'avarua',
 'vangaindrano',
 

In [7]:
# Starting URL for Weather Map API Call.
url = "http://api.openweathermap.org/data/2.5/weather?units=Imperial&APPID=" + weather_api_key

In [8]:
# Create an empty list to hold the weather data.
city_data = []
# Print the beginning of the logging.
print("Beginning Data Retrieval     ")
print("-----------------------------")

# Create counters.
record_count = 1
set_count = 1

# Loop through all the cities in the list.
for i, city in enumerate(cities):

    # Group cities in sets of 50 for logging purposes.
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 1
        time.sleep(60)

    # Create endpoint URL with each city.
    city_url = url + "&q=" + city.replace(" ","+")

    # Log the URL, record, and set numbers and the city.
    print(f"Processing Record {record_count} of Set {set_count} | {city}")
    # Add 1 to the record count.
    record_count += 1
    
# Run an API request for each of the cities.
    try:
        # Parse the JSON and retrieve data.
        city_weather = requests.get(city_url).json()
        # Parse out the needed data.
        city_lat = city_weather["coord"]["lat"]
        city_lng = city_weather["coord"]["lon"]
        city_max_temp = city_weather["main"]["temp_max"]
        city_humidity = city_weather["main"]["humidity"]
        city_clouds = city_weather["clouds"]["all"]
        city_wind = city_weather["wind"]["speed"]
        city_country = city_weather["sys"]["country"]
        # Convert the date to ISO standard.
        city_date = datetime.utcfromtimestamp(city_weather["dt"]).strftime('%Y-%m-%d %H:%M:%S')
        # Append the city information into city_data list.
        city_data.append({"City": city.title(),
                          "Lat": city_lat,
                          "Lng": city_lng,
                          "Max Temp": city_max_temp,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country,
                          "Date": city_date})

# If an error is experienced, skip the city.
    except:
        print("City not found. Skipping...")
        pass

# Indicate that Data Loading is complete.
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

Beginning Data Retrieval     
-----------------------------
Processing Record 1 of Set 1 | geraldton
Processing Record 2 of Set 1 | jever
Processing Record 3 of Set 1 | leshukonskoye
Processing Record 4 of Set 1 | lebu
Processing Record 5 of Set 1 | tasiilaq
Processing Record 6 of Set 1 | castro
Processing Record 7 of Set 1 | la algaba
Processing Record 8 of Set 1 | yellowknife
Processing Record 9 of Set 1 | albany
Processing Record 10 of Set 1 | viligili
City not found. Skipping...
Processing Record 11 of Set 1 | teya
Processing Record 12 of Set 1 | rikitea
Processing Record 13 of Set 1 | nizhniy mamon
Processing Record 14 of Set 1 | saint-joseph
Processing Record 15 of Set 1 | ushuaia
Processing Record 16 of Set 1 | atuona
Processing Record 17 of Set 1 | cape town
Processing Record 18 of Set 1 | chumikan
Processing Record 19 of Set 1 | thilogne
City not found. Skipping...
Processing Record 20 of Set 1 | busselton
Processing Record 21 of Set 1 | bethel
Processing Record 22 of Set 1 | 

Processing Record 39 of Set 4 | kumluca
Processing Record 40 of Set 4 | laguna
Processing Record 41 of Set 4 | palabuhanratu
City not found. Skipping...
Processing Record 42 of Set 4 | sao joao da barra
Processing Record 43 of Set 4 | monrovia
Processing Record 44 of Set 4 | cockburn town
Processing Record 45 of Set 4 | lahaina
Processing Record 46 of Set 4 | poya
Processing Record 47 of Set 4 | mackay
Processing Record 48 of Set 4 | pevek
Processing Record 49 of Set 4 | chinhoyi
Processing Record 50 of Set 4 | jablah
Processing Record 1 of Set 5 | dikson
Processing Record 2 of Set 5 | hereford
Processing Record 3 of Set 5 | meulaboh
Processing Record 4 of Set 5 | kapaa
Processing Record 5 of Set 5 | constitucion
Processing Record 6 of Set 5 | peno
Processing Record 7 of Set 5 | carutapera
Processing Record 8 of Set 5 | luderitz
Processing Record 9 of Set 5 | saleaula
City not found. Skipping...
Processing Record 10 of Set 5 | tual
Processing Record 11 of Set 5 | ambon
Processing Recor

Processing Record 23 of Set 8 | gizo
Processing Record 24 of Set 8 | fez
Processing Record 25 of Set 8 | lasa
Processing Record 26 of Set 8 | sigulda
Processing Record 27 of Set 8 | taksimo
Processing Record 28 of Set 8 | bargal
City not found. Skipping...
Processing Record 29 of Set 8 | coihaique
Processing Record 30 of Set 8 | verkhoyansk
Processing Record 31 of Set 8 | waipawa
Processing Record 32 of Set 8 | sinkat
City not found. Skipping...
Processing Record 33 of Set 8 | maraba
Processing Record 34 of Set 8 | evensk
Processing Record 35 of Set 8 | masuguru
Processing Record 36 of Set 8 | dolores
Processing Record 37 of Set 8 | serenje
Processing Record 38 of Set 8 | briancon
Processing Record 39 of Set 8 | ayr
Processing Record 40 of Set 8 | novopavlovka
Processing Record 41 of Set 8 | coquimbo
Processing Record 42 of Set 8 | haines junction
Processing Record 43 of Set 8 | zhezkazgan
Processing Record 44 of Set 8 | norman wells
Processing Record 45 of Set 8 | flin flon
Processing

Processing Record 15 of Set 12 | manggar
Processing Record 16 of Set 12 | green valley
Processing Record 17 of Set 12 | oriximina
Processing Record 18 of Set 12 | kayerkan
Processing Record 19 of Set 12 | aqtobe
Processing Record 20 of Set 12 | nome
Processing Record 21 of Set 12 | kolomak
Processing Record 22 of Set 12 | lumphat
Processing Record 23 of Set 12 | brokopondo
Processing Record 24 of Set 12 | te anau
Processing Record 25 of Set 12 | salisbury
Processing Record 26 of Set 12 | liverpool
Processing Record 27 of Set 12 | mutiscua
Processing Record 28 of Set 12 | chepo
Processing Record 29 of Set 12 | mbuyapey
Processing Record 30 of Set 12 | novosokolniki
Processing Record 31 of Set 12 | malingin
Processing Record 32 of Set 12 | chipinge
Processing Record 33 of Set 12 | ivybridge
Processing Record 34 of Set 12 | calvinia
Processing Record 35 of Set 12 | tautira
Processing Record 36 of Set 12 | grajau
Processing Record 37 of Set 12 | pionerskiy
Processing Record 38 of Set 12 | 

In [None]:
# Confirm how many rows with data
len(city_data)

In [None]:
# Convert the array of dictionaries to a Pandas DataFrame.
city_data_df = pd.DataFrame(city_data)
city_data_df.head(10)

In [None]:
# Arrange column order
column_order = ["City", "Country", "Date", "Lat", "Lng", "Max Temp", "Humidity", "Cloudiness", "Wind Speed"]

In [None]:
# Arrange column order
city_data_df = city_data_df[column_order]
city_data_df.head()

In [None]:
# Create the output file (CSV).
output_data_file = "../World_Weather_Analysis/cities.csv"
# Export the City_Data into a CSV.
city_data_df.to_csv(output_data_file, index_label="City_ID")

In [None]:
# Extract relevant fields from the DataFrame for plotting.
lats = city_data_df["Lat"]
max_temps = city_data_df["Max Temp"]
humidity = city_data_df["Humidity"]
cloudiness = city_data_df["Cloudiness"]
wind_speed = city_data_df["Wind Speed"]

In [None]:
# Build the scatter plot for latitude vs. max temperature.
plt.scatter(lats,
            max_temps,
            edgecolor="black", linewidths=1, marker="o",
            alpha=0.8, label="Cities")

# Incorporate the other graph properties.
plt.title(f"City Latitude vs. Max Temperature "+ time.strftime("%x"))
plt.ylabel("Max Temperature (F)")
plt.xlabel("Latitude")
plt.grid(True)

# Save the figure.
plt.savefig("../World_Weather_Analysis/Fig1.png")

# Show plot.
plt.show()

In [None]:
# Build the scatter plots for latitude vs. humidity.
plt.scatter(lats,
            humidity,
            edgecolor="black", linewidths=1, marker="o",
            alpha=0.8, label="Cities")

# Incorporate the other graph properties.
plt.title(f"City Latitude vs. Humidity "+ time.strftime("%x"))
plt.ylabel("Humidity (%)")
plt.xlabel("Latitude")
plt.grid(True)
# Save the figure.
plt.savefig("../World_Weather_Analysis/Fig2.png")
# Show plot.
plt.show()

In [None]:
# Build the scatter plots for latitude vs. cloudiness.
plt.scatter(lats,
            cloudiness,
            edgecolor="black", linewidths=1, marker="o",
            alpha=0.8, label="Cities")

# Incorporate the other graph properties.
plt.title(f"City Latitude vs. Cloudiness (%) "+ time.strftime("%x"))
plt.ylabel("Cloudiness (%)")
plt.xlabel("Latitude")
plt.grid(True)
# Save the figure.
plt.savefig("../World_Weather_Analysis/Fig3.png")
# Show plot.
plt.show()

In [None]:
# Build the scatter plots for latitude vs. wind speed.
plt.scatter(lats,
            wind_speed,
            edgecolor="black", linewidths=1, marker="o",
            alpha=0.8, label="Cities")

# Incorporate the other graph properties.
plt.title(f"City Latitude vs. Wind Speed "+ time.strftime("%x"))
plt.ylabel("Wind Speed (mph)")
plt.xlabel("Latitude")
plt.grid(True)
# Save the figure.
plt.savefig("../World_Weather_Analysis/Fig4.png")
# Show plot.
plt.show()

In [None]:
# Import linregress
from scipy.stats import linregress

# Create a function to create perform linear regression on the weather data
# and plot a regression line and the equation with the data.
def plot_linear_regression(x_values, y_values, title, y_label, text_coordinates):

    # Run regression on hemisphere weather data.
    (slope, intercept, r_value, p_value, std_err) = linregress(x_values, y_values)

    # Calculate the regression line "y values" from the slope and intercept.
    regress_values = x_values * slope + intercept
    # Get the equation of the line.
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
    # Create a scatter plot and plot the regression line.
    plt.scatter(x_values,y_values)
    plt.plot(x_values,regress_values,"r")
    # Annotate the text for the line equation.
    plt.annotate(line_eq, text_coordinates, fontsize=15, color="red")
    plt.title(title)
    plt.xlabel('Latitude')
    plt.ylabel(y_label)
    plt.show()

In [None]:
# Create Northern and Southern Hemisphere DataFrames.
northern_hemi_df = city_data_df.loc[(city_data_df["Lat"] >= 0)]
southern_hemi_df = city_data_df.loc[(city_data_df["Lat"] < 0)]

In [None]:
# Linear regression on the Northern Hemisphere
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Max Temp"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Northern Hemisphere \
                        for Maximum Temperature', 'Max Temp',(10,-40))

In [None]:
# Linear regression on the Southern Hemisphere
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Max Temp"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Southern Hemisphere \
                        for Maximum Temperature', 'Max Temp',(-50,45))

In [None]:
# Linear regression on the Northern Hemisphere
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Humidity"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Northern Hemisphere \
                        for % Humidity', '% Humidity',(40,10))

In [None]:
plot the data.

# Linear regression on the Southern Hemisphere
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Humidity"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Southern Hemisphere \n
                        for % Humidity', '% Humidity',(-50,15))

In [None]:
# Linear regression on the Northern Hemisphere
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Cloudiness"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Southern Hemisphere \n
                        for % Cloudiness', '% Cloudiness',(-50,60))

In [None]:
# Linear regression on the Southern Hemisphere
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Cloudiness"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Southern Hemisphere \n
                        for % Cloudiness', '% Cloudiness',(-50,60))

In [None]:
# Linear regression on the Northern Hemisphere
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Wind Speed"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Northern Hemisphere \n
                        for Wind Speed', 'Wind Speed',(40,35))

In [None]:
# Linear regression on the Southern Hemisphere
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Wind Speed"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Southern Hemisphere \n
                        for Wind Speed', 'Wind Speed',(-50,35))