In [1]:
#import the dependencies
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from citipy import citipy
from datetime import datetime
import requests
import time
from scipy.stats import linregress
from config2 import weather_api_key

In [2]:
#Create a set of random latitude and longitude combinations.
lats = np.random.uniform(low=-90.000,high=90.000, size = 1500)
lngs = np.random.uniform(low=-180.000,high=180.000, size = 1500)
lat_lngs = zip(lats, lngs)

In [3]:
#Create a list for holding the cities
cities =[]
coordinates = list(lat_lngs)

# Identify the nearest city for each latitude and longitude combination
for coordinate in coordinates:
    city = citipy.nearest_city(coordinate[0], coordinate[1]).city_name
    if city not in cities:
        cities.append(city)
        
len(cities)

638

In [4]:
#Create an endpoint URL for a city
url = "http://api.openweathermap.org/data/2.5/weather?units=Imperial&APPID="+ weather_api_key

In [5]:
#Create an empty list to hold the weather data
city_data = []

#Print the beginning of the logging.
print("Beginning Data Retrieval   ")
print("---------------------------")

#Create counters
record_count = 1
set_count = 1

# Loop through all the ciites in our list
for i, city in enumerate(cities):
    
    #Group cities in sets of 50 for logging purposes.
    if(i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 1
        
    # Create endpoint URL with each city.
    city_url = url + "&q=" + city
    
    #Log the URL, record, and set numbers and the city.
    print(f"Processing Record {record_count} of Set {set_count} | {city}")
    # Add 1 to the record count
    record_count += 1
    
    #Run an API request for each of the cities.
    try:
        city_weather = requests.get(city_url).json()
    
        # Parse out the needed data.
        city_lat = city_weather["coord"]["lat"]
        city_lng = city_weather ["coord"]["lon"]
        city_max_temp = city_weather["main"]["temp_max"]
        city_humidity = city_weather["main"]["humidity"]
        city_clouds = city_weather["clouds"]["all"]
        city_wind = city_weather["wind"]["speed"]
        city_country = city_weather["sys"]["country"]
        
        #convert the date to ISO format
        city_date = datetime.utcfromtimestamp(city_weather["dt"]).strftime("%Y-%m-%d %H:%M:%S")
    
        #Append the city information into city_data list.
        city_data.append({"City":city.title(),
                          "Lat": city_lat,
                          "Long": city_lng,
                          "Max Temp":city_max_temp,
                          "Humidity":city_humidity,
                          "Cloudiness":city_clouds,
                          "Wind Speed":city_wind,
                          "Country":city_country,
                          "Date":city_date})
    except:
        print(f"City {city} not found. Skipping...")
        pass

#Indicate that data loading is complete.
print("---------------------------")
print("Data Retrieval Complete    ")
print("---------------------------")

Beginning Data Retrieval   
---------------------------
Processing Record 1 of Set 1 | hambantota
Processing Record 2 of Set 1 | vaini
Processing Record 3 of Set 1 | albany
Processing Record 4 of Set 1 | chizhou
Processing Record 5 of Set 1 | jalaun
Processing Record 6 of Set 1 | tonota
Processing Record 7 of Set 1 | mildura
Processing Record 8 of Set 1 | solnechnyy
Processing Record 9 of Set 1 | upernavik
Processing Record 10 of Set 1 | mar del plata
Processing Record 11 of Set 1 | khatanga
Processing Record 12 of Set 1 | yellowknife
Processing Record 13 of Set 1 | avera
Processing Record 14 of Set 1 | cape town
Processing Record 15 of Set 1 | lebu
Processing Record 16 of Set 1 | bambous virieux
Processing Record 17 of Set 1 | barrow
Processing Record 18 of Set 1 | codrington
Processing Record 19 of Set 1 | auki
Processing Record 20 of Set 1 | thompson
Processing Record 21 of Set 1 | nikolskoye
Processing Record 22 of Set 1 | trapani
Processing Record 23 of Set 1 | yumen
Processing Re

Processing Record 33 of Set 4 | tshikapa
Processing Record 34 of Set 4 | tumannyy
City tumannyy not found. Skipping...
Processing Record 35 of Set 4 | bourail
Processing Record 36 of Set 4 | east london
Processing Record 37 of Set 4 | berlevag
Processing Record 38 of Set 4 | sentyabrskiy
City sentyabrskiy not found. Skipping...
Processing Record 39 of Set 4 | kjopsvik
Processing Record 40 of Set 4 | hue
Processing Record 41 of Set 4 | airai
Processing Record 42 of Set 4 | cap malheureux
Processing Record 43 of Set 4 | hasaki
Processing Record 44 of Set 4 | twin falls
Processing Record 45 of Set 4 | lahij
Processing Record 46 of Set 4 | buinsk
Processing Record 47 of Set 4 | evensk
Processing Record 48 of Set 4 | suzu
City suzu not found. Skipping...
Processing Record 49 of Set 4 | waingapu
Processing Record 50 of Set 4 | new norfolk
Processing Record 1 of Set 5 | diffa
Processing Record 2 of Set 5 | cidreira
Processing Record 3 of Set 5 | zamora
Processing Record 4 of Set 5 | itaituba


Processing Record 19 of Set 8 | darab
Processing Record 20 of Set 8 | san jose
Processing Record 21 of Set 8 | tucuman
Processing Record 22 of Set 8 | hihifo
City hihifo not found. Skipping...
Processing Record 23 of Set 8 | baoning
Processing Record 24 of Set 8 | jaciara
Processing Record 25 of Set 8 | riyaq
City riyaq not found. Skipping...
Processing Record 26 of Set 8 | road town
Processing Record 27 of Set 8 | torbay
Processing Record 28 of Set 8 | emba
Processing Record 29 of Set 8 | requena
Processing Record 30 of Set 8 | pouembout
Processing Record 31 of Set 8 | rio gallegos
Processing Record 32 of Set 8 | palatka
Processing Record 33 of Set 8 | port blair
Processing Record 34 of Set 8 | bubaque
Processing Record 35 of Set 8 | boyolangu
Processing Record 36 of Set 8 | brewster
Processing Record 37 of Set 8 | wloszczowa
Processing Record 38 of Set 8 | srednekolymsk
Processing Record 39 of Set 8 | vidalia
Processing Record 40 of Set 8 | pampa
Processing Record 41 of Set 8 | kharp

Processing Record 7 of Set 12 | alwar
Processing Record 8 of Set 12 | fairlie
Processing Record 9 of Set 12 | mayumba
Processing Record 10 of Set 12 | haines junction
Processing Record 11 of Set 12 | rivera
Processing Record 12 of Set 12 | santa eulalia del rio
City santa eulalia del rio not found. Skipping...
Processing Record 13 of Set 12 | sosnovo-ozerskoye
Processing Record 14 of Set 12 | bahia blanca
Processing Record 15 of Set 12 | iracoubo
Processing Record 16 of Set 12 | salalah
Processing Record 17 of Set 12 | dangriga
Processing Record 18 of Set 12 | korla
Processing Record 19 of Set 12 | pangoa
Processing Record 20 of Set 12 | batavia
Processing Record 21 of Set 12 | dawei
Processing Record 22 of Set 12 | riyadh
Processing Record 23 of Set 12 | satitoa
City satitoa not found. Skipping...
Processing Record 24 of Set 12 | dostpur
Processing Record 25 of Set 12 | broken hill
Processing Record 26 of Set 12 | ballina
Processing Record 27 of Set 12 | pangody
Processing Record 28 o

In [6]:
# Convert the array of dictionaries to a Pandas DataFrame
city_data_df = pd.DataFrame(city_data)
city_data_df

Unnamed: 0,City,Lat,Long,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,Hambantota,6.12,81.12,78.80,94,20,1.12,LK,2020-04-05 18:49:00
1,Vaini,-21.20,-175.20,75.20,78,75,4.70,TO,2020-04-05 18:49:00
2,Albany,42.60,-73.97,61.00,55,90,3.36,US,2020-04-05 18:46:07
3,Chizhou,30.66,117.48,51.22,68,97,2.15,CN,2020-04-05 18:49:00
4,Jalaun,26.15,79.34,82.33,19,0,4.52,IN,2020-04-05 18:49:00
...,...,...,...,...,...,...,...,...,...
576,Dingle,11.00,122.67,77.83,80,2,8.66,PH,2020-04-05 18:50:05
577,Pandamatenga,-18.53,25.63,74.35,41,92,2.46,BW,2020-04-05 18:50:05
578,Singkang,2.10,102.80,77.00,88,20,2.24,MY,2020-04-05 18:50:05
579,Ust-Nera,64.57,143.20,8.10,84,98,3.96,RU,2020-04-05 18:50:06


In [7]:
new_column_order = ["City","Country", "Date", "Lat", "Long", "Max Temp", "Humidity", "Cloudiness", "Wind Speed"]
city_data_df = city_data_df[new_column_order]

In [None]:
city_data_df 

In [None]:
#Create the output file (CSV)
output_data_file = "weather_data/cities.csv"

#Export the City data into a CSV
city_data_df.to_csv(output_data_file, index_label ="City_ID")

In [None]:
#Extract relevant fields from the DataFrame for plotting
lats = city_data_df ["Lat"]
max_temps = city_data_df ["Max Temp"]
humidity = city_data_df ["Humidity"]
cloudiness =city_data_df ["Cloudiness"]
wind_speed =city_data_df ["Wind Speed"]

In [None]:
#Get today's date in seconds
today = time.strftime("%x")
today

In [None]:
#Build the scatter plot for latitude vs. max temperature
plt.scatter(lats,
           max_temps,
           edgecolor="black", linewidths=1, marker="o",
           alpha=0.8, label="Cities")

#Incorporate the other graph properties
plt.title(f"City Latitude vs. Max Temperature "+time.strftime("%x"))
plt.ylabel("Max Temperature (F)")
plt.xlabel("Latitude")
plt.grid("True")

#Save the figure
plt.savefig("weather_data/Fig1.png")

#Show plot
plt.show()

In [None]:
#Build the scatter plot for latitude vs. humidity
plt.scatter(lats,
           humidity,
           edgecolor="black", linewidths=1, marker="o",
           alpha=0.8, label="Cities")

#Incorporate the other graph properties
plt.title(f"City Latitude vs. Humidity "+time.strftime("%x"))
plt.ylabel("Humidity (%)")
plt.xlabel("Latitude")
plt.grid("True")

#Save the figure
plt.savefig("weather_data/Fig2.png")

#Show plot
plt.show()

In [None]:
#Build the scatter plot for latitude vs. Cloudiness
plt.scatter(lats,
           cloudiness,
           edgecolor="black", linewidths=1, marker="o",
           alpha=0.8, label="Cities")

#Incorporate the other graph properties
plt.title(f"City Latitude vs. Cloudiness "+time.strftime("%x"))
plt.ylabel("Cloudiness (%)")
plt.xlabel("Latitude")
plt.grid("True")

#Save the figure
plt.savefig("weather_data/Fig3.png")

#Show plot
plt.show()

In [None]:
#Build the scatter plot for latitude vs. Wind speed
plt.scatter(lats,
           wind_speed,
           edgecolor="black", linewidths=1, marker="o",
           alpha=0.8, label="Cities")

#Incorporate the other graph properties
plt.title(f"City Latitude vs. Cloudiness "+time.strftime("%x"))
plt.ylabel("Wind speed (mph)")
plt.xlabel("Latitude")
plt.grid("True")

#Save the figure
plt.savefig("weather_data/Fig4.png")

#Show plot
plt.show()

In [None]:
#Create a function to perform linear regression on the weather data and plot a regression line and the equation with the data
    
def plot_linear_regression(x_values, y_values, title, y_label, text_coordinates):
    
    # Run regression on hemisphere weather data.
    (slope, intercept, r_value, p_value, std_err) = linregress(x_values, y_values)
    
    # Calculate the regression line "y values" from the slope and intercept.
    regress_values = x_values * slope + intercept
    
    # Get the equation of the line.    
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
    
    # Create a scatter plot and plot the regression line.
    plt.scatter(x_values,y_values)
    plt.plot(x_values,regress_values,"r")
    
    # Annotate the text for the line equation.
    plt.annotate(line_eq, text_coordinates, fontsize=15, color="red")
    plt.title(title)
    plt.xlabel('Latitude')
    plt.ylabel(y_label)
    plt.show()

In [None]:
nothern_hemi_df = city_data_df.loc[city_data_df["Lat"]>=0]
southern_hemi_df = city_data_df.loc[city_data_df["Lat"]< 0]

In [None]:
#Linear regression on the Nothern Hemisphere
x_values = nothern_hemi_df["Lat"]
y_values =nothern_hemi_df["Max Temp"]

#Call the function
plot_linear_regression (x_values, y_values,
                        "Linear Regression on the Northern Hemisphere\n for Maximum Temperature", "Max Temp", (0,2))

In [None]:
#Linear regression on the Southern Hemisphere
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Max Temp"]

#Call the function
plot_linear_regression (x_values, y_values, 
                        "Linear Regression on the Southern Hemisphere\n for Maximum Temperature", "Max Temp", (-55,90))

In [None]:
#Linear regression on the Nothern Hemisphere
x_values = nothern_hemi_df["Lat"]
y_values =nothern_hemi_df["Humidity"]

#Call the function
plot_linear_regression (x_values, y_values,
                    "Linear Regression on the Northern Hemisphere\n for Humidity", "% Humidity", (40,10))

In [None]:
#Linear regression on the Southern Hemisphere
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Humidity"]

#Call the function
plot_linear_regression (x_values, y_values, 
                        "Linear Regression on the Southern Hemisphere\n for Humidity", "% Humidity", (-55,20))

In [None]:
#Linear regression on the Nothern Hemisphere
x_values = nothern_hemi_df["Lat"]
y_values =nothern_hemi_df["Cloudiness"]

#Call the function
plot_linear_regression (x_values, y_values,
                        "Linear Regression on the Northern Hemisphere\n for Cloudiness", "Cloudiness", (0,2))

In [None]:
#Linear regression on the Southern Hemisphere
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Cloudiness"]

#Call the function
plot_linear_regression (x_values, y_values, 
                        "Linear Regression on the Southern Hemisphere\n for Cloudiness", "Cloudiness", (-55,90))

In [None]:
#Linear regression on the Nothern Hemisphere
x_values = nothern_hemi_df["Lat"]
y_values =nothern_hemi_df["Wind Speed"]

#Call the function
plot_linear_regression (x_values, y_values,
                        "Linear Regression on the Northern Hemisphere\n for Wind Speed", "Wind Speed (mph)", (0,40))

In [None]:
#Linear regression on the Nothern Hemisphere
x_values = southern_hemi_df["Lat"]
y_values =southern_hemi_df["Wind Speed"]

#Call the function
plot_linear_regression (x_values, y_values,
                        "Linear Regression on the Southern Hemisphere\n for Wind Speed", "Wind Speed (mph)", (-30,30))