In [1]:
#Import Dependencies
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from citipy import citipy 
import requests
from config import weather_api_key
import time
from datetime import datetime


In [2]:
#Create a set of random latitude and longitude combinations
lats = np.random.uniform(-90, 90, size = 15)
lngs = np.random.uniform(-180, 180, size = 15)
lat_lngs = zip(lats, lngs)
lat_lngs

<zip at 0x262af621348>

In [3]:
#Add longitudes and latitudes to a list
coordinates = list(lat_lngs)

In [4]:
#Create a list for holding the cities
cities = []

#Identify the nearest city for each latitude and longitude combo
for coordinate in coordinates:
    city = citipy.nearest_city(coordinate[0], coordinate[1]).city_name
    
    #If the citty is unique add it to the list
    if city not in cities:
        cities.append(city)
        
#POrint the city count
len(cities)


15

In [5]:
cities


['longyearbyen',
 'taolanaro',
 'moussoro',
 'thompson',
 'hermanus',
 'hobart',
 'hilo',
 'cape town',
 'salalah',
 'pimentel',
 'avon park',
 'ushuaia',
 'punta arenas',
 'alice town',
 'castro']

In [6]:
#Create an empty list
city_data = []

#Create URL
url = "http://api.openweathermap.org/data/2.5/weather?units=Imperial&APPID=" + weather_api_key
 
#Print the begginning of logging
print("Beginning Data Retrieval     ")
print("-----------------------------")

#Create Counter
record_count = 1
set_count = 1


Beginning Data Retrieval     
-----------------------------


In [7]:
# Loop through all  the cities in our list
for i, city in enumerate(cities):
    
    #Group cities in sets of 50
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 1
        time.sleep(60)
        
    #Create endpoint URL with each city
    city_url = url + "&q=" + city.replace(" ","+")
    
    #Log the URL, record and set number and the city
    print(f"Processing Record {record_count} of Set {set_count} | {city}")
    record_count += 1
    
        #Run an API request for each of the cities
    try:
        #Parse teh JSON and retreieve data
        city_weather = requests.get(city_url).json()

        #Parse of the need data
        city_lat = city_weather['coord']['lat']
        city_lng = city_weather['coord']['lon']
        city_max_temp = city_weather['main']['temp_max']
        city_humidity = city_weather['main']['humidity']
        city_clouds = city_weather['clouds']['all']
        city_wind = city_weather['wind']['speed']
        city_country = city_weather['sys']['country']

        #Convert date to standard
        city_date = datetime.utcfromtimestamp(city_weather['dt']).strftime('%Y-%m-%d %H:%M:%S')

        #append the city info
        city_data.append({"City" : city.title(), 
                         "Lat" : city_lat, 
                         "Lng" : city_lng,
                         "Max Temp" : city_max_temp, 
                         "Humidity" : city_humidity,
                         "Clouds" : city_clouds,
                         "Wind Speed" : city_wind,
                         "Country" : city_country,
                         "Date" : city_date})

    #If an error occurs
    except:
        print(f"City not found. Skipping...")
        pass

#indicate that the data is loading
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

Processing Record 1 of Set 1 | longyearbyen
Processing Record 2 of Set 1 | taolanaro
City not found. Skipping...
Processing Record 3 of Set 1 | moussoro
Processing Record 4 of Set 1 | thompson
Processing Record 5 of Set 1 | hermanus
Processing Record 6 of Set 1 | hobart
Processing Record 7 of Set 1 | hilo
Processing Record 8 of Set 1 | cape town
Processing Record 9 of Set 1 | salalah
Processing Record 10 of Set 1 | pimentel
Processing Record 11 of Set 1 | avon park
Processing Record 12 of Set 1 | ushuaia
Processing Record 13 of Set 1 | punta arenas
Processing Record 14 of Set 1 | alice town
Processing Record 15 of Set 1 | castro
-----------------------------
Data Retrieval Complete      
-----------------------------


In [9]:
city_weather


{'coord': {'lon': -50.0119, 'lat': -24.7911},
 'weather': [{'id': 803,
   'main': 'Clouds',
   'description': 'broken clouds',
   'icon': '04n'}],
 'base': 'stations',
 'main': {'temp': 64.72,
  'feels_like': 65.5,
  'temp_min': 64.72,
  'temp_max': 64.72,
  'pressure': 1012,
  'humidity': 98,
  'sea_level': 1012,
  'grnd_level': 901},
 'visibility': 10000,
 'wind': {'speed': 3.78, 'deg': 326, 'gust': 9.69},
 'clouds': {'all': 64},
 'dt': 1644025035,
 'sys': {'country': 'BR', 'sunrise': 1643965156, 'sunset': 1644012519},
 'timezone': -10800,
 'id': 3466704,
 'name': 'Castro',
 'cod': 200}

In [20]:
city_weather["weather"][0]["description"]

'broken clouds'

In [None]:
city_data_df = pd.DataFrame(city_data)
city_data_df

In [None]:
#setup new column order
new_column_order = ["City", "Country", "Date", "Lat", "Lng", "Max Temp", "Humidity", "Clouds", "Wind Speed"]
city_data_df = city_data_df[new_column_order]
city_data_df

In [None]:
#create a csv file
output_data_file = "cities.csv"

#Export city data into a csv
city_data_df.to_csv(output_data_file, index_label="City_ID")

In [None]:
#Extract releveant info
lats = city_data_df["Lat"]
max_temps = city_data_df["Max Temp"]
humidity = city_data_df["Humidity"]
clouds = city_data_df["Clouds"]
wind_speed = city_data_df["Wind Speed"]



In [None]:
#draw the max temp scatter plot
import time

plt.scatter(lats, max_temps,
            edgecolor = "black",
            linewidths = 1,
            marker = "o",
            alpha = .8,
            label = "Cities"
           )
plt.ylabel("Max Temp(F)")
plt.xlabel("Latitude")
plt.title(f"City Latitude vs Max Temp " + time.strftime("%x"))
plt.grid()

#Save it
plt.savefig("Lat_vs_MAxTemp.png")

#shwo it 
plt.show()

In [None]:
#draw the humidity scatter plot


plt.scatter(lats, humidity,
            edgecolor = "black",
            linewidths = 1,
            marker = "o",
            alpha = .8,
            label = "Cities"
           )
plt.ylabel("Humidity")
plt.xlabel("Latitude")
plt.title(f"City Latitude vs Humidity " + time.strftime("%x"))
plt.grid()

#Save it
plt.savefig("Lat_vs_Humidity.png")

#shwo it 
plt.show()

In [None]:
#draw the clousiness scatter plot


plt.scatter(lats, clouds,
            edgecolor = "black",
            linewidths = 1,
            marker = "o",
            alpha = .8,
            label = "Cities"
           )
plt.ylabel("Cloudiness (%)")
plt.xlabel("Latitude")
plt.title(f"City Latitude vs Cloudiness (%) " + time.strftime("%x"))
plt.grid()

#Save it
plt.savefig("Lat_vs_Cloudiness.png")

#shwo it 
plt.show()

In [None]:
#draw the wind speed scatter plot


plt.scatter(lats, wind_speed,
            edgecolor = "black",
            linewidths = 1,
            marker = "o",
            alpha = .8,
            label = "Cities"
           )
plt.ylabel("Wind Speed (mph)")
plt.xlabel("Latitude")
plt.title(f"City Latitude vs Wind Speed " + time.strftime("%x"))
plt.grid()

#Save it
plt.savefig("Lat_vs_Wind_Speed.png")

#shwo it 
plt.show()

In [None]:
#Import linregress 
from scipy.stats import linregress

#Create a function to create perform linear regression on the weather data
#and plot line and equation with the data
def plot_linear_regression(x_values, y_values, title, y_label, text_coordinates):
    
    #run regression on hemisphere data
    (slope, intercept, r_value, p_value, std_err) = linregress(x_values, y_values)
    
    #Calculate the regression "y values" 
    regress_values = x_values * slope + intercept
    
    #Get equation of the line
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
    
    #Create Scatter plot and  plot regress line
    plt.scatter(x_values, y_values)
    plt.plot(x_values, regress_values, "r")
    
    #Annotate the text 
    plt.annotate(line_eq, text_coordinates, fontsize=15, color="red")
    plt.title(title)
    plt.xlabel("Latitude")
    plt.ylabel(y_label)
    plt.show()
    print(f"r-value is {r_value}")

In [None]:
#Create north/south hemkisphere data
northern_hemi_df = city_data_df.loc[(city_data_df['Lat'] >= 0)]
southern_hemi_df = city_data_df.loc[(city_data_df['Lat'] < 0)]


In [None]:
#Linear regression northern hemi lat vs max temp
x_values = northern_hemi_df['Lat']
y_values = northern_hemi_df['Max Temp']

plot_linear_regression(x_values, y_values, "Linear Regression on Northern Hemispehere \n for Max Temperature", "Max Temp", (10,-40))

In [None]:
#Linear regression southern hemi lat vs max temp
x_values = southern_hemi_df['Lat']
y_values = southern_hemi_df['Max Temp']

plot_linear_regression(x_values, y_values, "Linear Regression on Southern Hemispehere \n for Max Temperature", "Max Temp", (-40,45
                                                                                                                        ))

In [None]:
#Linear regression northern hemi lat vs humidity
x_values = northern_hemi_df['Lat']
y_values = northern_hemi_df['Humidity']

plot_linear_regression(x_values, y_values, "Linear Regression on Northern Hemispehere \n for % Humidity", "% Humidity", (40,0))

In [None]:
#Linear regression southern hemi lat vs humidity
x_values = southern_hemi_df['Lat']
y_values = southern_hemi_df['Humidity']

plot_linear_regression(x_values, y_values, "Linear Regression on Southern Hemispehere \n for % Humidity", "% Humidity", (-57,13))


In [None]:
#Linear regression northern hemi lat vs cloudiness
x_values = northern_hemi_df['Lat']
y_values = northern_hemi_df['Clouds']

plot_linear_regression(x_values, y_values, "Linear Regression on Northern Hemispehere \n for % Cloudiness", "% Cloudiness", (40,0))

In [None]:
#Linear regression southern hemi lat vs cloudiness
x_values = southern_hemi_df['Lat']
y_values = southern_hemi_df['Clouds']

plot_linear_regression(x_values, y_values, "Linear Regression on Southern Hemispehere \n for % Cloudiness", "% Cloudiness", (-55,22))

In [None]:
#Linear regression northern hemi lat vs Wind Speed
x_values = northern_hemi_df['Lat']
y_values = northern_hemi_df['Wind Speed']

plot_linear_regression(x_values, y_values, "Linear Regression on Northern Hemispehere \n for Wind Speed", "Wind Speed", (0,40))

In [None]:
#Linear regression southern hemi lat vs Wind Speed
x_values = southern_hemi_df['Lat']
y_values = southern_hemi_df['Wind Speed']

plot_linear_regression(x_values, y_values, "Linear Regression on Southern Hemispehere \n for Wind Speed", "Wind Speed", (-55,30))