In [1]:
# Dependencies and Setup

# visualizations
import matplotlib.pyplot as plt
import seaborn as sns

#data science - standard library
import pandas as pd
import numpy as np

#requests
import requests
from pprint import pprint
import time

#regression
from scipy.stats import linregress
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "Lane_cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

In [2]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

646

In [3]:
cities_sub=cities[0:5]
cities_sub

['busselton', 'misratah', 'qaanaaq', 'hobart', 'butaritari']

In [4]:
#init lists to hold parsed data
lats = []
longs = []
temps = []
humid = []
windSpeed = []
cloudiness = []
cities_found = []
countries = []

for i in range(len(cities)):
    #get the city
    city = cities[i]
    
    #create the url
    units = "imperial"
    url = f"http://api.openweathermap.org/data/2.5/weather?q={city}&units={units}&appid={weather_api_key}"
#     print(url)

    #make the request
    response = requests.get(url)
#     print(response.status_code)
    
    #error check
    if response.status_code == 200:
        data = response.json()
#         pprint(data)
        
        try:
            lat = data["coord"]["lat"]
            long = data["coord"]["lon"]
            temp = data["main"]["temp"]
            humidity = data["main"]["humidity"]
            speed = data["wind"]["speed"]
            clouds = data["clouds"]["all"]
            city_name = data["name"]
            country = data["sys"]["country"]

            #save the data
            lats.append(lat)
            longs.append(long)
            temps.append(temp)
            humid.append(humidity)
            windSpeed.append(speed)
            cloudiness.append(clouds)
            cities_found.append(city_name)
            countries.append(country)
            
        except Exception as e:
            print(f"Through exception for city {city}: {e}")
    
    elif response.status_code == 404:
        print(f"Missing data in API for {city}")
    else:
        print(response.status_code)
        print("The API is broken.")
        
    #print every 5
    if (i % 5 == 0):
        print(f"Data gathered for: {i} of {len(cities)}")
    
    #sleep for requests
        time.sleep(1)
        
# make the dataframe
df = pd.DataFrame()
df["City"] = cities_found
df["Country"] = countries
df["Latitude"] = lats
df["Longitude"] = longs
df["Temperature"] = temps
df["Humidity"] = humid
df["Wind Speed"] = windSpeed
df["Cloudiness"] = cloudiness

df.head(10)

Data gathered for: 0 of 646
Data gathered for: 5 of 646
Data gathered for: 10 of 646
Missing data in API for phnum penh
Data gathered for: 15 of 646
Data gathered for: 20 of 646
Missing data in API for samusu
Data gathered for: 25 of 646
Missing data in API for marcona
Data gathered for: 30 of 646
Data gathered for: 35 of 646
Missing data in API for amderma
Data gathered for: 40 of 646
Data gathered for: 45 of 646
Missing data in API for illoqqortoormiut
Missing data in API for mys shmidta
Data gathered for: 50 of 646
Data gathered for: 55 of 646
Missing data in API for tumannyy
Data gathered for: 60 of 646
Data gathered for: 65 of 646
Data gathered for: 70 of 646
Data gathered for: 75 of 646
Data gathered for: 80 of 646
Data gathered for: 85 of 646
Data gathered for: 90 of 646
Data gathered for: 95 of 646
Data gathered for: 100 of 646
Data gathered for: 105 of 646
Data gathered for: 110 of 646
Missing data in API for witrivier
Data gathered for: 115 of 646
Missing data in API for malw

Unnamed: 0,City,Country,Latitude,Longitude,Temperature,Humidity,Wind Speed,Cloudiness
0,Busselton,AU,-33.65,115.33,57.61,98,3.15,38
1,Misratah,LY,32.38,15.09,69.39,65,10.96,1
2,Qaanaaq,GL,77.48,-69.36,-12.33,70,8.9,0
3,Hobart,AU,-42.88,147.33,58.15,54,5.82,20
4,Butaritari,KI,3.07,172.79,81.34,72,11.63,92
5,Gisborne,NZ,-38.65,178.0,73.99,53,8.01,64
6,Faanui,PF,-16.48,-151.75,79.2,77,10.54,76
7,Albany,US,42.6,-73.97,57.76,54,3.76,90
8,Punta Arenas,CL,-53.15,-70.92,57.09,58,32.21,40
9,Fortuna,US,40.6,-124.16,53.76,62,17.22,75


In [5]:
df.to_csv(output_data_file, index=False)

In [6]:
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 587 entries, 0 to 586
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   City         587 non-null    object 
 1   Country      587 non-null    object 
 2   Latitude     587 non-null    float64
 3   Longitude    587 non-null    float64
 4   Temperature  587 non-null    float64
 5   Humidity     587 non-null    int64  
 6   Wind Speed   587 non-null    float64
 7   Cloudiness   587 non-null    int64  
dtypes: float64(4), int64(2), object(2)
memory usage: 36.8+ KB
