In [1]:
#import dependencies

import pandas as pd

import matplotlib.pyplot as plt

import numpy as np

import random

from citipy import citipy

import sys
sys.path.append("..")

from config import weather_api_key

import time
from datetime import datetime

import requests

In [None]:
#create set of lat and long, combine w zip

lats = np.random.uniform(-90, 90, size = 1500)

lngs = np.random.uniform(-180, 180, size= 1500)

lat_lngs = zip(lats, lngs)

lat_lngs

In [None]:
#add lat and long to list, then can iterate thru list 

coordinates = list(lat_lngs)

In [None]:
#create list to hold cities

cities = []

#identify nearest city for each lat and long combination

for coordinate in coordinates:
        city = citipy.nearest_city(coordinate[0], coordinate[1]).city_name

        #if city is unique, append to list
        if city not in cities:
                cities.append(city)

#print the city count to confirm sufficient count
len(cities)

In [None]:
url = (f"http://api.openweathermap.org/data/2.5/weather?units=Imperial&appid={weather_api_key}")

In [None]:
#empty list for weather data 
city_data = []

# print beginning of logging
print("Beggining Data Retrieval        ")
print("--------------------------------")

#create counters
record_count = 1
set_count = 1

#loop through cities in our list
for i, city in enumerate(cities):

    #group cities in sets of 50 for logging (plus sleep60 to prevent timeout err)
    if (i % 50 == 0 and i >=50):
        set_count +=1
        record_count = 1
        time.sleep(60)
    
    #create endpoint url with each city (concat with replace fxn, repl space w plus)
    city_url = url +"&q=" + city.replace(" ","+")

    #log url, record, and set numbers/city
    print(f"Processing record {record_count} of Set {set_count} | {city}")
    #add 1 to record count
    record_count +=1 

#run api requests for each city 
    try:
        # Parse the JSON and retrieve data.
        city_weather = requests.get(city_url).json()

        # Parse out the needed data.
        city_lat = city_weather["coord"]["lat"]
        city_lng = city_weather["coord"]["lon"]
        city_max_temp = city_weather["main"]["temp_max"]
        city_humidity = city_weather["main"]["humidity"]
        city_clouds = city_weather["clouds"]["all"]
        city_wind = city_weather["wind"]["speed"]
        city_country = city_weather["sys"]["country"]

        # Convert the date to ISO standard.
        city_date = datetime.utcfromtimestamp(city_weather["dt"]).strftime('%Y-%m-%d %H:%M:%S')


        # Append the city information into city_data list. dict for each city where key value pairs are from weather parsed data 
        city_data.append({"City": city.title(),
                            "Lat": city_lat,
                            "Lng": city_lng,
                            "Max Temp": city_max_temp,
                            "Humidity": city_humidity,
                            "Cloudiness": city_clouds,
                            "Wind Speed": city_wind,
                            "Country": city_country,
                            "Date": city_date})
    
    #in line w try 
    except:
        print ("City not found. Skipping...")
        pass

#Indicate Data Loading is complete (flush w margin)

print("--------------------------------")
print("Data Retrieval Complete         ")
print("--------------------------------")

In [None]:
#convert array of dicts into dataframe

city_data_df = pd.DataFrame(city_data)

city_data_df.head(10)

city_data.describe()

In [None]:
#reorder columns

new_column_order = ["City", "Country", "Date", "Lat", "Lng", "Max Temp", "Humidity", "Cloudiness", "Wind Speed"]

city_data_clean_df = city_data_df[new_column_order]

city_data_clean_df

In [None]:
#create output file

output_data_file = "weather_data/cities.csv"

#export df into csv
city_data_clean_df.to_csv(output_data_file, index_label="City_ID")