In [1]:
# dependencies and setup
import pandas as pd
import requests
import os
import csv
import json
import re 
from config import weather_api_key

# import list of latitude/longitude
locations_file = os.path.join('..','Resources','CitiesWGeolocation.csv')
# export list of latitude/longitude average temperature and average number of sunny days to data file 
weather_file = os.path.join('..','Resources','weather.csv')

In [2]:
# locations_df = pd.read_csv('../Resources/CitiesWGeolocation.csv')
locations_df = pd.read_csv(locations_file)
locations_df

Unnamed: 0.1,Unnamed: 0,City,State/Province,Country,Latitude,Longitude
0,0,Lexington,Kentucky,United States,38.046407,-84.497039
1,1,San Diego,California,United States,32.717420,-117.162773
2,2,Cook Islands​,,Cook Islands​,-19.996972,-157.785871
3,3,Park City,Utah,United States,40.646092,-111.497996
4,4,Newcastle Upon Tyne,England,United Kingdom,54.973847,-1.613157
...,...,...,...,...,...,...
215,232,Beirut,,Lebanon,33.895920,35.478430
216,233,Zurich,,Switzerland,47.374449,8.541042
217,234,Geneva,,Switzerland,46.201756,6.146601
218,235,Valletta,,Malta,35.898982,14.513676


In [3]:
# create variables and initialize parameters and authentication to data
latlng = ""
url_month = 0
day_count = 0
temp_total = float()
sun_total = float()
data_dict = {}
data_list = []
historical_date_list = []
url_list= []

# set up key for API auth
api_key = weather_api_key

# imperial,Farenheit
units = 'f'

# average for 24 hour period
interval = '24'

# hourly split of data 1=on 0=off
hourly = 0

# set up base url and query parameters
baseUrl = 'http://api.weatherstack.com/historical?'

# url = baseUrl +parms
# url_list.append(url)

In [4]:
# confirm number of locations and records to write
(f'Number of Locations: {locations_df.shape[0]}, with a total number of writes to df: {(locations_df.shape[0])*1}.')

'Number of Locations: 220, with a total number of writes to df: 220.'

In [5]:
# calculate dates for historical data, create url, append to url list
url_list.clear()  # start off fresh
# new parms for api call   
    # format the dates: 4 - 12 are last year historical, months 1 - 3 are this year historical
mon_list = ['04','05','06','07','08','09','10','11','12','01','02','03']
year_list = ['2022', '2023']

# read the prospective honeymoon geolocations data file 
for i in range(locations_df.shape[0]):
    lat = locations_df['Latitude'][i]
    lng = locations_df['Longitude'][i]
    latlng = str(lat) + ',' + str(lng)

    # process each month's data for each location
    for mon in mon_list:
        if mon in['01','02','03']:
            year = year_list[1]
        else:
            year = year_list[0]
            # can only use 8 days per month because of API limitations
        historical_date_start = f'{year}-{mon}-10'
        historical_date_end = f'{year}-{mon}-18'
       # print(historical_date_start, historical_date_end )        
        parms = 'access_key='+ api_key +'&query='+ latlng +\
              '&historical_date_start=' + historical_date_start + '&historical_date_end=' + historical_date_end + ''    
        # append url list for later calls to api
        url = baseUrl + parms
#         print(url)
#         print(latlng)
        url_list.append(url)

In [6]:
# what keys are available
# historical_data['2023-03-01'].keys()

In [7]:
# # set up for API call and return json response
data_list.clear() # start fresh

# call out to the api to get the data
#   for each day, for each month, for each location
for i in range(len(url_list)):
    url = url_list[i]
    
    # keep track of the month we are pulling data for
    p = ("-(\d{2})-")
    match = re.search(p, url)
    if match:
        url_month = match.group(1)
           
    #return data from the API call
    response = requests.get(url)
    data = response.json()    
    
    # get the latitude and longitude to begin creating a dictionary to append to list
    latitude = data['location']['lat']
    longitude = data['location']['lon']
    day_count = 0
    # dig in to the future dashboard filter information
    historical_data = data['historical']                
    for key in historical_data: 
        historical_key = historical_data[key]
        day_count += 1
        temp_total += historical_data[key]['avgtemp']
        sun_total += historical_data[key]['sunhour']
    # month average data
    temp_avg = round((temp_total/day_count),2)
    sun_avg = round((sun_total/day_count),2)
    
    # send data to dictionary        
    data_dict = {
        'Latitude' : data['location']['lat'],
        'Longitude' : data['location']['lon'],
        'Month' : url_month,
        'Average Temperature' : temp_avg,
        'Average No Sunny Days' : sun_avg
    }
    # set up dictionary append to list to create DataFrame
    data_list.append(data_dict)

    # clear averages to prepare for next month processing
    temp_total = 0.00
    sun_total = 0.00
    day_count = 0
    temp_avg = 0.00
    sun_avg = 0.00        

In [13]:
weather_df = pd.DataFrame(data_list)
weather_df.head(13)

Unnamed: 0,Latitude,Longitude,Month,Average Temperature,Average No Sunny Days
0,38.05,-84.459,4,11.33,9.08
1,38.05,-84.459,5,19.56,10.08
2,38.05,-84.459,6,23.67,12.53
3,38.05,-84.459,7,24.22,12.53
4,38.05,-84.459,8,21.67,9.49
5,38.05,-84.459,9,20.67,10.38
6,38.05,-84.459,10,12.67,9.92
7,38.05,-84.459,11,3.78,7.44
8,38.05,-84.459,12,4.44,7.8
9,38.05,-84.459,1,5.44,8.04


In [14]:
# confirm all records process successfully or note the issue
forecasted_records = ((locations_df.shape[0])*12)
actual_records = weather_df.shape[0]

if forecasted_records > actual_records:
    print(f'There are {forecasted_records-actual_records} missing records. Try Again!!')
elif actual_records > forecasted_records:
    print(f'There are {actual_records-forecasted_records} extra records. Try Again!!')
else:
    print(f'{weather_df.shape[0]} records have processed. Congratulations!!')

2640 records have processed. Congratulations!!


In [10]:
# send new weather DataFrame to csv file
weather_df.to_csv(weather_file)