In [1]:
# dependencies and setup
import pandas as pd
import requests
import os
import csv
import json
import re 
import config
from config import weather_api_key
from datetime import date

# import list of latitude/longitude
locations_file = os.path.join('..','Resources','CitiesWGeolocation2.csv')

# export list of latitude/longitude average temperature and average number of sunny days to data file 
weather_file = os.path.join('..','Resources','weather.csv')

# file with serial id for month and year to month file 
month_file = os.path.join('..','Resources','month.csv')

In [2]:
# get locations for weather research
locations_df = pd.read_csv(locations_file)
locations_df

Unnamed: 0.1,Unnamed: 0,LocationID,City,State/Province,Country,Latitude,Longitude,CountryID
0,0,0,Lexington,Kentucky,United States,38.046407,-84.497039,82
1,26,26,Zanzibar,Tanzania,Tanzania,-6.166491,39.207431,75
2,71,71,Kaua'i,Hawaii,United States,39.78373,-100.445882,82
3,227,227,Reykjavik,,Iceland,64.145981,-21.942237,31


In [3]:
# create variables, initialize parameters and authentication to data
latlng = ""
url_month = 0
day_count = 0
temp_total = float()
sun_total = float()
weather_dict = {}
weather_list = []
month_dict = {}
month_list = []
historical_date_list = []
url_list = []
month_text_list = ['January', 'February', 'March', 'April', 'May', 'June',
            'July', 'August', 'September', 'October', 'November', 'December']

# creating the date object of today's date
date_today= date.today()  
url_year = date_today.year - 1

# set up key for API auth
api_key = weather_api_key

# imperial,Farenheit
units = 'f'

# average for 24 hour period
interval = '24'

# hourly split of data 1=on 0=off
hourly = 0

# set up base url and query parameters
baseUrl = 'http://api.weatherstack.com/historical?'

# url = baseUrl +parms
# url_list.append(url)

In [4]:
# calculate dates for historical data, create url, append to url list
url_list.clear()  # start off fresh
month_list.clear()

# read the prospective vacation geolocations data file 
for i in range(locations_df.shape[0]):
    lat = locations_df['Latitude'][i]
    lng = locations_df['Longitude'][i]
    latlng = str(lat) + ',' + str(lng)
    location_id = locations_df['LocationID'][i]
    url_list.append(location_id)
    
    # process each month's data for each location
    for mon in range(13):
        if mon > 0:   # no month zero
            if mon < 10:
                url_month = f'0{mon}' # string, add leading zero
            else:
                url_month = f'{mon}'
            # IF SHORT ON API CALLS, limit the dates (15 days in middle of month to average)
            historical_date_start = f'{url_year}-{url_month}-09'
            historical_date_end = f'{url_year}-{url_month}-23'
    #         print(historical_date_start, historical_date_end )        
            parms = 'access_key='+ api_key +'&query='+ latlng +\
                  '&historical_date_start=' + historical_date_start + '&historical_date_end=' + historical_date_end + ''    
            # append url list for later calls to api
            url = baseUrl + parms
            url_list.append(url)
    #         print(url)
    #         print(latlng) 

            if i == 0:  # on first pass through only
                if mon != 0:   # no month zero
                    # send year/month data to dictionary
                    month_dict = {            
                        'Year' : url_year,
                        'Month' : mon,
                        'Month_text' : month_text_list[mon - 1]
                    }
                    # set up dictionary append to list to create DataFrame
                    month_list.append(month_dict)


In [5]:
month_df = pd.DataFrame(month_list)
month_df['month_id'] = month_df.index
month_df.info()
month_df

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12 entries, 0 to 11
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Year        12 non-null     int64 
 1   Month       12 non-null     int64 
 2   Month_text  12 non-null     object
 3   month_id    12 non-null     int64 
dtypes: int64(3), object(1)
memory usage: 512.0+ bytes


Unnamed: 0,Year,Month,Month_text,month_id
0,2022,1,January,0
1,2022,2,February,1
2,2022,3,March,2
3,2022,4,April,3
4,2022,5,May,4
5,2022,6,June,5
6,2022,7,July,6
7,2022,8,August,7
8,2022,9,September,8
9,2022,10,October,9


In [6]:
# what keys are available
# historical_data['2023-03-01'].keys()

In [7]:
# # set up for API call and return json response
weather_list.clear() # start fresh

# call out to the api to get the data
#   for each day, for each month, for each location
for i in range(len(url_list)):
    url = url_list[i]
    
    # next location is in url list, skip processing that 'url'
    if url in range(0,9999999):
        location_id = url 
#         print(f'Location ID: {location_id}')
    else:    
        # keep track of the month we are pulling data for
        p = ("-(\d{2})-")
        match = re.search(p, url)
        if match:
            url_month = match.group(1)

        #return data from the API call
        response = requests.get(url)
        data = response.json()    

        # get the latitude and longitude to begin creating a dictionary to append to list
        latitude = data['location']['lat']
        longitude = data['location']['lon']
        day_count = 0
        # dig in to the future dashboard filter information
        historical_data = data['historical']                
        for key in historical_data: 
            historical_key = historical_data[key]
            day_count += 1
            temp_total += historical_data[key]['avgtemp']
            sun_total += historical_data[key]['sunhour']
        # month average data
        temp_avg = (((temp_total/day_count) * 9/5) + 32) #Change to Fahrenheit
        temp_avg = round(temp_avg, 2) 
        sun_avg = round((sun_total/day_count),2)

        # send data to dictionary        
        weather_dict = {
            'LocationID' : location_id,
    #             'Latitude' : data['location']['lat'],
    #             'Longitude' : data['location']['lon'],
            'Year' : url_year,
            'Month' : url_month,
            'Average Temperature' : temp_avg,
            'Average No Sunny Days' : sun_avg
        }
        # set up dictionary append to list to create DataFrame
        weather_list.append(weather_dict)

        # clear averages to prepare for next month processing
        temp_total = 0.00
        sun_total = 0.00
        day_count = 0
        temp_avg = 0.00
        sun_avg = 0.00        

In [8]:
# view weather info
weather_df = pd.DataFrame(weather_list)
weather_df['Month'] = weather_df['Month'].astype(int) # for merge with month file
weather_df.info()
weather_df

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48 entries, 0 to 47
Data columns (total 5 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   LocationID             48 non-null     int64  
 1   Year                   48 non-null     int64  
 2   Month                  48 non-null     int32  
 3   Average Temperature    48 non-null     float64
 4   Average No Sunny Days  48 non-null     float64
dtypes: float64(2), int32(1), int64(2)
memory usage: 1.8 KB


Unnamed: 0,LocationID,Year,Month,Average Temperature,Average No Sunny Days
0,0,2022,1,28.28,5.75
1,0,2022,2,39.32,8.61
2,0,2022,3,46.52,8.89
3,0,2022,4,52.88,9.51
4,0,2022,5,67.52,10.19
5,0,2022,6,73.64,13.25
6,0,2022,7,76.76,11.63
7,0,2022,8,71.96,9.43
8,0,2022,9,70.04,10.81
9,0,2022,10,53.6,10.39


In [9]:
# merge to get monthID into the weather file
weather_merged_df = pd.merge(weather_df, month_df, on='Month', how='outer')
weather_merged_df

Unnamed: 0,LocationID,Year_x,Month,Average Temperature,Average No Sunny Days,Year_y,Month_text,month_id
0,0,2022,1,28.28,5.75,2022,January,0
1,26,2022,1,80.96,8.55,2022,January,0
2,71,2022,1,35.48,8.01,2022,January,0
3,227,2022,1,36.44,2.31,2022,January,0
4,0,2022,2,39.32,8.61,2022,February,1
5,26,2022,2,80.72,8.37,2022,February,1
6,71,2022,2,36.2,9.81,2022,February,1
7,227,2022,2,26.48,4.29,2022,February,1
8,0,2022,3,46.52,8.89,2022,March,2
9,26,2022,3,82.04,8.94,2022,March,2


In [10]:
# weather_df.info(), df.info()

In [11]:
# clean the weather file to match DBD
dict = {'month_id': 'MonthID',
        'Year_x' : 'Year',
        'Average Temperature' : 'Temp',
        'Average No Sunny Days' : 'Sun'
       }
# reorder
cols = ['LocationID', 'Year', 'MonthID', 'Sun', 'Temp']

weather_clean_df = weather_merged_df.drop(['Year_y','Month_text','Month'], axis=1)
weather_clean_df.rename(columns=dict, inplace=True)
weather_clean_df = weather_clean_df.reindex(columns=cols)
weather_clean_df.index.name = 'WeatherID'
weather_clean_df

Unnamed: 0_level_0,LocationID,Year,MonthID,Sun,Temp
WeatherID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,0,2022,0,5.75,28.28
1,26,2022,0,8.55,80.96
2,71,2022,0,8.01,35.48
3,227,2022,0,2.31,36.44
4,0,2022,1,8.61,39.32
5,26,2022,1,8.37,80.72
6,71,2022,1,9.81,36.2
7,227,2022,1,4.29,26.48
8,0,2022,2,8.89,46.52
9,26,2022,2,8.94,82.04


In [12]:
# clean the month file to match DBD
month_df.drop(['Year', 'Month', 'month_id'], axis=1, inplace=True)
month_df.rename({'Month_text': 'Month'}, axis=1, inplace=True)
month_df.index.name = 'MonthID'
month_df

Unnamed: 0_level_0,Month
MonthID,Unnamed: 1_level_1
0,January
1,February
2,March
3,April
4,May
5,June
6,July
7,August
8,September
9,October


In [13]:
# send clean month DataFrame to csv file
month_df.to_csv(month_file)

In [14]:
# send new weather DataFrame to csv file
weather_clean_df.to_csv(weather_file)