# Introduction

- Overview
  - This notebook supplements the primary one, focusing on retrieving historical weather data for the Scrooge Building in 2018. The data is sourced from https://home.openweathermap.org/.
  - The (downloadedd) historical weather json files will be stored in the weather directory.

  - Further details regarding the importance and the usage of this data source will be described in the main notebook.

- Notes
  - In order to execute properly the code, we need an api key from the data provider at https://home.openweathermap.org/api_keys.
  - We could skip this notebook if we just extract the weather.zip file into the ./weather directory

# Settings 

In [1]:
!which python
!python --version

input_dir = "./"
weather_dir = f"{input_dir}/weather"
!mkdir -p $weather_dir

# scrooge_metadata.parquet
# 	            in.weather_file_latitude	in.weather_file_longitude
# bldg_scrooge	42.15714285714286	        -71.15857142857143

lat = 42.15714285714286 # 
lon = -71.15857142857143

API_key = "api_key"
api_template_url = "https://api.openweathermap.org/data/3.0/onecall/timemachine?lat={lat}&lon={lon}&dt={dt}&appid={API_key}"


/home/motoki/miniconda3/envs/py39_holiday/bin/python
Python 3.9.18


# Utilities

In [2]:
import pandas as pd
import numpy as np
import os

import requests
import json
from tqdm import tqdm

import time
import datetime

def download_json_from_api(api_url, output_file_path):
    try:
        headers = {
            'Content-Type': 'application/vnd.api+json',
        }
        response = requests.get(api_url, headers=headers)

        if response.status_code == 200: # Check if the request was successful (status code 200)
            json_data = response.json()
            with open(output_file_path, 'w') as file:
                json.dump(json_data, file, indent=2)

            return 0
        else:
            print(f"Failed to download JSON data. Status code: {response.status_code}")
            return response.status_code

    except Exception as e:
        print(f"An error occurred: {e}")
        return -1

def download_json_from_api_retry(api_url, output_file_path, 
                                 nb_tries=5, skip_existing=True):
    code = 0

    if skip_existing and os.path.exists(output_file_path):
        return code

    for i in range(nb_tries):
        code = download_json_from_api(api_url, output_file_path)
        if code == 0:
            return code
    return code

def get_unix_timestamp(str_timestamp):
    d = datetime.datetime.strptime(str_timestamp, '%Y-%m-%d %H:%M:%S')
    return int(d.timestamp())

# Execution

In [3]:
d1 = datetime.date(2018, 1, 1)
d2 = datetime.date(2018, 12, 31)
days = [d1 + datetime.timedelta(days=x) for x in range((d2-d1).days + 1)]

all_dates = [day.strftime('%Y-%m-%d') for day in days] 
len(all_dates), all_dates[:2], all_dates[-2:]

(365, ['2018-01-01', '2018-01-02'], ['2018-12-30', '2018-12-31'])

In [4]:
weather_hours  = ["17:00:00", "17:15:00", "17:30:00", "17:45:00"]
weather_hours += ["18:00:00", "18:15:00", "18:30:00", "18:45:00"]
weather_hours += ["19:00:00", "19:15:00", "19:30:00", "19:45:00"]
weather_hours += ["20:00:00", "20:15:00", "20:30:00", "20:45:00"]
weather_hours += ["21:00:00", "21:15:00", "21:30:00", "21:45:00"]
weather_hours += ["22:00:00", "22:15:00", "22:30:00", "22:45:00"]
weather_hours += ["23:00:00", "23:15:00", "23:30:00", "23:45:00"]

weather_hours = list(set(weather_hours))
weather_hours.sort()

print(f"Running for {len(set(weather_hours))} timestamps (per day)")
print(weather_hours)

for weather_date in tqdm(all_dates):
    for weather_hour in weather_hours:
        str_timestamp = weather_date + " " + weather_hour
        dt = int(get_unix_timestamp(str_timestamp))
        api_url = api_template_url.format(lat=lat, lon=lon, dt=dt, API_key=API_key)

        str_timestamp = str_timestamp.replace("-", "").replace(":", "").replace(" ", "_")
        output_filename = f"{weather_dir}/scrooge_{str_timestamp}_{dt}.json"
        download_json_from_api_retry(api_url, output_filename)


Running for 28 timestamps (per day)
['17:00:00', '17:15:00', '17:30:00', '17:45:00', '18:00:00', '18:15:00', '18:30:00', '18:45:00', '19:00:00', '19:15:00', '19:30:00', '19:45:00', '20:00:00', '20:15:00', '20:30:00', '20:45:00', '21:00:00', '21:15:00', '21:30:00', '21:45:00', '22:00:00', '22:15:00', '22:30:00', '22:45:00', '23:00:00', '23:15:00', '23:30:00', '23:45:00']


100%|████████████████████████████████████████████████████████| 365/365 [00:00<00:00, 836.21it/s]


# Next steps
- Run the main notebook