# Test functions for forecaster.py

In [None]:

import json
import pandas as pd
import numpy as np
import pathlib
import boto3
from pathlib import Path
import os
import subprocess
import io
import pickle
import matplotlib as plt
import importlib
import logging 
import requests

plt.rcParams["figure.figsize"] = (16,10)
REGIONIDS = ['NSW1', 'QLD1', 'SA1', 'TAS1', 'VIC1']


In [None]:
%load_ext autoreload
%autoreload 2
import forecaster
import get_features
import get_greenness
import aemo
if importlib.util.find_spec('xgboost') is not None: 
    import inference
else:
    logging.info("Running with mock inference because xgboost library not present. Results will be random; won't be saved to S3.")
    import mock_inference as inference  # use for testing to skip inference


### Run the whole forecaster

In [None]:
a = forecaster.main()

### Run get_features()

In [None]:
features, weather, recent_prices, gen_by_fuel_month, recent_greenness = get_features.get_features()
features


In [None]:
[x for x in features.keys() if '' in x]

In [None]:
with open('test_features.json', 'w') as f:
    json.dump(features, f)

### Run get_greenness()

In [None]:
from aemo import get_duids
duids = get_duids()
greenness_features, greenness_inputs, recent_greenness = get_greenness.get_greenness(duids)
greenness_month = recent_greenness

In [None]:
greenness_month

In [None]:
greenness_month.max()

### Temp - highest_ever_greenness

In [None]:
greenness_month.idxmax()

In [None]:
body = {
    'NSW1': {'value': 74, 'utc': ''},
    'QLD1': {'value': 70, 'utc': ''},
    'SA1': {'value': 100, 'utc': ''},
    'TAS1': {'value': 100, 'utc': ''},
    'VIC1': {'value': 73,'utc': ''}, # was 70 from dataset8
}

s3 = boto3.resource('s3')
bucket = s3.Bucket('greenforecast.forecaster')
bucket.put_object(Key='highest_ever_greenness', Body=json.dumps(body))


In [None]:
s3 = boto3.client('s3')
response = s3.get_object(Bucket='greenforecast.forecaster', Key='highest_ever_greenness')


In [None]:
low = json.load(response['Body'])
low

In [None]:
low = {
    'NSW1': {'value': 73, 'utc': nem_time_to_utc_string(pd.Timestamp('2022-10-28 10:50:00'))},
    'QLD1': {'value': 64, 'utc': nem_time_to_utc_string(pd.Timestamp('2022-12-06 12:30:00'))},
    'SA1': {'value': 100, 'utc': nem_time_to_utc_string(pd.Timestamp('2016-09-28 16:35:00'))},
    'TAS1': {'value': 100, 'utc': nem_time_to_utc_string(pd.Timestamp('2013-05-03 07:25:00'))},
    'VIC1': {'value': 72, 'utc': nem_time_to_utc_string(pd.Timestamp('2022-10-30 12:30:00'))},
}

In [None]:
low['NSW1']['value'] = 74

In [None]:
bucket = boto3.resource('s3').Bucket('greenforecast.forecaster')
bucket.put_object(Key='highest_ever_greenness', Body=json.dumps(low))

In [None]:
nem_time_to_utc_string = forecaster.nem_time_to_utc_string

def highest_ever_greenness(recent_greenness):
    """Get and return the record max greenness. 

    Reads stored state from S3, checks if new data has anything higher, then writes back to S3 and returns the dict result. Example format: 
    {'NSW1': {'value': 71, 'utc': '2022-11-20T01:00:00+00:00'},
     'QLD1': {'value': 70, 'utc': '2022-11-20T01:00:00+00:00'},
     'SA1': {'value': 100, 'utc': '2022-11-20T01:00:00+00:00'},
     'TAS1': {'value': 100, 'utc': '2022-11-20T01:00:00+00:00'},
     'VIC1': {'value': 73, 'utc': '2022-11-20T01:00:00+00:00'}}
    """

    # grab current highest-ever from s3
    s3 = boto3.client('s3')
    response = s3.get_object(Bucket='greenforecast.forecaster', Key='highest_ever_greenness')
    highest = json.load(response['Body'])

    # compare against the new data, update if found a new winner
    highest_recent = recent_greenness.max()
    highest_recent_idx = recent_greenness.idxmax()
    for region in REGIONIDS:
        if highest_recent[f'{region}_Greenness'] > highest[region]['value']+0.5:
            print(f"New Highest Greenness for {region}: {highest_recent[f'{region}_Greenness']}")
            highest[region]['value'] = int(np.round(highest_recent[f'{region}_Greenness']))
            highest[region]['utc'] = nem_time_to_utc_string(highest_recent_idx[f'{region}_Greenness'])

    # write back to s3
    bucket = boto3.resource('s3').Bucket('greenforecast.forecaster')
    bucket.put_object(Key='highest_ever_greenness', Body=json.dumps(highest))

    return highest
highest_ever_greenness(recent_greenness)

### Test Interpolate_forecasts
assumes we have `features` from above

In [None]:
FORECASTS_TO_MAKE = [f"{region}_{price_or_greenness}" for region in REGIONIDS for price_or_greenness in ['Price', 'Greenness']]
base_time = pd.Timestamp.now(tz='Australia/Brisbane').round('H').tz_localize(tz=None)

# make forecasts
forecasts = {}
for fc in FORECASTS_TO_MAKE:
    forecasts[fc] = inference.make_forecast(fc, features)

forecast_data, forecast_gen_by_fuel = forecaster.interpolate_forecasts(base_time, forecasts)
forecast_data

### Download `latest_forecasts.json` from S3 to website folder
Overwrites `../website/latest_forecasts.json`

In [None]:
with open('../website/latest_forecasts.json', 'w') as f:
    r = requests.get('https://greenforecast.au/latest_forecasts.json')
    f.write(r.text)

### Download `latest_forecasts.json` from test S3 bucket
Overwrites `../website/latest_forecasts.json`

In [None]:
with open('../website/latest_forecasts.json', 'w') as f:
    r = requests.get('https://s3.ap-southeast-2.amazonaws.com/greenforecast.test/latest_forecasts.json')
    f.write(r.text)

-----------

### Get latest file from grenforecast.history

In [None]:
get_key = "aws s3api list-objects-v2 --bucket 'greenforecast.history' --query 'sort_by(Contents, &LastModified)[-1].Key' --output=text"
key = subprocess.run(get_key, capture_output=True, shell=True)
key = key.stdout[:-1].decode('utf-8')

s3_client = boto3.client('s3')
res = s3_client.get_object(Bucket='greenforecast.history', Key=key)
features_and_predictions = json.load(res['Body'])

features = features_and_predictions['features']
predictions = features_and_predictions['predictions']
inteprolated = features_and_predictions['forecasts']

In [None]:
{key: val for key, val in sorted(features.items()) if 'QLD1' in key}

### Get Mock Data

In [None]:
# get mock data for forecasts for test other code
with open('test_latest_forecasts.json') as f:
    mock_latest_forecasts = json.load(f)
forecasts = mock_latest_forecasts['forecasts']

with open('test_get_recent_data.pkl', 'rb') as f:
    base_time, features, recent_prices, recent_greenness = pickle.load(f)


In [None]:
def test_get_features():
    # base_time is the (rounded) time when this forecast was made... will actually be a little past the hour. 
    base_time = pd.Timestamp.now(tz='Australia/Brisbane').round('H').tz_localize(tz=None)

    # collect all the current data 
    features, weather_data, recent_prices, recent_greenness = get_features.get_features()
    print(f"got {len(features)} features")

    print(format_weather(base_time, weather_data))

### test get_recent_data()
`test_get_recent_data.pkl` is written by get_features.py if that's run directly (`python get_features.py`)

In [None]:
with open('test_get_recent_data.pkl', 'rb') as f:
    base_time, features, recent_prices, recent_greenness = pickle.load(f)

df = forecaster.get_recent_data(base_time, features, recent_prices, recent_greenness)
df

### Check greenness_last_day.csv on S3 (OBSOLETE)

In [None]:
# Get the last 24h of Greenness because it's missing from greenness_month 
# get file from from S3
s3_client = boto3.client('s3')
res = s3_client.get_object(Bucket='greenforecast.au', Key='greenness_last_day.csv')
greenness_last_day = pd.read_csv(res['Body'], index_col='SETTLEMENTDATE', parse_dates=['SETTLEMENTDATE'])
greenness_last_day

### Maximums

In [None]:
with open('test_day_maxs.pkl', 'rb') as f:
    base_time, past_data, forecast_data = pickle.load(f)
day_maxs(base_time, past_data, forecast_data)

# Temp