## Example - Using External Forecaster

External Trainer Loop
- Create External Trainer class instance (ET)
- Populate with data and train models

Forecast Wrapper Loop
- Create Forecaster Wrapper class instance (FW) that uses ET models
- Pass FW inputs of ET training date and data pickle path
- Run forecaster in FW using imported models
- Add SCADA and weather data to FW to update training dataset

Interactions between Loops
- Trigger retraining in FW
- FW writes retraining date in outputs
- Demonstrate ET response to FW retraining flag

### Import forecaster libraries from current directory

In [1]:
import os
import sys
import pandas as pd
import matplotlib.pyplot as plt

import numpy as np
import json
import traceback
import time
import json
import random
from datetime import datetime, timedelta
from pprint import pprint

import fcLib
from fcWrapper import ForecasterWrapper, ForecasterTrainer

### Define functions for generating data

In [2]:
def example_scada_random():
    # generates a list of SCADA readings with randome SOCs           
    data = {'soc': 99, 'pmax': 1e3, 'emax': 5e3, 
            'echa': 0.96, 'edis': 0.96, 'smin': 0.1, 'smax': 1.0}
    
    scada1 = pd.DataFrame(index=range(len(data)*3), columns=['name', 'value', 'valid', 'error'])
    ix = 0
    for par, v in data.items():
        for i in range(3):
            
            # overwrite static SOC with random value
            if par == 'soc':
                v = round(random.uniform(0.25, 0.9), 2)
            
            scada1.loc[ix, :] = [f'irn{i}_battery-{par}', v, 1, '']
            ix += 1
    return scada1.to_json()

def example_weather_forecast_random():
    
    weatherFilePath = os.path.abspath(os.path.join(os.getcwd(), 
                'example_weatherforecast.csv'))

    newWeatherRecord = pd.read_csv(weatherFilePath, index_col=0)
    
    # generate random value to scale forecast by
    scaleVal = random.uniform(0.95, 1.05)
    
    newWeatherRecord[newWeatherRecord.select_dtypes(include=['number']).columns] *= scaleVal
    
    newWeatherRecord = newWeatherRecord.to_json()

    return newWeatherRecord

### Define Custom Forecaster List

In [3]:
# create a list of forecaster candidate to evalute
fcList = fcLib.forecaster_list

# selecting 4 arbitrary forecaster options
fcListUser = [fcList[ii] for ii in [0,1,2,6]]

# package fcList as Json
fcListUser = json.dumps(fcListUser)

In [4]:
[fcList[ii]['name'] for ii in [0,1,2,6]]

['extra_trees_pipeline',
 'random_forest_pipeline',
 'multi_layer_perceptron',
 'random_forest']

### Forecast Wrapper Loop: Initialization

Initialize forecast wrapper loop using external forecast trainer. No data has been collected yet, so no forecasters have been trained. Wrapper collects data and stores in dataManager class.

If 'last-trainall' input field is None, Wrapper will not attempt to load trained models from external pickle file.

When sufficient data collected, wrapper writes retrain trigger to 'date-trainall' output field

In [5]:
# define file locations where loops will interact
trainingDataPath = './example_trainer_data_history.csv'
trainingModelPath = 'example_external_models.pkl'

# instantiate forecast framework wrapper
n2 = ForecasterWrapper()

# create initial training data row by row
for rr in range(0,160):
    
    baseTime = '2023-07-01 00:00:00'
    baseTime = datetime.strptime(baseTime, '%Y-%m-%d %H:%M:%S')
    newTime = baseTime + timedelta(hours=rr)
    
    # get unix time for timestampe
    newTimeUnix = int(newTime.strftime('%s'))
    newTime = newTime.strftime('%Y-%m-%d %H:%M:%S')
    
    newInput = {
        'forecaster-list': None, 
        
        'time':  newTimeUnix,
        
        'data-timestamp':  newTime,
        'scada-data': example_scada_random(),
        'weather-data': example_weather_forecast_random(),
    
        'config': {
            'targetName': 'irn0_battery-edis',
            'backupPath': trainingDataPath,
            'useExternalTrainer': True,
            'externalTrainerPath': trainingModelPath,
            'trainingInterval': 1*24*60*60, # set retraining interval to 1 day
            'min_samples': 150 # minimum samples before training dataset written to file
        },
        'last-trainall': None
    }
    
    n2.input = newInput
    
    # run compute method to train and predict
    n2.compute()
    
pprint(n2.output)
print(n2.msg)

# duration should be full compute time

{'date-trainall': '2023-07-07 05:00:00',
 'duration': 0.022462129592895508,
 'model-summary': {'lastTrainedAll': 'None', 'lastTrainedBest': 'None'},
 'output-data': None}
INFO: Externally trained model not yet avialable. No prediction generated.


### External Trainer Loop: Initialization

In [6]:
# extract date-trainall from wrapper loop outputs
trainDate = n2.output['date-trainall']

# create input for external trainer wrapper
newInputs1 = {
    'forecaster-list': fcListUser, 
    
    'time':  newTimeUnix,
    
    'config': {
        'targetName': 'irn0_battery-edis',
        'inputPath': trainingDataPath,
        'externalTrainerPath': trainingModelPath,
        'isExternalTrainer': True
    },
    'date-trainall': trainDate
}

# instantiate forecast framework wrapper
n1 = ForecasterTrainer()

# read data from file & train models
n1.input = newInputs1
n1.compute()

# show summary data of trained model
pprint(n1.output)
print(n1.msg)

print(n1.trainingDate)

{'duration': 2.021742105484009,
 'last-trainall': '2023-07-07 15:00:00',
 'model-summary': {'bestModelName': 'random_forest',
                   'bestScore': 0.0,
                   'bestScoreAdj': 1.263157894736842,
                   'bestScoreMse': 2.7733391199176196e-30,
                   'bestScoreRmse': 1.6653345369377348e-15,
                   'lastTrainedAll': '2023-07-07 15:00:00',
                   'lastTrainedBest': '2023-07-07 15:00:00'},
 'output-data': None}
Done.
2023-07-07 15:00:00


### External Trainer Loop: Re-running without new trigger


In [7]:
# extract date-trainall from wrapper loop outputs
trainDate = n2.output['date-trainall']

# create input for external trainer wrapper
newInputs1 = {
    'forecaster-list': fcListUser, 
    
    'time':  newTimeUnix + 60*60,
    
    'config': {
        'targetName': 'irn0_battery-edis',
        'inputPath': trainingDataPath,
        'externalTrainerPath': trainingModelPath,
        'isExternalTrainer': True
    },
    'date-trainall': trainDate
}

# read data from file & train models
n1.input = newInputs1
n1.compute()

# show summary data of trained model
pprint(n1.output)
print(n1.msg)

print(n1.trainingDate)

{'duration': 2.0531840324401855,
 'last-trainall': '2023-07-07 16:00:00',
 'model-summary': {'bestModelName': 'random_forest',
                   'bestScore': 0.0,
                   'bestScoreAdj': 1.263157894736842,
                   'bestScoreMse': 2.7733391199176196e-30,
                   'bestScoreRmse': 1.6653345369377348e-15,
                   'lastTrainedAll': '2023-07-07 15:00:00',
                   'lastTrainedBest': '2023-07-07 16:00:00'},
 'output-data': None}
Done.
2023-07-07 15:00:00


### Forecast Wrapper Loop: Load External Forecasters

Passing last-trainall output from External Trainer loop allows Wrapper to load trained models from pickle file

Note: forecaster predictions currently throwing errors, needs to be resolved in fcLib

In [8]:
# extract date-trainall from wrapper loop outputs
trainDateLast = n1.output['last-trainall']

# create initial inputs with no training data
newInputs2 = {
    'forecaster-list': None, 

    'time':  newTimeUnix,

    'config': {
        'targetName': 'irn0_battery-edis',
        'backupPath': trainingDataPath,
        'useExternalTrainer': True,
        'externalTrainerPath': trainingModelPath,
        'trainingInterval': 1*24*60*60, # set retraining interval to 1 day
        'min_samples': 150 # minimum samples before training dataset written to file
    },
    'last-trainall': trainDateLast
}

# store inputs to wrapper
n2.input = newInputs2
    
# run compute method to train and predict
n2.compute()

pprint(n2.output)

{'date-trainall': '2023-07-07 05:00:00',
 'duration': 0.00015497207641601562,
 'model-summary': {'lastTrainedAll': 'None', 'lastTrainedBest': 'None'},
 'output-data': None}


### Forecast Wrapper Loop: Trigger Retraining

As additional time passes/data collected by Wrapper Loop, retraining should be triggered. Causing new data to be written to backup file, and a new datetime passed in outputs

In [9]:
retrainDatePrev = n2.output['date-trainall']

baseTimeStart = str(n2.dataManager.dataDf.index[-1])

# create initial training data row by row
for rr in range(1,160):
    
    # set timestamp index as end of existing dataset
    baseTime = datetime.strptime(baseTimeStart, '%Y-%m-%d %H:%M:%S')
    newTime = baseTime + timedelta(hours=rr)
    
    # get unix time for timestampe
    newTimeUnix = int(newTime.strftime('%s'))
    newTime = newTime.strftime('%Y-%m-%d %H:%M:%S')
    
    newInput = {
        'forecaster-list': None, 
        
        'time':  newTimeUnix,
        
        'data-timestamp':  newTime,
        'scada-data': example_scada_random(),
        'weather-data': example_weather_forecast_random(),
    
        'config': {
            'targetName': 'irn0_battery-edis',
            'backupPath': trainingDataPath,
            'useExternalTrainer': True,
            'externalTrainerPath': trainingModelPath,
            'trainingInterval': 1*24*60*60, # set retraining interval to 1 day
            'min_samples': 150 # minimum samples before training dataset written to file
        },
        'last-trainall': trainDateLast
    }
    
    n2.input = newInput
    
    # run compute method to train and predict
    n2.compute()
    
pprint(n2.output)
print(n2.msg)

print(f"new retraining timestamp: {n2.output['date-trainall']}")

{'date-trainall': '2023-07-08 15:00:00',
 'duration': 0.036850929260253906,
 'model-summary': {'bestModelName': 'random_forest',
                   'bestScore': 0.0,
                   'bestScoreAdj': 1.263157894736842,
                   'bestScoreMse': 2.7733391199176196e-30,
                   'bestScoreRmse': 1.6653345369377348e-15,
                   'lastTrainedAll': '2023-07-07 15:00:00',
                   'lastTrainedBest': 'None'},
 'output-data': '{"y":{"1689314400000":0.96,"1689318000000":0.96,"1689321600000":0.96,"1689325200000":0.96,"1689328800000":0.96,"1689332400000":0.96,"1689336000000":0.96,"1689339600000":0.96,"1689343200000":0.96,"1689346800000":0.96,"1689350400000":0.96,"1689354000000":0.96,"1689357600000":0.96,"1689361200000":0.96,"1689364800000":0.96,"1689368400000":0.96,"1689372000000":0.96,"1689375600000":0.96,"1689379200000":0.96,"1689382800000":0.96,"1689386400000":0.96,"1689390000000":0.96,"1689393600000":0.96,"1689397200000":0.96}}'}
Done.
new retraining ti

In [10]:
print(f"previous retraining timestamp: {retrainDatePrev}")
print(f"updated retraining timestamp: {n2.output['date-trainall']}")

previous retraining timestamp: 2023-07-07 05:00:00
updated retraining timestamp: 2023-07-08 15:00:00


### Forecast Wrapper Loop: Trigger Retraining

In [11]:
# extract previous data-set size and training date
samplesPrev = n1.dataManager.dataDf.shape[0]
traingingDatePrev = str(n1.framework.trainingDate)

# extract date-trainall from wrapper loop outputs
trainDate = n2.output['date-trainall']

# create input for external trainer wrapper
newInputs1 = {
    'forecaster-list': fcListUser, 
    
    'time':  newTimeUnix,
    
    'config': {
        'targetName': 'irn0_battery-edis',
        'inputPath': trainingDataPath,
        'externalTrainerPath': trainingModelPath,
        'isExternalTrainer': True
    },
    'date-trainall': trainDate
}

# read data from file & train models
n1.input = newInputs1
n1.compute()

# extract updated data-set size and training date
samplesNew = n1.dataManager.dataDf.shape[0]
traingingDateNew = str(n1.framework.trainingDate)

print(f"Previous model training date: {traingingDatePrev}")
print(f"Updated model training date: {traingingDateNew}")
print(f"Training dataset changed from {samplesPrev} obs to {samplesNew}")


Previous model training date: 2023-07-07 16:00:00
Updated model training date: 2023-07-14 06:00:00
Training dataset changed from 150 obs to 184
