## Forecaster Data Management Use Cases

Notebook illustrates how input data into forecast wrapper using the following methods:

1. direct definition of X (pandas df) and y (pandas series)
2. read from .CSV file (simple)
3. read from .CSV file (unpack hourly forecast into additional observations) [Not Working]
4. empty initial data - add observations incrementally (simple)
5. empty initial data - add observations incrementally (combine SCADA & weather, unpack hourly)
7. add values to methods 1/2/3 [Not Working]
8. backup record [Debug]

### Import forecaster libraries from current directory

In [1]:
import os
import sys
import json
import pandas as pd
import matplotlib.pyplot as plt
import random
import time

from datetime import datetime, timedelta

import fcLib
from fcSelector import ForecasterFramework
from fcWrapper import ForecasterWrapper

### generate a list of forecaster candidates from models in fcLib

In [2]:
# create a list of forecaster candidate to evalute
fcList = fcLib.forecaster_list

# selecting 4 arbitrary forecaster options
fcListUser = [fcList[ii] for ii in [0,1,2,6]]

# package fcList as Json
fcListUser = json.dumps(fcListUser)

### Functions for generating dummy data

In [3]:
# function for generating example scada observation
def example_scada():
    data = {'soc': 0.5, 'pmax': 1e3, 'emax': 5e3, 
            'echa': 0.96, 'edis': 0.96, 'smin': 0.1, 'smax': 1.0}
    scada1 = pd.DataFrame(index=range(len(data)*3), columns=['name', 'value', 'valid', 'error'])
    ix = 0
    for par, v in data.items():
        for i in range(3):
            scada1.loc[ix, :] = [f'irn{i}_battery-{par}', v, 1, '']
            ix += 1
    return scada1.to_json()

def example_scada_random():
    # generates a list of SCADA readings with randome SOCs           
    data = {'soc': 99, 'pmax': 1e3, 'emax': 5e3, 
            'echa': 0.96, 'edis': 0.96, 'smin': 0.1, 'smax': 1.0}
    
    scada1 = pd.DataFrame(index=range(len(data)*3), columns=['name', 'value', 'valid', 'error'])
    ix = 0
    for par, v in data.items():
        for i in range(3):
            
            # overwrite static SOC with random value
            if par == 'soc':
                v = round(random.uniform(0.25, 0.9), 2)
            
            scada1.loc[ix, :] = [f'irn{i}_battery-{par}', v, 1, '']
            ix += 1
    return scada1.to_json()

def example_weather_forecast_random():
    
    weatherFilePath = os.path.abspath(os.path.join(os.getcwd(), 
                'example_weatherforecast.csv'))

    newWeatherRecord = pd.read_csv(weatherFilePath, index_col=0)
    
    # generate random value to scale forecast by
    scaleVal = random.uniform(0.95, 1.05)
    
    newWeatherRecord[newWeatherRecord.select_dtypes(include=['number']).columns] *= scaleVal
    
    newWeatherRecord = newWeatherRecord.to_json()

    return newWeatherRecord

### Case. Read Training Data from File, Add Similar Records via SCADA/weather

In [None]:
# define metaData for loaded csvFile
csvMetaData = {
    'irn0_battery-soc': {'tsIndexed': False},
    'irn1_battery-soc': {'tsIndexed': False},
    'irn2_battery-soc': {'tsIndexed': False},
    'irn0_battery-pmax': {'tsIndexed': False},
    'irn1_battery-pmax': {'tsIndexed': False},
    'irn2_battery-pmax': {'tsIndexed': False},
    'irn0_battery-emax': {'tsIndexed': False},
    'irn1_battery-emax': {'tsIndexed': False},
    'irn2_battery-emax': {'tsIndexed': False},
    'irn0_battery-echa': {'tsIndexed': False},
    'irn1_battery-echa': {'tsIndexed': False},
    'irn2_battery-echa': {'tsIndexed': False},
    'irn0_battery-edis': {'tsIndexed': False},
    'irn1_battery-edis': {'tsIndexed': False},
    'irn2_battery-edis': {'tsIndexed': False},
    'irn0_battery-smin': {'tsIndexed': False},
    'irn1_battery-smin': {'tsIndexed': False},
    'irn2_battery-smin': {'tsIndexed': False},
    'irn0_battery-smax': {'tsIndexed': False},
    'irn1_battery-smax': {'tsIndexed': False},
    'irn2_battery-smax': {'tsIndexed': False},
    'dataValid': {'tsIndexed': False},
    'temp_air': {'tsIndexed': True, 'tsCount': 17},
    'ghi': {'tsIndexed': True, 'tsCount': 17},
    'dni': {'tsIndexed': True, 'tsCount': 17},
    'dhi': {'tsIndexed': True, 'tsCount': 17}
 }



# create initial inputs with no training data
# target column name currently required
newInputs = {
    'forecaster-list': fcListUser, 
    'training-data': {
        'targetName': 'irn0_battery-edis',
        'unpackHourly': False,
        'inputPath': './example_scada_data_history.csv',
#        'columnList': list(data.columns),
        'csvMetaData': csvMetaData,
    }
}

# instantiate forecast framework wrapper
n1 = ForecasterWrapper()

# update inputs and run compute 
# compute method will return error msg saying no training data 
n1.input = newInputs
n1.compute()

trainingDataSize = n1.dataManager.dataDf.shape
print(f'Read training data from file')
print(f'Training data has {trainingDataSize[0]} observations of {trainingDataSize[1]} fields.')


# iterate through each new record to add 1 by 1
for rr in range(0,5):

    # create generic hypothetical timestamp
    dataTs=f'2018-07-21 {rr:02}:00:00'
    
    baseTime = '2018-08-04 14:00:00'
    baseTime = datetime.strptime(baseTime, '%Y-%m-%d %H:%M:%S')
    newTime = baseTime + timedelta(hours=rr)
    newTime = newTime.strftime('%Y-%m-%d %H:%M:%S')

    newInput2 = {
        'forecaster-list': fcListUser, 
        
        'data-timestamp':  newTime,
        'scada-data': example_scada_random(),
        'weather-data': example_weather_forecast_random(),
        
        'training-data': {
            'targetName': 'irn0_battery-edis',
            'unpackHourly': True
        }
    }

    n1.input = newInput2

    # run compute method to train and predict
    n1.compute()
    
trainingDataSize = n1.dataManager.dataDf.shape    
print(f'Additional records added.')
print(f'Training data has {trainingDataSize[0]} observations of {trainingDataSize[1]} fields.')

### Data Manager should infer metaData based on new additions, but may fail in some cases

In [None]:
# create initial inputs with no training data
# target column name currently required
newInputs = {
    'forecaster-list': fcListUser, 
    'training-data': {
        'targetName': 'irn0_battery-edis',
        'unpackHourly': False,
        'inputPath': './example_scada_data_history.csv',
#        'columnList': list(data.columns),
#         'csvMetaData': csvMetaData,
    }
}

# instantiate forecast framework wrapper
n1 = ForecasterWrapper()

# update inputs and run compute 
# compute method will return error msg saying no training data 
n1.input = newInputs
n1.compute()


# iterate through each new record to add 1 by 1
for rr in range(0,5):

    # create generic hypothetical timestamp
    dataTs=f'2018-07-21 {rr:02}:00:00'
    
    baseTime = '2018-08-04 14:00:00'
    baseTime = datetime.strptime(baseTime, '%Y-%m-%d %H:%M:%S')
    newTime = baseTime + timedelta(hours=rr)
    newTime = newTime.strftime('%Y-%m-%d %H:%M:%S')

    newInput2 = {
        'forecaster-list': fcListUser, 
        
        'data-timestamp':  newTime,
        'scada-data': example_scada_random(),
        'weather-data': example_weather_forecast_random(),
        
        'training-data': {
            'targetName': 'irn0_battery-edis',
            'unpackHourly': True
        }
    }

    n1.input = newInput2

    # run compute method to train and predict
    n1.compute()
    
print(n1.dataManager.dataMeta)

### Case - Initialize from .csv and add SCADA record in single call

In [None]:
# create initial inputs with no training data
# target column name currently required
newInputs = {
    'forecaster-list': fcListUser, 
    'training-data': {
        'targetName': 'irn0_battery-edis',
        'unpackHourly': False,
        'inputPath': './example_scada_data_history.csv',
#        'columnList': list(data.columns),
#         'csvMetaData': csvMetaData,
    }
}

# instantiate forecast framework wrapper
n1 = ForecasterWrapper()


    
rr=0

# create generic hypothetical timestamp
dataTs=f'2018-07-21 {rr:02}:00:00'

baseTime = '2018-08-04 14:00:00'
baseTime = datetime.strptime(baseTime, '%Y-%m-%d %H:%M:%S')
newTime = baseTime + timedelta(hours=rr)
newTime = newTime.strftime('%Y-%m-%d %H:%M:%S')

newInput = {
    'forecaster-list': fcListUser, 

    'data-timestamp':  newTime,
    'scada-data': example_scada_random(),
    'weather-data': example_weather_forecast_random(),

    'training-data': {
        'targetName': 'irn0_battery-edis',
        'inputPath': './example_scada_data_history.csv',
        'unpackHourly': True
    }
}

n1.input = newInput

# run compute method to train and predict
n1.compute()

trainingDataSize = n1.dataManager.dataDf.shape    
print(f'Initialized from input file while adding new record.')
print(f'Training data has {trainingDataSize[0]} observations of {trainingDataSize[1]} fields.')
    
print(n1.dataManager.dataMeta)