## Current Python Version

Ensure the python version running in the jupyter kernel is the same as expected

In [None]:
# from platform import python_version

# print(python_version())


List installed packages, should match the ones in the pipfile (and their dependencies)

In [None]:
# pip list

## Internal Module Imports
Code required to enable project's internally defined modules into the jupyter notebook environment

In [None]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [None]:
from dto import (
    COST_PRESETS, 
    ElectricalConstants,
    ElectricalSystemSpecs, 
    EpsilonSpecs,
    GeneratorSpecs, 
    LoadSpecs, 
    NodeStatePower, 
    SystemHistory, 
    )
    

Declare reward function to be used in training.
The reward function is not strictly necessary as default values are already defined in the model adapters. The option to specify innthe notebook is to facilitate experimentation with the reward function

In [None]:
def rewardFn(outputDifferentialFromOpt):
    baseComponent = 2**(-(outputDifferentialFromOpt**2)/100)
    peakComponent = 2**(-(outputDifferentialFromOpt**2)/2)
    earnedReward = (baseComponent + 9*peakComponent)/10
    
#     print(f'outputDifferentialFromOpt:{outputDifferentialFromOpt} - baseComponent:{baseComponent} - peakComponent:{peakComponent} - reward:{earnedReward}')

    return earnedReward, {'base': baseComponent, 'peak': peakComponent, 'total':earnedReward}


Instantiate the model adapter
See `./app/learning/model_adapater.py` for the structure of the class. There are a few already implemented in `./app/learning`.

Please note that the signature of the reward function must match that of the default value in the model adapter

In [None]:
from learning.cost_single import ModelAdapterCostSingle as ModelAdapter
modelAdapter = ModelAdapter(rewardFn=rewardFn)

Declare learning parameters to be used in the experiment

In [None]:
from learning import LearningParams

LearningParams(
    gamma=0.9,   # Gamma (Discount)
    tau=0.001, # Tau (Target network mixing factor)
    epsilonSpecs = EpsilonSpecs( # Epsilon explore/exploit control
        thresholdProgress = 0.6, # % of steps where decay change should happen
        thresholdValue = 0.5, # Value at which decay change would happen
        finalValue = 0.0001, # Value at the end of the experiment
    ),
    numEpisodes=100, # Number of learning episodes to run
    maxSteps=100,   # Number of steps per learning episode
    bufferSize=500, # Experience Buffer Size
    traceLength=20,     # Number of steps each sampled episode should contain
    batchSize=10,     # Number of episodes sampled from experience buffer
    updateInterval=4, # Run update cycle every N steps
    electricalSystemSpecs = ElectricalSystemSpecs(
        loads=[LoadSpecs(id_="L1", basePower=3.0, noiseLevel=0.01)],
        generators=[
            GeneratorSpecs(id_="G1", basePower=1.0, costProfile=COST_PRESETS.COAL_2, minPower=0.5, maxPower=3.0, noiseLevel=0.005),
            GeneratorSpecs(id_="G2", basePower=1.0, costProfile=COST_PRESETS.OIL_2, minPower=0.5, maxPower=3.0, noiseLevel=0.005),
            GeneratorSpecs(id_="G3", basePower=1.0, costProfile=COST_PRESETS.OIL_ALTERNATE_2, minPower=0.5, maxPower=3.0, noiseLevel=0.005),
        ],
        shouldTrackOptimalCost=True,
    ),
    modelName='Template-Experiment'
)

# Droop = +infinity essentially remove the droop component from the power update equation. System reacts as if droop is disabled
# ElectricalConstants().droop = math.inf

Train the model

In [None]:
%%time
allAgents = ModelTrainer(modelAdapter).trainAgents()

Run a testing episode with the trained model

In [None]:
STEPS_TO_TEST = 300

electricalSystem, rewards = ModelTester(modelAdapter).testAgents(
    electricalSystemSpecs=LearningParams().electricalSystemSpecs, 
    modelName=LearningParams().modelName, 
    stepsToTest=STEPS_TO_TEST,
)

In [None]:
import matplotlib.pyplot as plt
from learning.learning_state import LearningState
from plots import plotAll, plotExperimentRewardProgression

Plot the reward progression of the training process

In [None]:
plotExperimentRewardProgression(LearningState().model.allRewards)

Plot the default list of graphs of the test episode (For more individual plots, see `./app/plots`

plotAll(electricalSystem.systemHistory)

In [None]:
from plots import plotRewardComponents
for modelName, rewardComp in rewardComponents.items():
    print(modelName)
    plotRewardComponents(rewardComp)

Plot all graphs related to the electrical system state during the model testing

In [None]:
plotAll(electricalSystem.systemHistory)