In [1]:
# Load libs:
import os
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime as dt
from glob import glob
from os.path import dirname, abspath, join, exists
from clearml import Task
from amplify.data import DataGenerator, DataSplit

In [2]:
# ClearML Stuff
task = Task.init(project_name="amplify", task_name="predict_power_gen_initial_model")

ClearML Task: created new task id=02fcd2d3b5cd4e71bfac9d2687b968a6
ClearML results page: https://app.clear.ml/projects/f0e0b096252d4c18b9005e580afc5bce/experiments/02fcd2d3b5cd4e71bfac9d2687b968a6/output/log


## Load Data

In [3]:
xy_data = DataGenerator().load_data()
xy_data.head()

ClearML Monitor: GPU monitoring failed getting GPU reading, switching off GPU monitoring
Info: Successfully loaded building data!
Info: Successfully loaded weather data!
Successfully merged Building and Weather Data


Unnamed: 0,temp,clouds_all,azimuth,irradiance,day_of_week,True Power (kW) solar,True Power (kW) usage
2021-10-19 04:00:00+00:00,11.18,75.0,297.679945,0.0,2,1.0,11.3
2021-10-19 05:00:00+00:00,9.51,20.0,7.143008,0.0,2,1.0,13.2
2021-10-19 06:00:00+00:00,9.28,20.0,34.060157,0.0,2,1.0,12.7
2021-10-19 07:00:00+00:00,9.28,20.0,34.060157,0.0,2,1.0,17.2
2021-10-19 08:00:00+00:00,9.28,20.0,34.060157,0.0,2,1.0,17.0


## Data Processing

```
1. Make xy data into a set of series of shape [N, M, F], where N = number of examples; M = nubmer of timestamps; F = Number of features.
2. Train-Test-Validation Split. Split off at 0.8, 0.1, 0.1 ratio for now. Shuffle the split.
3. Seperate x, y from dataset. Remove last column from dataset to extract y.
```

In [4]:
# Hyperparameters 
SERIES = 48
STRIDE = 1

In [5]:
# This in each set includes both solar and power gen
# e.g. train_ds[0] = x_train,
#      train_ds[1] = y_train
train_ds, val_ds, test_ds = DataSplit(xy_data, series_length=SERIES, stride=STRIDE).split_data()

In [6]:
# Isolate XY from datasets
x_train, y_train,  = (
    train_ds[0],
    train_ds[1],
)  # Training data
x_val, y_val = (
    val_ds[0],
    val_ds[1],
)  # Validation data
x_test, y_test = (
    test_ds[0],
    test_ds[1],
)  # Test data

In [7]:
print("x_train shape: ", x_train.shape)

x_train shape:  (1556, 48, 5)


In [8]:
print("y_train shape: ", y_train.shape)

y_train shape:  (1556, 48, 2)


In [9]:
def split_solar_power_y(y_data):
    y_solar = y_data[:, :, -2:-1].astype("float32")
    y_power = y_data[:, :, -1:].astype("float32")
    return y_solar, y_power

In [10]:
# seperate solar gen, power usage for training models using just one data type
y_solar_train, y_power_train    = split_solar_power_y(y_train)
y_solar_val, y_power_val        = split_solar_power_y(y_val)
y_solar_test, y_power_test      = split_solar_power_y(y_test)

In [11]:
# verify that this worked
print("y_power_train shape: ", y_power_train.shape)
print("y_solar_train shape: ", y_solar_train.shape)

y_power_train shape:  (1556, 48, 1)
y_solar_train shape:  (1556, 48, 1)


### Train Normalizer on XTrain

In [12]:
from keras.layers import Normalization

In [13]:
# Normalize to x_train data
norm_layer = Normalization()
norm_layer.adapt(x_train)

2022-01-30 18:47:20.897776: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Test Models

In [14]:
# below import also includes tf/keras stuff
from amplify.test_models import *

## 1. Simple LSTM On Solar Gen

In [15]:
# hyperparameters
NUM_LSTM_LAYER=20
SERIES_FT = 5
SERIES_OUT = 1
BATCH  = 4
EPOCHS = 30
activation_fn = 'tanh'

In [16]:
# create model
lstm_model_1 = SimpleLSTM_1(
    norm_layer=norm_layer,
    n_layer=NUM_LSTM_LAYER,
    n_series_len=SERIES,
    n_series_ft=SERIES_FT,
    n_series_out=SERIES_OUT,
    activation_f=activation_fn
)

# set hyper param
lstm_model_1.SetHyperParam(
    epoch=EPOCHS,
    batch_size=BATCH
)

ClearML Monitor: Could not detect iteration reporting, falling back to iterations as seconds-from-start
