In [1]:
"""
    Following tutorial: https://www.tensorflow.org/tutorials/structured_data/imbalanced_data
"""

import itertools
import os

import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split

from bclassification.utils_base import (
    print_class_weights,
    compute_weight_bias,
    plot_metrics,
)
from bclassification.utils_fc import (
    create_datasets,
    print_dataset,
    plot_cm,
    plot_roc,
    describe_results,
)
from experience import load_experience
from lib.constants import Constants as Const
from lib.data_utils import (
    make_dir,
    env_pf,
    create_results_dir,
    save_dict_to_file,
)
from lib.tf_utils import (
    print_variables,
    ResidulaFCBlock,
    MatthewsCorrelationCoefficient,
)
from lib.visualizer import Visualizer, pprint

Visualizer()

experience_dir = make_dir(os.path.join(Const.RESULTS_DIR, "performance-aug"))
results_dir = make_dir(os.path.join(Const.RESULTS_DIR, "_bc-fc"))

agent_name = "agent-mip"
case_name = "l2rpn_2019_art"
env_dc = True
verbose = False

case_results_dir = make_dir(os.path.join(results_dir, f"{case_name}-{env_pf(env_dc)}"))
case, collector = load_experience(case_name, agent_name, experience_dir, env_dc=env_dc)


L2RPN_2019_ART (dc)


--------------------------------------------------------------------------------
                                        Loading Experience
--------------------------------------------------------------------------------
    - Loading chronics:                 ./results/performance-aug/l2rpn_2019_art-dc/agent-mip-chronic-****
    - Number of loaded chronics:        99


In [6]:
"""
    Parameters
"""

random_seed = 0

# Data
test_frac = 0.10
val_frac = 0.10

downsampling_rate = 0.20

n_window_targets = 20
n_window_history = 2
n_window_forecasts = 1

use_forecasts = True
use_actions = True

# Model
model_type = "res"  # "fc" or "res"
dropout_rate = 0.2
l2_reg = 1e-4
n_hidden = 512
n_hidden_layers = 8

# Training
learning_rate = 1e-3
n_batch = 512
n_epochs = 200

# Prediction
threshold = 0.50

In [7]:
%%capture cap --no-stderr
"""
    Datasets
"""

np.random.seed(random_seed)
tf.random.set_seed(random_seed)

X, Y, mask_targets, X_all, Y_all = create_datasets(
    case,
    collector,
    n_window_targets=n_window_targets,
    n_window_history=n_window_history,
    n_window_forecasts=n_window_forecasts,
    use_actions=use_actions,
    use_forecasts=use_forecasts,
    downsampling_rate=downsampling_rate,
)

In [8]:
np.savez_compressed(os.path.join(case_results_dir, f"fc-data-h{n_window_history}-f{n_window_forecasts}"), 
                    X_all=X_all, Y_all=Y_all, X=X, Y=Y, mask_targets=mask_targets)

In [15]:
%%capture cap --no-stderr

X_train, X_val, Y_train, Y_val = train_test_split(
    X, Y, test_size=val_frac, random_state=random_seed
)

mask_test_neg = np.logical_and(~mask_targets, np.random.binomial(1, 0.08, mask_targets.size).astype(np.bool))
X_test = np.concatenate((X_val, X_all[mask_test_neg, :]))
Y_test = np.concatenate((Y_val, Y_all[mask_test_neg]))

class_weight, initial_bias = compute_weight_bias(Y)

print_dataset(X_all, Y_all, "All data")
print_dataset(X, Y, "Data")
print_dataset(X_train, Y_train, "Train")
print_dataset(X_val, Y_val, "Validation")
print_dataset(X_test, Y_test, "Test")
print_class_weights(class_weight)
pprint("Initial bias:", "{:.4f}".format(float(initial_bias)))

In [16]:
cap.show()

    - All data:                         X, Y	       (589367, 560), (589367,)
        - Positive labels:              1.02 %
        - Negative labels:              98.98 %
    - Data:                             X, Y	       (103215, 560), (103215,)
        - Positive labels:              5.84 %
        - Negative labels:              94.16 %
    - Train:                            X, Y	        (92893, 560), (92893,)
        - Positive labels:              5.82 %
        - Negative labels:              94.18 %
    - Validation:                       X, Y	        (10322, 560), (10322,)
        - Positive labels:              6.03 %
        - Negative labels:              93.97 %
    - Test:                             X, Y	        (49414, 560), (49414,)
        - Positive labels:              1.26 %
        - Negative labels:              98.74 %
Class                                   Weight
    - 0                                 0.53103
    - 1                                 8.55562


In [17]:
"""
    Model
"""

metrics = [
    tf.keras.metrics.TruePositives(thresholds=threshold, name="tp"),
    tf.keras.metrics.FalsePositives(thresholds=threshold, name="fp"),
    tf.keras.metrics.TrueNegatives(thresholds=threshold, name="tn"),
    tf.keras.metrics.FalseNegatives(thresholds=threshold, name="fn"),
    tf.keras.metrics.BinaryAccuracy(threshold=threshold, name="accuracy"),
    tf.keras.metrics.Precision(thresholds=threshold, name="precision"),
    tf.keras.metrics.Recall(thresholds=threshold, name="recall"),
    MatthewsCorrelationCoefficient(threshold=threshold, name="mcc"),
]

if l2_reg > 0:
    kwargs_reg = {
        "kernel_regularizer": tf.keras.regularizers.L2(l2=l2_reg),
        "bias_regularizer": tf.keras.regularizers.L2(l2=l2_reg),
    }
else:
    kwargs_reg = {}

input_dim = X.shape[-1]

tf.random.set_seed(random_seed)
if model_type == "fc":
    hidden_layers = [
        (
            tf.keras.layers.Dense(n_hidden, activation="relu", **kwargs_reg),
            tf.keras.layers.Dropout(dropout_rate),
        )
        for _ in range(n_hidden_layers)
    ]
    hidden_layers = list(itertools.chain(*hidden_layers))

    model = tf.keras.Sequential(
        [
            tf.keras.layers.Dense(
                n_hidden, activation="relu", input_shape=(input_dim,), **kwargs_reg
            ),
            tf.keras.layers.Dropout(dropout_rate),
            *hidden_layers,
            tf.keras.layers.Dense(
                1,
                activation="sigmoid",
                bias_initializer=tf.keras.initializers.Constant(initial_bias),
                **kwargs_reg,
            ),
        ]
    )
else:
    hidden_layers = [
        (
            ResidulaFCBlock(n_hidden, activation="relu", **kwargs_reg),
            tf.keras.layers.Dropout(dropout_rate),
        )
        for _ in range(n_hidden_layers // 2)
    ]
    hidden_layers = list(itertools.chain(*hidden_layers))

    model = tf.keras.Sequential(
        [
            tf.keras.layers.Dense(
                n_hidden, activation="relu", input_shape=(input_dim,), **kwargs_reg
            ),
            tf.keras.layers.Dropout(dropout_rate),
            *hidden_layers,
            tf.keras.layers.Dropout(dropout_rate),
            tf.keras.layers.Dense(
                1,
                activation="sigmoid",
                bias_initializer=tf.keras.initializers.Constant(initial_bias),
                **kwargs_reg,
            ),
        ]
    )

model.compile(
    optimizer=tf.keras.optimizers.Adam(lr=learning_rate),
    loss=tf.keras.losses.BinaryCrossentropy(),
    metrics=metrics,
)

model_dir = create_results_dir(case_results_dir, model_name=model_type)
checkpoint_path = os.path.join(model_dir, "ckpts")
ckpt = tf.train.Checkpoint(model=model, optimizer=model.optimizer)
ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=5)

pprint("Model directory:", model_dir)

if ckpt_manager.latest_checkpoint:
    ckpt.restore(ckpt_manager.latest_checkpoint)
    pprint("Restoring checkpoint from:", ckpt_manager.latest_checkpoint)

save_dict_to_file(
    {
        "random_seed": random_seed,
        "test_frac": test_frac,
        "val_frac": val_frac,
        "downsampling_rate": downsampling_rate,
        "n_window_targets": n_window_targets,
        "n_window_history": n_window_history,
        "use_forecasts": use_forecasts,
        "use_actions": use_actions,
        "model_type": model_type,
        "dropout_rate": dropout_rate,
        "l2_reg": l2_reg,
        "n_hidden": n_hidden,
        "n_hidden_layers": n_hidden_layers,
        "learning_rate": learning_rate,
        "n_batch": n_batch,
        "n_epochs": n_epochs,
        "threshold": threshold,
    },
    os.path.join(model_dir, "params.txt"),
)

Model directory:                        ./results\_bc-fc\l2rpn_2019_art-dc\2020-10-06_18-43-29_res


In [18]:
with open(os.path.join(model_dir, "log.txt"), "a") as f:
    f.write(cap.stdout)

In [None]:
"""
    Training
"""
tensorboard_path = os.path.join(model_dir, "logs")
tensorboard_callback = tf.keras.callbacks.TensorBoard(
    log_dir=tensorboard_path, write_graph=False, write_images=False, update_freq="epoch"
)
early_callback = tf.keras.callbacks.EarlyStopping(monitor="val_mcc", patience=20, restore_best_weights=True)

print(f"    - TensorBoard cmd:\ttensorboard --logdir={tensorboard_path}")

training = model.fit(
    X_train,
    Y_train,
    epochs=n_epochs,
    batch_size=n_batch,
    class_weight=class_weight,
    validation_data=(X_val, Y_val),
    callbacks=[tensorboard_callback],
    verbose=4,
)

ckpt_save_path = ckpt_manager.save()
pprint(f"    - Saving checkpoint to:", ckpt_save_path)

    - TensorBoard cmd:	tensorboard --logdir=./results\_bc-fc\l2rpn_2019_art-dc\2020-10-06_18-43-29_res\logs
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200


In [None]:
"""
    Results
"""

print_variables(model.trainable_variables)
plot_metrics(training, Y_train, Y_val, save_dir=model_dir)

In [None]:
%%capture cap --no-stderr

results_train = model.evaluate(X_train, Y_train, batch_size=n_batch, verbose=0)
results_val = model.evaluate(X_val, Y_val, batch_size=n_batch, verbose=0)
results_test = model.evaluate(X_test, Y_test, batch_size=n_batch, verbose=0)
results_all = model.evaluate(X_all, Y_all, batch_size=n_batch, verbose=0)

Y_train_pred = model.predict(X_train, batch_size=n_batch)
Y_val_pred = model.predict(X_val, batch_size=n_batch)
Y_test_pred = model.predict(X_test, batch_size=n_batch)
Y_all_pred = model.predict(X_all, batch_size=n_batch, verbose=0)

describe_results(model.metrics_names, results_train, Y_train, name="Train")
describe_results(model.metrics_names, results_val, Y_val, name="Validation")
describe_results(model.metrics_names, results_test, Y_test, name="Test")
describe_results(model.metrics_names, results_all, Y_all, name="All")

plot_cm(Y_train, Y_train_pred, "Training", save_dir=model_dir)
plot_cm(Y_val, Y_val_pred, "Validation", save_dir=model_dir)
plot_cm(Y_test, Y_test_pred, "Test", save_dir=model_dir)
plot_cm(Y_all, Y_all_pred, "All", save_dir=model_dir)

plot_roc(
    [
        ("Training", Y_train, Y_train_pred),
        ("Validation", Y_val, Y_val_pred),
        ("Test", Y_test, Y_test_pred),
        ("All", Y_all, Y_all_pred),
    ],
    save_dir=model_dir,
)

In [None]:
cap.show()

In [None]:
with open(os.path.join(model_dir, "log.txt"), "a") as f:
    f.write(cap.stdout)