In [2]:
import itertools
import os

import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split

from experience import load_experience
from bclassification.utils_base import (
    print_class_weights,
    compute_weight_bias,
    print_dataset,
    plot_metrics,
    plot_cm,
    plot_roc,
    describe_results,
)
from lib.constants import Constants as Const
from lib.data_utils import (
    make_dir,
    env_pf,
    create_results_dir,
    save_dict_to_file,
)
from lib.tf_utils import (
    print_variables,
    ResidulaFCBlock,
    MatthewsCorrelationCoefficient,
)
from lib.visualizer import Visualizer, pprint

Visualizer()

experience_dir = make_dir(os.path.join(Const.EXPERIENCE_DIR, "data-aug"))
results_dir = make_dir(os.path.join(Const.RESULTS_DIR, "il"))

agent_name = "agent-mip"
case_name = "l2rpn_2019_art"
env_dc = True
verbose = False

case_results_dir = make_dir(os.path.join(results_dir, f"{case_name}-{env_pf(env_dc)}"))

In [3]:
"""
    Parameters
"""

random_seed = 1

input_mode = "structured"
label_mode = "dn"

n_window_targets = 12  # 0 or 12
n_window_history = 1
downsampling_rate = 0.10
n_window_forecasts = 1
use_actions = True
feature_scaling = True
batch_normalization = False

n_perturb = 0
scale_perturb = 0.02

val_frac = 0.10
test_frac = 0.10

# Model
model_type = "res"  # "fc" or "res"
dropout_rate = 0.1
l1_reg = 0
l2_reg = 0
n_hidden = 512
n_hidden_layers = 2
threshold = 0.50
pos_scaling = 1

# Training
learning_rate = 1e-5
n_batch = 512
n_epochs = 300

In [4]:
def perturb(X, Y, scale, n):
    n_gen = 5
    n_load = 11
    n_line = 20
    n_sub = 14
    
    x_p = []
    y_p = []
    for _ in range(n):
        if input_mode == "binary":
            # Observation size
            s = 3 * n_gen + n_load + 2 * n_line + (4 * n_line + 2 * n_gen + 2 * n_load) + n_sub + 2 * n_line

            # Features to perturb
            n_first = 3 * n_gen + n_load + 2 * n_line  # Flows and injections

            x = X.copy()
            for h in range(n_window_history + 1):
                noise = np.random.normal(loc=0.0, scale=scale, size=(x.shape[0], n_first))
                x[:, h * s : (h * s + n_first)] = x[:, h * s : (h * s + n_first)] + noise

            # Forecast
            if n_window_forecasts > 0:
                n_last = n_window_forecasts * (n_gen + n_load)
                noise = np.random.normal(loc=0.0, scale=scale, size=(x.shape[0], n_last))
                x[:, -n_last:] = x[:, -n_last:] + noise

        elif input_mode == "structured":
            # Observation size
            s = 2 * (n_gen + n_load) + 2 * n_gen + 5 * n_line + n_sub + 2 * n_line

            # Features to perturb
            n_first = 2 * (n_gen + n_load) + 2 * n_gen + 5 * n_line  # Flows and injections

            x = X.copy()
            for h in range(n_window_history + 1):
                noise = np.random.normal(loc=0.0, scale=scale, size=(x.shape[0], n_first))
                x[:, h * s : (h * s + n_first)] = x[:, h * s : (h * s + n_first)] + noise

            # Forecasts
            if n_window_forecasts > 0:
                n_last = n_window_forecasts * (n_gen + n_load)
                noise = np.random.normal(loc=0.0, scale=scale, size=(x.shape[0], n_last))
                x[:, -n_last:] = x[:, -n_last:] + noise
        
        x_p.append(x)
        y_p.append(Y)
    
    return np.vstack(x_p), np.hstack(y_p)

In [5]:
%%capture cap --no-stderr
"""
    Dataset
"""

np.random.seed(random_seed)
tf.random.set_seed(random_seed)

tar_str = f"w{n_window_targets}"
dr_str = str(int(100 * downsampling_rate))
f_str = str(n_window_forecasts)
h_str = str(n_window_history)

file_name = f"fc-d{dr_str}-h{h_str}-f{f_str}-{tar_str}-{input_mode}.npz"
file_path = os.path.join(experience_dir, file_name)

data = np.load(file_path)
X_all = data["X_all"]
Y_all = data["Y_all"]
mask_targets = data["mask_targets"]
X = X_all[mask_targets, :]
Y = Y_all[mask_targets]

class_weight, initial_bias = compute_weight_bias(Y)
initial_bias = 0
class_weight[1] = class_weight[1] * pos_scaling 

X_train, X_val, Y_train, Y_val = train_test_split(
    X, Y, test_size=val_frac, random_state=random_seed
)

X_train, X_test, Y_train, Y_test = train_test_split(
    X_train, Y_train, test_size=test_frac, random_state=random_seed
)

# Augment by adding random noise and upsampling the positive class
if scale_perturb > 0 and n_perturb > 0:
    indices_pos = np.equal(Y_train, 1)
    X_train_perturb, Y_train_perturb = perturb(X_train[indices_pos, :], Y_train[indices_pos], scale=scale_perturb, n=n_perturb)
    X_train = np.vstack((X_train, X_train_perturb))    
    Y_train = np.hstack((Y_train, Y_train_perturb))
    
mask_test_neg = np.logical_and(~mask_targets, np.random.binomial(1, val_frac, mask_targets.size).astype(np.bool))
X_test_all = np.concatenate((X_test, X_all[mask_test_neg, :]))
Y_test_all = np.concatenate((Y_test, Y_all[mask_test_neg]))

print_dataset(X_all, Y_all, "All data")
print_dataset(X, Y, "Data")
print_dataset(X_train, Y_train, "Train")
print_dataset(X_val, Y_val, "Validation")
print_dataset(X_test, Y_test, "Test")
print_dataset(X_test_all, Y_test_all, "Test-All")
print_class_weights(class_weight)
pprint("Initial bias:", "{:.4f}".format(float(initial_bias)))

model_dir = create_results_dir(case_results_dir, model_name=model_type)

del data
del X_all
del Y_all

In [6]:
"""
    Model
"""

metrics = [
    tf.keras.metrics.TruePositives(thresholds=threshold, name="tp"),
    tf.keras.metrics.FalsePositives(thresholds=threshold, name="fp"),
    tf.keras.metrics.TrueNegatives(thresholds=threshold, name="tn"),
    tf.keras.metrics.FalseNegatives(thresholds=threshold, name="fn"),
    tf.keras.metrics.BinaryAccuracy(threshold=threshold, name="accuracy"),
    tf.keras.metrics.Precision(thresholds=threshold, name="precision"),
    tf.keras.metrics.Recall(thresholds=threshold, name="recall"),
    MatthewsCorrelationCoefficient(threshold=threshold, name="mcc"),
    tf.keras.metrics.AUC(name="auc"),
]

if l1_reg > 0:
    kwargs_reg = {
        "kernel_regularizer": tf.keras.regularizers.L1(l1_reg),
        "bias_regularizer": tf.keras.regularizers.L1(l1_reg),
    }
elif l2_reg > 0:
    kwargs_reg = {
        "kernel_regularizer": tf.keras.regularizers.L2(l2=l2_reg),
        "bias_regularizer": tf.keras.regularizers.L2(l2=l2_reg),
    }
else:
    kwargs_reg = {}

input_dim = X.shape[-1]

tf.random.set_seed(random_seed)
if model_type == "fc":
    hidden_layers = [
        (
            tf.keras.layers.Dense(n_hidden, activation="relu", **kwargs_reg),
            tf.keras.layers.Dropout(dropout_rate),
        )
        for _ in range(n_hidden_layers)
    ]
    hidden_layers = list(itertools.chain(*hidden_layers))

    if batch_normalization:
        hidden_layers = hidden_layers + [tf.keras.layers.BatchNormalization()]
    
    model = tf.keras.Sequential(
        [
            tf.keras.layers.Dense(
                n_hidden, activation="relu", input_shape=(input_dim,), **kwargs_reg
            ),
            tf.keras.layers.Dropout(dropout_rate),
            *hidden_layers,
            tf.keras.layers.Dense(
                1,
                activation="sigmoid",
                bias_initializer=tf.keras.initializers.Constant(initial_bias),
                **kwargs_reg,
            ),
        ]
    )

elif model_type == "linear":
    model = tf.keras.Sequential(
        [
            tf.keras.layers.Dense(
                1,
                activation="sigmoid",
                bias_initializer=tf.keras.initializers.Constant(initial_bias), input_shape=(input_dim,),
                **kwargs_reg,
            ),
        ]
    )
else:
    hidden_layers = [
        (
            ResidulaFCBlock(n_hidden, activation="relu", **kwargs_reg),
            tf.keras.layers.Dropout(dropout_rate),
        )
        for _ in range(n_hidden_layers // 2)
    ]
    
    hidden_layers = list(itertools.chain(*hidden_layers))

    if batch_normalization:
        hidden_layers = hidden_layers + [tf.keras.layers.BatchNormalization()]
    
    model = tf.keras.Sequential(
        [
            tf.keras.layers.Dense(
                n_hidden, activation="relu", input_shape=(input_dim,), **kwargs_reg
            ),
            tf.keras.layers.Dropout(dropout_rate),
            *hidden_layers,
            tf.keras.layers.Dropout(dropout_rate),
            tf.keras.layers.Dense(
                1,
                activation="sigmoid",
                bias_initializer=tf.keras.initializers.Constant(initial_bias),
                **kwargs_reg,
            ),
        ]
    )

model.compile(
    optimizer=tf.keras.optimizers.Adam(lr=learning_rate),
    loss=tf.keras.losses.BinaryCrossentropy(),
    metrics=metrics,
)

checkpoint_path = os.path.join(model_dir, "ckpts")
ckpt = tf.train.Checkpoint(model=model, optimizer=model.optimizer)
ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=5)

pprint("Model directory:", model_dir)

if ckpt_manager.latest_checkpoint:
    ckpt.restore(ckpt_manager.latest_checkpoint)
    pprint("Restoring checkpoint from:", ckpt_manager.latest_checkpoint)
    
    
save_dict_to_file(
    {
        "random_seed": random_seed,
        "input_mode": input_mode,
        "label_mode": label_mode,
        "val_frac": val_frac,
        "downsampling_rate": downsampling_rate,
        "n_window_targets": n_window_targets,
        "n_window_history": n_window_history,
        "use_actions": use_actions,
        "feature_scaling": feature_scaling,
        "n_perturb": n_perturb,
        "scale_perturb": scale_perturb,
        "model_type": model_type,
        "dropout_rate": dropout_rate,
        "l1_reg": l1_reg,
        "l2_reg": l2_reg,
        "n_hidden": n_hidden,
        "n_hidden_layers": n_hidden_layers,
        "learning_rate": learning_rate,
        "n_batch": n_batch,
        "n_epochs": n_epochs,
        "threshold": threshold,
        "pos_scaling": pos_scaling,
        "batch_normalization": batch_normalization,
    },
    os.path.join(model_dir, "params.txt"),
)

Model directory:                        ./results\il\l2rpn_2019_art-dc\2020-11-04_15-54-52_res


In [7]:
cap.show()

with open(os.path.join(model_dir, "log.txt"), "a") as f:
    f.write(cap.stdout)

    - All data:                         X, Y	       (740873, 412), (740873,)
        - Positive labels:              1.03 %
        - Negative labels:              98.97 %

    - Data:                             X, Y	        (72947, 412), (72947,)
        - Positive labels:              10.45 %
        - Negative labels:              89.55 %

    - Train:                            X, Y	        (59086, 412), (59086,)
        - Positive labels:              10.33 %
        - Negative labels:              89.67 %

    - Validation:                       X, Y	         (7295, 412), (7295,)
        - Positive labels:              11.60 %
        - Negative labels:              88.40 %

    - Test:                             X, Y	         (6566, 412), (6566,)
        - Positive labels:              10.31 %
        - Negative labels:              89.69 %

    - Test-All:                         X, Y	        (73063, 412), (73063,)
        - Positive labels:              0.93 %
        - Nega

In [None]:
"""
    Training
"""
# tensorboard_path = os.path.join(model_dir, "logs")
# tensorboard_callback = tf.keras.callbacks.TensorBoard(
#     log_dir=tensorboard_path, write_graph=False, write_images=False, update_freq="epoch"
# )

# print(f"tensorboard --logdir={tensorboard_path}")

training = model.fit(
    X_train,
    Y_train,
    epochs=n_epochs,
    batch_size=n_batch,
    class_weight=class_weight,
    validation_data=(X_val, Y_val),
    # callbacks=[tensorboard_callback],
    verbose=1,
)

ckpt_save_path = ckpt_manager.save()
pprint(f"    - Saving checkpoint to:", ckpt_save_path)

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300


Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300


Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300


Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300


Epoch 77/300
Epoch 78/300
Epoch 79/300
Epoch 80/300
Epoch 81/300
Epoch 82/300
Epoch 83/300
Epoch 84/300
Epoch 85/300
Epoch 86/300
Epoch 87/300
Epoch 88/300
Epoch 89/300
Epoch 90/300
Epoch 91/300
Epoch 92/300
Epoch 93/300
Epoch 94/300


Epoch 95/300
Epoch 96/300
Epoch 97/300
Epoch 98/300
Epoch 99/300
Epoch 100/300
Epoch 101/300
Epoch 102/300
Epoch 103/300
Epoch 104/300
Epoch 105/300
Epoch 106/300
Epoch 107/300
Epoch 108/300
Epoch 109/300
Epoch 110/300
Epoch 111/300
Epoch 112/300


Epoch 113/300
Epoch 114/300
Epoch 115/300
Epoch 116/300
Epoch 117/300
Epoch 118/300
Epoch 119/300
Epoch 120/300
Epoch 121/300
Epoch 122/300
Epoch 123/300
Epoch 124/300
Epoch 125/300
Epoch 126/300
Epoch 127/300
Epoch 128/300
Epoch 129/300
Epoch 130/300
Epoch 131/300


Epoch 132/300
Epoch 133/300
Epoch 134/300
Epoch 135/300
Epoch 136/300
Epoch 137/300
Epoch 138/300
Epoch 139/300
Epoch 140/300
Epoch 141/300
Epoch 142/300
Epoch 143/300
Epoch 144/300
Epoch 145/300
Epoch 146/300
Epoch 147/300
Epoch 148/300
Epoch 149/300
Epoch 150/300


Epoch 151/300
Epoch 152/300
Epoch 153/300
Epoch 154/300
Epoch 155/300
Epoch 156/300
Epoch 157/300
Epoch 158/300
Epoch 159/300
Epoch 160/300
Epoch 161/300
Epoch 162/300
Epoch 163/300
Epoch 164/300
Epoch 165/300
Epoch 166/300
Epoch 167/300
Epoch 168/300
Epoch 169/300


Epoch 170/300
Epoch 171/300
Epoch 172/300
Epoch 173/300
Epoch 174/300
Epoch 175/300
Epoch 176/300
Epoch 177/300
Epoch 178/300
Epoch 179/300
Epoch 180/300
Epoch 181/300
Epoch 182/300
Epoch 183/300
Epoch 184/300
Epoch 185/300
Epoch 186/300
Epoch 187/300
Epoch 188/300


Epoch 189/300
Epoch 190/300
Epoch 191/300
Epoch 192/300
Epoch 193/300
Epoch 194/300
Epoch 195/300
Epoch 196/300
Epoch 197/300
Epoch 198/300
Epoch 199/300
Epoch 200/300
Epoch 201/300
Epoch 202/300
Epoch 203/300
Epoch 204/300
Epoch 205/300
Epoch 206/300
Epoch 207/300


Epoch 208/300
Epoch 209/300
Epoch 210/300
Epoch 211/300
Epoch 212/300
Epoch 213/300
Epoch 214/300
Epoch 215/300
Epoch 216/300
Epoch 217/300
Epoch 218/300
Epoch 219/300
Epoch 220/300
Epoch 221/300
Epoch 222/300
Epoch 223/300
Epoch 224/300
Epoch 225/300
Epoch 226/300


Epoch 227/300
Epoch 228/300
Epoch 229/300
Epoch 230/300
Epoch 231/300
Epoch 232/300
Epoch 233/300

In [None]:
%%capture cap --no-stderr
"""
    Results
"""

if ckpt_manager.latest_checkpoint:
    ckpt.restore(ckpt_manager.latest_checkpoint)
    pprint("Restoring checkpoint from:", ckpt_manager.latest_checkpoint)

print_variables(model.trainable_variables)
plot_metrics(training, Y_train, Y_val, save_dir=model_dir)

results_train = model.evaluate(X_train, Y_train, batch_size=n_batch, verbose=0)
results_val = model.evaluate(X_val, Y_val, batch_size=n_batch, verbose=0)
results_test = model.evaluate(X_test, Y_test, batch_size=n_batch, verbose=0)
results_test_all = model.evaluate(X_test_all, Y_test_all, batch_size=n_batch, verbose=0)

Y_train_pred = model.predict(X_train, batch_size=n_batch)
Y_val_pred = model.predict(X_val, batch_size=n_batch)
Y_test_pred = model.predict(X_test, batch_size=n_batch)
Y_test_all_pred = model.predict(X_test_all, batch_size=n_batch)

describe_results(model.metrics_names, results_train, Y_train, name="Train")
describe_results(model.metrics_names, results_val, Y_val, name="Validation")
describe_results(model.metrics_names, results_test, Y_test, name="Test")
describe_results(model.metrics_names, results_test_all, Y_test_all, name="Test-all")

plot_cm(Y_train, Y_train_pred, "Training", save_dir=model_dir)
plot_cm(Y_val, Y_val_pred, "Validation", save_dir=model_dir)
plot_cm(Y_test, Y_test_pred, "Test", save_dir=model_dir)

plot_roc(
    [
        ("Training", Y_train, Y_train_pred),
        ("Validation", Y_val, Y_val_pred),
        ("Test", Y_test, Y_test_pred),
        ("Test-all", Y_test_all, Y_test_all_pred),
    ],
    save_dir=model_dir,
)

In [None]:
cap.show()

with open(os.path.join(model_dir, "log.txt"), "a") as f:
    f.write(cap.stdout)

In [None]:
import matplotlib.pyplot as plt

from bclassification.utils_fcn import create_dataset

def plot_preds(t, y, y_pred, rhos, threshold, chronic_idx=None):
    fig, ax = plt.subplots(figsize=(Const.FIG_SIZE[0] * 2, Const.FIG_SIZE[1]))

    indices_pos = np.equal(y, 1)
    # ax.plot(t, y, label=r"$y$", color="tab:blue")
    ax.bar(t[indices_pos], y[indices_pos], label=r"$y$", color="tab:blue", lw=1.5)

    # ax.plot(t, y_pred, label=r"$P(y = 1)$", color="tab:green")
    indices_pos = np.greater(y_pred, threshold)
    pos = np.ma.masked_where(np.greater_equal(y_pred, threshold), y_pred)
    neg = np.ma.masked_where(np.less_equal(y_pred, threshold), y_pred)
    ax.plot(t, pos, "tab:green", t, neg, "tab:red")

    ax.plot(t, np.ones_like(t) * threshold, label=r"$y$", color="tab:red")
    # ax.plot(t, rhos, label=r"$y$")

    # ax.legend()
    ax.set_xlabel("Time step t")
    ax.set_ylabel(r"$P(y = 1)$")

    if ax.get_xlim()[-1] > 2000:
        ax.set_xlim(right=2000, left=-10)
    else:
        ax.set_xlim(left=-10)

    fig.tight_layout()
    if model_dir and not isinstance(chronic_idx, type(None)):
        fig.savefig(os.path.join(model_dir, "test-y-step-{:04}".format(chronic_idx)))
        
    if not isinstance(rhos, type(None)):
        ax2 = ax.twinx()
        ax2.set_ylabel(r"$\rho^\mathrm{max}$")
        ax2.plot(t, rhos, label=r"$y$", lw=Const.LW, color="tab:orange")
        ax2.set_ylim(*ax.get_ylim())

    fig.tight_layout()
    if model_dir and not isinstance(chronic_idx, type(None)):
        fig.savefig(os.path.join(model_dir, "test-y-step-{:04}-rhos".format(chronic_idx)))


sample_experience_dir = make_dir(os.path.join(Const.EXPERIENCE_DIR, "data-aug-sample"))
case, collector = load_experience(case_name, agent_name, sample_experience_dir, env_dc=env_dc)

_, _, _, X_all_sample, Y_all_sample = create_dataset(
    case,
    collector,
    input_mode=input_mode,
    label_mode=label_mode,
    n_window_history=n_window_history,
    n_window_targets=n_window_targets,
    downsampling_rate=downsampling_rate,
    n_window_forecasts=n_window_forecasts,
    use_actions=use_actions,
    feature_scaling=feature_scaling,
)

In [None]:
start_idx = 0
for chronic_idx, chronic_len in zip(collector.chronic_ids, collector.chronic_lengths):
    X_chronic = X_all_sample[start_idx:(start_idx + chronic_len), :]
    Y_chronic = Y_all_sample[start_idx:(start_idx + chronic_len)]
    Y_chronic_pred = model.predict(X_chronic, batch_size=n_batch).flatten()
    
    t = np.arange(chronic_len)
    rhos = [np.max(obs.rho) for obs in collector.data[chronic_idx]["obses"][:-1]]
        
    plot_preds(t, Y_chronic, Y_chronic_pred, rhos, threshold, chronic_idx)
    
    start_idx = start_idx + chronic_len

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.manifold import TSNE

def plot_scatter(ax, data, color_label, marker_label=None, x_label=None, y_label=None, ax_title=None):
    colors = Const.COLORS
    
    color_values = sorted(data[color_label].unique())
    
    for i, color_val in enumerate(color_values):
        color_id = i % len(colors)
        color = colors[color_id]
        
        if marker_label:
            marker_values = sorted(data[marker_label].unique())
            for j, marker_val in enumerate(marker_values):
                d = data[np.logical_and(data[color_label] == color_val, data[marker_label] == marker_val)]

                if d.shape[0] > 0:
                    if marker_val == "1":
                        ax.scatter(d[x_label], d[y_label], label=str(color_val), s=50, marker="+", facecolors='none', c=color)
                    elif marker_val == "0":
                        ax.scatter(d[x_label], d[y_label], label=str(color_val), s=30, marker="o", facecolors='none', edgecolors=color, alpha=0.5)
                    else:
                        ax.scatter(d[x_label], d[y_label], label=str(color_val), s=30, marker="+", c=color)                
        else:
            d = data[data[color_label] == color_val]
            ax.scatter(d[x_label], d[y_label], label=str(color_val), s=30, marker="+", c=color)
            
    if len(color_values) < 4 and not marker_label:
        ax.legend(color_values)
        
    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)
    
    if ax_title:
        ax.set_title(ax_title)

"""
    TSNE

    Following: https://towardsdatascience.com/visualising-high-dimensional-datasets-using-pca-and-t-sne-in-python-8ef87e7915b
"""

file_name = f"tsne-d{dr_str}-h{h_str}-f{f_str}{ft_str}-{input_mode}.npz"
file_path = os.path.join(experience_dir, file_name)


if file_name in os.listdir(experience_dir):
    pprint("Loading:", file_path)
    tsne_results = np.load(file_path)["tsne_results"]
else:
    tsne = TSNE(n_components=2, verbose=1, perplexity=30, n_iter=300)
    red = PCA(n_components=100, random_state=random_seed)

    X_red = red.fit_transform(X)
    pprint("    - Explained variance:", red.explained_variance_ratio_.sum())
    tsne_results = tsne.fit_transform(X_red)
    np.savez_compressed(
        file_path,
        tsne_results=tsne_results,
    )
    pprint("Saving:", file_path)

data = pd.DataFrame()
data["tsne-1"] = tsne_results[:, 0]
data["tsne-2"] = tsne_results[:, 1]
data["y"] = Y.astype(int).astype(str)

data_train, data_val = train_test_split(
    data, test_size=val_frac, random_state=random_seed
)

data_train, data_test = train_test_split(
    data_train, test_size=test_frac, random_state=random_seed
)

fig_name = "tsne-X-train"
fig, ax = plt.subplots(figsize=Const.FIG_SIZE)
plot_scatter(ax, data_train, color_label="y", x_label="tsne-1", y_label="tsne-2")
fig.savefig(os.path.join(model_dir, fig_name))

fig_name = "tsne-X-val"
fig, ax = plt.subplots(figsize=Const.FIG_SIZE)
plot_scatter(ax, data_val, color_label="y", x_label="tsne-1", y_label="tsne-2")
fig.savefig(os.path.join(model_dir, fig_name))

fig_name = "tsne-X-test"
fig, ax = plt.subplots(figsize=Const.FIG_SIZE)
plot_scatter(ax, data_test, color_label="y", x_label="tsne-1", y_label="tsne-2")
fig.savefig(os.path.join(model_dir, fig_name))


Y_pred = np.greater(model.predict(X, batch_size=n_batch), threshold).astype(int).flatten()
data["y"][np.logical_and(np.not_equal(Y, Y_pred), np.equal(Y, 1))] = "2"

data_train, data_val = train_test_split(
    data, test_size=val_frac, random_state=random_seed
)

data_train, data_test = train_test_split(
    data_train, test_size=test_frac, random_state=random_seed
)

fig_name = "tsne-X-train"
fig, ax = plt.subplots(figsize=Const.FIG_SIZE)
plot_scatter(ax, data_train, color_label="y", x_label="tsne-1", y_label="tsne-2")
fig.savefig(os.path.join(model_dir, fig_name))

fig_name = "tsne-X-labels-val"
fig, ax = plt.subplots(figsize=Const.FIG_SIZE)
plot_scatter(ax, data_val, color_label="y", x_label="tsne-1", y_label="tsne-2")
fig.savefig(os.path.join(model_dir, fig_name))

fig_name = "tsne-X-labels-test"
fig, ax = plt.subplots(figsize=Const.FIG_SIZE)
plot_scatter(ax, data_test, color_label="y", x_label="tsne-1", y_label="tsne-2")
fig.savefig(os.path.join(model_dir, fig_name))