In [None]:
import math
import time

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn

torch.manual_seed(0)
np.random.seed(0)
input_window = 19
output_window = 1
batch_size = 1
start_lr = 0.00001
scheduler_gamma = 0.87
epochs = 10

from sklearn.preprocessing import RobustScaler

# scaler = StandardScaler()
scaler = RobustScaler()

data_folder = "run475"
data_file = "A2.csv"
target = "DOT"

feature_size = 1
# The flag decides if the loss will be calculted over all
# or just the predicted values.
calculate_loss_over_all_values = False
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("data_folder: " + data_folder)
print("data_file: " + data_file)
print("target: " + target)
print("input_window: " + str(input_window))
print("output_window: " + str(output_window))
print("batch_size: " + str(batch_size))
print("start_lr: " + str(start_lr))
print("epochs: " + str(epochs))
print("feature_size: " + str(feature_size))

# print("Loss: " + loss)
print("Loss RMSE")
print("===================================")

In [None]:
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()

    def forward(self, yhat, y):
        return torch.sqrt(self.mse(yhat, y))


criterion = RMSELoss()

In [None]:
data = pd.read_pickle("kiwi_experiments_and_run_355.pk")

In [None]:
runs = data.keys()
run_id = 482
print(runs)

In [None]:
experiments = data[482].keys()
experiment_id_1 = 16282
experiment_id_2 = 16283
experiment_id_3 = 16284
print(experiments)

In [None]:
metadata = data[run_id][experiment_id_1]["metadata"]
setpoints = data[run_id][experiment_id_1]["setpoints"]
measurements_reactor = data[run_id][experiment_id_1]["measurements_reactor"]
measurements_array = data[run_id][experiment_id_1]["measurements_array"]
measurements_aggregated = data[run_id][experiment_id_1]["measurements_aggregated"]
# print(data[run_id][experiment_id].keys())

In [None]:
print(data[run_id][experiment_id_1]["metadata"])

In [None]:
df_metadata = pd.DataFrame(data=metadata)
df_metadata

In [None]:
df_setpoints = pd.DataFrame(data=setpoints)
df_setpoints

In [None]:
df_setpoints.describe()

In [None]:
df_measurements_reactor = pd.DataFrame(data=measurements_reactor)
df_measurements_reactor

In [None]:
df_measurements_reactor.describe()

In [None]:
df_measurements_array = pd.DataFrame(data=measurements_array)
df_measurements_array

In [None]:
print(df_measurements_array.describe())

In [None]:
df_measurements_array.isnull().sum()

In [None]:
df_measurements_array.size

In [None]:
df_measurements_aggregated = pd.DataFrame(data=measurements_aggregated)
df_measurements_aggregated

In [None]:
print(df_measurements_aggregated.describe())

In [None]:
print(df_measurements_aggregated.size)
print(df_measurements_aggregated.isnull().sum())

In [None]:
# plt.figure(figsize=(12,8))
# plt.grid(True, which='both')
# plt.plot(df_measurements_aggregated['DOT'])
# plt.show()

In [None]:
# %matplotlib inline
# plt.figure(figsize=(24,4))
# plt.grid(True)
# plt.style.use('seaborn-whitegrid')
# plt.ylim([80,100])
# plt.plot(df_measurements_aggregated['DOT'], 'ob')
# plt.style.use('default')
# plt.title("DOT")
# plt.show()

In [None]:
fill_DOT = (
    df_measurements_aggregated["DOT"].fillna(method="ffill").fillna(method="bfill")
)

In [None]:
train_set = data[run_id][experiment_id_1]["measurements_aggregated"][target]
valid_set = data[run_id][experiment_id_2]["measurements_aggregated"][target]
test_set = data[run_id][experiment_id_3]["measurements_aggregated"][target]

train_control = data[run_id][experiment_id_1]["measurements_aggregated"][
    "Cumulated_feed_volume_glucose"
]
valid_control = data[run_id][experiment_id_2]["measurements_aggregated"][
    "Cumulated_feed_volume_glucose"
]
test_control = data[run_id][experiment_id_3]["measurements_aggregated"][
    "Cumulated_feed_volume_glucose"
]

In [None]:
# print(train_set)
# print(valid_set)
# print(test_set)

In [None]:
train_set_fill = (
    data[run_id][experiment_id_1]["measurements_aggregated"][target]
    .fillna(method="ffill")
    .fillna(method="bfill")
)
valid_set_fill = (
    data[run_id][experiment_id_2]["measurements_aggregated"][target]
    .fillna(method="ffill")
    .fillna(method="bfill")
)
test_set_fill = (
    data[run_id][experiment_id_3]["measurements_aggregated"][target]
    .fillna(method="ffill")
    .fillna(method="bfill")
)

In [None]:
# %matplotlib inline
# plt.figure(figsize=(20,4))
# plt.grid(True)
# plt.plot(test_set_fill, marker="o", markersize=4, color='g')
# plt.style.use('default')
# plt.title("DOT")
# plt.show()

In [None]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=1000):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(
            torch.arange(0, d_model, 2).float() * (-math.log(1000.0) / d_model)
        )
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        # pe.requires_grad = False
        self.register_buffer("pe", pe)

    def forward(self, x):
        return x + self.pe[: x.size(0), :]


class Transformer_TS(nn.Module):
    def __init__(
        self,
        nhidden=1024,
        num_layers=1,
        nhead=8,
        dropout=0.1,
        in_channels=input_window,
        out_channels=output_window,
    ):
        super(Transformer_TS, self).__init__()
        self.model_type = "Transformer for TS"

        self.src_mask = None
        self.pos_encoder = PositionalEncoding(nhidden)

        self.encoder_layer = nn.TransformerEncoderLayer(
            d_model=nhidden, nhead=nhead, dropout=dropout
        )
        self.transformer_encoder = nn.TransformerEncoder(
            self.encoder_layer, num_layers=num_layers
        )
        self.decoder = nn.Linear(nhidden, 1)  # nn.Linear(nhidden,1)
        self.init_weights()

        self.conv1 = self.contract_block(in_channels, 48, 3, 1)
        self.conv2 = self.contract_block(48, 96, 3, 1)
        self.conv3 = self.contract_block(96, 192, 3, 1)
        self.conv4 = self.contract_block(192, 384, 3, 1)

        self.upconv4 = self.expand_block(384, 192, 3, 1)
        self.upconv3 = self.expand_block(192 * 2, 96, 3, 1)
        self.upconv2 = self.expand_block(96 * 2, 48, 3, 1)
        self.upconv1 = self.expand_block(48 * 2, out_channels, 3, 1)

    def init_weights(self):
        initrange = 0.1
        self.decoder.bias.data.zero_()
        self.decoder.weight.data.uniform_(-initrange, initrange)

    def forward(self, src):
        if self.src_mask is None or self.src_mask.size(0) != len(src):
            device = src.device
            mask = self._generate_square_subsequent_mask(len(src)).to(device)
            self.src_mask = mask

        src = self.pos_encoder(src)
        output = self.transformer_encoder(src, self.src_mask)

        # u-net block
        output = torch.reshape(
            output, (output.shape[1], output.shape[0], output.shape[2])
        )

        conv1 = self.conv1(output)
        conv2 = self.conv2(conv1)
        conv3 = self.conv3(conv2)
        conv4 = self.conv4(conv3)

        upconv4 = self.upconv4(conv4)
        upconv3 = self.upconv3(torch.cat([upconv4, conv3], 1))
        upconv2 = self.upconv2(torch.cat([upconv3, conv2], 1))
        upconv1 = self.upconv1(torch.cat([upconv2, conv1], 1))

        upconv1 = torch.reshape(
            upconv1, (upconv1.shape[1], upconv1.shape[0], upconv1.shape[2])
        )
        # u-net block

        output = self.decoder(upconv1)
        return output

    def _generate_square_subsequent_mask(self, sz):
        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
        mask = (
            mask.float()
            .masked_fill(mask == 0, float("-inf"))
            .masked_fill(mask == 1, float(0.0))
        )
        return mask

    def contract_block(self, in_channels, out_channels, kernel_size, padding):
        contract = nn.Sequential(
            torch.nn.Conv1d(
                in_channels,
                out_channels,
                kernel_size=kernel_size,
                stride=1,
                padding=padding,
            ),
            # torch.nn.BatchNorm1d(out_channels),
            # torch.nn.ReLU(),
            torch.nn.Conv1d(
                out_channels,
                out_channels,
                kernel_size=kernel_size,
                stride=1,
                padding=padding,
            ),
            # torch.nn.BatchNorm1d(out_channels),
            # torch.nn.ReLU(),
            torch.nn.MaxPool1d(kernel_size=3, stride=2, padding=1),
            torch.nn.Dropout(p=0.1),
        )

        return contract

    def expand_block(self, in_channels, out_channels, kernel_size, padding):
        expand = nn.Sequential(
            torch.nn.Conv1d(
                in_channels, out_channels, kernel_size, stride=1, padding=padding
            ),
            # torch.nn.BatchNorm1d(out_channels),
            # torch.nn.ReLU(),
            torch.nn.Conv1d(
                out_channels, out_channels, kernel_size, stride=1, padding=padding
            ),
            # torch.nn.BatchNorm1d(out_channels),
            # torch.nn.ReLU(),
            torch.nn.ConvTranspose1d(
                out_channels,
                out_channels,
                kernel_size=3,
                stride=2,
                padding=1,
                output_padding=1,
            ),
            torch.nn.Dropout(p=0.1),
        )
        return expand

In [None]:
def create_inout_sequences(input_data, input_window):
    inout_seq = []
    L = len(input_data)
    for i in range(L - input_window):
        train_seq = np.append(
            input_data[i : i + input_window][:-output_window], output_window * [0]
        )
        train_label = input_data[i : i + input_window]
        # train_label = input_data[i+output_window:i+input_window+output_window]
        inout_seq.append((train_seq, train_label))
    return torch.FloatTensor(inout_seq)

In [None]:
def get_data(dataset=data_file, target=target):
    train_data = train_set_fill
    valid_data = valid_set_fill
    test_data = test_set_fill

    train_data = scaler.fit_transform(train_data.to_numpy().reshape(-1, 1)).reshape(-1)
    valid_data = scaler.transform(valid_data.to_numpy().reshape(-1, 1)).reshape(-1)
    test_data = scaler.transform(test_data.to_numpy().reshape(-1, 1)).reshape(-1)

    train_sequence = create_inout_sequences(train_data, input_window)
    train_sequence = train_sequence[:-output_window]

    valid_sequence = create_inout_sequences(valid_data, input_window)
    valid_sequence = valid_sequence[:-output_window]

    test_sequence = create_inout_sequences(test_data, input_window)
    test_sequence = test_sequence[:-output_window]

    return (
        train_sequence.to(device),
        valid_sequence.to(device),
        test_sequence.to(device),
    )

In [None]:
def get_batch(source, i, batch_size, feature_size=feature_size):
    seq_len = min(batch_size, len(source) - 1 - i)
    data = source[i : i + seq_len]
    observation = torch.stack(
        torch.stack([item[0] for item in data]).chunk(input_window, feature_size)
    )  # 1 is feature size
    target = torch.stack(
        torch.stack([item[1] for item in data]).chunk(input_window, feature_size)
    )
    return observation, target

In [None]:
def train(train_data):
    model.train()  # Turn on the train mode
    total_loss = 0.0
    start_time = time.time()

    for batch, i in enumerate(range(0, len(train_data) - 1, batch_size)):
        data, targets = get_batch(train_data, i, batch_size, feature_size)
        optimizer.zero_grad()
        output = model(data)

        if calculate_loss_over_all_values:
            loss = criterion(output, targets)
        else:
            loss = criterion(output[-output_window:], targets[-output_window:])

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
        optimizer.step()

        total_loss += loss.item()
        log_interval = int(len(train_data) / batch_size / 2)  # / 5
        if batch % log_interval == 0 and batch > 0:
            cur_loss = total_loss / log_interval
            elapsed = time.time() - start_time
            print(
                "| epoch {:3d}/{} | {:5d}/{:5d} batches | "
                "lr {:02.10f} | {:6.2f} ms | "
                "loss {:5.5f}".format(
                    epoch,
                    epochs,
                    batch,
                    len(train_data) // batch_size,
                    scheduler.get_last_lr()[0],
                    elapsed * 1000 / log_interval,
                    cur_loss,
                )
            )
            total_loss = 0
            start_time = time.time()

In [None]:
def evaluate(eval_model, data_source):
    eval_model.eval()
    total_loss = 0.0
    eval_batch_size = 1
    outputs = []
    with torch.no_grad():
        for i in range(0, len(data_source) - 1, eval_batch_size):
            data, targets = get_batch(data_source, i, eval_batch_size, feature_size)
            output = eval_model(data)
            outputs.append(output)
            if calculate_loss_over_all_values:
                total_loss += len(data[0]) * criterion(output, targets).cpu().item()
            else:
                total_loss += (
                    len(data[0])
                    * criterion(output[-output_window:], targets[-output_window:])
                    .cpu()
                    .item()
                )
    return total_loss / len(data_source), outputs

In [None]:
train_data, val_data, test_data = get_data()

model = Transformer_TS().to(device)

lr = start_lr
lrs = []

optimizer = torch.optim.AdamW(model.parameters(), lr=lr)

scheduler = torch.optim.lr_scheduler.StepLR(
    optimizer, step_size=2.0, gamma=scheduler_gamma
)
# scheduler1 = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
# scheduler2 = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

best_val_loss = float("inf")
future_horizon = output_window

In [None]:
for epoch in range(1, epochs + 1):
    epoch_start_time = time.time()
    train(train_data)

    if epoch % 5 == 0:
        pass
        # val_loss = plot_and_loss(model, val_data, epoch)
        # predict_future(model, val_data, future_horizon)
    else:
        val_loss, _ = evaluate(model, val_data)

    print("-" * 100)

    print(
        "| end of epoch {:3d}/{} | time: {:5.2f}s | valid loss {:5.5f}".format(
            epoch, epochs, (time.time() - epoch_start_time), val_loss
        )
    )
    print("-" * 100)
    scheduler.step()

print("evaluate test_data:")
rmse, predicted_test = evaluate(model, test_data)
print(rmse)

In [None]:
def convert_to_np_array(predicted_test):
    length = len(predicted_test)
    outputs = []
    for i in range(length):
        outputs.append(predicted_test[i][0][0][0].cpu().item())
    return np.array(outputs).reshape(-1, 1)

In [None]:
predicted_test = convert_to_np_array(predicted_test)

In [None]:
inversed = scaler.inverse_transform(predicted_test)

In [None]:
# print(inversed)
print(len(inversed))
print(len(predicted_test))

In [None]:
plt.figure(figsize=(20, 4))
plt.grid(True)
plt.style.use("default")
plt.title(target)
plt.plot(test_set_fill, marker="o", markersize=4, color="g", label="groundtruth values")
plt.plot(inversed, marker="o", markersize=4, color="b", label="predicted values")
plt.legend(loc="best")
plt.savefig(
    "graph/run_id_{},experiment_id_{},target_{}.png".format(
        run_id, experiment_id_3, target
    )
)
plt.show()