In [1]:
import copy

import torch
import torch.nn as nn
import torch.optim as optim
import tqdm

from data_preparation import *
from util import *

In [2]:
df = load_preprocessed_dataset(remove_duplicates=True)
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1638 entries, 1909 to 768
Data columns (total 31 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   storageRegion        1638 non-null   object        
 1   storageProvider      1638 non-null   object        
 2   functionId           1638 non-null   object        
 3   functionName         1638 non-null   object        
 4   functionType         1638 non-null   object        
 5   RTT                  1638 non-null   float64       
 6   loopCounter          1638 non-null   float64       
 7   maxLoopCounter       1638 non-null   float64       
 8   startTime            1638 non-null   datetime64[ns]
 9   endTime              1638 non-null   datetime64[ns]
 10  upAll                1638 non-null   float64       
 11  downAll              1638 non-null   float64       
 12  numberDownloadFiles  1638 non-null   int64         
 13  sizeDownloadInMB     1638 non-null  

In [3]:
input_cols = get_function_related_cols() + get_storage_related_cols() + get_time_related_cols() + get_concurrency_related_cols()
output_col_rtt = 'RTT'
group_col = 'kFoldGroupEnc'

In [4]:
X_train, y_train, groups_train, X_test, y_test, _, df_test = train_test_split_with_criterion(
    lambda x: (x['wfType'] == 'bwa' and x['functionProvider'] == 'AWS'), df, input_cols, output_col_rtt, group_col)

In [5]:
device = "cuda" if torch.cuda.is_available() else "cpu"
def measure(model, plot=True,X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test):
    torch.manual_seed(0)
    np.random.seed(0)
    model.to(device=device)
    loss_fn = nn.MSELoss()  # mean square error
    optimizer = optim.Adam(model.parameters(), lr=0.0001)

    n_epochs = 500  # number of epochs to run
    batch_size = 10  # size of each batch
    batch_start = torch.arange(0, len(X_train), batch_size)

    # Hold the best model
    best_mse = np.inf  # init to infinity
    best_weights = None
    history = []
    torch_X_train = torch.tensor(X_train, dtype=torch.float32, device=device)
    torch_y_train = torch.tensor(y_train, dtype=torch.float32, device=device).reshape(-1, 1)
    torch_X_test = torch.tensor(X_test, dtype=torch.float32, device=device)
    torch_y_test = torch.tensor(y_test, dtype=torch.float32, device=device).reshape(-1, 1)

    for epoch in range(n_epochs):
        model.train()
        with tqdm.tqdm(batch_start, unit="batch", mininterval=0, disable=True) as bar:
            bar.set_description(f"Epoch {epoch}")
            for start in bar:
                # take a batch
                X_batch = torch_X_train[start:start + batch_size]
                y_batch = torch_y_train[start:start + batch_size]
                # forward pass
                y_pred = model(X_batch)
                loss = loss_fn(y_pred, y_batch)
                # backward pass
                optimizer.zero_grad()
                loss.backward()
                # update weights
                optimizer.step()
                # print progress
                bar.set_postfix(mse=float(loss))
        # evaluate accuracy at end of each epoch
        model.eval()

        y_pred = model(torch_X_test)
        mse = loss_fn(y_pred, torch_y_test)
        mse = float(mse)
        history.append(mse)
        if mse < best_mse:
            best_mse = mse
            best_weights = copy.deepcopy(model.state_dict())

    # restore model and return best accuracy
    y_pred = model(torch_X_test)


    rmse = np.sqrt(s_m.mean_squared_error(torch_y_test.detach().cpu().numpy(),
                                  y_pred.detach().cpu()))

    mae = s_m.mean_absolute_error(torch_y_test.detach().cpu().numpy(), y_pred.detach().cpu().numpy())
    mape = s_m.mean_absolute_percentage_error(torch_y_test.detach().cpu().numpy(),
                                                            y_pred.detach().cpu().numpy())
    return rmse, mae, mape

In [6]:
class NN(nn.Module):

    def append_linear_layer_stack(self, in_size, out_size, activation, dropout):
        new_layer = nn.Sequential(nn.Linear(in_size, out_size),
                                  activation,
                                  nn.Dropout1d(0.1) if dropout else nn.Identity())
        self.layer_stack.append(new_layer)

    def __init__(self, activation, hidden_width=200, hidden_depth = 2, dropout=False, input_dim=len(input_cols)):
        super(NN, self).__init__()

        self.layer_stack = []

        self.append_linear_layer_stack(input_dim, hidden_width, activation, dropout)
        for _ in range(hidden_depth - 1):
            self.append_linear_layer_stack(hidden_width, hidden_width, activation, dropout)

        self.layer_stack.append(nn.Linear(hidden_width, 1))

        self.layers = nn.Sequential(*self.layer_stack)

    def forward(self, x):
        return self.layers.forward(x)


In [10]:
def eval_num_layers(num_layers=2):
    print(num_layers, " layers")
    for layer in [10, 50, 100, 500, 1000, 2000]:
        for activationName, activation in [("ReLU", nn.ReLU()), ("Sigmoid", nn.Sigmoid())]:
            for dropouts in [False, True]:
                rmse, mae, mape = measure(NN(activation, layer, num_layers, dropouts))
                method = activationName
                if dropouts:
                    method += " + dropouts"
                metric = "{rmse:.2f} & {mae:.2f} & {mape:.2f} \\\\"

                print(tuple([layer for i in range(num_layers)]), " & ", method + " & " , metric.format(rmse=rmse, mae=mae, mape=mape))
        print("\\midrule")

In [8]:
eval_num_layers(2)

2  layers
(10, 10)  &  ReLU &  23.77 & 21.90 & 3.77 \\
(10, 10)  &  ReLU + dropouts &  10.76 & 8.68 & 0.64 \\
(10, 10)  &  Sigmoid &  16.01 & 14.62 & 2.59 \\
(10, 10)  &  Sigmoid + dropouts &  15.24 & 13.78 & 2.47 \\
\midrule
(50, 50)  &  ReLU &  28.03 & 27.07 & 4.35 \\
(50, 50)  &  ReLU + dropouts &  21.32 & 20.02 & 3.40 \\
(50, 50)  &  Sigmoid &  30.57 & 29.85 & 4.66 \\
(50, 50)  &  Sigmoid + dropouts &  27.49 & 26.69 & 4.23 \\
\midrule
(100, 100)  &  ReLU &  28.04 & 27.12 & 4.35 \\
(100, 100)  &  ReLU + dropouts &  21.11 & 19.95 & 3.35 \\
(100, 100)  &  Sigmoid &  30.85 & 30.13 & 4.70 \\
(100, 100)  &  Sigmoid + dropouts &  27.61 & 26.81 & 4.25 \\
\midrule
(500, 500)  &  ReLU &  23.81 & 22.80 & 3.73 \\
(500, 500)  &  ReLU + dropouts &  12.27 & 10.61 & 2.02 \\
(500, 500)  &  Sigmoid &  30.91 & 30.20 & 4.71 \\
(500, 500)  &  Sigmoid + dropouts &  26.97 & 26.15 & 4.16 \\
\midrule
(1000, 1000)  &  ReLU &  21.69 & 20.66 & 3.42 \\
(1000, 1000)  &  ReLU + dropouts &  10.70 & 9.10 & 1.78 \\

In [11]:
eval_num_layers(3)

3  layers
(10, 10, 10)  &  ReLU &  28.03 & 26.74 & 4.41 \\
(10, 10, 10)  &  ReLU + dropouts &  18.45 & 16.68 & 3.00 \\
(10, 10, 10)  &  Sigmoid &  16.85 & 15.52 & 2.71 \\
(10, 10, 10)  &  Sigmoid + dropouts &  16.03 & 14.64 & 2.59 \\
\midrule
(50, 50, 50)  &  ReLU &  27.47 & 26.40 & 4.29 \\
(50, 50, 50)  &  ReLU + dropouts &  18.23 & 16.75 & 2.95 \\
(50, 50, 50)  &  Sigmoid &  30.55 & 29.83 & 4.66 \\
(50, 50, 50)  &  Sigmoid + dropouts &  27.54 & 26.74 & 4.24 \\
\midrule
(100, 100, 100)  &  ReLU &  27.22 & 26.13 & 4.26 \\
(100, 100, 100)  &  ReLU + dropouts &  17.78 & 16.27 & 2.88 \\
(100, 100, 100)  &  Sigmoid &  30.84 & 30.12 & 4.70 \\
(100, 100, 100)  &  Sigmoid + dropouts &  27.72 & 26.92 & 4.27 \\
\midrule
(500, 500, 500)  &  ReLU &  30.81 & 30.10 & 4.70 \\
(500, 500, 500)  &  ReLU + dropouts &  26.92 & 26.10 & 4.15 \\
(500, 500, 500)  &  Sigmoid &  30.90 & 30.19 & 4.71 \\
(500, 500, 500)  &  Sigmoid + dropouts &  27.06 & 26.25 & 4.17 \\
\midrule
(1000, 1000, 1000)  &  ReLU &  30.

In [13]:
eval_num_layers(4)

4  layers
(10, 10, 10, 10)  &  ReLU &  28.67 & 27.57 & 4.47 \\
(10, 10, 10, 10)  &  ReLU + dropouts &  15.44 & 13.60 & 2.54 \\
(10, 10, 10, 10)  &  Sigmoid &  16.24 & 14.87 & 2.62 \\
(10, 10, 10, 10)  &  Sigmoid + dropouts &  15.47 & 14.03 & 2.51 \\
\midrule
(50, 50, 50, 50)  &  ReLU &  28.35 & 27.46 & 4.39 \\
(50, 50, 50, 50)  &  ReLU + dropouts &  15.26 & 13.70 & 2.49 \\
(50, 50, 50, 50)  &  Sigmoid &  30.55 & 29.83 & 4.66 \\
(50, 50, 50, 50)  &  Sigmoid + dropouts &  27.50 & 26.70 & 4.24 \\
\midrule
(100, 100, 100, 100)  &  ReLU &  28.26 & 27.35 & 4.38 \\
(100, 100, 100, 100)  &  ReLU + dropouts &  14.90 & 13.32 & 2.43 \\
(100, 100, 100, 100)  &  Sigmoid &  30.84 & 30.12 & 4.70 \\
(100, 100, 100, 100)  &  Sigmoid + dropouts &  27.64 & 26.84 & 4.25 \\
\midrule
(500, 500, 500, 500)  &  ReLU &  30.61 & 29.89 & 4.67 \\
(500, 500, 500, 500)  &  ReLU + dropouts &  25.42 & 24.55 & 3.94 \\
(500, 500, 500, 500)  &  Sigmoid &  30.90 & 30.19 & 4.71 \\
(500, 500, 500, 500)  &  Sigmoid + dropout

In [12]:
eval_num_layers(1)

1  layers
(10,)  &  ReLU &  30.14 & 28.20 & 4.75 \\
(10,)  &  ReLU + dropouts &  24.38 & 21.98 & 3.90 \\
(10,)  &  Sigmoid &  6.61 & 5.61 & 0.92 \\
(10,)  &  Sigmoid + dropouts &  6.60 & 5.58 & 0.90 \\
\midrule
(50,)  &  ReLU &  10.59 & 8.92 & 0.85 \\
(50,)  &  ReLU + dropouts &  7.91 & 6.28 & 0.58 \\
(50,)  &  Sigmoid &  27.45 & 26.64 & 4.23 \\
(50,)  &  Sigmoid + dropouts &  24.66 & 23.76 & 3.83 \\
\midrule
(100,)  &  ReLU &  13.59 & 12.07 & 1.82 \\
(100,)  &  ReLU + dropouts &  9.68 & 7.53 & 1.00 \\
(100,)  &  Sigmoid &  30.32 & 29.59 & 4.63 \\
(100,)  &  Sigmoid + dropouts &  26.61 & 25.78 & 4.11 \\
\midrule
(500,)  &  ReLU &  36.88 & 29.67 & 4.69 \\
(500,)  &  ReLU + dropouts &  86.39 & 72.03 & 7.52 \\
(500,)  &  Sigmoid &  30.89 & 30.18 & 4.71 \\
(500,)  &  Sigmoid + dropouts &  26.76 & 25.93 & 4.13 \\
\midrule
(1000,)  &  ReLU &  22.48 & 18.80 & 3.05 \\
(1000,)  &  ReLU + dropouts &  26.28 & 21.97 & 3.92 \\
(1000,)  &  Sigmoid &  30.95 & 30.24 & 4.72 \\
(1000,)  &  Sigmoid + dro