<a href="https://colab.research.google.com/github/max-seeli/ai-model-runtime-prediction/blob/main/parameter_search.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Dep

In [1]:
!pip install torch-geometric
!pip install einops

Collecting torch-geometric
  Downloading torch_geometric-2.3.1.tar.gz (661 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/661.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m71.7/661.6 kB[0m [31m2.0 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m661.6/661.6 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: torch-geometric
  Building wheel for torch-geometric (pyproject.toml) ... [?25l[?25hdone
  Created wheel for torch-geometric: filename=torch_geometric-2.3.1-py3-none-any.whl size=910454 sha256=0365d232c27cc98333f37b4821be5dcc913c0ae5e7aeb691bcdfc2c0d8e9a9c6
  Stored in directory: /root/.cache/pip/wheels/ac/dc/30/e2874821ff308

In [2]:
import shutil
import os
import csv

from google.colab import drive

from tqdm import tqdm

import numpy as np
import pandas as pd

import torch
from torch import nn

from torch_geometric import nn as gnn

from torch.nn import Linear, ReLU, Dropout
from torch_geometric.nn import GCNConv
from torch_geometric.nn import global_mean_pool
from einops import reduce, repeat, rearrange

from torch.utils.data import Dataset, DataLoader

from sklearn.model_selection import ParameterGrid

drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


## Load Data

In [4]:
# unzipped archive in drive
splits = ["train", "valid", "test"]

layout_nlp_default = '/content/drive/MyDrive/google-tpu/predict-ai-model-runtime/npz_all/npz/layout/nlp/default'
layout_nlp_random = '/content/drive/MyDrive/google-tpu/predict-ai-model-runtime/npz_all/npz/layout/nlp/random'
layout_xla_default = '/content/drive/MyDrive/google-tpu/predict-ai-model-runtime/npz_all/npz/layout/xla/default'
layout_xla_random = '/content/drive/MyDrive/google-tpu/predict-ai-model-runtime/npz_all/npz/layout/xla/random'

tile_xla= '/content/drive/MyDrive/google-tpu/predict-ai-model-runtime/npz_all/npz/tile/xla'

In [5]:
def load_data_to_df(directory, split):

    path = os.path.join(directory, split)
    files = [os.path.join(path, file) for file in os.listdir(path)]
    data_list = []
    for file in tqdm(files):
        data = dict(np.load(file))
        data_list.append(data)

    return pd.DataFrame(data_list)

In [6]:
from torch_geometric.data import Dataset, Data
from typing import Literal

class RuntimeDataset(Dataset):

    def __init__(self, data, mode: Literal["tile", "layout", "layout_all"]):
            super().__init__()

            self.mode = mode

            if self.mode == "layout_all":

              '''
              Approach:
              For every graph add c number of graphs to the dataset where every one of these graphs has node features that are the concat node features.
              (nodes that don't have additional features are padded with zeros)
              This has implications for training: we are now predicting a scalar value for each graph configuration (can not really train with ranking loss anymore)
              '''

              self.dataset = pd.DataFrame(columns=['graph_id', 'node_feat', 'node_opcode', 'edge_index', 'config_runtime'])

              for index, row in data.iterrows():

                # tensors to large, not memory efficient enough

                c = row['config_runtime'].shape[0]

                node_feat = torch.tensor(row['node_feat'], dtype=torch.float32) # (n, 140)
                node_config_feat = torch.tensor(row['node_config_feat'], dtype=torch.float32) # (c, nc, 18)

                node_feat = repeat(node_feat, 'n f -> r n f', r = c) # repeats every node c times (c, n, 140)
                node = torch.zeros((c, node_feat.shape[1], 18)) # (c, n, 18)

                node = torch.cat((node_feat, node), dim=2)

                for j in range(node_config_feat.shape[1]):
                  node[:,j,18:] = node_config_feat[:,j,:]

                for i in range(c):

                  graph = {'graph_id': index,
                           'node_feat': node[i],
                           'node_opcode': row['node_opcode'],
                           'edge_index': row['edge_index'],
                           'config_runtime': row['config_runtime'][i]
                           }
                  self.dataset.append(graph, ignore_index=True)

            else:
              self.dataset = data
              self.mode = mode


    def len(self):
        return len(self.dataset)

    def get(self, index):
        data_row = self.dataset.loc[index]
        if self.mode == "tile":
          normalized_runtime = torch.tensor(data_row['config_runtime'] / data_row['config_runtime_normalizers'], dtype=torch.float32)
          return Data(
            node_feat=torch.tensor(data_row['node_feat'], dtype=torch.float32),
            edge_index=torch.tensor(data_row['edge_index'], dtype=torch.long).t().contiguous(),
            node_opcode=torch.tensor(data_row['node_opcode'], dtype=torch.int32),
            config_feat=torch.tensor(data_row['config_feat'], dtype=torch.float32),
            y=normalized_runtime, # TODO: rename
            number_configs=torch.tensor([len(data_row['config_runtime'])]) # needed to match config_feat to the corresponding graph in the batch
          )


        else:

          data_row = self.dataset.loc[index]

          return Data(
            graph_id = torch.tensor(data_row['graph_id'], dtype=torch.long),
            node_feat=data_row['node_feat'],
            edge_index=torch.tensor(data_row['edge_index'], dtype=torch.long).t().contiguous(),
            node_opcode=torch.tensor(data_row['node_opcode'], dtype=torch.int32),
            config_runtime=torch.tensor(data_row['config_runtime'], dtype=torch.float32),
            node_config_ids = torch.tensor(data_row['node_config_ids'], dtype=torch.long)
          )

In [7]:
from torch_geometric.loader import DataLoader

def runtime_data_loader(dataset: RuntimeDataset, batch_size=32, shuffle=True):
    '''
       dataset: RuntimeDataset, containing data
    '''

    data_loader = DataLoader(dataset, batch_size, shuffle=True)
    return data_loader

## Models

In [8]:
from torch_geometric.data import Batch
"""
TODO: add regularisation
      make the linear layer variable
      batch normalization
"""
class Tile_GNN(nn.Module):

    def __init__(self, len_opcode_embedd, hidden_dim_conv, output_dim_conv, num_layers_conv, dropout_conv,
                                          hidden_dim_lin, num_layers_lin, dropout_lin):
        super(Tile_GNN, self).__init__()

        self.len_opcode_embedd = len_opcode_embedd
        self.hidden_dim_conv = hidden_dim_conv
        self.output_dim_conv = output_dim_conv
        self.num_layers_conv = num_layers_conv
        self.dropout_conv = 0 if dropout_conv is None else dropout_conv

        self.hidden_dim_lin = hidden_dim_lin
        self.num_layers_lin = num_layers_lin
        self.dropout_lin = dropout_lin
        self.dropout_lin = 0 if dropout_lin is None else dropout_lin

        # Dropout
        self.conv_dropout = nn.Dropout(p=self.dropout_conv)
        self.lin_dropout = nn.Dropout(p=self.dropout_lin)
        # Embeddings
        self.embedding_layer = nn.Embedding(num_embeddings = 120, embedding_dim=self.len_opcode_embedd)

        # GNN
        input_dim = self.len_opcode_embedd + 140
        layers = nn.ModuleList()
        layers.append(GCNConv(input_dim, self.hidden_dim_conv))
        for _ in range(self.num_layers_conv - 2): # TODO list comp, ReLU
            layers.append(GCNConv(self.hidden_dim_conv, self.hidden_dim_conv))
        layers.append(GCNConv(self.hidden_dim_conv,self.output_dim_conv))

        self.conv = layers

        # Linear
        lin_layers = [nn.Linear(self.output_dim_conv + 24, self.hidden_dim_lin), nn.ReLU(), self.lin_dropout]
        for i in range(self.num_layers_lin - 2):
          lin_layers.append(nn.Linear(self.hidden_dim_lin, self.hidden_dim_lin))
          lin_layers.append(nn.ReLU())
          lin_layers.append(self.lin_dropout)
        lin_layers.append(nn.Linear(self.hidden_dim_lin, 1))

        self.linear = nn.Sequential(*lin_layers)

    def forward(self, data):

        opcode_embedd = self.embedding_layer(data['node_opcode']) # (n,) -> (n,len_opcode_embedd)

        x = torch.cat((opcode_embedd, data['node_feat']), dim=1) # [(n, len_opcode_embedd), (n,140)] -> (n, len_opcode_embedd + 140 + 18)

        for layer in self.conv:
          x = layer(x, data['edge_index'])
          x = torch.relu(x)
          x = self.conv_dropout(x)

        # differ two cases:
        # 1) batched data used for training
        # 2) single graph Data object used for inference

        if isinstance(data, Batch):

          '''
          The geometric data loader will take batch_size number of graphs. Then it will take all nodes in all these graphs and fuse them together into one graph.
          On this fuesed graph it will perform the convolution to calculate all the node embeddings at once.
          To apply the linear layer we have to seperate out all the graphs out of the batch again.
          '''

          # tensor used to store config predictions for each graph
          configs = torch.empty(0,1).to(device)

          # used to retrieve the config_feat tensors for each graph
          total = 0

          for graph_ind in range(data.num_graphs):

            # using a mask to gather all nodes that belong to the graph_ind-th graph
            node_indices = (data.batch == graph_ind).nonzero(as_tuple=True)[0]
            # first dimension is the number of nodes in the graph with index graph_ind, second dimensions is the feature dimension of the convolution (n, output_dim)
            graph_nodes = x[node_indices]

            # reduce node embeedings to get a graph embedding
            temp = reduce(graph_nodes, 'n f -> f', 'mean')

            # number of configurations for the graph_ind-th graph
            c = data.number_configs[graph_ind]

            # config_feat for the graph_ind-th graph
            graph_config_feat = data.config_feat[total:total+c]

            total += c

            # concatinating graph embedding with config_feat
            temp = repeat(temp, 'f -> r f', r=c)
            temp = torch.cat((temp, graph_config_feat), dim=1)


            # apply linear layer to tensor with shape (c, output_dim+24)
            temp = self.linear(temp)

            # add calculated runtimes to configs
            configs = torch.cat((configs, temp), dim=0)

          configs = rearrange(configs, 'f 1 -> f')

          return configs

        # only working with one graph object

        else:

          x = reduce(x, 'n f -> f', 'mean') # (n, output_dim) -> (output_dim, )

          x = repeat(x, 'f -> r f', r=len(data['config_feat'])) # (output_dim,) -> (c, output_dim)

          x = torch.cat((x, data['config_feat']), dim=1) # [(x, output_dim), (c,24)] -> (c, output_dim + 24)

          x = self.linear(x)

          x = rearrange(x, 'f 1 -> f')

        return x

## Training Loops

In [9]:
def MSE_training_tile(model: Tile_GNN, dataloader: DataLoader, epochs: int, lr=0.01):
    '''
    Training model using MSE
    '''

    print(type(model))

    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    #optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
    #optimizer = torch.optim.Adagrad(model.parameters(), lr=0.01)


    loss_fn = torch.nn.MSELoss()

    model.to(device)
    model.train()

    for epoch in tqdm(range(epochs)):
        for batch, data in enumerate(dataloader):
            data = data.to(device)
            optimizer.zero_grad()
            x_pred = model(data)
            #print(f"shapes: {x_pred.shape}, {data['y'].shape}")
            loss = loss_fn(x_pred, data['y'])/len(data['y'])
            loss.backward()
            optimizer.step()
            #if batch % 200 == 0:
            #    print('Epoch [{}/{}], Batch [{}/{}], Loss: {:.4f}'.format(epoch+1, epochs, batch+1, len(dataloader), loss.item()))

In [10]:
# implementaion of ListNET Loss (see: https://arxiv.org/pdf/1911.09798v2.pdf)

def ListNet_training_tile(model: Tile_GNN, dataloader: DataLoader, epochs: int, lr=0.01):
    '''
    Training model using MSE
    '''

    print(type(model))

    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    #optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
    #optimizer = torch.optim.Adagrad(model.parameters(), lr=0.01)


    model.to(device)
    model.train()

    for epoch in tqdm(range(epochs)):
        for batch, data in enumerate(dataloader):
            data = data.to(device)
            optimizer.zero_grad()
            x_pred = model(data)

            # predicted distribution, need to negate since lower ist better

            pred_distribution = F.softmax(-x_pred, dim=0)

            label_distribution = F.softmax(-data['y'], dim=0)

            listnet_loss = -torch.sum(label_distribution * torch.log(pred_distribution))

            listnet_loss.backward()
            optimizer.step()

## Validation

In [11]:
from torch_geometric.data import Data

def validate_model_tiles(model, dataset):


    # Make predictions
    predictions = []
    model.to(device)
    model.eval()

    for tile in tqdm(dataset):
        tile.to(device)
        out = model(tile)
        predictions.append(torch.sort(out).indices)

    # Calculate score
    score = 0.0

    for i, tile in tqdm(enumerate(dataset), total=len(dataset)):
        best_prediction = min([dataset[i]["y"][pred_ind] for pred_ind in predictions[i][:5]])
        best_total = min(dataset[i]["y"])
        score += 2.0 - best_prediction / best_total

    avg_score = score / len(dataset)
    print("Score:", avg_score)
    return avg_score

## Parameter Search

In [12]:
df_train = load_data_to_df(tile_xla, "train")
df_valid = load_data_to_df(tile_xla, "valid")

100%|██████████| 5709/5709 [02:34<00:00, 36.89it/s] 
100%|██████████| 673/673 [00:10<00:00, 62.50it/s] 


In [13]:
train_dataset = RuntimeDataset(df_train, mode="tile")
valid_dataset = RuntimeDataset(df_valid, mode="tile")
data_loader = runtime_data_loader(train_dataset, batch_size=16)
len(data_loader)

357

In [16]:
# lists of parameters to optimize for Tile model:
param_list = {
  'len_opcode_embedd': [12],
  'hidden_dim_conv': [64],
  'output_dim_conv': [32],
  'num_layers_conv': [5],
  'hidden_dim_lin': [20, 48],
  'num_layers_lin': [2,4,6],
  'optimizer': ['Adam'], # TODO implement
  'lr': [0.01],
  'dropout_conv': [0,0.3],
  'dropout_lin': [0,0.3],
  'loss_fn': ['MSE']
}

para_grid = ParameterGrid(param_list)

In [17]:
results = pd.DataFrame(columns=['score', *param_list.keys(), 'epochs', 'number of parameters', 'validation'])

In [None]:
EPOCHS = 40
VALIDATION = "split"

for i, params in enumerate(para_grid):
  # define model
  model = Tile_GNN(len_opcode_embedd=params['len_opcode_embedd'], hidden_dim_conv=params['hidden_dim_conv'], output_dim_conv=params['output_dim_conv'], num_layers_conv=params['num_layers_conv'], dropout_conv=params['dropout_conv'],
                   hidden_dim_lin=params['hidden_dim_lin'], num_layers_lin=params['num_layers_lin'], dropout_lin=params['dropout_lin'])

  num_para = sum(p.numel() for p in model.parameters())
  score = None

  # train model
  if params['loss_fn'] == 'MSE':
    MSE_training_tile(model, data_loader, epochs=EPOCHS, lr=params['lr'])
  elif params['loss_fn'] == 'ListNET':
    ListNet_training_tile(model, data_loader, epochs=EPOCHS, lr=params['lr'])

  if VALIDATION == 'split':
    score = validate_model_tiles(model, valid_dataset)

  elif VALIDATION == 'cross':
    pass

  res = {
    'len_opcode_embedd': params['len_opcode_embedd'],
    'hidden_dim_conv': params['hidden_dim_conv'],
    'output_dim_conv': params['output_dim_conv'],
    'num_layers_conv': params['num_layers_conv'],
    'hidden_dim_lin': params['hidden_dim_lin'],
    'num_layers_lin': params['num_layers_lin'],
    'optimizer': params['optimizer'],
    'lr': params['lr'],
    'dropout_conv': params['dropout_conv'],
    'dropout_lin': params['dropout_lin'],
    'loss_fn': params['loss_fn'],
    'score': score,
    'epochs': EPOCHS,
    'number of parameters': num_para,
    'validation': VALIDATION
  }
  print(res)
  results = pd.concat([results, pd.DataFrame([res])], ignore_index=True)
  print(f"[{i+1}]/[{len(para_grid)}] configs tested")


<class '__main__.Tile_GNN'>


100%|██████████| 40/40 [10:14<00:00, 15.35s/it]
100%|██████████| 673/673 [00:03<00:00, 196.56it/s]
100%|██████████| 673/673 [00:06<00:00, 97.47it/s] 


Score: tensor(0.7717)
{'len_opcode_embedd': 12, 'hidden_dim_conv': 64, 'output_dim_conv': 32, 'num_layers_conv': 5, 'hidden_dim_lin': 20, 'num_layers_lin': 2, 'optimizer': 'Adam', 'lr': 0.01, 'dropout_conv': 0, 'dropout_lin': 0, 'loss_fn': 'MSE', 'score': tensor(0.7717), 'epochs': 40, 'number of parameters': 26953, 'validation': 'split'}
[1]/[24] configs tested
<class '__main__.Tile_GNN'>


100%|██████████| 40/40 [13:18<00:00, 19.96s/it]
100%|██████████| 673/673 [00:03<00:00, 190.82it/s]
100%|██████████| 673/673 [00:06<00:00, 100.62it/s]


Score: tensor(-1.5095)
{'len_opcode_embedd': 12, 'hidden_dim_conv': 64, 'output_dim_conv': 32, 'num_layers_conv': 5, 'hidden_dim_lin': 20, 'num_layers_lin': 4, 'optimizer': 'Adam', 'lr': 0.01, 'dropout_conv': 0, 'dropout_lin': 0, 'loss_fn': 'MSE', 'score': tensor(-1.5095), 'epochs': 40, 'number of parameters': 27793, 'validation': 'split'}
[2]/[24] configs tested
<class '__main__.Tile_GNN'>


100%|██████████| 40/40 [14:40<00:00, 22.01s/it]
100%|██████████| 673/673 [00:03<00:00, 180.66it/s]
100%|██████████| 673/673 [00:06<00:00, 96.17it/s] 


Score: tensor(0.9728)
{'len_opcode_embedd': 12, 'hidden_dim_conv': 64, 'output_dim_conv': 32, 'num_layers_conv': 5, 'hidden_dim_lin': 20, 'num_layers_lin': 6, 'optimizer': 'Adam', 'lr': 0.01, 'dropout_conv': 0, 'dropout_lin': 0, 'loss_fn': 'MSE', 'score': tensor(0.9728), 'epochs': 40, 'number of parameters': 28633, 'validation': 'split'}
[3]/[24] configs tested
<class '__main__.Tile_GNN'>


100%|██████████| 40/40 [10:34<00:00, 15.85s/it]
100%|██████████| 673/673 [00:03<00:00, 196.61it/s]
100%|██████████| 673/673 [00:06<00:00, 98.41it/s] 


Score: tensor(0.6001)
{'len_opcode_embedd': 12, 'hidden_dim_conv': 64, 'output_dim_conv': 32, 'num_layers_conv': 5, 'hidden_dim_lin': 48, 'num_layers_lin': 2, 'optimizer': 'Adam', 'lr': 0.01, 'dropout_conv': 0, 'dropout_lin': 0, 'loss_fn': 'MSE', 'score': tensor(0.6001), 'epochs': 40, 'number of parameters': 28577, 'validation': 'split'}
[4]/[24] configs tested
<class '__main__.Tile_GNN'>


100%|██████████| 40/40 [13:32<00:00, 20.31s/it]
100%|██████████| 673/673 [00:03<00:00, 189.00it/s]
100%|██████████| 673/673 [00:06<00:00, 97.14it/s] 


Score: tensor(0.7146)
{'len_opcode_embedd': 12, 'hidden_dim_conv': 64, 'output_dim_conv': 32, 'num_layers_conv': 5, 'hidden_dim_lin': 48, 'num_layers_lin': 4, 'optimizer': 'Adam', 'lr': 0.01, 'dropout_conv': 0, 'dropout_lin': 0, 'loss_fn': 'MSE', 'score': tensor(0.7146), 'epochs': 40, 'number of parameters': 33281, 'validation': 'split'}
[5]/[24] configs tested
<class '__main__.Tile_GNN'>


100%|██████████| 40/40 [14:56<00:00, 22.42s/it]
100%|██████████| 673/673 [00:03<00:00, 181.52it/s]
100%|██████████| 673/673 [00:06<00:00, 96.26it/s] 


Score: tensor(0.8212)
{'len_opcode_embedd': 12, 'hidden_dim_conv': 64, 'output_dim_conv': 32, 'num_layers_conv': 5, 'hidden_dim_lin': 48, 'num_layers_lin': 6, 'optimizer': 'Adam', 'lr': 0.01, 'dropout_conv': 0, 'dropout_lin': 0, 'loss_fn': 'MSE', 'score': tensor(0.8212), 'epochs': 40, 'number of parameters': 37985, 'validation': 'split'}
[6]/[24] configs tested
<class '__main__.Tile_GNN'>


100%|██████████| 40/40 [10:36<00:00, 15.92s/it]
100%|██████████| 673/673 [00:03<00:00, 194.64it/s]
100%|██████████| 673/673 [00:07<00:00, 95.35it/s] 


Score: tensor(0.9728)
{'len_opcode_embedd': 12, 'hidden_dim_conv': 64, 'output_dim_conv': 32, 'num_layers_conv': 5, 'hidden_dim_lin': 20, 'num_layers_lin': 2, 'optimizer': 'Adam', 'lr': 0.01, 'dropout_conv': 0, 'dropout_lin': 0.3, 'loss_fn': 'MSE', 'score': tensor(0.9728), 'epochs': 40, 'number of parameters': 26953, 'validation': 'split'}
[7]/[24] configs tested
<class '__main__.Tile_GNN'>


100%|██████████| 40/40 [13:35<00:00, 20.39s/it]
100%|██████████| 673/673 [00:03<00:00, 192.08it/s]
100%|██████████| 673/673 [00:07<00:00, 94.01it/s] 


Score: tensor(0.6437)
{'len_opcode_embedd': 12, 'hidden_dim_conv': 64, 'output_dim_conv': 32, 'num_layers_conv': 5, 'hidden_dim_lin': 20, 'num_layers_lin': 4, 'optimizer': 'Adam', 'lr': 0.01, 'dropout_conv': 0, 'dropout_lin': 0.3, 'loss_fn': 'MSE', 'score': tensor(0.6437), 'epochs': 40, 'number of parameters': 27793, 'validation': 'split'}
[8]/[24] configs tested
<class '__main__.Tile_GNN'>


100%|██████████| 40/40 [15:09<00:00, 22.73s/it]
100%|██████████| 673/673 [00:03<00:00, 185.57it/s]
100%|██████████| 673/673 [00:07<00:00, 95.74it/s] 


Score: tensor(0.9728)
{'len_opcode_embedd': 12, 'hidden_dim_conv': 64, 'output_dim_conv': 32, 'num_layers_conv': 5, 'hidden_dim_lin': 20, 'num_layers_lin': 6, 'optimizer': 'Adam', 'lr': 0.01, 'dropout_conv': 0, 'dropout_lin': 0.3, 'loss_fn': 'MSE', 'score': tensor(0.9728), 'epochs': 40, 'number of parameters': 28633, 'validation': 'split'}
[9]/[24] configs tested
<class '__main__.Tile_GNN'>


100%|██████████| 40/40 [10:27<00:00, 15.70s/it]
100%|██████████| 673/673 [00:03<00:00, 197.98it/s]
100%|██████████| 673/673 [00:06<00:00, 96.71it/s] 


Score: tensor(0.0772)
{'len_opcode_embedd': 12, 'hidden_dim_conv': 64, 'output_dim_conv': 32, 'num_layers_conv': 5, 'hidden_dim_lin': 48, 'num_layers_lin': 2, 'optimizer': 'Adam', 'lr': 0.01, 'dropout_conv': 0, 'dropout_lin': 0.3, 'loss_fn': 'MSE', 'score': tensor(0.0772), 'epochs': 40, 'number of parameters': 28577, 'validation': 'split'}
[10]/[24] configs tested
<class '__main__.Tile_GNN'>


100%|██████████| 40/40 [13:57<00:00, 20.94s/it]
100%|██████████| 673/673 [00:03<00:00, 191.58it/s]
100%|██████████| 673/673 [00:07<00:00, 95.99it/s] 


Score: tensor(0.4532)
{'len_opcode_embedd': 12, 'hidden_dim_conv': 64, 'output_dim_conv': 32, 'num_layers_conv': 5, 'hidden_dim_lin': 48, 'num_layers_lin': 4, 'optimizer': 'Adam', 'lr': 0.01, 'dropout_conv': 0, 'dropout_lin': 0.3, 'loss_fn': 'MSE', 'score': tensor(0.4532), 'epochs': 40, 'number of parameters': 33281, 'validation': 'split'}
[11]/[24] configs tested
<class '__main__.Tile_GNN'>


100%|██████████| 40/40 [15:38<00:00, 23.46s/it]
100%|██████████| 673/673 [00:03<00:00, 182.00it/s]
100%|██████████| 673/673 [00:06<00:00, 97.20it/s] 


Score: tensor(0.7573)
{'len_opcode_embedd': 12, 'hidden_dim_conv': 64, 'output_dim_conv': 32, 'num_layers_conv': 5, 'hidden_dim_lin': 48, 'num_layers_lin': 6, 'optimizer': 'Adam', 'lr': 0.01, 'dropout_conv': 0, 'dropout_lin': 0.3, 'loss_fn': 'MSE', 'score': tensor(0.7573), 'epochs': 40, 'number of parameters': 37985, 'validation': 'split'}
[12]/[24] configs tested
<class '__main__.Tile_GNN'>


100%|██████████| 40/40 [10:17<00:00, 15.44s/it]
100%|██████████| 673/673 [00:03<00:00, 195.49it/s]
100%|██████████| 673/673 [00:06<00:00, 96.53it/s] 


Score: tensor(0.9728)
{'len_opcode_embedd': 12, 'hidden_dim_conv': 64, 'output_dim_conv': 32, 'num_layers_conv': 5, 'hidden_dim_lin': 20, 'num_layers_lin': 2, 'optimizer': 'Adam', 'lr': 0.01, 'dropout_conv': 0.3, 'dropout_lin': 0, 'loss_fn': 'MSE', 'score': tensor(0.9728), 'epochs': 40, 'number of parameters': 26953, 'validation': 'split'}
[13]/[24] configs tested
<class '__main__.Tile_GNN'>


100%|██████████| 40/40 [12:59<00:00, 19.50s/it]
100%|██████████| 673/673 [00:03<00:00, 190.23it/s]
100%|██████████| 673/673 [00:06<00:00, 98.10it/s] 


Score: tensor(0.9728)
{'len_opcode_embedd': 12, 'hidden_dim_conv': 64, 'output_dim_conv': 32, 'num_layers_conv': 5, 'hidden_dim_lin': 20, 'num_layers_lin': 4, 'optimizer': 'Adam', 'lr': 0.01, 'dropout_conv': 0.3, 'dropout_lin': 0, 'loss_fn': 'MSE', 'score': tensor(0.9728), 'epochs': 40, 'number of parameters': 27793, 'validation': 'split'}
[14]/[24] configs tested
<class '__main__.Tile_GNN'>


100%|██████████| 40/40 [14:15<00:00, 21.40s/it]
100%|██████████| 673/673 [00:03<00:00, 181.78it/s]
100%|██████████| 673/673 [00:07<00:00, 91.98it/s]


Score: tensor(0.9728)
{'len_opcode_embedd': 12, 'hidden_dim_conv': 64, 'output_dim_conv': 32, 'num_layers_conv': 5, 'hidden_dim_lin': 20, 'num_layers_lin': 6, 'optimizer': 'Adam', 'lr': 0.01, 'dropout_conv': 0.3, 'dropout_lin': 0, 'loss_fn': 'MSE', 'score': tensor(0.9728), 'epochs': 40, 'number of parameters': 28633, 'validation': 'split'}
[15]/[24] configs tested
<class '__main__.Tile_GNN'>


100%|██████████| 40/40 [10:29<00:00, 15.75s/it]
100%|██████████| 673/673 [00:03<00:00, 193.75it/s]
100%|██████████| 673/673 [00:07<00:00, 95.81it/s] 


Score: tensor(0.6119)
{'len_opcode_embedd': 12, 'hidden_dim_conv': 64, 'output_dim_conv': 32, 'num_layers_conv': 5, 'hidden_dim_lin': 48, 'num_layers_lin': 2, 'optimizer': 'Adam', 'lr': 0.01, 'dropout_conv': 0.3, 'dropout_lin': 0, 'loss_fn': 'MSE', 'score': tensor(0.6119), 'epochs': 40, 'number of parameters': 28577, 'validation': 'split'}
[16]/[24] configs tested
<class '__main__.Tile_GNN'>


100%|██████████| 40/40 [13:25<00:00, 20.14s/it]
100%|██████████| 673/673 [00:03<00:00, 186.15it/s]
100%|██████████| 673/673 [00:07<00:00, 94.07it/s] 


Score: tensor(0.6567)
{'len_opcode_embedd': 12, 'hidden_dim_conv': 64, 'output_dim_conv': 32, 'num_layers_conv': 5, 'hidden_dim_lin': 48, 'num_layers_lin': 4, 'optimizer': 'Adam', 'lr': 0.01, 'dropout_conv': 0.3, 'dropout_lin': 0, 'loss_fn': 'MSE', 'score': tensor(0.6567), 'epochs': 40, 'number of parameters': 33281, 'validation': 'split'}
[17]/[24] configs tested
<class '__main__.Tile_GNN'>


100%|██████████| 40/40 [15:05<00:00, 22.65s/it]
100%|██████████| 673/673 [00:03<00:00, 180.93it/s]
100%|██████████| 673/673 [00:07<00:00, 91.87it/s] 


Score: tensor(0.9728)
{'len_opcode_embedd': 12, 'hidden_dim_conv': 64, 'output_dim_conv': 32, 'num_layers_conv': 5, 'hidden_dim_lin': 48, 'num_layers_lin': 6, 'optimizer': 'Adam', 'lr': 0.01, 'dropout_conv': 0.3, 'dropout_lin': 0, 'loss_fn': 'MSE', 'score': tensor(0.9728), 'epochs': 40, 'number of parameters': 37985, 'validation': 'split'}
[18]/[24] configs tested
<class '__main__.Tile_GNN'>


100%|██████████| 40/40 [10:32<00:00, 15.81s/it]
100%|██████████| 673/673 [00:03<00:00, 198.44it/s]
100%|██████████| 673/673 [00:06<00:00, 96.79it/s] 


Score: tensor(0.9728)
{'len_opcode_embedd': 12, 'hidden_dim_conv': 64, 'output_dim_conv': 32, 'num_layers_conv': 5, 'hidden_dim_lin': 20, 'num_layers_lin': 2, 'optimizer': 'Adam', 'lr': 0.01, 'dropout_conv': 0.3, 'dropout_lin': 0.3, 'loss_fn': 'MSE', 'score': tensor(0.9728), 'epochs': 40, 'number of parameters': 26953, 'validation': 'split'}
[19]/[24] configs tested
<class '__main__.Tile_GNN'>


100%|██████████| 40/40 [13:40<00:00, 20.52s/it]
100%|██████████| 673/673 [00:03<00:00, 189.60it/s]
100%|██████████| 673/673 [00:07<00:00, 95.23it/s] 


Score: tensor(0.9728)
{'len_opcode_embedd': 12, 'hidden_dim_conv': 64, 'output_dim_conv': 32, 'num_layers_conv': 5, 'hidden_dim_lin': 20, 'num_layers_lin': 4, 'optimizer': 'Adam', 'lr': 0.01, 'dropout_conv': 0.3, 'dropout_lin': 0.3, 'loss_fn': 'MSE', 'score': tensor(0.9728), 'epochs': 40, 'number of parameters': 27793, 'validation': 'split'}
[20]/[24] configs tested
<class '__main__.Tile_GNN'>


100%|██████████| 40/40 [15:14<00:00, 22.86s/it]
100%|██████████| 673/673 [00:03<00:00, 187.18it/s]
100%|██████████| 673/673 [00:06<00:00, 96.32it/s]


Score: tensor(0.9728)
{'len_opcode_embedd': 12, 'hidden_dim_conv': 64, 'output_dim_conv': 32, 'num_layers_conv': 5, 'hidden_dim_lin': 20, 'num_layers_lin': 6, 'optimizer': 'Adam', 'lr': 0.01, 'dropout_conv': 0.3, 'dropout_lin': 0.3, 'loss_fn': 'MSE', 'score': tensor(0.9728), 'epochs': 40, 'number of parameters': 28633, 'validation': 'split'}
[21]/[24] configs tested
<class '__main__.Tile_GNN'>


100%|██████████| 40/40 [10:33<00:00, 15.84s/it]
100%|██████████| 673/673 [00:03<00:00, 204.39it/s]
100%|██████████| 673/673 [00:07<00:00, 94.98it/s]


Score: tensor(0.9728)
{'len_opcode_embedd': 12, 'hidden_dim_conv': 64, 'output_dim_conv': 32, 'num_layers_conv': 5, 'hidden_dim_lin': 48, 'num_layers_lin': 2, 'optimizer': 'Adam', 'lr': 0.01, 'dropout_conv': 0.3, 'dropout_lin': 0.3, 'loss_fn': 'MSE', 'score': tensor(0.9728), 'epochs': 40, 'number of parameters': 28577, 'validation': 'split'}
[22]/[24] configs tested
<class '__main__.Tile_GNN'>


100%|██████████| 40/40 [14:01<00:00, 21.04s/it]
100%|██████████| 673/673 [00:03<00:00, 192.08it/s]
100%|██████████| 673/673 [00:07<00:00, 95.72it/s]


Score: tensor(0.9728)
{'len_opcode_embedd': 12, 'hidden_dim_conv': 64, 'output_dim_conv': 32, 'num_layers_conv': 5, 'hidden_dim_lin': 48, 'num_layers_lin': 4, 'optimizer': 'Adam', 'lr': 0.01, 'dropout_conv': 0.3, 'dropout_lin': 0.3, 'loss_fn': 'MSE', 'score': tensor(0.9728), 'epochs': 40, 'number of parameters': 33281, 'validation': 'split'}
[23]/[24] configs tested
<class '__main__.Tile_GNN'>


 40%|████      | 16/40 [06:21<09:33, 23.88s/it]

In [None]:
results

In [None]:
from datetime import datetime
from pytz import timezone

cest = timezone('Europe/Berlin')
now = datetime.now(timezone('UTC'))
now_cest = now.astimezone(cest)
now_str = now_cest.strftime('%Y-%m-%d_%H:%M:%S')

results.to_csv('/content/drive/MyDrive/google-tpu/predict-ai-model-runtime/tile_parameter_search/' + now_str, index=False)