
Ref:

*   [TPU Graphs](https://arxiv.org/pdf/2308.13490.pdf)
*   [GraphSAGE](https://arxiv.org/pdf/1706.02216.pdf)
*   [Ranked List Loss for Deep Metric Learning](https://arxiv.org/pdf/1903.03238.pdf)


TODO:

*   split dataset to make upload faster
*   test validation


Notes:

*   Training simple model with MSE loss:
    *   need hyperparamter search
    *   why does the loss spice at the beginning of each epoch (batches are randomized)
    *   oberservations: seems that the smaller models just learn some average absolute value, but not really a ranking
    * probably model would have to be huge to rank correctly

*   Training simple model with ranking loss:
    *   Ranked List Loss
    *   Extract the smallest k times

*   Abandoning simple model:
    *   Replicate TPU paper:
        *   SageGraphs
        *   ResGCN

## Dependencies


In [None]:
!pip install torch-geometric
!pip install einops

Collecting torch-geometric
  Downloading torch_geometric-2.3.1.tar.gz (661 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/661.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.6/661.6 kB[0m [31m6.2 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m661.6/661.6 kB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: torch-geometric
  Building wheel for torch-geometric (pyproject.toml) ... [?25l[?25hdone
  Created wheel for torch-geometric: filename=torch_geometric-2.3.1-py3-none-any.whl size=910454 sha256=8258eda7ad520df7d7f371dcf8e5b309909c3f14dfbea057dc91c2140d80a933
  Stored in directory: /root/.cache/pip/wheels/ac/dc/30/e2874821ff308

In [None]:
import os
from tqdm import tqdm

import numpy as np
import pandas as pd

import torch
from torch import nn

from torch_geometric import nn as gnn

from torch.nn import Linear, ReLU, Dropout
from torch_geometric.nn import GCNConv
from torch_geometric.nn import global_mean_pool
from einops import reduce, repeat, rearrange

from torch.utils.data import Dataset, DataLoader

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


## Load Data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import shutil
shutil.unpack_archive("/content/drive/MyDrive/google-tpu/predict-ai-model-runtime.zip", "/content/data")

In [None]:
splits = ["train", "valid", "test"]

nlp_default = '/content/data/npz_all/npz/layout/nlp/default'
nlp_random = '/content/data/npz_all/npz/layout/nlp/random'
xla_default = '/content/data/npz_all/npz/layout/xla/default'
xla_random = '/content/data/npz_all/npz/layout/xla/random'

xla_tile = '/content/data/npz_all/npz/tile/xla'

In [None]:
def load_data_to_df(directory, split):

    path = os.path.join(directory, split)
    files = [os.path.join(path, file) for file in os.listdir(path)]
    data_list = []
    for file in tqdm(files):
        data = dict(np.load(file))
        data_list.append(data)

    return pd.DataFrame(data_list)

### geometric dataloader

In [None]:
from torch_geometric.data import Dataset, Data
class RuntimeDataset(Dataset):

    def __init__(self, dataset):
            super().__init__()
            self.dataset = dataset

    def len(self):
        return len(self.dataset)

    def get(self, index):
        data_row = self.dataset.loc[index]
        normalized_runtime = torch.tensor(data_row['config_runtime'] / data_row['config_runtime_normalizers'], dtype=torch.float32)
        return Data(
          node_feat=torch.tensor(data_row['node_feat'], dtype=torch.float32),
          edge_index=torch.tensor(data_row['edge_index'], dtype=torch.long).t().contiguous(),
          node_opcode=torch.tensor(data_row['node_opcode'], dtype=torch.int32),
          config_feat=torch.tensor(data_row['config_feat'], dtype=torch.float32),
          y=normalized_runtime, # TODO: rename
          number_configs=torch.tensor([len(data_row['config_feat'])]) # needed to match config_feat to the corresponding graph in the batch
        )

In [None]:
from torch_geometric.loader import DataLoader

def runtime_data_loader(dataset: RuntimeDataset, batch_size=32, shuffle=True):
    '''
       dataset: RuntimeDataset, containing data
    '''

    data_loader = DataLoader(dataset, batch_size, shuffle=True)
    return data_loader

## Model

In [118]:
from torch_geometric.data import Batch
class GNN(nn.Module):
    def __init__(self, len_opcode_embedd, hidden_dim, output_dim, num_layers):
        super(GNN, self).__init__()

        self.len_opcode = len_opcode_embedd
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.num_layers = num_layers

        # Embeddings
        self.embedding_layer = nn.Embedding(num_embeddings = 120, embedding_dim=len_opcode_embedd)

        #GNN
        input_dim = len_opcode_embedd + 140
        layers = nn.ModuleList()
        layers.append(GCNConv(input_dim, hidden_dim))
        for _ in range(num_layers - 2): # TODO list comp, ReLU
            layers.append(GCNConv(hidden_dim, hidden_dim))
        layers.append(GCNConv(hidden_dim, output_dim))

        self.conv = layers

        # Linear
        self.linear = nn.Sequential(
            nn.Linear(output_dim + 24, 48),
            nn.ReLU(),
            nn.Linear(48, 48),
            nn.ReLU(),
            nn.Linear(48, 1)
        )

    def forward(self, data):

        opcode_embedd = self.embedding_layer(data['node_opcode']) # (n,) -> (n,len_opcode_embedd)

        x = torch.cat((opcode_embedd, data['node_feat']), dim=1) # [(n, len_opcode_embedd), (n,140)] -> (n, len_opcode_embedd + 140)

        for layer in self.conv:
          x = layer(x, data['edge_index'])
          x = torch.relu(x)

        # differ two cases:
        # 1) batched data used for training
        # 2) single graph Data object used for inference

        if isinstance(data, Batch):

          '''
          The geometric data loader will take batch_size number of graphs. Then it will take all nodes in all these graphs and fuse them together into one graph.
          On this fuesed graph it will perform the convolution to calculate all the node embeddings at once.
          To apply the linear layer we have to seperate out all the graphs out of the batch again.
          '''

          # tensor used to store config predictions for each graph
          configs = torch.empty(0,1).to(device)

          # used to retrieve the config_feat tensors for each graph
          total = 0

          for graph_ind in range(data.num_graphs):

            # using a mask to gather all nodes that belong to the graph_ind-th graph
            node_indices = (data.batch == graph_ind).nonzero(as_tuple=True)[0]
            # first dimension is the number of nodes in the graph with index graph_ind, second dimensions is the feature dimension of the convolution (n, output_dim)
            graph_nodes = x[node_indices]

            # reduce node embeedings to get a graph embedding
            temp = reduce(graph_nodes, 'n f -> f', 'mean')

            # number of configurations for the graph_ind-th graph
            c = data.number_configs[graph_ind]

            # config_feat for the graph_ind-th graph
            graph_config_feat = data.config_feat[total:total+c]

            total += c

            # concatinating graph embedding with config_feat
            temp = repeat(temp, 'f -> r f', r=c)
            temp = torch.cat((temp, graph_config_feat), dim=1)


            # apply linear layer to tensor with shape (c, output_dim+24)
            temp = self.linear(temp)

            # add calculated runtimes to configs
            configs = torch.cat((configs, temp), dim=0)

          configs = rearrange(configs, 'f 1 -> f')

          return configs

        # only working with one graph object

        else:

          x = reduce(x, 'n f -> f', 'mean') # (n, output_dim) -> (output_dim, )

          x = repeat(x, 'f -> r f', r=len(data['config_feat'])) # (output_dim,) -> (c, output_dim)

          x = torch.cat((x, data['config_feat']), dim=1) # [(x, output_dim), (c,24)] -> (c, output_dim + 24)

          x = self.linear(x)

          x = rearrange(x, 'f 1 -> f')

        return x

In [None]:
class GNN_bugged(nn.Module):
    def __init__(self, len_opcode_embedd, hidden_dim, output_dim, num_layers):
        super(GNN_bugged, self).__init__()
        # Embeddings
        self.embedding_layer = nn.Embedding(num_embeddings = 120, embedding_dim=len_opcode_embedd)

        #GNN
        input_dim = len_opcode_embedd + 140
        layers = []
        layers.append(GCNConv(input_dim, hidden_dim))
        for _ in range(num_layers - 2):
            layers.append(GCNConv(hidden_dim, hidden_dim))
        layers.append(GCNConv(hidden_dim, output_dim))
        self.conv = nn.Sequential(*layers)

        # Linear
        self.linear = nn.Sequential(
            nn.Linear(output_dim + 24, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, data):
        opcode_embedd = self.embedding_layer(data['node_opcode']) # (n,) -> (n,len_opcode_embedd)

        x = torch.cat((opcode_embedd, data['node_feat']), dim=1) # [(n, len_opcode_embedd), (n,140)] -> (n, len_opcode_embedd + 140)

        for layer in self.conv:
          x = layer(x, data['edge_index'])
          x = torch.relu(x)

        x = reduce(x, 'n f -> f', 'mean') # (n, output_dim) -> (outputdim, )

        x = repeat(x, 'f -> r f', r=len(data['config_feat'])) # (output_dim,) -> (c, output_dim)

        x = torch.cat((x, data['config_feat']), dim=1) # [(x, output_dim), (c,24)] -> (c, output_dim + 24)

        x = self.linear(x)

        x = rearrange(x, 'b 1 -> b')

        return x

In [None]:
class dummy_model(nn.Module):
  def __init__(self):
    super(dummy_model, self).__init__()
    self.linear = nn.Sequential(
            nn.Linear(24, 32),
            nn.ReLU(),
            nn.Linear(32, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )
  def forward(self, x):
      x = self.linear(x)
      x = rearrange(x, 'f 1 -> f')
      return x

In [None]:
class testing(nn.Module):
  def __init__(self, prob: float):
    super(testing, self).__init__()
    self.prob = prob

  def forward(self, data):
    return data.y


## Train Loop

In [151]:
def MSE_training(model: GNN, dataloader: DataLoader, epochs: int, lr=0.01):
    '''
    Training model using MSE
    '''

    print(type(model))

    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    #optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
    #optimizer = torch.optim.Adagrad(model.parameters(), lr=0.01)


    loss_fn = torch.nn.MSELoss()

    model.to(device)
    model.train()

    for epoch in tqdm(range(epochs)):
        for batch, data in enumerate(dataloader):
            data = data.to(device)
            optimizer.zero_grad()
            x_pred = model(data)
            #print(f"shapes: {x_pred.shape}, {data['y'].shape}")
            loss = loss_fn(x_pred, data['y'])/len(data['y'])
            loss.backward()
            optimizer.step()
            #if batch % 200 == 0:
            #    print('Epoch [{}/{}], Batch [{}/{}], Loss: {:.4f}'.format(epoch+1, epochs, batch+1, len(dataloader), loss.item()))

In [147]:
def no_batch_training(model, dataset, epochs, lr=0.01):
  '''
  Training model using MSE
  '''

  print(type(model))

  optimizer = torch.optim.Adam(model.parameters(), lr=lr)
  #optimizer = torch.optim.SGD(model.parameters(), lr=lr)
  #optimizer = torch.optim.Adagrad(model.parameters(), lr=lr)


  loss_fn = torch.nn.MSELoss()

  model.to(device)
  model.train()
  for epoch in tqdm(range(epochs)):
      for num, graph in enumerate(dataset):
          graph = graph.to(device)
          optimizer.zero_grad()
          x_pred = model(graph)
          #print(f"shapes: {x_pred.shape}, {data['y'].shape}")
          loss = loss_fn(x_pred, graph['y'])/len(graph['y'])
          loss.backward()
          optimizer.step()
          #if num % 500 == 0:
          #    print('Epoch [{}/{}], Batch [{}/{}], Loss: {:.4f}'.format(epoch+1, epochs, num+1, len(dataset), loss.item()))

## Validation

In [143]:
from torch_geometric.data import Data

def validate_model(model, dataset):


    # Make predictions
    predictions = []
    model.to(device)
    model.eval()

    for tile in tqdm(dataset):
        tile.to(device)
        out = model(tile)
        predictions.append(torch.sort(out).indices)

    # Calculate score
    score = 0.0

    for i, tile in tqdm(enumerate(dataset), total=len(dataset)):
        best_prediction = min([dataset[i]["y"][pred_ind] for pred_ind in predictions[i][:5]])
        best_total = min(dataset[i]["y"])
        score += 2.0 - best_prediction / best_total

    avg_score = score / len(dataset)
    print("Score:", avg_score)
    return avg_score

## Testing

### data_loader


In [135]:
df_train = load_data_to_df(xla_tile, "train")
df_valid = load_data_to_df(xla_tile, "valid")

100%|██████████| 5709/5709 [00:13<00:00, 417.87it/s]
100%|██████████| 676/676 [00:01<00:00, 425.74it/s]


In [156]:
torch.manual_seed(42)
train_dataset = RuntimeDataset(df_train)
valid_dataset = RuntimeDataset(df_valid)
data_loader = runtime_data_loader(train_dataset, batch_size=64)
len(data_loader)

90

### gnn

In [157]:
torch.manual_seed(42)
model = GNN(len_opcode_embedd=12, hidden_dim=128, output_dim=64, num_layers=8)
MSE_training(model, data_loader, 20, lr=0.01)

<class '__main__.GNN'>


100%|██████████| 20/20 [06:40<00:00, 20.03s/it]


In [158]:
validate_model(model, train_dataset)

100%|██████████| 5709/5709 [00:42<00:00, 134.09it/s]
100%|██████████| 5709/5709 [00:59<00:00, 95.22it/s] 

Score: tensor(0.0900)





tensor(0.0900)

In [155]:
validate_model(model, valid_dataset)

100%|██████████| 676/676 [00:05<00:00, 130.62it/s]
100%|██████████| 676/676 [00:06<00:00, 106.80it/s]

Score: tensor(0.9728)





tensor(0.9728)

In [145]:
sum(p.numel() for p in model.parameters())

15089