In [None]:
!pip install torch-geometric

In [None]:
import os
import random
from tqdm import tqdm

import numpy as np
import pandas as pd

import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import torch_geometric as pyg
from torch_geometric import nn as gnn

from matplotlib import pyplot as plt
device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
splits = ["train", "valid", "test"]

layout_nlp_default = '/kaggle/input/predict-ai-model-runtime/npz_all/npz/layout/nlp/default'
layout_nlp_random = '/kaggle/input/predict-ai-model-runtime/npz_all/npz/layout/nlp/random'
layout_xla_default = '/kaggle/input/predict-ai-model-runtime/npz_all/npz/layout/xla/default'
layout_xla_random = '/kaggle/input/predict-ai-model-runtime/npz_all/npz/layout/xla/random'

In [None]:
# Load data sample from storage only when it's actually needed
class LayoutLazyDataset(Dataset):
    
    def __init__(self, path, split):
        
        self.data_folder = os.path.join(path,split)
        self.data_files = os.listdir(self.data_folder)

    def __len__(self):
        return len(self.data_files)

    def __getitem__(self, index):
        data_file = self.data_files[index]
        
        data = self.load_data(data_file)

        return {
            "file": data['file'],
            "node_feat": torch.from_numpy(data['node_feat']),
            "node_opcode": torch.from_numpy(data['node_opcode']).type(torch.int64),
            "edge_index": torch.from_numpy(data['edge_index']).permute(1, 0),
            "node_config_feat": torch.from_numpy(data['node_config_feat']),
            "node_config_ids": torch.from_numpy(data['node_config_ids']),
            "y": torch.from_numpy(data['config_runtime']).type(torch.float32)
        }

    def load_data(self, data_file):
        
        data = dict(np.load(os.path.join(self.data_folder, data_file)))
        data["file"] = data_file
        return data


In [None]:
# Load all data sample to memory when initializing the dataset
class LayoutDataset(Dataset):
    
    def __init__(self, path, split, chunk_size = 1000):
        self.layouts = self.load_data_to_df(path, split, chunk_size)
        
    def load_data_to_df(directory, split, chunk_size):
    
        path = os.path.join(directory, split)
        files = os.listdir(path)
    
        graphs = []
        for file in tqdm(files):
            file_path = os.path.join(path, file)
            with np.load(file_path) as np_file:
                graphs.append({
                    "file": file,
                    'node_feat': torch.tensor(np_file['node_feat']),
                    'node_opcode': torch.tensor(np_file['node_opcode']).type(torch.int64),
                    'edge_index': torch.tensor(np_file['edge_index']).permute(1, 0),
                    'node_config_feat': torch.tensor(np_file['node_config_feat'][:chunk_size]),
                    'node_config_ids': torch.tensor(np_file['node_config_ids']),
                    'y': torch.tensor(np_file['config_runtime']).type(torch.float32)
                })
    
        return pd.DataFrame(graphs)

    def __len__(self):
        return len(self.layouts)

    def __getitem__(self, idx):
        return self.layouts.iloc[idx].to_dict()
    
    


In [None]:
class LayoutModel(nn.Module):
    
    def __init__(self, opcode_embedding_dim = 4, conv_hidden_channels = 120, conv_num_layers = 4, conv_out_dim = 48):
        super().__init__()
        
        self.opcode_embedding = nn.Embedding(120, opcode_embedding_dim)
        
        node_feature_dim = 140
        node_config_dim = 18
        
        node_rep_dim = opcode_embedding_dim + node_feature_dim + node_config_dim
        
        self.conv = gnn.GCNConv(node_rep_dim, conv_out_dim)

        # self.conv = gnn.GraphSAGE(node_rep_dim, conv_hidden_channels, conv_num_layers, conv_out_dim)
        
        hidden_dim = 48
        self.fwd = nn.Sequential(
            nn.Linear(conv_out_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1)
        )
            
    def forward(self, node_feat, node_opcode, edge_index, node_config_feat, node_config_ids):
        """
            Shapes:
                node_feat    - (n, 140)
                node_opcode  - (n, )
                edge_index   - (2, m)
                node_config_feat  - (c, nc, 18)
                node_config_ids - (nc, )
            
            Approach:
                1. Opcode embeddings
                2. Concatenate embeddings and config to node feature-vector
                3. Convolutional layer for node embeddings
                4. Pooling for graph embedding
                6. Forward layer
                7. Flatten        
        """
        n = node_feat.shape[0]
        c = node_config_feat.shape[0]
        
        node_opcode_embedding = self.opcode_embedding(node_opcode) # (n, opcode_embedding_dim)
        
        empty_configs = torch.full([n, 18], -1).to(device)
        
        x = torch.cat([node_opcode_embedding, node_feat, empty_configs], dim=1) # (n, opcode_embedding_dim + 140 + 18)

        runtimes = []
        
        # print(x.shape)
        x = x.unsqueeze(0)
        # print(x.shape)
        x = x.repeat(c, 1, 1)
        # print(x.shape)
        
        # print(node_config_feat.shape)
        for i, config in enumerate(node_config_feat):
        #    print(i)
            for node_id, config_feat in zip(node_config_ids, config):
                x[i, node_id, -18:] = config_feat
            
        x = self.conv(x, edge_index) # (c, n, conv_out_dim)
        # print(cx.shape)
        x = torch.mean(x, 1) # (c, conv_out_dim)
        # print(cx.shape)
        x = self.fwd(x) # (c, )
        return torch.flatten(x) # (c, )
        
    
        
        

In [None]:
print(f"Total parameters in the model: {sum(p.numel() for p in LayoutModel().parameters())}")

## Training

In [None]:
def node_config_selection(node_config_feat, config_runtime, n):
    c = config_runtime.shape[0]

    num_chosen_configs = min(c, n)
    configs = list(range(0, c))
    random.shuffle(configs)
    chosen_configs = configs[:num_chosen_configs]
    return node_config_feat[chosen_configs, :, :], config_runtime[chosen_configs]

In [None]:
def train_step(model, dataloader, criterion, opt):
    model.train()
    
    step = 0
    
    epoch_train_losses = np.empty((0,))
    for layout in tqdm(dataloader):
        
        node_feat, node_opcode, edge_index, node_config_feat, node_config_ids = layout["node_feat"][0].to(device), layout["node_opcode"][0].to(device), layout["edge_index"][0].to(device), layout["node_config_feat"][0].to(device), layout["node_config_ids"][0].to(device)
        y = layout["y"][0].to(device)
        node_config_feat, y = node_config_selection(node_config_feat, y, 100)
        
        opt.zero_grad()
        pred = model(node_feat, node_opcode, edge_index, node_config_feat, node_config_ids)
        loss = criterion(pred, y)
        loss.backward()
        
        # max_grad_norm = 10000.0
        # nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
        
        parameters = [p for p in model.parameters() if p.grad is not None and p.requires_grad]     
        gradient_norms = []
        if len(parameters) == 0:
            print("No grads")
            total_norm = 0.0
        else:
            total_norm = torch.norm(torch.stack([torch.norm(p.grad.detach(), 2).to(device) for p in parameters]), 2.0).item()
        
        for param_group in opt.param_groups:
            for param in param_group['params']:
                if param.grad is not None:
                    gradient_norms.append(param.grad.data.norm().cpu())
        
        gradient_norms = np.array(gradient_norms)
        print(f"Avg grad norm: {np.mean(gradient_norms)}")
        print(f"Max grad norm: {np.max(gradient_norms)}")
        print(f"Median grad norm: {np.median(gradient_norms)}")
        print(f"Total norm: {total_norm}")        
        
        opt.step()
        step += 1
        epoch_train_losses = np.append(epoch_train_losses, loss.item())
    return np.mean(epoch_train_losses)

def test(model, dataloader, criterion):
    model.eval()
    
    epoch_valid_losses = np.empty((0,))
    
    with torch.no_grad():
        for layout in dataloader:
                
            node_feat, node_opcode, edge_index, node_config_feat, node_config_ids = layout["node_feat"][0].to(device), layout["node_opcode"][0].to(device), layout["edge_index"][0].to(device), layout["node_config_feat"][0].to(device), layout["node_config_ids"][0].to(device)
            y = layout["y"][0].to(device)
            
            node_config_feat, y = node_config_selection(node_config_feat, y, 1)
            
            pred = model(node_feat, node_opcode, edge_index, node_config_feat, node_config_ids)
            loss = criterion(pred, y)
            
            epoch_valid_losses = np.append(epoch_valid_losses, loss.item())
    return np.mean(epoch_valid_losses)

def train(model, trainloader, validloader, lr = 0.01, epochs = 10):
    
    model.to(device)
    
    loss_fn = nn.MSELoss().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    
    train_losses = np.empty((0,))
    valid_losses = np.empty((0,))
    
    for i in range(1, epochs + 1):
    
        train_loss = train_step(model, trainloader, loss_fn, optimizer)
        valid_loss = test(model, validloader, loss_fn)
    
        train_losses = np.append(train_losses, train_loss)
        valid_losses = np.append(valid_losses, valid_loss) 
        print(f"Epoch: {i}, Train Loss: {train_loss}, Valid Loss: {valid_loss}")
    
    return valid_losses[-1]

In [None]:
torch.manual_seed(42)
random.seed(42)

model = LayoutModel()
# trainloader = DataLoader(LayoutDataset(layout_nlp_default, 'train'), batch_size = 1, shuffle = True)
# validloader = DataLoader(LayoutDataset(layout_nlp_default, 'valid'), batch_size = 1, shuffle = False)

In [None]:
# train(model, trainloader, validloader, lr=0.01, epochs=1)

## Predictions

In [None]:
def predict(model, layout):
    model = model.to(device)
    node_feat, node_opcode, edge_index, node_config_feat, node_config_ids = layout["node_feat"].to(device), layout["node_opcode"].to(device), layout["edge_index"].to(device), layout["node_config_feat"].to(device), layout["node_config_ids"].to(device)
    
    runtimes = []
    for i in range(len(node_config_feat)):
        pred = model(node_feat, node_opcode, edge_index, node_config_feat[i:i+1, :, :], node_config_ids)
        runtimes.append(pred)
    
    runtimes = torch.cat(runtimes)
    
    return torch.sort(runtimes).indices
    

In [None]:
def get_predictions_per_file(model, dataset):
    
    model.eval()
    prediction_for_file = {}
    
    with torch.no_grad():
        for layout in dataset:
            prediction_for_file[layout["file"]] = predict(model, layout)
    
    return prediction_for_file
    

In [None]:
from scipy.stats import kendalltau

def evaluate(model, dataset):
    model.eval()
    
    correlations = np.array([])
    p_values = np.array([])
    
    with torch.no_grad():
        for layout in tqdm(dataset):
            preds = predict(model, layout).cpu()
            actual = torch.sort(layout["y"]).indices
            correlation, p_value = kendalltau(preds, actual)
        
            correlations = np.append(correlations, correlation)
            p_values = np.append(p_values, p_value)
            
    print(f"Average Tau: {np.mean(correlations)}")
    print(f"Max Tau: {np.max(correlations)}")
          
    plt.boxplot(correlations)
    plt.title("Kendall Tau")
    plt.ylabel("Tau Coefficient")
    plt.show()
    
            
    

In [None]:
# test_dataset = LayoutLazyDataset(layout_nlp_default, 'test')
# evaluate(model, test_dataset)
