In [None]:
!pip install torch-geometric

In [None]:
import os
from tqdm import tqdm

import numpy as np
import pandas as pd

import torch
from torch import nn
from torch import optim
import torch.nn.functional as F

from torch_geometric import nn as gnn
from torch_geometric.data import Data, Dataset, Batch
from torch_geometric.loader import DataLoader

from matplotlib import pyplot as plt

In [None]:
splits = ["train", "valid", "test"]

layout_nlp_default = '/kaggle/input/predict-ai-model-runtime/npz_all/npz/layout/nlp/default'
layout_nlp_random = '/kaggle/input/predict-ai-model-runtime/npz_all/npz/layout/nlp/random'
layout_xla_default = '/kaggle/input/predict-ai-model-runtime/npz_all/npz/layout/xla/default'
layout_xla_random = '/kaggle/input/predict-ai-model-runtime/npz_all/npz/layout/xla/random'

tile_xla = '/kaggle/input/predict-ai-model-runtime/npz_all/npz/tile/xla'

In [None]:
def load_n_data_to_df(directory, split, n, pos=0):
    
    path = os.path.join(directory, split)
    files = os.listdir(path)
    
    data_list = []
    
    n = min(n, len(files))
    for file in tqdm(files[pos:pos + n]):
        file_path = os.path.join(path, file)
        model_graph = dict(np.load(file_path))
        model_graph["file"] = file
        data_list.append(model_graph)
    
    return pd.DataFrame(data_list)

In [None]:
def load_data_to_df(directory, split):
    
    n = len(os.listdir(os.path.join(directory, split)))
    return load_n_data_to_df(directory, split, n)

## Data

In [None]:
class CustomData(Data):
    def __cat_dim__(self, key, value, *args, **kwargs):
        if key == 'config_feat':
            return None
        else:
            return super().__cat_dim__(key, value, args, kwargs)

In [None]:
class TileDataset(Dataset):
    
    def __init__(self, graphs, lookup):
        super().__init__()
        self.graphs = graphs
        self.lookup = lookup
    
    def len(self):
        return len(self.lookup)
    
    def get(self, idx):
        graph_idx, config_idx = self.lookup[idx]
        
        return self.get_quad(graph_idx, config_idx)
        
    
    def get_quad(self, graph_idx, config_idx):
        graph = self.graphs[graph_idx]
        
        return CustomData(
            x = graph['node_feat'],
            edge_index = graph['edge_index'],
            y = graph['y'][config_idx],
            
            node_opcode = graph['node_opcode'],
            config_feat = graph['config_feat'][config_idx, :],
            file = graph['file']
        )

In [None]:
def get_dataset(df):
    
    graphs = []
    lookup = []
    
    for index, graph in tqdm(df.iterrows(), total=len(df)):
               
        c = graph['config_feat'].shape[0]
        
        graph = {
            'file': graph['file'],
            'node_feat': torch.from_numpy(graph['node_feat']),
            'node_opcode': torch.from_numpy(graph['node_opcode']).type(torch.int64),
            'edge_index': torch.from_numpy(graph['edge_index']).t().contiguous(),
            'config_feat': torch.from_numpy(graph['config_feat']),
            'y': torch.from_numpy(graph['config_runtime']) / torch.from_numpy(graph['config_runtime_normalizers'])   
        }
        
        graphs.append(graph)
        lookup.extend((index, conf_idx) for conf_idx in range(c))
        
    print(f"Read {len(graphs)} graphs with a total of {len(lookup)} configurations")
    return TileDataset(graphs, lookup)

In [None]:
df_train = load_data_to_df(tile_xla, "train")
df_valid = load_data_to_df(tile_xla, "valid")
df_test = load_data_to_df(tile_xla, "test")

In [None]:
train_dataset = get_dataset(df_train)
valid_dataset = get_dataset(df_valid)
test_dataset = get_dataset(df_test)

In [None]:
train_loader = DataLoader(train_dataset, batch_size = 512, shuffle = True)

## Model

In [None]:
class TileModel(nn.Module):
    
    def __init__(self, opcode_embedding_dim = 4, conv_hidden_channels = 120, conv_num_lyers = 4, conv_out_dim = 48):
        super().__init__()
        
        self.opcode_embedding = nn.Embedding(120, opcode_embedding_dim)
        
        node_feature_dim = 140
            
        self.conv = gnn.GraphSAGE(opcode_embedding_dim + node_feature_dim, conv_hidden_channels, conv_num_lyers, conv_out_dim)
        
        config_dim = 24
        self.fwd = nn.Sequential(
            nn.Linear(conv_out_dim + config_dim, 48),
            nn.ReLU(),
            nn.Linear(48, 48),
            nn.ReLU(),
            nn.Linear(48, 1)
        )
        
        
    def forward(self, data):
        """
            Shapes:
                node_feat    - (n, 140)
                node_opcode  - (n, )
                edge_index   - (m, 2)
                config_feat  - (1, 24)
            
            Approach:
                1. Opcode embeddings
                2. Concatenate embeddings to node feature-vector
                3. Convolutional layer for node embeddings
                4. Pooling for graph embedding
                5. Concatenate configuration feature-vector to graph embedding
                6. Forward layer
                7. Flatten
            
            Approach is inline with the paper Phitchaya Mangpo Phothilimthana et. al (2023) 
        """
        
        node_opcode_embedding = self.opcode_embedding(data["node_opcode"]) # (n, 4)
        
        x = torch.concat([data["x"], node_opcode_embedding], dim = 1) # (n, 144)
        
        x = self.conv(x, data["edge_index"]) # (n, 48)
        
        means = []
        for start, end in zip(data.ptr[:-1], data.ptr[1:]):            
            means.append(torch.mean(x[start:end, :], 0))
        
        x = torch.stack(means) # (batch_size, 48)
        
        x = torch.concat([x, data["config_feat"]], dim = 1) # (batch_size, 72)
        
        x = self.fwd(x) # (batch_size, 1)
        
        return x.flatten()
        

In [None]:
def get_num_params(model):
    # Count the number of parameters
    total_params = sum(p.numel() for p in model.parameters())
    print(f"Total parameters in the model: {total_params}")

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

def train_step(model, criterion, opt):
    model.train()
    
    epoch_train_losses = np.empty((0,))
    
    for tile_data in (pbar := tqdm(train_loader)):
        
        tile_data.to(device)
        
        opt.zero_grad()
        pred = model(tile_data)
        loss = criterion(pred, tile_data["y"])
                
        loss.backward()
        opt.step()
        
        pbar.set_description(f"Loss: {loss.item():8.2f}")
        epoch_train_losses = np.append(epoch_train_losses, loss.item())
    return np.mean(epoch_train_losses)

def test(model, criterion):
    model.eval()
    
    epoch_valid_losses = np.empty((0,))
    
    with torch.no_grad():
        for tile in DataLoader(valid_dataset, batch_size = 512):
                
            tile.to(device)
            
            pred = model(tile)
            loss = criterion(pred, tile["y"])
            
            epoch_valid_losses = np.append(epoch_valid_losses, loss.item())
    return np.mean(epoch_valid_losses)

def train(model, lr = 0.01, epochs = 10):
    
    model.to(device)
    
    loss_fn = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    
    train_losses = np.empty((0,))
    valid_losses = np.empty((0,))
    
    for i in range(1, epochs + 1):
    
        train_loss = train_step(model, loss_fn, optimizer)
        valid_loss = test(model, loss_fn)
    
        train_losses = np.append(train_losses, train_loss)
        valid_losses = np.append(valid_losses, valid_loss) 
        print(f"Epoch: {i}, Train Loss: {train_losses[-1]}, Valid Loss: {valid_losses[-1]}")
    
    return valid_losses[-1]


In [None]:
model = TileModel()

In [None]:
# train(model)

## Evaluation

In [None]:
def predict(batch, model):
    model = model.to(device)
    batch.to(device)
    
    out = model(batch).to("cpu")
    return out

In [None]:
def get_predictions_per_file(model, dataset):
        
    model.eval()
    
    with torch.no_grad():
        predictions = []
        for graph_configs in tqdm(DataLoader(dataset, batch_size = 512)):
            predictions.append(predict(graph_configs, model))
    
    all_predictions = torch.cat(predictions)
        
    pred_index = 0
    predictions = {}
    for graph in tqdm(dataset.graphs):
        c = graph['y'].shape[0]
        rel_pred = all_predictions[pred_index : pred_index + c]
    
        top_5 = torch.sort(rel_pred).indices[:5]
        predictions[graph['file']] = top_5
        pred_index += c

    return predictions

In [None]:
def evaluate_model(model, predictions):
    
    predictions = list(predictions.values())

    # Calculate score
    scores = np.empty((0,))

    for i, graph in tqdm(enumerate(valid_dataset.graphs), total=len(valid_dataset.graphs)):
        best_prediction = min([graph["y"][pred_ind] for pred_ind in predictions[i][:5]])
        best_total = min(graph["y"])
        scores = np.append(scores, 2.0 - best_prediction / best_total)

    avg_score = np.mean(scores)
    print("Score:", avg_score)
    return avg_score
    

In [None]:
# Make predictions
# predictions = get_predictions_per_file(model, valid_dataset)

In [None]:
# evaluate_model(model, predictions)

In [None]:
def create_submission(predictions):
    
    submission = pd.read_csv("/kaggle/input/predict-ai-model-runtime/sample_submission.csv")
    
    
    for model_name in predictions.keys():
        model_id = 'tile:xla:' + model_name[:-4]
        submission.loc[submission["ID"] == model_id, "TopConfigs"] = ";".join([str(pred) for pred in predictions[model_name].tolist()])
    
    submission.to_csv("submission.csv", index=False)
        
# create_submission(predictions)