In [None]:
!pip install torch-geometric

In [None]:
import os
from tqdm import tqdm

import numpy as np
import pandas as pd

import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch_geometric import nn as gnn

from matplotlib import pyplot as plt

In [None]:
splits = ["train", "valid", "test"]

In [None]:
layout_nlp_default = '/kaggle/input/predict-ai-model-runtime/npz_all/npz/layout/nlp/default'
layout_nlp_random = '/kaggle/input/predict-ai-model-runtime/npz_all/npz/layout/nlp/random'
layout_xla_default = '/kaggle/input/predict-ai-model-runtime/npz_all/npz/layout/xla/default'
layout_xla_random = '/kaggle/input/predict-ai-model-runtime/npz_all/npz/layout/xla/random'

tile_xla = '/kaggle/input/predict-ai-model-runtime/npz_all/npz/tile/xla'

In [None]:
def load_n_data_to_df(directory, split, n, pos=0):
    
    path = os.path.join(directory, split)
    files = os.listdir(path)
    
    data_list = []
    
    n = min(n, len(files))
    for file in tqdm(files[pos:pos + n]):
        file_path = os.path.join(path, file)
        model_graph = dict(np.load(file_path))
        model_graph["file"] = file
        data_list.append(model_graph)
    
    return pd.DataFrame(data_list)

In [None]:
def load_data_to_df(directory, split):
    
    n = len(os.listdir(os.path.join(directory, split)))
    return load_n_data_to_df(directory, split, n)

In [None]:
df_layout = load_n_data_to_df(layout_nlp_random, "valid", 5)

In [None]:
for i in range(6):
    print(len(df_layout.iloc[3, i]))

print(len(df_layout.iloc[3, 3][0]))
df_layout.iloc[3]

In [None]:
del df_layout

In [None]:
class GAT(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, heads):
        super().__init__()
        self.conv1 = gnn.GATConv(in_channels, hidden_channels, heads, dropout=0.6)
        # On the Pubmed dataset, use `heads` output heads in `conv2`.
        self.conv2 = gnn.GATConv(hidden_channels * heads, out_channels, heads=1,
                             concat=False, dropout=0.6)

    def forward(self, x, edge_index):
        x = F.dropout(x, p=0.6, training=self.training)
        x = F.elu(self.conv1(x, edge_index))
        x = F.dropout(x, p=0.6, training=self.training)
        x = self.conv2(x, edge_index)
        return x


In [None]:
class TileModel(nn.Module):
    
    def __init__(self, opcode_embedding_dim = 8, conv_hidden_channels = 120, conv_num_lyers = 4, conv_out_dim = 48):
        super().__init__()
        
        self.opcode_embedding = nn.Embedding(120, opcode_embedding_dim)
        
        node_feature_dim = 140
            
        # hidden_channels = 8
        # heads = 8
        # self.conv = GAT(opcode_embedding_dim + node_feature_dim, hidden_channels, conv_out_dim, heads)
        
        # self.conv = gnn.GCNConv(opcode_embedding_dim + node_feature_dim, conv_out_dim)
        
        self.conv = gnn.GraphSAGE(opcode_embedding_dim + node_feature_dim, conv_hidden_channels, conv_num_lyers, conv_out_dim)
        
        config_dim = 24
        self.fwd = nn.Sequential(
            nn.Linear(conv_out_dim + config_dim, 48),
            nn.ReLU(),
            nn.Linear(48, 48),
            nn.ReLU(),
            nn.Linear(48, 1)
        )
        
        
    def forward(self, node_feat, node_opcode, edge_index, config_feat):
        """
            Shapes:
                node_feat    - (n, 140)
                node_opcode  - (n, )
                edge_index   - (m, 2)
                config_feat  - (c, 24)
            
            Approach:
                1. Opcode embeddings
                2. Concatenate embeddings to node feature-vector
                3. Convolutional layer for node embeddings
                4. Pooling for graph embedding
                5. Concatenate configuration feature-vector to graph embedding
                6. Forward layer
                7. Flatten
            
            Approach is inline with the paper Phitchaya Mangpo Phothilimthana et. al (2023) 
        """
        node_opcode_embedding = self.opcode_embedding(node_opcode) # (n, 8)
        
        x = torch.concat([node_feat, node_opcode_embedding], dim = 1) # (n, 148)
        
        x = self.conv(x, edge_index) # (n, 48)
                
        x = torch.mean(x, 0) # (48, )    
            
        x = x.repeat(len(config_feat), 1) # (c, 48)
        
        x = torch.concat([x, config_feat], dim = 1) # (c, 72)
        
        x = self.fwd(x) # (c, 1)
        
        return torch.flatten(x) # (c, )
        

In [None]:
class TileDataset(Dataset):
    
    def __init__(self, tiles):
        self.tiles = tiles
    
    def __len__(self):
        return len(self.tiles)
    
    def __getitem__(self, idx):
        
        file = self.tiles.iloc[idx]['file']
        node_feat = torch.from_numpy(self.tiles.iloc[idx]['node_feat'])
        node_opcode = torch.from_numpy(self.tiles.iloc[idx]['node_opcode']).type(torch.int64)
        edge_index = torch.from_numpy(self.tiles.iloc[idx]['edge_index']).permute(1, 0)
        config_feat = torch.from_numpy(self.tiles.iloc[idx]['config_feat'])
        config_runtime = torch.from_numpy(self.tiles.iloc[idx]['config_runtime'])
        config_runtime_normalizers = torch.from_numpy(self.tiles.iloc[idx]['config_runtime_normalizers'])
        
        return {
            "file": file,
            "node_feat": node_feat,
            "node_opcode": node_opcode,
            "edge_index": edge_index,
            "config_feat": config_feat,
            "y": config_runtime / config_runtime_normalizers
        }
    
    def __iter__(self):
        self.i = 0
        return self
    
    def __next__(self):
        if self.i < len(self.tiles):
            item = self[self.i]
            self.i += 1
            return item
        else:
            raise StopIteration

df_train = load_data_to_df(tile_xla, "train")
df_valid = load_data_to_df(tile_xla, "valid")
df_test = load_data_to_df(tile_xla, "test")

train_dataset = TileDataset(df_train)
valid_dataset = TileDataset(df_valid)
test_dataset = TileDataset(df_test)

In [None]:
model = TileModel()

data = train_dataset[0]

model(data['node_feat'], data['node_opcode'], data['edge_index'], data['config_feat'])

In [None]:
def get_num_params(model):
    # Count the number of parameters
    total_params = sum(p.numel() for p in model.parameters())
    print(f"Total parameters in the model: {total_params}")

## Training

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

def train_step(model, criterion, opt):
    model.train()
    
    epoch_train_losses = np.empty((0,))
    for tile in tqdm(train_dataset):
        
        node_feat, node_opcode, edge_index, config_feat = tile["node_feat"].to(device), tile["node_opcode"].to(device), tile["edge_index"].to(device), tile["config_feat"].to(device)
        
        opt.zero_grad()
        pred = model(node_feat, node_opcode, edge_index, config_feat).to("cpu")
        loss = criterion(pred, tile["y"])
                
        loss.backward()
        opt.step()
        
        epoch_train_losses = np.append(epoch_train_losses, loss.item())
    return np.mean(epoch_train_losses)

def test(model, criterion):
    model.eval()
    
    epoch_valid_losses = np.empty((0,))
    
    with torch.no_grad():
        for tile in valid_dataset:
                
            node_feat, node_opcode, edge_index, config_feat = tile["node_feat"].to(device), tile["node_opcode"].to(device), tile["edge_index"].to(device), tile["config_feat"].to(device)
    
            pred = model(node_feat, node_opcode, edge_index, config_feat).to("cpu")
            loss = criterion(pred, tile["y"])
            
            epoch_valid_losses = np.append(epoch_valid_losses, loss.item())
    return np.mean(epoch_valid_losses)

def train(model, lr = 0.01, epochs = 10):
    
    model.to(device)
    
    loss_fn = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    
    train_losses = np.empty((0,))
    valid_losses = np.empty((0,))
    
    for i in range(1, epochs + 1):
    
        train_loss = train_step(model, loss_fn, optimizer)
        valid_loss = test(model, loss_fn)
    
        train_losses = np.append(train_losses, train_loss)
        valid_losses = np.append(valid_losses, valid_loss) 
        print(f"Epoch: {i}, Train Loss: {train_losses[-1]}, Valid Loss: {valid_losses[-1]}")
    
    return valid_losses[-1]


In [None]:
def store_model(model):
    # Store model
    torch.save(model.state_dict(), "tile_model.pt")

In [None]:
def plot_losses(valid_losses, train_losses):
    # Plot losses
    all_epochs = list(1, 1 + range(len(valid_losses)))

    # create figure and axis objects with subplots()
    fig,ax = plt.subplots()
    # make a plot
    ax.plot(all_epochs,
        valid_losses,
        color="red", 
        marker="o")
    # set x-axis label
    ax.set_xlabel("Epoch", fontsize = 14)
    # set y-axis label
    ax.set_ylabel("Valid",
              color="red",
              fontsize=14)

    # twin object for two different y-axis on the sample plot
    ax2=ax.twinx()
    # make a plot with different y-axis using second axis object
    ax2.plot(all_epochs, train_losses,color="blue",marker="o")
    ax2.set_ylabel("Train",color="blue",fontsize=14)
    plt.show()


In [None]:
def predict(tile, model):
    model = model.to(device)
    node_feat = tile["node_feat"].to(device)
    node_opcode = tile["node_opcode"].to(device)
    edge_index = tile["edge_index"].to(device)
    config_feat = tile["config_feat"].to(device)
    
    out = model(node_feat, node_opcode, edge_index, config_feat).to("cpu")
    return torch.sort(out).indices[:5]
    

In [None]:
def get_predictions_per_file(model, dataset):
    
    if isinstance(model, str):
        state_dict = torch.load(model)
        model = TileModel()
        model.load_state_dict(state_dict)
    
    model.eval()
    prediction_for_file = {}
    
    with torch.no_grad():
        for tile in dataset:
            prediction_for_file[tile["file"]] = predict(tile, model)

    return prediction_for_file

In [None]:
def evaluate_model(model):
    
    # Make predictions
    predictions = list(get_predictions_per_file(model, valid_dataset).values())

    # Calculate score
    scores = np.empty((0,))

    for i, tile in tqdm(enumerate(valid_dataset), total=len(valid_dataset)):
        best_prediction = min([valid_dataset[i]["y"][pred_ind] for pred_ind in predictions[i][:5]])
        best_total = min(valid_dataset[i]["y"])
        scores = np.append(scores, 2.0 - best_prediction / best_total)

    avg_score = np.mean(scores)
    print("Score:", avg_score)
    return avg_score
    

In [None]:
import itertools
import math

def hyperparameter_search():
    
    opcode_embedding = [4, 8, 16]
    hidden_channels = [64, 128, 256]
    num_layers = [5, 7]
    conv_out_dim = [32, 48, 64]
    lr = [0.001, 0.01]
    epochs = [15]
    
    hyperparameters = [opcode_embedding, hidden_channels, num_layers, conv_out_dim, lr, epochs]
    
    best_score = 0.0
    best_config = ()
    best_model = None
    
    
    configs = list(itertools.product(*hyperparameters))
    num_configs = len(configs)
    
    print(configs[58])
    
    print(f"Testing a total of {num_configs} configurations")
    
    for i, config in enumerate(configs):
        
        print(f"Testing config {i + 1}/{num_configs}")
        print(config)
        
        model = TileModel(config[0], config[1], config[2], config[3]).to(device)
        
        train(model, config[4], config[5])
                
        score = evaluate_model(model)
                
        if score > best_score:
            best_score = score
            best_config = config
            best_model = model
            store_model(model)
        
        del model
                    
    return best_model, best_config, best_score


In [None]:
# hyperparameter_search()
model = TileModel(8, 128, 7, 64).to(device)
# train(model, 0.001, 15)
# score = evaluate_model(model)
# print(score)

In [None]:
# hyperparameter_search()

In [None]:
def create_submission():
    
    submission = pd.read_csv("/kaggle/input/predict-ai-model-runtime/sample_submission.csv")
    
    state_dict = torch.load("tile_model.pt")
    model = TileModel(16, 256, 7, 64)
    model.load_state_dict(state_dict)
    predictions = get_predictions_per_file(model, test_dataset)
    
    for model_name in predictions.keys():
        model_id = 'tile:xla:' + model_name[:-4]
        submission.loc[submission["ID"] == model_id, "TopConfigs"] = ";".join([str(pred) for pred in predictions[model_name].tolist()])
    
    submission.to_csv("submission.csv", index=False)
        
# create_submission()