# Attention!!!

This is a very simple but bad quality notebook. 
 - I do not use any sort of ranking loss, which would be better.
 - My strategy instead is to min-max scale the times and apply L1-loss
 - My model is also not optimized. It is a relatively simple GNN that embeds the graph and only processes 1 datapoint at a time and is only trained on 1 epoch.
 - The public score would be much better if you paired this submission with a trained model for layout. Since this only contributes to half of the score.
 - Have fun playing around with it!
 
 
 # CHANGES
 - V5 - normalized train and infer targets, use MSE loss, changed evaluation metric to perform top5 mean instead of top5 max for robustness, 5-fold CV
 - V6 - use SAGEConv instead of GCN, add dropout layer, increase number of paramters, changed evaluation metric to perform top50 mean, 10->20 epochs.
 - V13 - fixed problem where model weights weren't being reset leading to heavy overfitting...oops

In [1]:
!pip install torch-geometric torch-scatter

Looking in indexes: http://mirrors.aliyun.com/pypi/simple


In [2]:
import numpy as np
import pandas as pd
import os
from tqdm import tqdm 

import sklearn,sklearn.model_selection
import torch
from torch import nn
from torch import Tensor
from torch_geometric.nn import GCNConv,SAGEConv
from torch_geometric.datasets import Planetoid
from torch.utils.data import DataLoader, Dataset
from timm.scheduler import CosineLRScheduler
import matplotlib.pyplot as plt
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [3]:
def load_df(directory):
    splits = ["train", "valid", "test"]
    dfs = dict()
    
    for split in splits:
        path = os.path.join(directory, split)
        files = os.listdir(path)
        list_df = []
        
        for file in files:
            d = dict(np.load(os.path.join(path,file)))
            d['file'] = file
            list_df.append(d)
        dfs[split] = pd.DataFrame.from_dict(list_df)
    return dfs
tile_xla = load_df("/root/autodl-tmp/npz_all/npz/tile/xla/")

# Define Dataset and Model

In [4]:
class TileDataset(Dataset):
    def __init__(self, df):
        self.df = df

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        config_feat = torch.tensor(row['config_feat'].astype(np.float32))
        node_feat = torch.tensor(row['node_feat'].astype(np.float32))
        node_opcode = torch.tensor(row['node_opcode'].astype(np.int64))
        edge_index = torch.tensor(np.swapaxes(row['edge_index'],0,1).astype(np.int64))
        target = (row['config_runtime']/(row['config_runtime_normalizers']+1e-5)).astype(np.float32) #/row['config_runtime_normalizers']
        # minmax scale the target, we only care about order
        target = (target-np.mean(target))/(np.std(target)+1e-5)

#         target = (target-np.mean(target))/(np.std(target))
        target = torch.tensor(target)
        return config_feat,node_feat,node_opcode,edge_index,target

In [5]:
class EnhancedModel(torch.nn.Module):
    def __init__(self, hidden_channels, graph_in, graph_out, hidden_dim, dropout=0.2):
        super().__init__()
        op_embedding_dim = 4
        self.embedding = torch.nn.Embedding(120, op_embedding_dim)
        
        self.linear = nn.Linear(op_embedding_dim + 140, graph_in)
        
        self.convs = torch.nn.ModuleList()
        self.norms = torch.nn.ModuleList()
        
        in_channels = graph_in
        for out_channels in hidden_channels:
            self.convs.append(SAGEConv(in_channels, out_channels))
            self.norms.append(nn.LayerNorm(out_channels))
            in_channels = out_channels
            
        self.convs.append(SAGEConv(in_channels, graph_out))
        
        self.dense = torch.nn.Sequential(
            nn.Linear(graph_out * 2 + 24, hidden_dim),
            nn.Dropout(p=dropout),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.Dropout(p=dropout),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1)
        )

    def forward(self, x_cfg, x_feat, x_op, edge_index):
        x = torch.cat([x_feat, self.embedding(x_op)], dim=1)
        x = self.linear(x)
        
        for conv, norm in zip(self.convs[:-1], self.norms):
            x = F.relu(conv(x, edge_index))
            x = norm(x)
        
        x = self.convs[-1](x, edge_index)
        
        x_mean = x.mean(0)
        x_max = x.max(0).values
        
        x = torch.cat([x_cfg, x_max.repeat((len(x_cfg), 1)), x_mean.repeat((len(x_cfg), 1))], dim=1)
        x = torch.flatten(self.dense(x))
        x = (x - torch.mean(x)) / (torch.std(x) + 1e-5)
        
        return x


# Train One Epoch

In [6]:
df = pd.concat((tile_xla["train"],tile_xla["valid"]),axis=0).reset_index(drop=True)

In [7]:
def score_tile_mean(predictions, df):
    score = 0
    for i in range(len(df)):
        predbest = np.mean(df.iloc[i]['config_runtime'][predictions[i]])
        best = np.mean(np.sort(df.iloc[i]['config_runtime'])[:50])
        score += 2-predbest/best
    score /= len(df)
    return score

def score_tile_max(predictions, df):
    score = 0
    for i in range(len(df)):
        predbest = np.min(df.iloc[i]['config_runtime'][predictions[i][:5]])
        best = np.min(df.iloc[i]['config_runtime'])
        score += 2 - predbest/best
    score /= len(df)
    return score

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
from tqdm import tqdm
import sklearn.model_selection

# Assuming you've defined SimpleModel and TileDataset classes above
# Also assuming you've defined CosineLRScheduler

# Data loading and preprocessing
df = pd.concat((tile_xla["train"], tile_xla["valid"]), axis=0).reset_index(drop=True)

# K-Fold Cross Validation
kfold = sklearn.model_selection.KFold(n_splits=5, shuffle=True, random_state=0)
score_means = []
score_maxs = []

# Early stopping parameters
patience = 5
counter = 0

for fold, (tr_idx, va_idx) in enumerate(kfold.split(df)):
    model = EnhancedModel(hidden_channels=[32, 48, 64, 84], graph_in=64, graph_out=64, hidden_dim=128, dropout=0.2).to(device)
    train_dataset = TileDataset(df.iloc[tr_idx])
    val_dataset = TileDataset(df.iloc[va_idx])
    
    # Loss and optimizer
    criterion = torch.nn.MSELoss()
    steps = len(train_dataset) * 20
    warmup_steps = int(steps * 0.2)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-4)
    scheduler = CosineLRScheduler(optimizer, t_initial=steps, warmup_t=warmup_steps, warmup_lr_init=1e-6, lr_min=2e-8)
    
    best_score = 0
    best_score_max = 0
    
    for epoch in range(50):  # Set max epochs to 50
        # Training
        model.train()
        pbar = tqdm(range(len(train_dataset)), leave=False)
        loss_sum = 0
        n = 0
        for i in pbar:
            cfg_ft, nd_ft, nd_op, ind, target = train_dataset[i]
            cfg_ft, nd_ft, nd_op, ind, target = cfg_ft.to(device), nd_ft.to(device), nd_op.to(device), ind.to(device), target.to(device)
            
            out = model(cfg_ft, nd_ft, nd_op, ind)
            loss = criterion(out, target)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1e-2)
            scheduler.step(i + len(train_dataset) * epoch)
            optimizer.step()
            loss_sum += loss.item()
            n += 1
            pbar.set_description(f'running loss: {(loss_sum / n):.2f}, current loss: {(loss.item()):.2f}')
        
        # Validation
        model.eval()
        tile_xla_predictions = []
        pbar = tqdm(range(len(val_dataset)), leave=False)
        for i in pbar:
            cfg_ft, nd_ft, nd_op, ind, target = val_dataset[i]
            cfg_ft, nd_ft, nd_op, ind, target = cfg_ft.to(device), nd_ft.to(device), nd_op.to(device), ind.to(device), target.to(device)
            
            out = model(cfg_ft, nd_ft, nd_op, ind)
            tile_xla_predictions.append(np.argsort(out.detach().cpu().numpy())[:50])
        
        score_mean = score_tile_mean(tile_xla_predictions, val_dataset.df)
        score_max = score_tile_max(tile_xla_predictions, val_dataset.df)
        
        # Early stopping logic
        if score_mean > best_score:
            best_score = score_mean
            best_score_max = score_max
            torch.save(model.state_dict(), f'best_model_{fold}.pth')
            counter = 0  # Reset counter
        else:
            counter += 1
            if counter >= patience:
                print(f"Early stopping on fold {fold} at epoch {epoch}")
                break  # Stop if counter reaches patience
    
    score_means.append(best_score)
    score_maxs.append(best_score_max)

# Final score
print(f'comp_score = {np.mean(score_maxs)}, mean_score = {np.mean(score_means)},')


                                                                                            

Early stopping on fold 0 at epoch 6


                                                                                            

Early stopping on fold 1 at epoch 8


                                                                                            

Early stopping on fold 2 at epoch 7


running loss: 0.38, current loss: 0.27:  96%|█████████▋| 4929/5108 [01:01<00:02, 76.07it/s] IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

                                                                                            

Early stopping on fold 3 at epoch 7


running loss: 0.31, current loss: 1.18:  74%|███████▎  | 3757/5108 [00:46<00:18, 72.32it/s] 

In [None]:
# kfold = sklearn.model_selection.KFold(n_splits=5,shuffle=True,random_state=0)
# score_means = []
# score_maxs = []
# for fold,(tr_idx,va_idx) in enumerate(kfold.split(df)):
#     model = EnhancedModel(hidden_channels = [32,48,64,84],graph_in = 64,graph_out = 64,hidden_dim=128,dropout = 0.2).to(device)
#     train_dataset = TileDataset(df.iloc[tr_idx])
#     val_dataset = TileDataset(df.iloc[va_idx])
#     criterion = torch.nn.MSELoss()
#     steps = len(train_dataset)*20
#     warmup_steps = int(steps*0.2)
#     optimizer = torch.optim.Adam(model.parameters(), lr=1e-4,weight_decay = 1e-4)
#     scheduler = CosineLRScheduler(optimizer,t_initial= steps,warmup_t=warmup_steps, warmup_lr_init=1e-6,lr_min=2e-8,)
    
#     def score_tile_mean(predictions, df):
#         score = 0
#         for i in range(len(df)):
#             predbest = np.mean(df.iloc[i]['config_runtime'][predictions[i]])
#             best = np.mean(np.sort(df.iloc[i]['config_runtime'])[:50])
#             score += 2-predbest/best
#         score /= len(df)
#         return score
#     def score_tile_max(predictions, df):
#         score = 0
#         for i in range(len(df)):
#             predbest = np.min(df.iloc[i]['config_runtime'][predictions[i][:5]])
#             best = np.min(df.iloc[i]['config_runtime'])
#     #         print(best,predbest)
#             score += 2 - predbest/best
#         score /= len(df)
#         return score

#     best_score = 0
#     best_score_max = 0
#     for epoch in range(10):
#         model.train()
#         pbar = tqdm(range(len(train_dataset)),leave=False)
#         loss_sum = 0
#         n = 0
#         for i in pbar:
#             cfg_ft,nd_ft,nd_op,ind,target = train_dataset[i]
#             cfg_ft,nd_ft,nd_op,ind,target = cfg_ft.to(device),nd_ft.to(device),nd_op.to(device),ind.to(device),target.to(device)

#             out = model(cfg_ft,nd_ft,nd_op,ind)
#             loss = criterion(out, target)
#             loss.backward()
#             torch.nn.utils.clip_grad_norm_(model.parameters(), 1e-2)
#             scheduler.step(i+len(train_dataset)*epoch)
#             optimizer.step()
#             loss_sum+=loss.item()
#             n+=1
#             pbar.set_description(f'running loss: {(loss_sum/n):.2f},current loss: {(loss.item()):.2f}')
#         pbar.close()
#         model.eval()

#         tile_xla_predictions = []
#         pbar = tqdm(range(len(val_dataset)),leave=False)
#         for i in pbar:
#             cfg_ft,nd_ft,nd_op,ind,target = val_dataset[i]
#             cfg_ft,nd_ft,nd_op,ind,target = cfg_ft.to(device),nd_ft.to(device),nd_op.to(device),ind.to(device),target.to(device)

#             out = model(cfg_ft,nd_ft,nd_op,ind)
#             # tile_xla_predictions.append(np.argsort(out.detach().numpy())[:50])
#             tile_xla_predictions.append(np.argsort(out.detach().cpu().numpy())[:50])
#         pbar.close()
#         score_mean = score_tile_mean(tile_xla_predictions, val_dataset.df)
#         score_max = score_tile_max(tile_xla_predictions, val_dataset.df)
#         print(f'fold {fold} epoch {epoch}, comp_score = {score_max:.3f}, mean_score = {score_mean:.3f},')
#         if score_mean>best_score:
#             best_score = score_mean
#             best_score_max = score_max
#             torch.save(model.state_dict(), f'best_model_{fold}.pth')
#     score_means.append(best_score)
#     score_maxs.append(best_score_max)
# print(f'comp_score = {np.mean(score_maxs)}, mean_score = {np.mean(score_means)},')

# Evaluate on Validation Dataset

**0.31 is not bad considering that this model only trained on 1 epoch and is not on a ranking loss!**

# Predict and Submit (only tile:xla predictions)

In [None]:
dataset = TileDataset(tile_xla["test"])
tile_xla_predictions = [[] for i in range(len(dataset))]
for fold in range(5):
    model.load_state_dict(torch.load(f'/root/autodl-tmp/best_model_{fold}.pth'))
    model.eval()
    pbar = tqdm(range(len(dataset)))
    for i in pbar:
        cfg_ft,nd_ft,nd_op,ind,target = dataset[i]
        cfg_ft,nd_ft,nd_op,ind,target = cfg_ft.to(device),nd_ft.to(device),nd_op.to(device),ind.to(device),target.to(device)

        out = model(cfg_ft,nd_ft,nd_op,ind)
        tile_xla_predictions[i].append(out.detach().cpu().numpy())
tile_xla_predictions = [np.argsort(np.mean(pred,axis=0))[:5] for pred in tile_xla_predictions]

In [None]:
sub = pd.read_csv('/root/autodl-tmp/sample_submission.csv')
for i,filename in enumerate(tile_xla["test"]['file'].values):
    id = 'tile:xla:' +filename[:-4]
    sub.loc[sub.ID == id,'TopConfigs'] = ';'.join(tile_xla_predictions[i].astype(str))
sub.to_csv('submission.csv',index=False)
sub