In [1]:
!pip install numpy
!pip install pandas
!pip install tqdm
!pip install scikit-learn
!pip install torch
!pip install torch-geometric
!pip install timm
!pip install matplotlib



In [2]:
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
import time

import sklearn,sklearn.model_selection
import torch
from torch import nn
from torch import Tensor

from torch_geometric.nn import GCNConv,SAGEConv,SGConv,TAGConv,ARMAConv,\
ChebConv,GENConv,LEConv,GATConv,MFConv,FeaStConv,GATv2Conv,\
GraphConv,ResGatedGraphConv,ClusterGCNConv

from torch_geometric.datasets import Planetoid
from torch.utils.data import DataLoader, Dataset
from timm.scheduler import CosineLRScheduler
import matplotlib.pyplot as plt
device = 'cuda' if torch.cuda.is_available() else 'cpu'

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# involving dataset
def load_df(directory):
    splits = ["train", "valid", "test"]
    dfs = dict()

    for split in splits:
        path = os.path.join(directory, split)
        files = os.listdir(path)
        list_df = []

        for file in files:
            d = dict(np.load(os.path.join(path,file)))
            d['file'] = file
            list_df.append(d)
        dfs[split] = pd.DataFrame.from_dict(list_df)
    return dfs
tile_xla = load_df("/home/guoming.li/Project/AI701_Project/npz_all/npz/tile/xla/")

# Define Dataset and Model

In [4]:
# class of dataset (for generation)
class TileDataset(Dataset):
    def __init__(self, df):
        self.df = df

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        config_feat = torch.tensor(row['config_feat'].astype(np.float32))
        node_feat = torch.tensor(row['node_feat'].astype(np.float32))
        node_opcode = torch.tensor(row['node_opcode'].astype(np.int64))
        edge_index = torch.tensor(np.swapaxes(row['edge_index'],0,1).astype(np.int64))
        target = (row['config_runtime']/(row['config_runtime_normalizers']+1e-5)).astype(np.float32) #/row['config_runtime_normalizers']
        # minmax scale the target, we only care about order
        target = (target - np.mean(target)) / (np.std(target) + 1e-5)

        target = torch.tensor(target)
        return config_feat, node_feat, node_opcode, edge_index, target

In [5]:
class Model(nn.Module):
    def __init__(self, conv_layer:str='sageconv'):
        super().__init__()

        conv_substitute = ['sageconv','gcnconv','sgconv'\
                  ,'tagconv','chebconv','armaconv','gatv2conv'\
                  ,'genconv','leconv','gatconv','clustergcnconv'\
                  ,'graphconv','resgatedgraphconv','mfconv','feastconv']
        assert conv_layer.lower() in conv_substitute, 'choose convolution layer in:'+str(conv_substitute)

        if conv_layer.lower() == 'sageconv':
          conv = SAGEConv
        elif conv_layer.lower() == 'gcnconv':
          conv = GCNConv
        elif conv_layer.lower() == 'sgconv':
          conv = SGConv
        elif conv_layer.lower() == 'tagconv':
          conv = TAGConv
        elif conv_layer.lower() == 'chebconv':
          conv = ChebConv
        elif conv_layer.lower() == 'genconv':
          conv = GENConv
        elif conv_layer.lower() == 'leconv':
          conv = LEConv
        elif conv_layer.lower() == 'gatconv':
          conv = GATConv
        elif conv_layer.lower() == 'gatv2conv':
          conv = GATv2Conv
        elif conv_layer.lower() == 'graphconv':
          conv = GraphConv
        elif conv_layer.lower() == 'resgatedgraphconv':
          conv = ResGatedGraphConv
        elif conv_layer.lower() == 'clustergcnconv':
          conv = ClusterGCNConv
        elif conv_layer.lower() == 'armaconv':
          conv = ARMAConv
        elif conv_layer.lower() == 'mfconv':
          conv = MFConv
        elif conv_layer.lower() == 'feastconv':
          conv = FeaStConv

        op_embedding_dim = 12
        hidden_dim = 256
        dropout = 0.4

        self.embedding = nn.Embedding(120, op_embedding_dim)
        self.ffn = nn.Sequential(
            nn.Linear(op_embedding_dim + 140, hidden_dim),
            nn.ReLU(),
            nn.Dropout(p=dropout),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(p=dropout),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU()
        )

        in_channels = hidden_dim
        hidden_channels = [128, 256, 512, 256, 128]
        graph_out = 86

        self.convs = nn.ModuleList()

        if conv_layer.lower() in ['chebconv','armaconv','feastconv']:
          self.convs.append(conv(in_channels, hidden_channels[0], 3))
          for i in range(1, len(hidden_channels)):
              self.convs.append(conv(hidden_channels[i-1], hidden_channels[i], 3))
          self.convs.append(conv(hidden_channels[-1], graph_out, 3))
        else:
          self.convs.append(conv(in_channels, hidden_channels[0]))
          for i in range(1, len(hidden_channels)):
              self.convs.append(conv(hidden_channels[i-1], hidden_channels[i]))
          self.convs.append(conv(hidden_channels[-1], graph_out))

        self.dense = nn.Sequential(
            nn.Linear(graph_out*2 + 24, hidden_dim),
            nn.ReLU(),
            nn.Dropout(p=dropout),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(p=dropout/2),
            nn.Linear(hidden_dim, 1)
        )

    def forward(self, x_cfg, x_feat, x_op, edge_index):
        x = torch.cat([x_feat, self.embedding(x_op)], dim=1)
        x = self.ffn(x)

        for conv in self.convs:
            x = conv(x, edge_index).relu()

        x_mean = x.mean(dim=0)
        x_max = x.max(dim=0).values

        x = torch.cat([x_cfg, x_max.repeat(len(x_cfg), 1), x_mean.repeat(len(x_cfg), 1)], dim=1)
        x = torch.flatten(self.dense(x))
        x = (x - torch.mean(x)) / (torch.std(x) + 1e-5)
        return x

In [6]:
## we follow the similar setting provided by the competition, combine the train-valid to be training set.
train_df = tile_xla["train"]
test_df = pd.concat((tile_xla["valid"], tile_xla["test"]), axis=0).reset_index(drop=True)

In [11]:
# dataset split: unnecessary when follow the competition
# from sklearn.model_selection import train_test_split
# train_df, test_df = train_test_split(df, test_size=0.3, random_state=42)

conv_layer = 'chebconv'

## model
model = Model(conv_layer=conv_layer).to(device)

## retrain?
retrain_mark = True

if os.path.exists(f'/home/guoming.li/Project/AI701_Project/save_model/tile/{conv_layer}.pth') and (retrain_mark==False):
    ## load model;
    model.load_state_dict(torch.load(f'/home/guoming.li/Project/AI701_Project/save_model/tile/{conv_layer}.pth'))
else:
    ## retrain;
    train_dataset = TileDataset(train_df)
    criterion = nn.MSELoss()
    steps = len(train_dataset) * 20
    warmup_steps = int(steps * 0.2)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    scheduler = CosineLRScheduler(optimizer, t_initial=steps, warmup_t=warmup_steps, warmup_lr_init=1e-6, lr_min=2e-8)

    ## loss per epoch;
    epoch_training_loss = []

    ## runtime collections:1
    start = time.time()

    ## training
    for epoch in range(20):
        ##
        model.train()
        pbar = tqdm(range(len(train_dataset)), leave=False)
        loss_sum = 0
        n = 0
        for i in range(len(train_dataset)):
            cfg_ft, nd_ft, nd_op, ind, target = [x.to(device) for x in train_dataset[i]]
            out = model(cfg_ft, nd_ft, nd_op, ind)
            loss = criterion(out, target)
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), 1e-2)
            scheduler.step(i + len(train_dataset) * epoch)
            optimizer.step()
            loss_sum += loss.item()
            n += 1

            if n % 10 == 0:
                pbar.set_description(f'running loss: {(loss_sum/n):.4f}, current loss: {(loss.item()):.4f}')

        epoch_training_loss.append(loss_sum/n)

        pbar.close()

        print(f'epoch: {epoch}, average training loss: {epoch_training_loss[-1]:.4f}')

    ## runtime collections:2
    end = time.time()
    runtime = (end - start) / 60
    print(f'total training time = {runtime:.4f} minutes.')

# testing
test_dataset = TileDataset(test_df)
model.eval()
tile_xla_predictions = []
testing_loss = 0
n = 0
for i in range(len(test_dataset)):
    cfg_ft, nd_ft, nd_op, ind, target = [x.to(device) for x in test_dataset[i]]
    out = model(cfg_ft, nd_ft, nd_op, ind)
    loss = criterion(out, target)
    testing_loss += loss.item()
    n += 1

testing_loss = testing_loss/n

## output the overall loss:
if os.path.exists(f'/home/guoming.li/Project/AI701_Project/save_model/tile/{conv_layer}.pth') and (retrain_mark==False):
    pass
else:
    ## trained model saved;
    torch.save(model.state_dict(), f'/home/guoming.li/Project/AI701_Project/save_model/tile/{conv_layer}.pth')

    print("epoch: ", list(range(20)))
    print("epoch-based average training loss: ", epoch_training_loss)

print(f'average testing loss: {testing_loss:.4f}')

  0%|          | 0/5709 [00:00<?, ?it/s]

In [10]:
# evaluation function
def score_tile_mean(predictions, df):
    score = 0
    for i in range(len(df)):
        predbest = np.mean(df.iloc[i]['config_runtime'][predictions[i]])
        best = np.mean(np.sort(df.iloc[i]['config_runtime'])[:50])

        ## zero avoider;
        if best==0:
            best +=0.01
        if predbest==0:
            predbest +=0.01
            
        score += 2 - predbest / best
    score /= len(df)
    return score
def score_tile_max(predictions, df):
    score = 0
    for i in range(len(df)):
        predbest = np.min(df.iloc[i]['config_runtime'][predictions[i][:5]])
        best = np.min(df.iloc[i]['config_runtime'])

        ## zero avoider;
        if best==0:
            best +=0.01
        if predbest==0:
            predbest +=0.01
            
        score += 2 - predbest/best
    score /= len(df)
    return score

tile_xla_predictions = [[] for i in range(len(test_dataset))]

model.eval()
pbar = tqdm(range(len(test_dataset)))

for i in pbar:
    cfg_ft, nd_ft, nd_op, ind, target = (x.to(device) for x in test_dataset[i])
    out = model(cfg_ft, nd_ft, nd_op, ind)
    tile_xla_predictions[i].append(out.cpu().detach().numpy())

tile_xla_predictions = [np.argsort(np.mean(pred,axis=0))[:5] for pred in tile_xla_predictions]

print(f'score max: {score_tile_max(tile_xla_predictions,test_df)}, score mean: {score_tile_mean(tile_xla_predictions,test_df)}')


100%|██████████| 1520/1520 [01:11<00:00, 21.32it/s]


score max: 0.9905805653093911, score mean: 0.2964594736017436


In [40]:
# sub = pd.read_csv('/kaggle/input/predict-ai-model-runtime/sample_submission.csv')
# for i,filename in enumerate(tile_xla["test"]['file'].values):
#     id = 'tile:xla:' + filename[:-4]
#     sub.loc[sub.ID == id,'TopConfigs'] = ';'.join(tile_xla_predictions[i].astype(str))
# sub.to_csv('submission.csv',index=False)
# sub