<a href="https://colab.research.google.com/github/max-seeli/ai-model-runtime-prediction/blob/main/google_tpu.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


Ref:

*   [TPU Graphs](https://arxiv.org/pdf/2308.13490.pdf)
*   [GraphSAGE](https://arxiv.org/pdf/1706.02216.pdf)
*   [Ranked List Loss for Deep Metric Learning](https://arxiv.org/pdf/1903.03238.pdf)


TODO:

*   split dataset to make upload faster
*   test validation
*   in layout the config data should be a torch.int32, but overflows if not torch.long

Notes:

*   Training simple model with MSE loss:
    *   need hyperparamter search
    *   why does the loss spice at the beginning of each epoch (batches are randomized)
    *   oberservations: seems that the smaller models just learn some average absolute value, but not really a ranking
    * probably model would have to be huge to rank correctly

*   Training simple model with ranking loss:
    *   Ranked List Loss
    *   Extract the smallest k times

*   Abandoning simple model:
    *   Replicate TPU paper:
        *   SageGraphs
        *   ResGCN

## Dependencies


In [5]:
!pip install torch-geometric
!pip install einops



In [6]:
import os
from tqdm import tqdm

import numpy as np
import pandas as pd

import torch
from torch import nn

from torch_geometric import nn as gnn

from torch.nn import Linear, ReLU, Dropout
from torch_geometric.nn import GCNConv
from torch_geometric.nn import global_mean_pool
from einops import reduce, repeat, rearrange

from torch.utils.data import Dataset, DataLoader

In [25]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


## Load Data

In [8]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### zipped data

In [None]:
import shutil
shutil.unpack_archive("/content/drive/MyDrive/google-tpu/predict-ai-model-runtime.zip", "/content/data")

In [None]:
splits = ["train", "valid", "test"]

layout_nlp_default = '/content/data/npz_all/npz/layout/nlp/default'
layout_nlp_random = '/content/data/npz_all/npz/layout/nlp/random'
layout_xla_default = '/content/data/npz_all/npz/layout/xla/default'
layout_xla_random = '/content/data/npz_all/npz/layout/xla/random'

tile_xla = '/content/data/npz_all/npz/tile/xla'

In [None]:
def load_data_to_df(directory, split):

    path = os.path.join(directory, split)
    files = [os.path.join(path, file) for file in os.listdir(path)]
    data_list = []
    for file in tqdm(files):
        data = dict(np.load(file))
        data_list.append(data)

    return pd.DataFrame(data_list)

### unzipped data in drive


In [11]:
# unzipped archive in drive
splits = ["train", "valid", "test"]

layout_nlp_default = '/content/drive/MyDrive/google-tpu/predict-ai-model-runtime/npz_all/npz/layout/nlp/default'
layout_nlp_random = '/content/drive/MyDrive/google-tpu/predict-ai-model-runtime/npz_all/npz/layout/nlp/random'
layout_xla_default = '/content/drive/MyDrive/google-tpu/predict-ai-model-runtime/npz_all/npz/layout/xla/default'
layout_xla_random = '/content/drive/MyDrive/google-tpu/predict-ai-model-runtime/npz_all/npz/layout/xla/random'

tile_xla= '/content/drive/MyDrive/google-tpu/predict-ai-model-runtime/npz_all/npz/tile/xla'

In [12]:
def load_data_to_df(directory, split):

    path = os.path.join(directory, split)
    files = [os.path.join(path, file) for file in os.listdir(path)]
    data_list = []
    for file in tqdm(files):
        data = dict(np.load(file))
        data_list.append(data)

    return pd.DataFrame(data_list)

### geometric dataloader

In [13]:
from torch_geometric.data import Dataset, Data
from typing import Literal

class RuntimeDataset(Dataset):

    def __init__(self, data, mode: Literal["tile", "layout", "layout_all"]):
            super().__init__()

            self.mode = mode

            if self.mode == "layout_all":

              '''
              Approach:
              For every graph add c number of graphs to the dataset where every one of these graphs has node features that are the concat node features.
              (nodes that don't have additional features are padded with zeros)
              This has implications for training: we are now predicting a scalar value for each graph configuration (can not really train with ranking loss anymore)
              '''

              self.dataset = pd.DataFrame(columns=['graph_id', 'node_feat', 'node_opcode', 'edge_index', 'config_runtime'])

              for index, row in data.iterrows():

                # tensors to large, not memory efficient enough

                c = row['config_runtime'].shape[0]

                node_feat = torch.tensor(row['node_feat'], dtype=torch.float32) # (n, 140)
                node_config_feat = torch.tensor(row['node_config_feat'], dtype=torch.float32) # (c, nc, 18)

                node_feat = repeat(node_feat, 'n f -> r n f', r = c) # repeats every node c times (c, n, 140)
                node = torch.zeros((c, node_feat.shape[1], 18)) # (c, n, 18)

                node = torch.cat((node_feat, node), dim=2)

                for j in range(node_config_feat.shape[1]):
                  node[:,j,18:] = node_config_feat[:,j,:]

                for i in range(c):

                  graph = {'graph_id': index,
                           'node_feat': node[i],
                           'node_opcode': row['node_opcode'],
                           'edge_index': row['edge_index'],
                           'config_runtime': row['config_runtime'][i]
                           }
                  self.dataset.append(graph, ignore_index=True)

            else:
              self.dataset = data
              self.mode = mode


    def len(self):
        return len(self.dataset)

    def get(self, index):
        data_row = self.dataset.loc[index]
        if self.mode == "tile":
          normalized_runtime = torch.tensor(data_row['config_runtime'] / data_row['config_runtime_normalizers'], dtype=torch.float32)
          return Data(
            node_feat=torch.tensor(data_row['node_feat'], dtype=torch.float32),
            edge_index=torch.tensor(data_row['edge_index'], dtype=torch.long).t().contiguous(),
            node_opcode=torch.tensor(data_row['node_opcode'], dtype=torch.int32),
            config_feat=torch.tensor(data_row['config_feat'], dtype=torch.float32),
            y=normalized_runtime, # TODO: rename
            number_configs=torch.tensor([len(data_row['config_runtime'])]) # needed to match config_feat to the corresponding graph in the batch
          )


        else:

          data_row = self.dataset.loc[index]

          return Data(
            graph_id = torch.tensor(data_row['graph_id'], dtype=torch.long),
            node_feat=data_row['node_feat'],
            edge_index=torch.tensor(data_row['edge_index'], dtype=torch.long).t().contiguous(),
            node_opcode=torch.tensor(data_row['node_opcode'], dtype=torch.int32),
            config_runtime=torch.tensor(data_row['config_runtime'], dtype=torch.float32),
            node_config_ids = torch.tensor(data_row['node_config_ids'], dtype=torch.long)
          )

In [14]:
from torch_geometric.loader import DataLoader

def runtime_data_loader(dataset: RuntimeDataset, batch_size=32, shuffle=True):
    '''
       dataset: RuntimeDataset, containing data
    '''

    data_loader = DataLoader(dataset, batch_size, shuffle=True)
    return data_loader

In [15]:
import random

def layout_loader(data, indicies, sample_size, batch_size=32):

  df = pd.DataFrame(columns=['graph_id', 'node_feat', 'node_opcode', 'edge_index', 'config_runtime'])

  for index, row in data.iterrows():

    c = row['config_runtime'].shape[0]



    node_feat = torch.tensor(row['node_feat'], dtype=torch.float32) # (n, 140)
    node_config_feat = torch.tensor(row['node_config_feat'], dtype=torch.float32) # (c, nc, 18)

    node = torch.zeros(node_feat.shape[0], 18)
    node = torch.cat((node_feat, node), dim=1)


    for _ in range(min(sample_size, c)):
      rand_config = random.randint(0, c-1)
      indicies[index].append(rand_config)

      for i in range(node_config_feat.shape[1]):
        node[row['node_config_ids'][i],:18] = node_config_feat[rand_config, i, :]

      # debugging
      # print(f"graph: {index}, config: {rand_config}, value: {row['config_runtime'][rand_config]}")

      next_row = {'graph_id': index,
                  'node_feat': node,
                  'node_opcode': row['node_opcode'],
                  'edge_index': row['edge_index'],
                  'config_runtime': row['config_runtime'][rand_config],
                  'node_config_ids': row['node_config_ids']
                  }
      df = pd.concat([df, pd.DataFrame([next_row])], ignore_index=True)

  dataset = RuntimeDataset(df, "layout")

  return indicies, runtime_data_loader(dataset, batch_size=batch_size)


## Models

In [16]:
from torch_geometric.data import Batch
class Tile_GNN(nn.Module):

    def __init__(self, len_opcode_embedd, hidden_dim, output_dim, num_layers):
        super(Tile_GNN, self).__init__()

        self.len_opcode = len_opcode_embedd
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.num_layers = num_layers

        # Embeddings
        self.embedding_layer = nn.Embedding(num_embeddings = 120, embedding_dim=len_opcode_embedd)

        #GNN
        input_dim = len_opcode_embedd + 140
        layers = nn.ModuleList()
        layers.append(GCNConv(input_dim, hidden_dim))
        for _ in range(num_layers - 2): # TODO list comp, ReLU
            layers.append(GCNConv(hidden_dim, hidden_dim))
        layers.append(GCNConv(hidden_dim, output_dim))

        self.conv = layers

        # Linear
        self.linear = nn.Sequential(
            nn.Linear(output_dim + 24, 48),
            nn.ReLU(),
            nn.Linear(48, 48),
            nn.ReLU(),
            nn.Linear(48, 1)
        )

    def forward(self, data):

        opcode_embedd = self.embedding_layer(data['node_opcode']) # (n,) -> (n,len_opcode_embedd)

        x = torch.cat((opcode_embedd, data['node_feat']), dim=1) # [(n, len_opcode_embedd), (n,140)] -> (n, len_opcode_embedd + 140 + 18)

        for layer in self.conv:
          x = layer(x, data['edge_index'])
          x = torch.relu(x)

        # differ two cases:
        # 1) batched data used for training
        # 2) single graph Data object used for inference

        if isinstance(data, Batch):

          '''
          The geometric data loader will take batch_size number of graphs. Then it will take all nodes in all these graphs and fuse them together into one graph.
          On this fuesed graph it will perform the convolution to calculate all the node embeddings at once.
          To apply the linear layer we have to seperate out all the graphs out of the batch again.
          '''

          # tensor used to store config predictions for each graph
          configs = torch.empty(0,1).to(device)

          # used to retrieve the config_feat tensors for each graph
          total = 0

          for graph_ind in range(data.num_graphs):

            # using a mask to gather all nodes that belong to the graph_ind-th graph
            node_indices = (data.batch == graph_ind).nonzero(as_tuple=True)[0]
            # first dimension is the number of nodes in the graph with index graph_ind, second dimensions is the feature dimension of the convolution (n, output_dim)
            graph_nodes = x[node_indices]

            # reduce node embeedings to get a graph embedding
            temp = reduce(graph_nodes, 'n f -> f', 'mean')

            # number of configurations for the graph_ind-th graph
            c = data.number_configs[graph_ind]

            # config_feat for the graph_ind-th graph
            graph_config_feat = data.config_feat[total:total+c]

            total += c

            # concatinating graph embedding with config_feat
            temp = repeat(temp, 'f -> r f', r=c)
            temp = torch.cat((temp, graph_config_feat), dim=1)


            # apply linear layer to tensor with shape (c, output_dim+24)
            temp = self.linear(temp)

            # add calculated runtimes to configs
            configs = torch.cat((configs, temp), dim=0)

          configs = rearrange(configs, 'f 1 -> f')

          return configs

        # only working with one graph object

        else:

          x = reduce(x, 'n f -> f', 'mean') # (n, output_dim) -> (output_dim, )

          x = repeat(x, 'f -> r f', r=len(data['config_feat'])) # (output_dim,) -> (c, output_dim)

          x = torch.cat((x, data['config_feat']), dim=1) # [(x, output_dim), (c,24)] -> (c, output_dim + 24)

          x = self.linear(x)

          x = rearrange(x, 'f 1 -> f')

        return x

In [17]:
from torch_geometric.data import Batch
class Layout_GNN(nn.Module):

    def __init__(self, len_opcode_embedd, hidden_dim, output_dim, num_layers):
        super(Layout_GNN, self).__init__()

        self.len_opcode = len_opcode_embedd
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.num_layers = num_layers

        # Embeddings
        self.embedding_layer = nn.Embedding(num_embeddings = 120, embedding_dim=len_opcode_embedd)

        #GNN
        input_dim = len_opcode_embedd + 140 + 18
        layers = nn.ModuleList()
        layers.append(GCNConv(input_dim, hidden_dim))
        for _ in range(num_layers - 2): # TODO list comp, ReLU
            layers.append(GCNConv(hidden_dim, hidden_dim))
        layers.append(GCNConv(hidden_dim, output_dim))

        self.conv = layers

        # Linear
        self.linear = nn.Sequential(
            nn.Linear(output_dim, 48),
            nn.ReLU(),
            nn.Linear(48, 48),
            nn.ReLU(),
            nn.Linear(48, 1)
        )

    def forward(self, data):

        opcode_embedd = self.embedding_layer(data['node_opcode']) # (n,) -> (n,len_opcode_embedd)

        x = torch.cat((opcode_embedd, data['node_feat']), dim=1) # [(n, len_opcode_embedd), (n,140 + 18)] -> (n, len_opcode_embedd + 140 + 18)

        for layer in self.conv:
          x = layer(x, data['edge_index'])
          x = torch.relu(x)

        # differ two cases:
        # 1) batched data used for training
        # 2) single graph Data object used for inference

        if isinstance(data, Batch):

          '''
          The geometric data loader will take batch_size number of graphs. Then it will take all nodes in all these graphs and fuse them together into one graph.
          On this fuesed graph it will perform the convolution to calculate all the node embeddings at once.
          To apply the linear layer we have to seperate out all the graphs out of the batch again.
          '''

          # tensor used to store config predictions for each graph
          configs = torch.empty(0).to(device)

          for graph_ind in range(data.num_graphs):

            # using a mask to gather all nodes that belong to the graph_ind-th graph
            node_indices = (data.batch == graph_ind).nonzero(as_tuple=True)[0]
            # first dimension is the number of nodes in the graph with index graph_ind, second dimensions is the feature dimension of the convolution (n, output_dim)
            graph_nodes = x[node_indices]

            # reduce node embeedings to get a graph embedding
            temp = reduce(graph_nodes, 'n f -> f', 'mean')
            # apply linear layer to tensor with shape (c, output_dim+24)
            temp = self.linear(temp)


            # add calculated runtimes to configs
            configs = torch.cat((configs, temp), dim=0)

          return configs

        # only working with one graph object

        else:

          x = reduce(x, 'n f -> f', 'mean') # (n, output_dim) -> (output_dim, )

          x = self.linear(x)

        return x

## Train Loop

### Tiles

In [18]:
def MSE_training_tile(model: Tile_GNN, dataloader: DataLoader, epochs: int, lr=0.01):
    '''
    Training model using MSE
    '''

    print(type(model))

    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    #optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
    #optimizer = torch.optim.Adagrad(model.parameters(), lr=0.01)


    loss_fn = torch.nn.MSELoss()

    model.to(device)
    model.train()

    for epoch in tqdm(range(epochs)):
        for batch, data in enumerate(dataloader):
            data = data.to(device)
            optimizer.zero_grad()
            x_pred = model(data)
            #print(f"shapes: {x_pred.shape}, {data['y'].shape}")
            loss = loss_fn(x_pred, data['y'])/len(data['y'])
            loss.backward()
            optimizer.step()
            #if batch % 200 == 0:
            #    print('Epoch [{}/{}], Batch [{}/{}], Loss: {:.4f}'.format(epoch+1, epochs, batch+1, len(dataloader), loss.item()))

In [19]:
def no_batch_training_tile(model, dataset, epochs, lr=0.01):
  '''
  Training model using MSE
  '''

  print(type(model))
  optimizer = torch.optim.Adam(model.parameters(), lr=lr)
  #optimizer = torch.optim.SGD(model.parameters(), lr=lr)
  #optimizer = torch.optim.Adagrad(model.parameters(), lr=lr)


  loss_fn = torch.nn.MSELoss()

  model.to(device)
  model.train()
  for epoch in tqdm(range(epochs)):
      for num, graph in enumerate(dataset):
          graph = graph.to(device)
          optimizer.zero_grad()
          x_pred = model(graph)
          #print(f"shapes: {x_pred.shape}, {data['y'].shape}")
          loss = loss_fn(x_pred, graph['y'])/len(graph['y'])
          loss.backward()
          optimizer.step()
          #if num % 500 == 0:
          #    print('Epoch [{}/{}], Batch [{}/{}], Loss: {:.4f}'.format(epoch+1, epochs, num+1, len(dataset), loss.item()))

### Layout

In [20]:
'''
Problem: Too many graphs in the layout dataset make it hard to train model.
More precisely the problem is not the number of graphs but more the number of configs for each graph.
One possible way to mittigate this problem is to train only on a selection of graph configurations.
Want to experiment with different selection procedures find select graphs and their configuratoins that bring the most value.
'''

def MSE_training_layout(model: Layout_GNN, dataset, epochs: int, sample_size=50, batch_size=32, lr=0.01):
    '''
    Training model using MSE
    '''

    print(type(model))

    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    #optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
    #optimizer = torch.optim.Adagrad(model.parameters(), lr=0.01)


    loss_fn = torch.nn.MSELoss()

    model.to(device)
    model.train()
    indicies = [[] for _ in range(len(dataset))]

    for epoch in tqdm(range(epochs)):

        indicies, dataloader = layout_loader(dataset, indicies, sample_size=sample_size, batch_size=batch_size)

        for batch, data in enumerate(dataloader):
            data = data.to(device)
            optimizer.zero_grad()
            x_pred = model(data)
            loss = loss_fn(x_pred, data['config_runtime'])
            loss.backward()
            optimizer.step()
            #if batch % 200 == 0:
            #    print('Epoch [{}/{}], Batch [{}/{}], Loss: {:.4f}'.format(epoch+1, epochs, batch+1, len(dataloader), loss.item()))

## Validation
TODO:
calculate relative weight of layout and tile data
validate sample submission and see if it matches their score

In [21]:
from torch_geometric.data import Data

def validate_model_tiles(model, dataset):


    # Make predictions
    predictions = []
    model.to(device)
    model.eval()

    for tile in tqdm(dataset):
        tile.to(device)
        out = model(tile)
        predictions.append(torch.sort(out).indices)

    # Calculate score
    score = 0.0

    for i, tile in tqdm(enumerate(dataset), total=len(dataset)):
        best_prediction = min([dataset[i]["y"][pred_ind] for pred_ind in predictions[i][:5]])
        best_total = min(dataset[i]["y"])
        score += 2.0 - best_prediction / best_total

    avg_score = score / len(dataset)
    print("Score:", avg_score)
    return avg_score

In [22]:
from torch_geometric.data import Data

def validate_model_layout(model, dataset):


    # Make predictions
    predictions = []
    model.to(device)
    model.eval()

    for graph_ind in range(len(dataset)):
      # c = graph['config_runtime'].shape[0]
      c = dataset.iloc[graph_ind]['config_runtime'].shape[0]
      graph_pred = []
      node_config_ids = dataset.iloc[graph_ind]['node_config_ids']
      node_config_feat = torch.tensor(dataset.iloc[graph_ind]['node_config_feat'], dtype=torch.float32)
      node_feat = torch.tensor(dataset.iloc[graph_ind]['node_feat'], dtype=torch.float32)
      node = torch.zeros((node_feat.shape[0], 18))
      node = torch.cat((node_feat, node), dim=1)

      for config in tqdm(range(c)):
        node_config = node.clone()

        for i in range(node_config_feat.shape[1]):
          node_config[node_config_ids[i], :18] = node_config_feat[config,i,:]
        x_pred = model({
            'node_feat': node_config,
            'edge_index': torch.tensor(dataset.iloc[graph_ind]['edge_index'], dtype=torch.long).t().contiguous(),
            'node_opcode': torch.tensor(dataset.iloc[graph_ind]['node_opcode'], dtype=torch.long),
            'node_config_ids': torch.tensor(dataset.iloc[graph_ind]['node_config_ids'], dtype=torch.float32)
        })

        graph_pred.append(x_pred)

      predictions.append(graph_pred)

      del c, graph_pred, node_config_ids, node_config_feat, node_feat, node

    # Calculate score
    score = 0.0

    for i, tile in tqdm(enumerate(dataset), total=len(dataset)):
        best_prediction = min([dataset[i]["y"][pred_ind] for pred_ind in predictions[i][:5]])
        best_total = min(dataset[i]["y"])
        score += 2.0 - best_prediction / best_total

    avg_score = score / len(dataset)
    print("Score:", avg_score)
    return avg_score

## Create Models

### Tile

In [23]:
df_train = load_data_to_df(tile_xla, "train")
df_valid = load_data_to_df(tile_xla, "valid")

100%|██████████| 5709/5709 [02:29<00:00, 38.16it/s] 
100%|██████████| 673/673 [00:13<00:00, 49.12it/s] 


In [24]:
train_dataset = RuntimeDataset(df_train, mode="tile")
valid_dataset = RuntimeDataset(df_valid, mode="tile")
data_loader = runtime_data_loader(train_dataset, batch_size=64)
len(data_loader)

90

In [71]:
model = Tile_GNN(len_opcode_embedd=8, hidden_dim=60, output_dim=30, num_layers=5)

In [74]:
MSE_training_tile(model, data_loader, 40, lr=0.01)

<class '__main__.Tile_GNN'>


100%|██████████| 40/40 [11:36<00:00, 17.41s/it]


In [79]:
MSE_training_tile(model, data_loader, 10, lr=0.01)

<class '__main__.Tile_GNN'>


100%|██████████| 10/10 [03:03<00:00, 18.31s/it]


In [75]:
validate_model_tiles(model, train_dataset)

100%|██████████| 5709/5709 [00:33<00:00, 172.36it/s]
100%|██████████| 5709/5709 [01:04<00:00, 87.97it/s]

Score: tensor(0.7938)





tensor(0.7938)

In [80]:
validate_model_tiles(model, valid_dataset)

100%|██████████| 673/673 [00:04<00:00, 156.37it/s]
100%|██████████| 673/673 [00:07<00:00, 90.37it/s]

Score: tensor(0.8032)





tensor(0.8032)

In [77]:
sum(p.numel() for p in model.parameters())

47561

In [78]:
from datetime import datetime
from pytz import timezone

cest = timezone('Europe/Berlin')
now = datetime.now(timezone('UTC'))
now_cest = now.astimezone(cest)
now_str = now_cest.strftime('%Y-%m-%d_%H:%M:%S')

torch.save(model, '/content/drive/MyDrive/google-tpu/predict-ai-model-runtime/submissions/models/' + now_str)

In [None]:
Tile_model = torch.load('/content/drive/MyDrive/google-tpu/predict-ai-model-runtime/submissions/models/2023-09-22_23:01:26')

## Testing

### Tiles

In [None]:
import sys
gb = sys.getsizeof(df_train)/1024**3
print(f"{gb} GB")
print(type(df_train))
print(len(df_train))

1.2677960405126214 GB
<class 'pandas.core.frame.DataFrame'>
5709


In [None]:
import sys
gb = sys.getsizeof(df_valid)/1024**3
print(f"{gb} GB")
print(type(df_valid))
print(len(df_valid))

0.12631069403141737 GB
<class 'pandas.core.frame.DataFrame'>
676


In [None]:
del df_valid
del df_train

### Layout

In [None]:
print("tile")
print("xla")
! cd data/npz_all/npz/tile/xla && du -sh test && du -sh train && du -sh valid
print("layout")
print("nlp")
print("default")
! cd data/npz_all/npz/layout/nlp/default && du -sh test && du -sh train && du -sh valid
print("random")
! cd data/npz_all/npz/layout/nlp/random && du -sh test && du -sh train && du -sh valid
print("xla")
print("default")
! cd data/npz_all/npz/layout/xla/default && du -sh test && du -sh train && du -sh valid
print("random")
! cd data/npz_all/npz/layout/xla/random && du -sh test && du -sh train && du -sh valid

tile
xla
17M	test
159M	train
17M	valid
layout
nlp
default
4.4M	test
2.2G	train
249M	valid
random
4.6M	test
2.3G	train
251M	valid
xla
default
9.7M	test
375M	train
45M	valid
random
11M	test
358M	train
44M	valid


In [None]:
df_xla_default_valid = load_data_to_df(layout_xla_default, "valid")

100%|██████████| 7/7 [00:09<00:00,  1.35s/it]


In [None]:
import sys
gb = sys.getsizeof(df_xla_default_train)/1024**3
print(f"{gb} GB")
print(type(df_xla_default_train))
print(len(df_xla_default_train))

3.9731383491307497 GB
<class 'pandas.core.frame.DataFrame'>
7


In [None]:
model = Layout_GNN(8,8,8,2)
MSE_training_layout(model, df_xla_default_valid, epochs=3, sample_size=50, batch_size=32, lr=0.01)

<class '__main__.Layout_GNN'>


100%|██████████| 3/3 [00:40<00:00, 13.60s/it]


In [None]:
validate_model_layout(model, df_xla_default_valid)

<class 'pandas.core.frame.DataFrame'>


  0%|          | 0/7 [00:00<?, ?it/s]

In [None]:
del df_xla_default_valid