# Model Exporting Notebook

This notebook takes a model and exports:

1. Test Predictions
2. MC Test predictions (if applicable)
3. Train/Valid prediction (for downstream stacking)

In [1]:
RUN_ID = "1k1gvxm9"

import os
os.chdir('/root/kaggle-fast-or-slow')

from ml.layout_v1.model import GraphMLP
from ml.layout_v1.dataset import LayoutDataset
from ml.layout_v1.job.spec import PreprocessorSpec, PostprocessorSpec, JobSpec
from ml.layout_v1.job.builder import build_processors, fit_node_processor
from ml.layout_v1.job.constants import GLOBAL_POOLINGS
from ml.layout_v1.preprocessors import GlobalFeatureGenerator
import torch_geometric
import torch
from copy import deepcopy

import wandb
torch.set_float32_matmul_precision('high')

In [2]:
# Used to fit
DTYPE = "xla"
TRAIN_DATA_DIRS = [f"data/layout/{DTYPE}/default/train", f"data/layout/{DTYPE}/random/train"]
TEST_DATA_DIRS = [f"data/layout/{DTYPE}/default/test", f"data/layout/{DTYPE}/random/test"]

WANDB_RUN_ID = f"kaggle-fast-or-slow/{RUN_ID}"

api = wandb.Api()
run = api.run(WANDB_RUN_ID)

config = run.config

In [4]:
job_spec = JobSpec(**config)

preprocessor_spec = PreprocessorSpec(**config["preprocessors"])
postprocessor_spec = PostprocessorSpec(**config["postprocessors"])

preprocessors = build_processors(preprocessor_spec)
postprocessors = build_processors(postprocessor_spec)

if preprocessors.node_transform:
    if hasattr(preprocessors.node_transform, "fit"):
        preprocessors.node_transform = fit_node_processor(
            TRAIN_DATA_DIRS, preprocessors.node_transform
        )

if postprocessors.node_transform:
    if hasattr(postprocessors.node_transform, "fit"):
        postprocessors.node_transform = fit_node_processor(
            TRAIN_DATA_DIRS, postprocessors.node_transform
        )

# Manually add global processor cause automating it is a pain cause I'm a bad dev
global_random_preprocessor = GlobalFeatureGenerator(DTYPE,"random",True)
global_default_preprocessor = GlobalFeatureGenerator(DTYPE,"default",True)

random_preprocessors = deepcopy(preprocessors)
random_preprocessors.global_transform = global_random_preprocessor

default_preprocessors = deepcopy(preprocessors)
default_preprocessors.global_transform = global_default_preprocessor

Fitting node processor


In [26]:
# Test Data
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
default_dataset = LayoutDataset(
    directories=[f"data/layout/{DTYPE}/default/test"],
    processed_dir="data/processed_test",
    pretransforms=default_preprocessors,
    posttransforms=postprocessors,
    multiprocess=False,
    force_reload=False
)

random_dataset = LayoutDataset(
    directories=[f"data/layout/{DTYPE}/random/test"],
    processed_dir="data/processed_test",
    pretransforms=random_preprocessors,
    posttransforms=postprocessors,
    multiprocess=False,
    force_reload=False
)

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

In [33]:
num_features = default_dataset.get(0).x.shape[1]
num_global_features = default_dataset.get(0).global_features.shape[1]

pooling = GLOBAL_POOLINGS[job_spec.pooling]


model = GraphMLP(
    graph_input_dim=num_features,
    global_features_dim=num_global_features,
    graph_channels=job_spec.graph_channels,
    graph_layers=job_spec.graph_layers,
    linear_channels=job_spec.linear_channels,
    linear_layers=job_spec.linear_layers,
    dropout=job_spec.dropout,
    pooling_fn=pooling,
    pooling_feature_multiplier=job_spec.pooling_feature_multiplier,
    graph_conv=job_spec.graph_convolution_type,
    graph_conv_kwargs=job_spec.graph_convolution_kwargs,
    graph_norm=job_spec.graph_norm,
    linear_norm=job_spec.linear_norm,
    use_multi_edge=job_spec.use_multi_edge,
    main_block=job_spec.main_block,
    alt_block=job_spec.alt_block,
)

model = model.to("cuda")
model = torch_geometric.compile(model)

In [34]:
# Get the most recent checkpoint
import os
from pathlib import Path

paths = sorted(Path(f"models/{RUN_ID}").iterdir(), key=os.path.getmtime)
most_recent = str(paths[0].absolute())

state_dict = torch.load(most_recent)
model.load_state_dict(state_dict["model_state_dict"])
model = model.to('cuda')

In [8]:
def make_id_from_file(filepath: str):
    file_id = filepath.removeprefix("data/").removesuffix(".npz")
    file_id = file_id.replace("/test","")
    file_id = file_id.replace("/",":")
    
    return file_id

In [42]:
def enable_dropout(model):
    for module in model.modules():
        if module.__class__.__name__.startswith('Dropout'):
            print(module.__class__.__name__)
            module.train()

def disable_dropout(model):
    for module in model.modules():
        if module.__class__.__name__.startswith('Dropout'):
            module.eval()

In [43]:
from collections import defaultdict
from tqdm.auto import tqdm
from torch_geometric.data import Batch

results = defaultdict(dict)
mc_dropout_results = defaultdict(dict)

BATCH_SIZE = 16


next_batch = []

DATASETS = [random_dataset, default_dataset]

model.eval()
for dataset in DATASETS:
    for i in tqdm(range(len(dataset))):
        file_path, config_idx = dataset.idx_to_source_file_and_config[i]
        file_id = make_id_from_file(file_path)
        data = dataset.get(i)
        next_batch.append((data, file_id, config_idx))
        
        if len(next_batch) == BATCH_SIZE or i == len(dataset) - 1:
            batch_data = [d[0] for d in next_batch]
            file_ids = [d[1] for d in next_batch]
            config_ids = [d[2] for d in next_batch]

            with torch.no_grad():
                batch = Batch.from_data_list(batch_data)
                batch = batch.to('cuda')
                output = model(batch).flatten()
            
            for o, f, c in zip(output.tolist(), file_ids, config_ids):
                results[f][c] = o

            next_batch = []


  0%|          | 0/8001 [00:00<?, ?it/s]

Dropout
Dropout
Dropout
Dropout
Dropout
Dropout
Dropout
Dropout
Dropout
Dropout
Dropout
Dropout
Dropout
Dropout
Dropout
Dropout


TorchRuntimeError: Failed running call_function <built-in function linear>(*(FakeTensor(..., device='cuda:0', size=(s0, s1)), Parameter(FakeTensor(..., device='cuda:0', size=(64, 275), requires_grad=True)), None), **{}):
a and b must have same reduction dim, but got [s0, s1] X [275, 64].

from user code:
   File "/tmp/root_pyg/tmprarypj7g.py", line 362, in forward
    x_src = x_dst = self.lin_src(x).view(-1, H, C)
  File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
    return forward_call(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/torch_geometric/nn/dense/linear.py", line 130, in forward
    return F.linear(x, self.weight, self.bias)

Set TORCH_LOGS="+dynamo" and TORCHDYNAMO_VERBOSE=1 for more information


You can suppress this exception and fall back to eager by setting:
    import torch._dynamo
    torch._dynamo.config.suppress_errors = True


In [37]:
import pandas

SAVE_ID = f"submissions/{RUN_ID}_xla.csv"
df = pandas.DataFrame(results).transpose()
df.to_csv(SAVE_ID)