NOTE: This notebook assumes that you have downloaded the competition test data and saved it in `./data/speed-and-structure-test-data` directory. 


# Speed and Structure Competition

## Part 4: Prediction on Test Data

---

In [None]:
import os
from glob import glob

import torch
import torch.nn as nn
import numpy as np
from tqdm.auto import tqdm
from scipy.interpolate import interp1d

from config import cfg
from model import Net, EnsembleModel

Make sure you have the starred paths correct as the glob library is used to find the files e.g. `"./data/speed-and-structure-test-data/*"`. 

In [None]:
TEST_DATASET = "./data/speed-and-structure-test-data/*" 
OUTPUT_DIR = "./"
MODEL_DIR = "./model_checkpoints"
MODEL_PATHS = [ # convention is best_model_{seed}_{invidual MAPE score on the leaderboard}.pt
    f"{MODEL_DIR}/best_model_43_023536.pt",
    f"{MODEL_DIR}/best_model_44_023772.pt",
    f"{MODEL_DIR}/best_model_45_023476.pt",
]

In [None]:
models = []
for model_path in MODEL_PATHS:
    _model = Net(backbone=cfg.backbone, 
                 pretrained=False, 
                 fuse_ch=cfg.fuse_ch,
                 one_channel=cfg.one_channel,
                 norm_layer=cfg.norm_layer,
                 dropout=cfg.dropout,
                 y_min_max_norm=cfg.y_min_max_norm,
                 horizontal_tta=cfg.horizontal_tta
                 ).to(cfg.device)
    # Load model weights
    _model.load_state_dict(torch.load(model_path, 
                                    map_location=cfg.device
                                    ))
    _model.eval()
    models.append(_model)

model = EnsembleModel(models, ensemble_method="mean") # theoretically median would be better but mean had a better leaderboard score

In [None]:
test_paths = glob(TEST_DATASET)
print("Number of test samples:", len(test_paths))

In [None]:
source_coordinates = [1, 75, 150, 225, 300]
submission = dict({})
for test_path in tqdm(test_paths):

    sample_id = test_path.split("/")[-1]

    # Load input data
    rec_data = []
    for i in source_coordinates:
        rec = np.load(os.path.join(test_path, f"receiver_data_src_{i}.npy"))

        if cfg.target_len is not None:
            original_rows = rec.shape[0]
            target_rows = cfg.target_len
            x_original = np.linspace(0, 1, original_rows)
            x_new = np.linspace(0, 1, target_rows)

            # Interpolate along axis 0 (rows), keeping columns unchanged
            interpolator = interp1d(x_original, rec, axis=0, kind='linear')
            rec = interpolator(x_new).astype(np.float32)
            rec = np.pad(rec, ((0, 0), (0, 32 - rec.shape[1])), mode='constant', constant_values=0)
        
        rec_data.append(rec)

    # (B, 1, T, 32*5) or (B, 5, T, 32)
    if cfg.one_channel:
        x = np.expand_dims(np.concatenate(rec_data, axis=1), 0)
    else:
        x = np.stack(rec_data, axis=0)

    if cfg.x_norm:
        x = (x - cfg.x_mean) / cfg.x_std

    # Inference
    x = torch.from_numpy(x).to(cfg.device)
    x = x.unsqueeze(0)
    with torch.no_grad():
        y_pred = model(x)

    if cfg.y_norm:
        y_pred = y_pred * cfg.y_std + cfg.y_median
    
    if cfg.y_min_max_norm:
        y_pred = y_pred * (cfg.y_max - cfg.y_min) + cfg.y_min

    prediction = y_pred[0].cpu().numpy().astype(np.float64)

    # clip prediction 
    prediction = np.clip(prediction, cfg.y_min, cfg.y_max)

    submission.update(dict({sample_id: prediction}))

In [None]:
np.savez(f"{OUTPUT_DIR}/speed-and-structure-submission.npz", **submission)