In [2]:
import numpy as np
import pandas as pd
import torch
import torchvision.transforms as T
import wandb
from pytorch_lightning import Trainer, seed_everything
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
from pytorch_lightning.loggers import WandbLogger
from torch.utils.data import DataLoader
from tqdm import tqdm

from nexar.data import NexarDataModule, NexarDataset, pad_to_square
from nexar.model import NexarClassifier


### Train

In [3]:
# Set random seed for reproducibility
random_seed = np.random.randint(0, 1e6)
seed_everything(random_seed, workers=True)

# Initialize trainin data module
train_df = pd.read_parquet("../data/processed/train.parquet")
datamodule = NexarDataModule(
    train_df=train_df,
    batch_size=32,
    val_size=0.1,
    transform=T.Compose([
        T.Lambda(pad_to_square),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        T.RandomHorizontalFlip(),
        T.RandomAffine(degrees=5, translate=(0.1, 0.1), scale=(0.9, 1.1), shear=5),
    ]),
    test_transform=T.Compose([
        T.Lambda(pad_to_square),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]),
)

# Initialize model
model = NexarClassifier(
    lr=1e-3,
    hidden_layers=[],
    dropout=None,
)

# Initialize trainer
trainer = Trainer(
    max_epochs=20,
    logger=WandbLogger(project="nexar-collision-prediction-private", save_dir="../logs"),
    callbacks=[
        ModelCheckpoint(monitor="val_acc", mode="max", save_top_k=1),
        EarlyStopping(monitor="val_acc", mode="max", patience=5),
    ],
    deterministic=True,
)

# Log seed
trainer.logger.experiment.config.update({"seed": random_seed})

# Train the model
trainer.fit(model, datamodule=datamodule)
wandb.finish()


Seed set to 980000
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
[34m[1mwandb[0m: Currently logged in as: [33mmaxzw[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin



  | Name               | Type              | Params | Mode 
-----------------------------------------------------------------
0 | image_backbone     | ResNet            | 11.2 M | train
1 | mask_flow_backbone | ResNet            | 11.2 M | train
2 | classifier         | Sequential        | 1.0 K  | train
3 | loss_fn            | BCEWithLogitsLoss | 0      | train
4 | train_accuracy     | BinaryAccuracy    | 0      | train
5 | val_accuracy       | BinaryAccuracy    | 0      | train
-----------------------------------------------------------------
11.2 M    Trainable params
11.2 M    Non-trainable params
22.4 M    Total params
89.416    Total estimated model params size (MB)
141       Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/opt/miniconda3/envs/nexar/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


                                                                           

/opt/miniconda3/envs/nexar/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
/opt/miniconda3/envs/nexar/lib/python3.10/site-packages/pytorch_lightning/loops/fit_loop.py:310: The number of training batches (43) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 0: 100%|██████████| 43/43 [01:48<00:00,  0.40it/s, v_num=7ygb, train_loss_step=0.575, train_acc_step=0.833, val_loss=0.506, val_acc=0.767, train_loss_epoch=0.639, train_acc_epoch=0.641]

Metric val_acc improved. New best score: 0.767


Epoch 3: 100%|██████████| 43/43 [01:34<00:00,  0.46it/s, v_num=7ygb, train_loss_step=0.276, train_acc_step=1.000, val_loss=0.539, val_acc=0.773, train_loss_epoch=0.563, train_acc_epoch=0.724]

Metric val_acc improved by 0.007 >= min_delta = 0.0. New best score: 0.773


Epoch 4: 100%|██████████| 43/43 [01:30<00:00,  0.48it/s, v_num=7ygb, train_loss_step=0.515, train_acc_step=0.667, val_loss=0.473, val_acc=0.780, train_loss_epoch=0.575, train_acc_epoch=0.704]

Metric val_acc improved by 0.007 >= min_delta = 0.0. New best score: 0.780


Epoch 9: 100%|██████████| 43/43 [01:35<00:00,  0.45it/s, v_num=7ygb, train_loss_step=0.519, train_acc_step=0.667, val_loss=0.608, val_acc=0.733, train_loss_epoch=0.540, train_acc_epoch=0.738]

Monitored metric val_acc did not improve in the last 5 records. Best score: 0.780. Signaling Trainer to stop.


Epoch 9: 100%|██████████| 43/43 [01:35<00:00,  0.45it/s, v_num=7ygb, train_loss_step=0.519, train_acc_step=0.667, val_loss=0.608, val_acc=0.733, train_loss_epoch=0.540, train_acc_epoch=0.738]


0,1
epoch,▁▁▂▂▂▃▃▃▃▃▃▄▄▄▅▅▅▆▆▆▆▆▇▇▇███
train_acc_epoch,▁▄▅▆▅██▇▆▇
train_acc_step,▁▅▆▃▅▆▅█
train_loss_epoch,█▅▅▄▄▂▁▂▂▂
train_loss_step,█▇▄▂▅▃▃▁
trainer/global_step,▁▁▁▂▂▂▃▃▃▃▃▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇██
val_acc,▇▆▁██▂▂▆▇▅
val_loss,▂▄▇▃▁█▆▂▁▆

0,1
epoch,9.0
train_acc_epoch,0.73778
train_acc_step,0.78125
train_loss_epoch,0.54001
train_loss_step,0.47471
trainer/global_step,429.0
val_acc,0.73333
val_loss,0.6079


### Predict

In [6]:
# Load the best model
best_model_path = trainer.checkpoint_callback.best_model_path
best_model = NexarClassifier.load_from_checkpoint(best_model_path)
best_model.eval()
best_model_id = best_model_path.split("/")[3]

print(f"Best model path: {best_model_path}")
print(f"Best model id: {best_model_id}")


Best model path: ../logs/nexar-collision-prediction-private/gn6e7ygb/checkpoints/epoch=04-val_acc=0.78.ckpt
Best model id: gn6e7ygb


In [5]:
test_df = pd.read_parquet("../data/processed/test.parquet")

predictions = {}
indices = [0, 1, 2]
weights = [0.2, 0.3, 0.5]

# Get predictions for each frame
for frame_idx in indices:
    test_dataset = NexarDataset(
        test_df, 
        frame_idx=frame_idx, 
        return_label=False, 
        transform=datamodule.test_transform,
    )
    test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=False, drop_last=False)
    
    preds = []
    for batch in tqdm(test_dataloader):
        with torch.no_grad():
            pred = model(batch)
        pred = torch.sigmoid(pred).squeeze().detach().tolist()
        preds.extend(pred)
    
    predictions[frame_idx] = preds

# Take weighted average of predictions
final_predictions = np.zeros(len(test_df))
for i, frame_idx in enumerate(indices):
    final_predictions += np.array(predictions[frame_idx]) * weights[i]
final_predictions = final_predictions / sum(weights)

# Save predictions
submission_df = pd.DataFrame({"id": test_df["id"].apply(lambda x: str(x).zfill(5)), "target": final_predictions})
submission_df.to_csv(f"../data/processed/submission_{best_model_id}.csv", index=False)
submission_df.head()


100%|██████████| 21/21 [03:36<00:00, 10.33s/it]
100%|██████████| 21/21 [03:44<00:00, 10.70s/it]
100%|██████████| 21/21 [03:43<00:00, 10.65s/it]


Unnamed: 0,id,target
0,204,0.078478
1,30,0.426809
2,146,0.323937
3,20,0.1838
4,511,0.786208
