In [1]:
# %load_ext autoreload
# %autoreload 2

In [2]:
import sys
sys.path.append('../../../src')

In [16]:
import torch

from torch.utils.data import DataLoader

from data.scannet.utils_scannet_fast import ScanNetDataset
from DEPO.depo_ablations import A5
from training.train_depo_pose_and_flow_weighted import train, validate
from training.loss_depo import LossMixedDetermininsticWeighted
from utils.model import load_checkpoint
import numpy as np

from transformers import get_scheduler

#### Data

In [4]:
train_data = ScanNetDataset(
    root_dir='/home/project/data/scans/',
    npz_path='/home/project/code/data/scannet_splits/train_subset_ablations.npz',
    intrinsics_path='/home/project/ScanNet/scannet_indices/intrinsics.npz',
    calculate_flow=True
)

train_loader = DataLoader(train_data, batch_size=32, shuffle=True, drop_last=True, pin_memory=True, num_workers=4)

val_data = ScanNetDataset(
    root_dir='/home/project/data/scans/',
    npz_path='/home/project/code/data/scannet_splits/val_subset_ablations.npz',
    intrinsics_path='/home/project/ScanNet/scannet_indices/intrinsics.npz',
    calculate_flow=False
)

val_loader = DataLoader(val_data, batch_size=32, shuffle=False, drop_last=False, pin_memory=True, num_workers=4)

#### Config

In [5]:
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True

config = dict(
    experiment_name='A10:A5+weights',
    device=torch.device('cuda:0' if torch.cuda.is_available() else 'cpu'),
    n_epochs=10,
    n_accum_steps=1,
    batch_size=train_loader.batch_size,
    n_steps_per_epoch=len(train_loader.dataset) // train_loader.batch_size,
    swa=False,
    n_epochs_swa=None,
    n_steps_between_swa_updates=None,
    repeat_val_epoch=1,
    repeat_save_epoch=20,
    model_save_path='../../src/weights/A10'
)

config['n_effective_steps_per_epoch'] = np.ceil(len(train_loader.dataset) / (train_loader.batch_size * config['n_accum_steps'])) 
config['n_warmup_steps'] = int(config['n_effective_steps_per_epoch'] * 0.5)
config['n_training_steps'] = int(config['n_effective_steps_per_epoch'] * config['n_epochs'])

#### Model

In [6]:
model = A5().to(config['device'])

for name, p in model.named_parameters():
    if 'self_encoder' in name:
        p.requires_grad = False
    else:
        p.requires_grad = True

#### Loss & Optimizer & Scheduler

In [10]:
val_loss = LossMixedDetermininsticWeighted(mode='val', weights=None)
train_loss = LossMixedDetermininsticWeighted(mode='train', weights=[-2., -3., 0.], add_l2=False)

In [13]:
optimizer = torch.optim.AdamW(model.parameters(), lr=3e-4, weight_decay=1e-6)
weights_optimizer = torch.optim.SGD([train_loss.weights], lr=1e-4)

In [14]:
scheduler = get_scheduler(
    "cosine",    
    optimizer=optimizer,
    num_warmup_steps=config['n_warmup_steps'],
    num_training_steps=config['n_training_steps'] 
)

#### Train & val

In [18]:
train(model, optimizer, weights_optimizer, scheduler, train_loss, val_loss, train_loader, val_loader, config, **config)

  0%|                                                                                                                               | 0/312 [00:06<?, ?it/s]


RuntimeError: CUDA out of memory. Tried to allocate 5.49 GiB (GPU 0; 23.70 GiB total capacity; 17.52 GiB already allocated; 2.29 GiB free; 18.93 GiB reserved in total by PyTorch)