In [1]:
# %load_ext autoreload
# %autoreload 2

In [2]:
import sys
sys.path.append('../../src')

In [3]:
import torch

from torch.utils.data import DataLoader
from torch.optim.swa_utils import SWALR
from data.scannet.utils_scannet_fast import ScanNetDataset
from DEPO.depo import depo_v3 as best_model
from training.train_depo_pose_and_flow import train, validate
from training.loss_depo import LossMixedDetermininstic
from utils.model import load_checkpoint
import numpy as np

from transformers import get_scheduler

  from .autonotebook import tqdm as notebook_tqdm


#### Data

In [4]:
train_data = ScanNetDataset(
    root_dir='/home/project/data/scans/',
    npz_path='/home/project/code/data/scannet_splits/smart_sample_train.npz',
    intrinsics_path='/home/project/ScanNet/scannet_indices/intrinsics.npz',
    calculate_flow=True
)

train_loader = DataLoader(train_data, batch_size=32, shuffle=True, drop_last=True, pin_memory=True, num_workers=4)

val_data = ScanNetDataset(
    root_dir='/home/project/data/scans/',
    npz_path='/home/project/code/data/scannet_splits/smart_sample_val.npz',
    intrinsics_path='/home/project/ScanNet/scannet_indices/intrinsics.npz',
    calculate_flow=False
)

val_loader = DataLoader(val_data, batch_size=32, shuffle=False, drop_last=False, pin_memory=True, num_workers=4)

#### Config

In [5]:
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True

config = dict(
    experiment_name='flow_and_pose_best',
    device=torch.device('cuda:0' if torch.cuda.is_available() else 'cpu'),
    n_epochs=5,
    n_accum_steps=4,
    batch_size=train_loader.batch_size,
    n_steps_per_epoch=len(train_loader.dataset) // train_loader.batch_size,
    swa=True,
    swa_lr=1e-4,
    n_epochs_swa=1,
    repeat_val_epoch=1,
    repeat_save_epoch=1,
    model_save_path='../../src/weights/flow_and_pose_best'
)

config['n_effective_steps_per_epoch'] = np.ceil(len(train_loader.dataset) / (train_loader.batch_size * config['n_accum_steps'])) 
config['n_warmup_steps'] = int(config['n_effective_steps_per_epoch'] * 0.2)
config['n_training_steps'] = int(config['n_effective_steps_per_epoch'] * config['n_epochs'])
config['swa_anneal_steps'] = int(config['n_effective_steps_per_epoch'] * 0.1)
config['n_steps_between_swa_updates'] = (config['n_effective_steps_per_epoch'] - config['swa_anneal_steps']) // 20


#### Model

In [6]:
model = best_model().to(config['device'])

#### Loss & Optimizer & Scheduler

In [7]:
val_loss = LossMixedDetermininstic(mode='val')
train_loss = LossMixedDetermininstic(mode='train')

In [None]:
#### opt_parameters = []
for name, module in model.named_parameters():
    if 'self_encoder' in name:
        lr = 5e-5
    else:
        lr = 1e-3
    opt_parameters.append({
        'params': module,
        'weight_decay': 0.0 if ('bias' in name) else 1e-6,
        'lr': lr
    })
    
optimizer = torch.optim.AdamW(opt_parameters)

In [None]:
class CustomSheduler:
    def __init__(self, base_scheduler, swa_scheduler, steps_per_epoch, n_epochs, swa, n_epochs_swa, **args):
        self.num_epochs = num_epochs 
        self.steps_per_epoch = steps_per_epoch
        self.base_scheduler = base_scheduler
        self.swa_scheduler = swa_scheduler
        self.swa = swa
        self.num_epochs_swa = num_epochs_swa
        self.epoch = 0
        self.step_ = 0
    
    def get_scheduler(self):
        if self.swa and (self.epoch >= self.num_epochs - self.num_epochs_swa):
            self.scheduler = self.swa_scheduler
        else:
            self.scheduler = self.base_scheduler
            
    def step(self):
        self.get_scheduler()
        self.scheduler.step()
        self.step_ += 1
        self.epoch = self.step_ // self.steps_per_epoch
        
    def get_last_lr(self):
        return self.scheduler.get_last_lr()

In [25]:
# for name,p in model.named_parameters():
#     print(p)

In [8]:
scheduler = get_scheduler(
    "cosine",    
    optimizer=optimizer,
    num_warmup_steps=config['n_warmup_steps'],
    num_training_steps=config['n_training_steps'])


# swa_scheduler = SWALR(
#     optimizer,
#     swa_lr=config['swa_lr'],
#     anneal_epochs=config['swa_anneal_steps'])

# #define scheduler
# scheduler = CustomSheduler(
#     base_scheduler,
#     swa_scheduler,
#     steps_per_epoch=steps_per_epoch, 
#     num_epochs=['n_epochs'],
#     swa=config['general']['swa'],
#     num_epochs_swa=config['general']['n_epochs_swa'] 
# )

In [17]:
for _ in range(config['n_steps_per_epoch']):
    scheduler.step()



In [26]:
scheduler.__dict__.keys()
scheduler.last_epoch

39419

In [18]:
scheduler._step_count

39420

#### Train & val

In [None]:
train(model, optimizer, scheduler, train_loss, val_loss, train_loader, val_loader, config, **config)

[34m[1mwandb[0m: Currently logged in as: [33mkovanic[0m. Use [1m`wandb login --relogin`[0m to force relogin


 11%|████████████▋                                                                                                     | 697/6250 [20:16<2:46:28,  1.80s/it]