In [14]:
import os
import wandb
from utils import AverageMeter, accuracy
from torch.utils.tensorboard import SummaryWriter
import os
from tqdm import tqdm
import logging
from utils import accuracy, save_checkpoint

In [15]:
from generate_dataset import TouchFolderLabel, CalandraLabel
from model import MultiModalMoCo

In [16]:
import torch
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import random_split
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import DataLoader, Dataset

In [20]:
config={
    "epochs": 10,
    "log_every_n_epochs": 1,
    "batch_size": 256,
    "num_workers": 16,
    "data_folder": "/home/fotis/PycharmProjects/calandra_dataset/objects_split_object_wise/",
    "num_channels": 6,
    "momentum": 0.99,
    "temperature": 0.07,
    "lr": 0.0001,
    "weight_decay": 1e-6,
    "nn_model": 'resnet18',
    "intra_dim": 128,
    "inter_dim": 128,
    "weight_inter_tv": 1,
    "weight_inter_vt": 1,
    "weight_intra_vision": 1,
    "weight_intra_tactile": 1,
    "pretrained_encoder": False,
    "use_wandb": False
}


In [21]:
from data_aug.contrastive_learning_dataset import ContrastiveLearningDataset

dataset = ContrastiveLearningDataset(root_folder='calandra_objects_split_object_wise')
train_dataset = dataset.get_dataset('calandra_label', 2)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True,
                                           num_workers=config['num_workers'], drop_last=False, pin_memory=True)

In [23]:
# load model
model = MultiModalMoCo(n_channels=config['num_channels'], m=config['momentum'], T=config['temperature'],
                       intra_dim=config['intra_dim'], inter_dim=config['inter_dim'], nn_model=config['nn_model'],
                       weight_inter_tv=config['weight_inter_tv'], weight_inter_vt=config['weight_inter_vt'],
                       weight_intra_vision=config['weight_intra_vision'], weight_intra_tactile=config['weight_intra_tactile'],
                       pretrained_encoder=config['pretrained_encoder'])



In [25]:
# define loss function (criterion) and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Training with gpu: {device}.")
optimizer = torch.optim.Adam(model.parameters(), lr=config['lr'], weight_decay=config['weight_decay'])
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=len(train_loader), eta_min=0,
                                                       last_epoch=-1)
writer = SummaryWriter()
logging.basicConfig(filename=os.path.join(writer.log_dir, 'training.log'), level=logging.DEBUG)
criterion = torch.nn.CrossEntropyLoss().to(device)
# Set number of training epochs
logging.info(f"Start MViTaC training for {config['epochs']} epochs.")
logging.info(f"Training with gpu: {device}.")
best_acc = 0
if config['use_wandb']:
    wandb.init(project="mvitac_pretraining", config=config)
    # name the model
    wandb.run.name = f"{config['nn_model']}_lr_{config['lr']}_batch_{config['batch_size']}_epochs_{config['epochs']}"
    
for epoch in range(config['epochs']):
    loss_epoch = 0
    pbar = tqdm(train_loader)  # Wrap train_loader with tqdm
    for idx, values in enumerate(pbar):  # Use enumerate to get idx
        x_vision_q, x_vision_k, x_tactile_q, x_tactile_k, label = values
        model.train()
        
        # send to device
        x_vision_q = x_vision_q.to(device)
        x_vision_k = x_vision_k.to(device)

        x_tactile_q = x_tactile_q.to(device)
        x_tactile_k = x_tactile_k.to(device)

        # Forward pass to get the loss
        loss, logits, labels = model(x_vision_q, x_vision_k, x_tactile_q, x_tactile_k, epoch, idx, len(train_loader))
        loss_epoch += loss.item()

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    avg_loss = loss_epoch / len(train_loader)
    if epoch % config['log_every_n_epochs'] == 0:
        top1, top5 = accuracy(logits, labels, topk=(1, 5))
        writer.add_scalar('loss', avg_loss, global_step=epoch)
        writer.add_scalar('acc/top1', top1[0], global_step=epoch)
        writer.add_scalar('acc/top5', top5[0], global_step=epoch)
        writer.add_scalar('learning_rate', scheduler.get_last_lr()[0], global_step=epoch)
        if top1[0] > best_acc:
            best_acc = top1[0]
            save_checkpoint({
                'epoch': epoch,
                'arch': 'resnet18',
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
            }, is_best=True, filename=os.path.join(writer.log_dir, f'checkpoint_best.pth.tar'))
        # torch.save(state, f'models/calandra/model_{args.task}_{epoch}_{args.batch_size}_best_object_wise_05_t05.pth')
        wandb.save('models/calandra/model_{}_best_object_wise.pth'.format(epoch))
        
    # warmup for the first 10 epochs
    if epoch >= 10:
        scheduler.step()
    logging.debug(f"Epoch: {epoch}\tLoss: {loss}\tTop1 accuracy: {top1[0]}")

    logging.info("Training has finished.")
    # save model checkpoints
    checkpoint_name = 'checkpoint_{:04d}.pth.tar'.format(config['epochs'])
    save_checkpoint({
        'epoch': config['epochs'],
        'arch': config['nn_model'],
        'state_dict': model.state_dict(),
        'optimizer': optimizer.state_dict(),
    }, is_best=False, filename=os.path.join(writer.log_dir, checkpoint_name))
    logging.info(f"Model checkpoint and metadata has been saved at {writer.log_dir}.")

Training with gpu: cuda.


  0%|          | 0/4 [00:20<?, ?it/s]


TypeError: MultiModalMoCo.forward() takes 5 positional arguments but 8 were given