In [1]:
import time
import os
import sys
import random
import copy
import torch
import torch
import torch.nn as nn
import torch.optim as optim
import hydra
from omegaconf import DictConfig, OmegaConf
sys.path.append(r'C:\Users\34609\VisualStudio\TFG')  
from tasks.EpicKitchens.dataset.dataset import Dataset
from models.model_v1 import Model

In [2]:
# @hydra.main(config_path=r'C:\Users\34609\VisualStudio\TFG\configs', config_name='model_v1')
# def cfg_setup(cfg: DictConfig):
#     return cfg

In [16]:
def train_model(model, dataloaders, criterion, optimizer, device, num_epochs=25, print_batch=50):
    since = time.time()
    
    softmax = torch.nn.Softmax(dim=0)
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs-1}')
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0
            total_videos = 0

            # Iterate over data.
            for i, (inputs, labels) in enumerate(dataloaders[phase]):
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    _, preds = torch.max(softmax(outputs), 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                total_videos += len(outputs)
                
                if i % print_batch == 0 and phase == 'train':
                    l = running_loss/total_videos
                    acc = running_corrects.cpu().numpy()/total_videos
                    print(' - Batch Number {} -> Loss: {:.3f} Accuracy: {:.3f}'.format(i, l, acc))

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [4]:
DEVICE = torch.device('cpu')

In [5]:
cfg = OmegaConf.create({
    'ATTENTION' : 'vanilla_attention',
    'NUM_CLASSES': 97,
    'FRAME_SIZE': 224,
    'PATCH_SIZE': 16,
    'IN_CHANNELS': 3,
    'NUM_FRAMES': 5,
    'BATCH_SIZE': 2,
    'DEPTH': 2,
    'HEADS': 4
})

In [6]:
model = Model(cfg)

In [7]:
params_to_update = []
params = 0
for param in model.parameters():
    if param.requires_grad:
        params_to_update.append(param)
        params += 1
print(params)

44


In [8]:
# for a in model.named_parameters():
#     print(a[0])

In [9]:
data_tensor = torch.rand((2, 3, 5, 224, 224)) # b, c, t, w, h
data_tensor_labels = torch.randint(97, (2,))

In [10]:
train_dataset = torch.utils.data.TensorDataset(data_tensor, data_tensor_labels)

In [11]:
train_sampler = torch.utils.data.sampler.RandomSampler(train_dataset)
train_loader = torch.utils.data.DataLoader(train_dataset, sampler=train_sampler, batch_size=2,
                                                num_workers=1)


In [12]:
optimizer = optim.Adam(params_to_update)
criterion = nn.CrossEntropyLoss()

In [13]:
dataloaders = {
    'train': train_loader, 
    'val': train_loader
}

In [17]:
trained_model = train_model(model, dataloaders, criterion, optimizer, DEVICE, num_epochs=1, print_batch=1)

Epoch 0/0
----------
softmax output: tensor([[0.5513, 0.4831, 0.5332, 0.4948, 0.5367, 0.5355, 0.4745, 0.4822, 0.4677,
         0.4739, 0.5189, 0.4966, 0.5761, 0.5206, 0.5575, 0.5547, 0.5292, 0.5678,
         0.5095, 0.5301, 0.5057, 0.5134, 0.4865, 0.5450, 0.4766, 0.5240, 0.4816,
         0.4631, 0.5397, 0.4268, 0.6214, 0.4176, 0.5589, 0.4799, 0.5218, 0.4821,
         0.5143, 0.5708, 0.4867, 0.4796, 0.5806, 0.5516, 0.4600, 0.5426, 0.4503,
         0.4592, 0.4645, 0.4980, 0.5635, 0.5261, 0.5096, 0.5240, 0.4882, 0.4909,
         0.4342, 0.5973, 0.4810, 0.4758, 0.5281, 0.4999, 0.4866, 0.4528, 0.5053,
         0.4495, 0.4893, 0.5158, 0.5107, 0.5170, 0.4976, 0.4833, 0.5333, 0.5411,
         0.4451, 0.4787, 0.5608, 0.4873, 0.4449, 0.5070, 0.8827, 0.4631, 0.4621,
         0.5179, 0.4534, 0.4876, 0.4340, 0.5714, 0.4256, 0.4471, 0.4777, 0.5082,
         0.5343, 0.4836, 0.4767, 0.5453, 0.4936, 0.5225, 0.1487],
        [0.4487, 0.5169, 0.4668, 0.5052, 0.4633, 0.4645, 0.5255, 0.5178, 0.5323,
      