In [1]:
import time
import os
import sys
import random
import copy
import torch
import torch
import torch.nn as nn
import torch.optim as optim
import hydra
from omegaconf import DictConfig, OmegaConf
sys.path.append(r'C:\Users\34609\VisualStudio\TFG')  
# from tasks.EpicKitchens.dataset.dataset import Dataset
from dataset.dataset import Dataset
from models.model_v1 import Model

In [2]:
# @hydra.main(config_path=r'C:\Users\34609\VisualStudio\TFG\configs', config_name='model_v1')
# def cfg_setup(cfg: DictConfig):
#     return cfg

In [24]:
def train_model(model, dataloaders, criterion, optimizer, device, num_epochs=25, print_batch=50):
    since = time.time()
    
    softmax = torch.nn.Softmax(dim=0)
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs-1}')
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0
            total_videos = 0

            # Iterate over data.
            for i, (inputs, labels) in enumerate(dataloaders[phase]):
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    _, preds = torch.max(softmax(outputs), 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                total_videos += len(outputs)
                
                if i % print_batch == 0 and phase == 'train':
                    l = running_loss/total_videos
                    acc = running_corrects.cpu().numpy()/total_videos
                    print(' - Batch Number {} -> Loss: {:.3f} Accuracy: {:.3f}'.format(i, l, acc))
                    
                    print(f'outputs: {softmax(outputs)}')
                    print(f'labels: {labels}')

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [20]:
DEVICE = torch.device('cpu')

In [7]:
cfg = OmegaConf.create({
    'model': {
        'ATTENTION': 'vanilla_attention',
        'NUM_CLASSES': 97,
        'PATCH_SIZE': 16,
        'DEPTH': 2,
        'HEADS': 4
    },
    'dataset': {
        'FRAME_SIZE': 224,
        'IN_CHANNELS': 3,
        'NUM_FRAMES': 5
    },
    'training': {
        'BATCH_SIZE': 2
    }
    
})

In [8]:
model = Model(cfg)

In [9]:
params_to_update = []
params = 0
for param in model.parameters():
    if param.requires_grad:
        params_to_update.append(param)
        params += 1
print(params)

43


In [10]:
# for a in model.named_parameters():
#     print(a[0])

In [11]:
data_tensor = torch.rand((2, 3, 5, 224, 224)) # b, c, t, w, h
data_tensor_labels = torch.randint(97, (2,))

In [12]:
train_dataset = torch.utils.data.TensorDataset(data_tensor, data_tensor_labels)

In [13]:
train_sampler = torch.utils.data.sampler.RandomSampler(train_dataset)
train_loader = torch.utils.data.DataLoader(train_dataset, sampler=train_sampler, batch_size=2,
                                                num_workers=1)


In [14]:
optimizer = optim.Adam(params_to_update)
criterion = nn.CrossEntropyLoss()

In [15]:
dataloaders = {
    'train': train_loader, 
    'val': train_loader
}

In [25]:
trained_model = train_model(model, dataloaders, criterion, optimizer, DEVICE, num_epochs=1, print_batch=1)

Epoch 0/0
----------
 - Batch Number 0 -> Loss: 43.998 Accuracy: 1.000
outputs: tensor([[0.5296, 0.7090, 0.6427, 0.3647, 0.4870, 0.7573, 0.4479, 0.4296, 0.4389,
         0.4710, 0.3585, 0.7390, 0.4002, 0.5934, 0.5322, 0.5462, 0.6322, 0.3997,
         0.6090, 0.5700, 0.6154, 0.4825, 0.4290, 0.4957, 0.6765, 0.5614, 0.3607,
         0.6110, 0.4891, 0.6411, 0.5251, 0.6508, 0.4663, 0.6603, 0.0045, 0.3508,
         0.5474, 0.4620, 0.6616, 0.4146, 0.9568, 0.5788, 0.4958, 0.4838, 0.4910,
         0.5098, 0.5090, 0.4901, 0.3314, 0.3461, 0.7003, 0.4398, 0.5305, 0.3885,
         0.6426, 0.5825, 0.6065, 0.6298, 0.5849, 0.5245, 0.4766, 0.3639, 0.5429,
         0.4413, 0.5435, 0.4659, 0.4773, 0.5984, 0.4734, 0.5491, 0.4731, 0.6286,
         0.5726, 0.6966, 0.3935, 0.3834, 0.4651, 0.5973, 0.6502, 0.6750, 0.4954,
         0.5689, 0.4962, 0.3939, 0.6080, 0.3047, 0.5025, 0.5090, 0.5553, 0.4936,
         0.5747, 0.6177, 0.5503, 0.4980, 0.5225, 0.5276, 0.6237],
        [0.4704, 0.2910, 0.3573, 0.6353, 0.5