In [1]:
import time
import os
import sys
import random
import copy
import torch
import torch
import torch.nn as nn
import torch.optim as optim
import hydra
from omegaconf import DictConfig, OmegaConf
sys.path.append(r'C:\Users\34609\VisualStudio\TFG')  
from tasks.EpicKitchens.dataset.dataset import Dataset
from models.model_v1 import Model

In [4]:
# @hydra.main(config_path=r'C:\Users\34609\VisualStudio\TFG\configs', config_name='model_v1')
# def cfg_setup(cfg: DictConfig):
#     return cfg

In [34]:
def train_model(model, dataloaders, criterion, optimizer, device, num_epochs=25, print_batch=50):
    since = time.time()

    val_acc_history = []

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs-1}')
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0
            total_videos = 0

            # Iterate over data.
            for i, (inputs, labels) in enumerate(dataloaders[phase]):
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                total_videos += len(outputs)
                
                if i % print_batch == 0 and phase == 'train':
                    l = running_loss/total_videos
                    acc = running_corrects.cpu().numpy()/total_videos
                    print(' - Batch Number {} -> Loss: {:.3f} Accuracy: {:.3f}'.format(i, l, acc))

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                val_acc_history.append(epoch_acc)

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, val_acc_history

In [4]:
DEVICE = torch.device('cpu')

In [5]:
cfg = OmegaConf.create({
    'ATTENTION' : 'vanilla_attention',
    'NUM_CLASSES': 97,
    'FRAME_SIZE': 240,
    'PATCH_SIZE': 16,
    'IN_CHANNELS': 3,
    'DEPTH': 2,
    'HEADS': 4
})

In [6]:
model = Model(cfg)

In [29]:
params_to_update = []
params = 0
for param in model.parameters():
    if param.requires_grad:
        params_to_update.append(param)
        params += 1
print(params)

44


In [13]:
for a in model.named_parameters():
    print(a[0])

<class 'generator'>
cls_token
pos_embed
patch_embed.proj.weight
patch_embed.proj.bias
blocks.0.norm1.weight
blocks.0.norm1.bias
blocks.0.attn.attention.q_proj.weight
blocks.0.attn.attention.q_proj.bias
blocks.0.attn.attention.k_proj.weight
blocks.0.attn.attention.k_proj.bias
blocks.0.attn.attention.v_proj.weight
blocks.0.attn.attention.v_proj.bias
blocks.0.attn.qkv.weight
blocks.0.attn.qkv.bias
blocks.0.attn.proj.weight
blocks.0.attn.proj.bias
blocks.0.norm2.weight
blocks.0.norm2.bias
blocks.0.mlp.fc1.weight
blocks.0.mlp.fc1.bias
blocks.0.mlp.fc2.weight
blocks.0.mlp.fc2.bias
blocks.1.norm1.weight
blocks.1.norm1.bias
blocks.1.attn.attention.q_proj.weight
blocks.1.attn.attention.q_proj.bias
blocks.1.attn.attention.k_proj.weight
blocks.1.attn.attention.k_proj.bias
blocks.1.attn.attention.v_proj.weight
blocks.1.attn.attention.v_proj.bias
blocks.1.attn.qkv.weight
blocks.1.attn.qkv.bias
blocks.1.attn.proj.weight
blocks.1.attn.proj.bias
blocks.1.norm2.weight
blocks.1.norm2.bias
blocks.1.mlp.f

In [21]:
data_tensor = torch.rand((10, 3, 30, 240, 240)) # b, c, t, w, h
data_tensor_labels = torch.randint(97, (10,))

In [22]:
train_dataset = torch.utils.data.TensorDataset(data_tensor, data_tensor_labels)

In [23]:
train_sampler = torch.utils.data.sampler.RandomSampler(train_dataset)
train_loader = torch.utils.data.DataLoader(train_dataset, sampler=train_sampler, batch_size=1,
                                                num_workers=1)


In [30]:
optimizer = optim.Adam(params_to_update)
criterion = nn.CrossEntropyLoss()

In [31]:
dataloaders = {
    'train': train_loader, 
    'val': train_loader
}

In [35]:
trained_model, _ = train_model(model, dataloaders, criterion, optimizer, DEVICE, num_epochs=1, print_batch=1)

Epoch 0/0
----------
 - Batch Number 0 -> Loss: 9.248 Accuracy: 0.000
 - Batch Number 1 -> Loss: 9.712 Accuracy: 0.000
 - Batch Number 2 -> Loss: 9.621 Accuracy: 0.000
 - Batch Number 3 -> Loss: 8.780 Accuracy: 0.000
 - Batch Number 4 -> Loss: 8.523 Accuracy: 0.000
 - Batch Number 5 -> Loss: 9.338 Accuracy: 0.000
 - Batch Number 6 -> Loss: 8.486 Accuracy: 0.000
 - Batch Number 7 -> Loss: 9.636 Accuracy: 0.000
 - Batch Number 8 -> Loss: 9.744 Accuracy: 0.000
 - Batch Number 9 -> Loss: 9.692 Accuracy: 0.000
train Loss: 9.6917 Acc: 0.0000
val Loss: 8.6512 Acc: 0.1000

Training complete in 1m 46s
Best val Acc: 0.100000
