In [1]:
from torchvision import transforms
from torch.utils import data
from tqdm import tqdm
import os
from vid_dataset import *
from tvn1 import *

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
vid_transforms = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((200, 200)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

In [3]:
os.environ["CUDA_VISIBLE_DEVICES"]="2"

In [4]:
gpu = torch.device('cuda:0')

In [5]:
torch.cuda.empty_cache()

In [6]:
train_dataset = VideoDataset('/DATA/ichuviliaeva/videos/UCF50/', 'train_index.txt', vid_transforms)

val_dataset = VideoDataset('/DATA/ichuviliaeva/videos/UCF50/', 'test_index.txt', vid_transforms)

In [7]:
train_dataloader = data.DataLoader(train_dataset, batch_size = 4, shuffle = True)
val_dataloader = data.DataLoader(val_dataset, batch_size = 4)

In [8]:
model = TVN1(50).to(gpu)

In [9]:
optimizer = torch.optim.Adam(model.parameters())

In [10]:
criterion = nn.CrossEntropyLoss()

In [11]:
def train(epoch = 1, verbose = 2, model = model, optimazer = optimizer, criterion = criterion, 
          train_dataloader = train_dataloader, val_dataloader = val_dataloader):
    for t in range(epoch):
        loss_list = []
        for x, vid_lens, labels in tqdm(train_dataloader):
            x = torch.stack([x[b, v] for b in range(x.shape[0]) for v in range(x.shape[1])])
            x = x.to(gpu)
            vid_lens = vid_lens.to(gpu)
            labels = labels.to(gpu)
            optimizer.zero_grad()
            res = model((x, vid_lens))
            loss = criterion(res, labels)
            loss_list.append(loss.detach())
            loss.backward()
            optimizer.step()
        print('epoch ', t, ':')
        print('mean loss = ', torch.mean(torch.tensor(loss_list)))
            
        if t % verbose == 0 or t == epoch - 1:
            mi = 0
            acc = 0
            with torch.no_grad():
                loss_val_list = []
                for x, vid_lens, labels in tqdm(val_dataloader):
                    x = torch.stack([x[b, v] for b in range(x.shape[0]) for v in range(x.shape[1])])
                    x = x.to(gpu)
                    vid_lens = vid_lens.to(gpu)
                    labels = labels.to(gpu)
                    predicts = model((x, vid_lens))
                    loss = criterion(predicts, labels)
                    loss_val_list.append(loss.detach())
                    acc += torch.sum(torch.eq(torch.argmax(predicts, dim=-1), labels).to(dtype=torch.float64)).item()
                    mi += x.shape[0]
                print('mean val loss = ', torch.mean(torch.tensor(loss_val_list)))
                print('accuracy = ', acc / (mi + (mi == 0)))
                torch.save(model.state_dict(), 'tvn1-epoch-' + str(t) + '.pth')

In [None]:
train(epoch = 20)

100%|███████████████████████████████████████| 1420/1420 [49:26<00:00,  2.09s/it]


epoch  0 :
mean loss =  tensor(3.9325)


100%|█████████████████████████████████████████| 251/251 [06:47<00:00,  1.63s/it]


mean val loss =  tensor(3.9014)
accuracy =  0.0009346959122632103


 76%|█████████████████████████████▌         | 1076/1420 [36:50<11:47,  2.06s/it]