### Import packages

In [62]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [63]:
import torch
import torch.nn as nn
import torchvision
from torchvision import datasets, models, transforms
import torch.optim as optim
from torch.optim import lr_scheduler
import cv2
import os
from os import path
import json
import time
import copy

import pandas as pd
from PIL import Image
import random
import torchvision.transforms.functional as TF
from torch.utils.data import Dataset

#if gpu, use it
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cpu


### Extract frames from videos and place them inside video name folder

In [64]:
# def extract_frames(file_path, target_dir):
#     if not os.path.exists(target_dir):
#         os.makedirs(target_dir)

#     vidcap = cv2.VideoCapture(file_path)
#     success, image = vidcap.read()
#     count = 0
 
#     while success:
#       frame_path = os.path.join(target_dir, f'{count}.jpg')
#       cv2.imwrite(frame_path, image)     # save frame as JPEG file
#       count += 1
#       success, image = vidcap.read()
#       success, image = vidcap.read()
#       success, image = vidcap.read()


# root = '/content/drive/MyDrive/mini_dataset'
# frame_root = '/content/drive/MyDrive/teste/hmdb51_frames'
# labels_path = '/content/drive/MyDrive/teste/labels.csv'
# class_name_to_label_path = '/content/drive/MyDrive/teste/class_name_to_label.json'

# # read files
# files = []

# for class_name in os.listdir(root):
#     for video_name in os.listdir(os.path.join(root, class_name)):
#         files.append([os.path.join(class_name, video_name), class_name])

# # normalize labels
# class_name_to_label = {}
# current_label = -1

# for vid in files:
#     label = class_name_to_label.get(vid[1], -1)

#     if label == -1:
#         current_label += 1
#         class_name_to_label[vid[1]] = current_label
#         label = current_label

#     vid[1] = label


# # save file paths
# if not os.path.exists(os.path.split(labels_path)[0]):
#     os.makedirs(os.path.split(labels_path)[0])

# f = open(labels_path, 'w')

# f.write('path,label\n')

# for vid in files:
#     f.write(f'{vid[0]},{vid[1]}\n')

# f.close()

# # save label normalization
# if not os.path.exists(os.path.split(class_name_to_label_path)[0]):
#     os.makedirs(os.path.split(class_name_to_label_path)[0])

# with open(class_name_to_label_path, 'w') as json_file:
#     json.dump(class_name_to_label, json_file, indent=4)

# # extract frames
# for i, vid in enumerate(files):
#     file_path = os.path.join(root, vid[0])
#     target_dir = os.path.join(frame_root, vid[0])

#     extract_frames(file_path, target_dir)

#     print(f'{i+1}/{len(files)}')


### Split labels csv file into train and validate - 80/20

In [65]:
# import pandas as pd

# def split(label):
#     path = '/content/drive/MyDrive/teste'
#     train = label.sample(frac=0.8, random_state=201)
#     val = label.drop(train.index)
#     train.to_csv(path + '/train.csv', mode='a', header=False)
#     val.to_csv(path + '/val.csv', mode='a', header=False)


# root = '/content/drive/MyDrive/teste'
# frame_root = '/content/drive/MyDrive/teste/hmdb51_frames'
# labels_path = '/content/drive/MyDrive/teste/labels.csv'
# class_name_to_label_path = '/content/drive/MyDrive/teste/class_name_to_label.json'

# labels_path = '/content/drive/MyDrive/teste/labels.csv'
# labels_data = pd.read_csv(labels_path)
# last_label = labels_data.tail(1)['label']
# last_label = last_label.iloc[0]
# max_labels = labels_data.nunique()
# max_labels= max_labels['label']
# index = 0

# for i in range(max_labels):
#     label = labels_data[labels_data['label'] == index]
#     split(label)
#     index +=1 


### Custom Dataset

In [66]:
train_csv_path = '/content/drive/MyDrive/teste/train.csv'
validate_csv_path = '/content/drive/MyDrive/teste/val.csv'
root_path = '/content/drive/MyDrive/teste/hmdb51_frames'

class MyDataset(Dataset):
  def __init__(self, frames_csv_file, root_dir):
    self.frames_csv = pd.read_csv(frames_csv_file)
    self.root_dir = root_dir
    self.slice_size = 10

  def __len__(self):
      return len(self.frames_csv)

  def __getitem__(self, idx):
      if torch.is_tensor(idx):
          idx = idx.tolist()
      
      #need to take 10 frames and stack them on 1 tensor                                                    
      #()-> second index has to be 1 because id column (3 columns) 
      #()-> why have to be NOT??? is wrong!! but works..
      all_frames = [f for f in os.listdir(os.path.join(self.root_dir, str(self.frames_csv.iloc[idx, 1]))) if not os.path.isfile(f)]
      #random get 10 consecutive frames from all video frames in folder
      if len(all_frames) > (self.slice_size):
        start = random.randrange(len(all_frames) - self.slice_size)
        frames = all_frames[start: start + self.slice_size]
      else:
        frames = all_frames
      
      images = []
      #load the images
      for frame in frames:
        path = os.path.join(self.root_dir, str(self.frames_csv.iloc[idx, 1])) + '/' + frame
        images.append(Image.open(path).convert('RGB'))
      
      #apply tansforms and condense all the images in 1 tensor
      final_image = self.transform(images)
      tag = int(self.frames_csv.iloc[idx, 2])
      return final_image, tag

  def transform(self, images):
    #random crop
    #random horizontal flip
    #transform to tensor and stack
    frames_tensors = []
    for image in images:
      #rezise image
      newsize = (240, 320)
      image = image.resize(newsize)
      image_tensor = TF.to_tensor(image)
      frames_tensors.append(image_tensor)
    final_tensor = torch.stack(frames_tensors, dim=1)
    #normalize

    return final_tensor

train_dataset = MyDataset(train_csv_path, root_path)
validate_dataset = MyDataset(validate_csv_path, root_path)


## Dataloaders

In [67]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True)
validate_dataloader = DataLoader(validate_dataset, batch_size=1, shuffle=True)


## Training Loop

In [68]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()
    
    stats = {}
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            if phase == 'train':
              for data, tag in train_dataloader:
                  data = data.to(device)
                  tag = tag.to(device)

              # zero the parameter gradients
              optimizer.zero_grad()

              # forward
              # track history if only in train
              with torch.set_grad_enabled(phase == 'train'):
                  outputs = model(data)
                  _, preds = torch.max(outputs, 1)
                  loss = criterion(outputs, tag)

                  # backward + optimize only if in training phase
                  loss.backward()
                  optimizer.step()

              # statistics
              running_loss += loss.item() * data.size(0)
              running_corrects += torch.sum(preds == tag)
            
              scheduler.step()
              epoch_loss = running_loss / len(train_dataloader)
              epoch_acc = running_corrects.double() / len(train_dataloader)
            else:
              for data, tag in validate_dataloader:
                data = data.to(device)
                tag = tag.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(data)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, tag)

                # statistics
                running_loss += loss.item() * data.size(0)
                running_corrects += torch.sum(preds == tag)
          
              epoch_loss = running_loss / len(validate_dataloader)
              epoch_acc = running_corrects.double() / len(validate_dataloader)
              stats[phase + '_loss'].append(epoch_loss)
              stats[phase + '_acc'].append(epoch_acc)
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

## Prepare the model

In [69]:

f = open('/content/drive/MyDrive/teste/class_name_to_label.json')
classes = json.load(f)
f.close()

model_ft = torchvision.models.video.r3d_18(pretrained=False, progress=True)
num_ftrs = model_ft.fc.in_features
# Here the size of each output sample is set to 2.
# Alternatively, it can be generalized to nn.Linear(num_ftrs, len(class_names)).
model_ft.fc = nn.Linear(num_ftrs, len(classes))

model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

## Train and Evaluate

In [70]:
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                       num_epochs=3)

Epoch 0/2
----------
train Loss: 0.0010 Acc: 0.0030


KeyError: ignored