In [None]:
!wget https://storage.googleapis.com/deepmind-media/Datasets/kinetics600.tar.gz
!tar -xvf /content/kinetics600.tar.gz

In [None]:
!pip install pytube

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
import torch.optim as optim
import numpy as np
import cv2
from imutils import paths
import albumentations as album
import glob
import random
import pandas as pd

In [None]:
if torch.cuda.is_available():
  device = torch.device('cuda:0')
else:
  device = torch.device('cpu')
  
print(device)

cuda:0


In [None]:
train_set = pd.read_csv('/content/kinetics600/train.csv')

In [None]:
from pytube import YouTube

youtube_video_url = 'https://www.youtube.com/watch?v='

for t in train_set['youtube_id']:
    video_url = '{}{}'.format(youtube_video_url, t)

    try:
        yt_obj = YouTube(video_url)
    
        filters = yt_obj.streams.filter(progressive=True, file_extension='mp4')
    
        # download the highest quality video
        filters.get_highest_resolution().download(output_path='/content/random_videos')
    except Exception as e:
        print(e)

In [None]:
!zip -r random_videos.zip random_videos

In [None]:
model = models.video.r3d_18(pretrained=True)

num_classes = 2 # is jumping jack or not
class_names = ['Jumping Jack', 'Other']

for param in model.parameters():
    param.requires_grad = False

model.fc = nn.Linear(512, num_classes)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)
model.load_state_dict(torch.load('/content/drive/MyDrive/Colab_Notebooks/classfier-3d-4.pt'))
model.to(device)

In [None]:
transforms = album.Compose([album.Resize(320, 320, always_apply=True)])

VIDEO_PATH = '/content/drive/MyDrive/UCF12/'
jumping_jacks = glob.glob(os.path.join(VIDEO_PATH, '*/*.avi')) # for ucf11 dataset
random_videos = glob.glob(os.path.join(VIDEO_PATH, '*/*/*.mpg')) # for ucf101 jumping jacks
video_files = jumping_jacks + random_videos

for i in range(3):
    random.shuffle(video_files) # randomize order for training

In [None]:
for i, video in enumerate(video_files):
    labels = video.split('/')[5]
    if labels == 'jumping_jack':
        labels = torch.Tensor([0])
    else:
        labels = torch.Tensor([1])

    print('{} {}'.format(video, labels))

In [None]:
video_frames = []
clip_length = 16

running_loss = 0
model.train()

for i, video in enumerate(video_files):
    cap = cv2.VideoCapture(video)

    while cap.isOpened() == True:
        ret, frame = cap.read()

        if ret == True:
            image = frame.copy()
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame = transforms(image=frame)['image']

            video_frames.append(frame)

            if len(video_frames) == clip_length:
                inputs = np.array(video_frames)
                # add an extra dimension        
                inputs = np.expand_dims(inputs, axis=0)
                # transpose to get [1, 3, num_clips, height, width]
                inputs = np.transpose(inputs, (0, 4, 1, 2, 3))
                # convert the frames to tensor
                inputs = torch.tensor(inputs, dtype=torch.float32)
                inputs = inputs.to(device)
                optimizer.zero_grad()

                outputs = model.forward(inputs)
                labels = video.split('/')[5]

                if labels == 'jumping_jack':
                    labels = torch.Tensor([0])
                else:
                    labels = torch.Tensor([1])

                labels = labels.long().to(device)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                running_loss += loss.item()
                video_frames.clear()
        else:
            break

    video_frames.clear()

In [None]:
torch.save(model.state_dict(), '/content/drive/MyDrive/Colab_Notebooks/classfier-3d-7.pt')

In [None]:
!cp /content/drive/MyDrive/EE381K/*.zip .
!unzip jumping_jack_videos.zip
!unzip random_videos.zip

In [None]:
jumping_jacks = glob.glob(os.path.join('/content/jumping_jack_videos', '*.mp4')) # from kinetics600 jumping jacks videos
random_videos = glob.glob(os.path.join('/content/random_videos', '*.mp4')) # from kineitcs600 non jumping jacks videos
video_files = jumping_jacks + random_videos

for i in range(3):
    random.shuffle(video_files) # randomize order for testing
  
video_files

In [None]:
video_frames = []
clip_length = 16

jj_correct = 0
jj_total = 0
notjj_correct = 0
notjj_total = 0
model.eval()

with torch.no_grad():
    for i, video in enumerate(video_files):
        cap = cv2.VideoCapture(video)

        while cap.isOpened() == True:
            ret, frame = cap.read()

            if ret == True:
                image = frame.copy()
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                frame = transforms(image=frame)['image']

                video_frames.append(frame)

                if len(video_frames) == clip_length:
                    inputs = np.array(video_frames)
                    # add an extra dimension        
                    inputs = np.expand_dims(inputs, axis=0)
                    # transpose to get [1, 3, num_clips, height, width]
                    inputs = np.transpose(inputs, (0, 4, 1, 2, 3))
                    # convert the frames to tensor
                    inputs = torch.tensor(inputs, dtype=torch.float32)
                    inputs = inputs.to(device)

                    # forward pass to get the predictions
                    outputs = model.forward(inputs)
                    _, prediction = torch.max(outputs.data, 1)
                    labels = video.split('/')[2]

                    if labels == 'jumping_jack_videos':
                        labels = torch.Tensor([0])
                        jj_total += 1
                    else:
                        labels = torch.Tensor([1])
                        notjj_total += 1

                    if labels.item() == prediction.item():
                        if labels.item() == 0:
                            jj_correct += 1
                        else:
                            notjj_correct += 1

                    video_frames.clear()
            else:
                break

        video_frames.clear()

In [None]:
print('Jumping jacks percent correct: {}%'.format(jj_correct / jj_total * 100))
print('Non-jumping jacks percent correct: {}%'.format(notjj_correct / notjj_total * 100))
print('Total correct: {}%'.format((jj_correct + notjj_correct) / (jj_total + notjj_total) * 100))