### This script is used to extract key frames

In [1]:
import os
import cv2
import torch
import numpy as np
import torch.nn as nn
import torchvision.models as models

from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

%matplotlib inline

In [2]:
# create folder if not exist
def create_folder(folder):
    if not os.path.exists(folder):
        os.makedirs(folder)

In [3]:
# Classification property of the classification layer removed.
model = models.inception_v3(pretrained=True)
model.fc = nn.Identity()

is_cpu_available = torch.cuda.is_available()
if is_cpu_available:
    model.cuda()

In [4]:
num_cluster = 16
pca = PCA(n_components = 50)
dataset = 'dataset_ucf_video'
outputs = 'dataset_ucf_video_keyframe'

In [5]:
create_folder(outputs)

In [6]:
# Pytorch expect image = [channels, height, width]
cluster_id = np.linspace(0, num_cluster - 1, num_cluster, dtype=np.int16)

model.eval()

with torch.no_grad():
    dtypes = os.listdir(dataset)
    
    for dtype in dtypes:
        dtype_path = os.path.join(dataset, dtype)

        create_folder(os.path.join(outputs, dtype))

        categories = os.listdir(dtype_path)
        for category in categories:
            category_path = os.path.join(dtype_path, category)

            # create category 
            create_folder(os.path.join(outputs, dtype, category))

            video_count = 0
            videos = os.listdir(category_path)
            for video in videos:
                video_path = os.path.join(category_path, video)
        
                vidcap = cv2.VideoCapture(video_path)
                success, img = vidcap.read()

                frames = []
                features = []
                while success:
                    frames.append(img)

                    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                    img = cv2.resize(img, (299, 299), interpolation=cv2.INTER_CUBIC)
                    img = np.expand_dims(img, axis=0)
                    img = np.transpose(img, (0, 3, 1, 2))
                    img = torch.from_numpy(img)
                    if is_cpu_available:
                        img = img.cuda()
                    feature = model.forward(img)
                    feature = feature.data.cpu().numpy()[0]
                    features.append(feature)

                    success, img = vidcap.read()

                features = pca.fit_transform(features)
                kmeans = KMeans(n_clusters=num_cluster, random_state=0).fit(features)

                kmeans_labels = list(kmeans.labels_)

                indexes = []
                for id in cluster_id:
                    index = kmeans_labels.index(id)
                    indexes.append(index)

                indexes.sort()
                for index in indexes:
                    cv2.imwrite(os.path.join(outputs, dtype, category, '{} - {}.jpg'.format(video_count, index)), frames[index])
                
                torch.cuda.empty_cache()
                
                video_count += 1