In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [3]:
!pip install pafy youtube-dl moviepy

In [4]:
import os
import cv2
import math
import pafy

import random
import numpy as np
import datetime as dt
import tensorflow as tf
from moviepy.editor import *
from collections import deque
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import *
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import plot_model

In [5]:
seed_constant = 23
np.random.seed(seed_constant)
random.seed(seed_constant)
tf.random.set_seed(seed_constant)

### Step 1: Download and Extract the Dataset
 
Let us start by downloading the dataset.

The Dataset we are using is the UCF50 – Action Recognition Dataset.

UCF50 is an action recognition dataset which contains:

  *  50 Action Categories consisting of realistic YouTube videos
  *  25 Groups of Videos per Action Category
  *  133 Average Videos per Action Category
  *  199 Average Number of Frames per Video
  *  320 Average Frames Width per Video
  *  240 Average Frames Height per Video
  *  26 Average Frames Per Seconds per Video

In [6]:
#!wget -nc --no-check-certificate https://www.crcv.ucf.edu/data/UCF50.rar

## Step 2: Visualize the Data with its Labels

Let us pick some random videos from each class of the dataset and display it, this will give us a good overview of how the dataset looks like.

In [7]:
# Create a Matplotlib figure
plt.figure(figsize = (30, 30))

# Get Names of all classes in UCF50
all_classes_names = os.listdir('../input/ucf50/UCF50')

> mylist = ["apple", "banana", "cherry"]
> 
> print(random.sample(mylist, k=2)) 
> 
> > *['banana', 'apple']*


**random_range**
* random_range =random.sample(range(len(all_classes_names)), 20)

Returns 20 randomly selected numerical value which lies in this range from the all_classes_names


**enumerate(iterable, start=0)**

Parameters:
* Iterable: any object that supports iteration 
* Start: the index value from which the counter is to be started, by default it is 0

**Random.choice(sequence)**

Parameters:
* sequence 	Required. A sequence like a list, a tuple, a range of numbers etc.

The choice() method returns a randomly selected element from the specified sequence.
The sequence can be a string, a range, a list, a tuple or any other kind ofsequence.


These are the main functions in OpenCV video I/O that we are going to discuss in this blog post:

**cv2.VideoCapture**

Creates a video capture object, which would help stream or display the video.
    
**cv2.VideoWriter** 

Saves the output video to a directory.
   
In addition, we also discuss other needed functions such as **cv2.imshow()**, **cv2.waitKey()** and the **get()** method which is used to read the video metadata such as frame height, width, fps etc.

The **vid_capture.read()** method returns a tuple, where the first element is a boolean and the next element is the actual video frame. When the first element is True, it indicates the video stream contains a frame to read. 

If there is a frame to read, you can then use **imshow()** to display the current frame in a window, otherwise exit the loop. 

**video_reader.release()**

When you call video_reader.release(), then:
* release software resource
* release hardware resource

**cv2.putText(image, text, org, font, fontScale, color[,  thickness [, lineType [, bottomLeftOrigin ] ] ] )**

Parameters:
* image : It is the image on which text is to be drawn.
* text : Text string to be drawn.
* org : It is the coordinates of the bottom-left corner of the text string in the image. The coordinates are represented as tuples of two values i.e. (X coordinate value, Y coordinate value).
* font : It denotes the font type. Some of font types are FONT_HERSHEY_SIMPLEX, FONT_HERSHEY_PLAIN, , etc.
* fontScale : Font scale factor that is multiplied by the font-specific base size.
* color : It is the color of text string to be drawn. For BGR, we pass a tuple. eg: (255, 0, 0) for blue color.
* thickness : It is the thickness of the line in px.
* lineType : This is an optional parameter.It gives the type of the line to be used.
* bottomLeftOrigin : This is an optional parameter. When it is true, the image data origin is at the bottom-left corner. Otherwise, it is at the top-left corner.

In [8]:
# Generate a random sample of images each time the cell runs
random_range =random.sample(range(len(all_classes_names)), 20)


# Iterating through all the random samples
for counter, random_index in enumerate(random_range, 1):
    
    # Getting Class Name using Random Index
    selected_class_name = all_classes_names[random_index]
        
    # Getting a list of all the video files present in a Class Directory
    video_files_names_list = os.listdir(f'../input/ucf50/UCF50/{selected_class_name}')
    
    # Randomly selecting a video file
    selected_video_file_name = random.choice(video_files_names_list)
    
    # Reading the Video File Using the Video Capture
    video_reader = cv2.VideoCapture(f'../input/ucf50/UCF50/{selected_class_name}/{selected_video_file_name}')
    
    # Reading The First Frame of the Video File
    _, bgr_frame = video_reader.read()
   
    # Closing the VideoCapture object and releasing all resources. 
    video_reader.release()
    
    # Converting the BGR Frame to RGB Frame  
    rgb_frame = cv2.cvtColor(bgr_frame, cv2.COLOR_BGR2RGB)
    
    # Adding The Class Name Text on top of the Video Frame.
    cv2.putText(rgb_frame, selected_class_name, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
    
    # Assigning the Frame to a specific position of a subplot
    plt.subplot(4, 5, counter)
    plt.imshow(rgb_frame)
    plt.axis('off')
    
        

### **Step 3: Read and Preprocess the Dataset**
Since we are going to use a classification architecture to train on a video classification dataset, we are going to need to preprocess the dataset first.

Now w constants,

* image_height and image_weight: This is the size we will resize all frames of the video to, we are doing this to avoid unnecessary computation.
* max_images_per_class: Maximum number of training images allowed for each class.
* dataset_directory: The path of the directory containing the extracted dataset.
* classes_list: These are the list of classes we are going to be training on, we are training on following 4 classes, you can feel free to change it.
    1. tai chi
    2. Swinging
    3. Horse Racing
    4. Walking with a Dog 

**Note**: The **image_height**, **image_weight** and **max_images_per_class** constants may be increased for better results, but be warned this will become computationally expensive.

In [9]:
image_height, image_width = 64, 64
max_images_per_classes = 8000

dataset_directory = '../input/ucf50/UCF50'
classes_list = ["Swing", "HorseRace", "WalkingWithDog","TaiChi"]

model_output_size = len(classes_list)
 

### **Extract, Resize and Normalize Frames** 

Now we will create a function that will extract frames from each video while performing other preprocessing operation like resizing and normalizing images.

This method takes a video file path as input. It then reads the video file frame by frame, resizes each frame, normalizes the resized frame, appends the normalized frame into a list, and then finally returns that list.

In [10]:
def frames_extraction(video_path):
    
    # Empty list declared to store video frame 
    frame_list = []
    
    # Reading the video file using the VideoCapture
    video_reader = cv2.VideoCapture(video_path)
    
    # Iterating through the video frames
    while True:
        
        # Reading a frame from a video file
        success, frame = video_reader.read()
        
        # If the video frame is not successfully read then break the loop
        if not success:
            break
        
        # Resize the frame to the fixed Dimension
        resized_frame = cv2.resize(frame, (image_height, image_width))
        
        # Now, Normalize the resized frame by dividing it with 255 so that each pixel value then lies between 0 and 1
        normalized_frame = resized_frame / 255
        
        # Appending the normalized frame into the frame list 
        frame_list.append(normalized_frame)

    # Closing the video capture object and releasing all the resources
    video_reader.release()

    # Returning the frames list
    return frame_list

## **Dataset Creation**
 
Now we will create another function called **create_dataset()**, this function uses the **frame_extraction()** function above and creates our final preprocessed dataset.

Here’s how this function works:

   1.  Iterate through all the classes mentioned in the **classes_list**
   2.  Now for each class iterate through all the video files present in it.
   3.  Call the frame_extraction method on each video file.
   4.  Add the returned frames to a list called temp_features
   5.  After all videos of a class are processed, randomly select video frames (**equal to            max_images_per_class**) and add them to the list called features.
   6. Add labels of the selected videos to the **`labels`** list.
   7. After all videos of all classes are processed then return the features and labels as           NumPy arrays.

So when you call this function, it returns two lists:

   * **A list of feature vectors**
   * **A list of its associated labels.**

In [11]:
def create_dataset():
    
    # Declaring Empty lists to store the Feature and Labels values.
    temp_feature = []
    features = []
    labels = []
    
    # Iterating through all the classes present in the classes list
    for class_index, class_name in enumerate(classes_list):
        print(f'Extracting data from the Class : {class_name}')
        
        # Getting the list of video files present in the specific class name directory
        files_list = os.listdir(os.path.join(dataset_directory, class_name))
        
        # Iterating through all the files present in the files list
        for file_name in files_list:
            
            # Create the video file path 
            video_file_path = os.path.join(dataset_directory, class_name, file_name)
            
            # Calling the frame extraction method for every video file path
            frames = frames_extraction(video_file_path)
            #print(len(frames))
            
            # Appending the frames to the temporary list
            temp_feature.extend(frames)
            
        # Adding randomly selected frames to the feature list
        features.extend(random.sample(temp_feature, max_images_per_classes ))
            
        # Adding fixed number of labels to the labels list
        labels.extend([class_index]*max_images_per_classes)
            
        # Emptying the temp features list so it can be reused for another to store all frames of the next class.
        temp_feature.clear()
            
    # Converting the features and labels lists to numpy arrays
    features = np.asarray(features)
    labels = np.array(labels)
            
    return features, labels
    

Calling the **create_dataset** method which returns features and labels.

In [12]:
features, labels = create_dataset()

Now we will convert class labels to one hot encoded vectors.

In [13]:
# Using keras to categorical method to convert labels into one-hot-encoded vectors
one_hot_encoded_labels = to_categorical(labels)

**Step 4: Split the Data into Train and Test Sets**

Now we have two numpy arrays, one containing all images. The second one contains all class labels in one hot encoded format. Let us split our data to create a training, and a testing set. We must shuffle the data before the split, which we have already done.

In [14]:
feature_train, feature_test, label_train, label_test = train_test_split(features, one_hot_encoded_labels, test_size = 0.2, shuffle = True, random_state = seed_constant)

### **Step 5: Construct the Model**

Now it is time to create our CNN model, for this post, we are creating a simple CNN Classification model with two CNN layers.

In [15]:
# Let's create the function that will create our model
def create_model():
    
    # We will use a sequential model for model construction
    model = Sequential()
    
    # Defining the model Architecture
    model.add( Conv2D ( filters = 64, kernel_size = ( 3, 3 ), activation = 'relu', input_shape = ( image_height, image_width, 3 )))
    model.add( Conv2D ( filters = 64, kernel_size = ( 3, 3 ), activation = 'relu' ))
    model.add( BatchNormalization())
    model.add( MaxPooling2D ( pool_size = ( 2, 2 )))
    model.add( GlobalAveragePooling2D())
    model.add( Dense( 256, activation = 'relu'))
    model.add( BatchNormalization())
    model.add( Dense( model_output_size, activation = 'softmax'))
    
    # Print the model Summary
    model.summary()
    
    return model

# Calling the create model method 
model = create_model()

print(f'Model Created Successfully!')
    

**Check Model’s Structure :**

Using the **plot_model** function, we can check the structure of the final model. This is really helpful when we are creating a complex network, and you want to make sure we have constructed the network correctly.

In [17]:
plot_model( model, to_file = 'model_structure_plot.png', show_shapes = True, show_layer_names = True)


**Step 6: Compile and Train the Model**


Now let us start the training. Before we do that, we also need to compile the model.

In [19]:
# Adding early stopping Callback
early_stopping_callback  = EarlyStopping( monitor = 'val_loss', patience = 15, mode = 'min', restore_best_weights = True )

# Adding loss value, optimizer and metrics value to the model
model.compile(loss = 'categorical_crossentropy', optimizer = 'Adam', metrics = ["accuracy"])

# Start Training 
model_training_history = model.fit( x = feature_train, y = label_train, epochs = 50, batch_size = 4, shuffle = True, validation_split = 0.2, callbacks = [early_stopping_callback])

**Evaluating Your Trained Model**

Evaluate your trained model on the feature’s and label’s test sets.

In [21]:
model_evaluation_history = model.evaluate(feature_test, label_test)

**Save Your Model**

You should now save your model for future runs.

In [None]:
# Creating a useful name for our model, incase you're saving multiple models (OPTIONAL)