In [1]:
import os
import cv2
import pafy
import math
import random
import numpy as np
import datetime as dt
import tensorflow as tf
from collections import deque
import matplotlib.pyplot as plt

from moviepy.editor import *
%matplotlib inline


from tensorflow.keras.layers import *
from tensorflow.keras.models import Sequential,load_model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import plot_model
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [2]:
seed_constant = 27
np.random.seed(seed_constant)
random.seed(seed_constant)
tf.random.set_seed(seed_constant)

In [11]:
IMAGE_HEIGHT , IMAGE_WIDTH = 60,60

SEQUENCE_LENGTH = 50

DATASET_DIR = r"D:\BaiduNetdiskDownload\score_training\sequence"

In [12]:
def frames_extraction(video_path):
    '''
    This function will extract the required frames from a video after resizing and normalizing them.
    Args:
        video_path: The path of the video in the disk, whose frames are to be extracted.
    Returns:
        frames_list: A list containing the resized and normalized frames of the video.
    '''

    # Declare a list to store video frames.
    frames_list = []
    
    # Read the Video File using the VideoCapture object.
    video_reader = cv2.VideoCapture(video_path)

    # Get the total number of frames in the video.
    video_frames_count = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))

    # Calculate the the interval after which frames will be added to the list.
    skip_frames_window = max(int(video_frames_count/SEQUENCE_LENGTH), 1)

    # Iterate through the Video Frames.
    for frame_counter in range(SEQUENCE_LENGTH):

        # Set the current frame position of the video.
        video_reader.set(cv2.CAP_PROP_POS_FRAMES, frame_counter * skip_frames_window)

        # Reading the frame from the video. 
        success, frame = video_reader.read() 

        # Check if Video frame is not successfully read then break the loop
        if not success:
            break

        # Resize the Frame to fixed height and width.
        resized_frame = cv2.resize(frame, (IMAGE_HEIGHT, IMAGE_WIDTH))
        
        # Normalize the resized frame by dividing it with 255 so that each pixel value then lies between 0 and 1
        normalized_frame = resized_frame / 255
        
        # Append the normalized frame into the frames list
        frames_list.append(normalized_frame)
    
    # Release the VideoCapture object. 
    video_reader.release()

    # Return the frames list.
    return frames_list

In [13]:
def plot_metric(model_training_history, metric_name_1, metric_name_2, plot_name):
    '''
    This function will plot the metrics passed to it in a graph.
    Args:
        model_training_history: A history object containing a record of training and validation 
                                loss values and metrics values at successive epochs
        metric_name_1:          The name of the first metric that needs to be plotted in the graph.
        metric_name_2:          The name of the second metric that needs to be plotted in the graph.
        plot_name:              The title of the graph.
    '''
    
    # Get metric values using metric names as identifiers.
    metric_value_1 = model_training_history.history[metric_name_1]
    metric_value_2 = model_training_history.history[metric_name_2]
    
    # Construct a range object which will be used as x-axis (horizontal plane) of the graph.
    epochs = range(len(metric_value_1))

    # Plot the Graph.
    plt.plot(epochs, metric_value_1, 'blue', label = metric_name_1)
    plt.plot(epochs, metric_value_2, 'red', label = metric_name_2)

    # Add title to the plot.
    plt.title(str(plot_name))

    # Add legend to the plot.
    plt.legend()

In [14]:
def create_LRCN_feature_extractor():
    '''
    This function will construct the LRCN model for feature extraction.
    Returns:
        model: The constructed LRCN model for feature extraction.
    '''
    
    # We will use a Sequential model for model construction.
    model = Sequential()
    
    # Define the Model Architecture.
    # Assume SEQUENCE_LENGTH, IMAGE_HEIGHT, IMAGE_WIDTH, and CLASSES_LIST are defined earlier.
    model.add(TimeDistributed(Conv2D(16, (3, 3), padding='same', activation='relu'),
                              input_shape=(SEQUENCE_LENGTH, IMAGE_HEIGHT, IMAGE_WIDTH, 3)))
    model.add(TimeDistributed(MaxPooling2D((4, 4))))
    model.add(TimeDistributed(Dropout(0.25)))
    
    model.add(TimeDistributed(Conv2D(32, (3, 3), padding='same', activation='relu')))
    model.add(TimeDistributed(MaxPooling2D((4, 4))))
    model.add(TimeDistributed(Dropout(0.25)))
    
    model.add(TimeDistributed(Conv2D(64, (3, 3), padding='same', activation='relu')))
    model.add(TimeDistributed(MaxPooling2D((2, 2))))
    model.add(TimeDistributed(Dropout(0.25)))
    
    model.add(TimeDistributed(Conv2D(64, (3, 3), padding='same', activation='relu')))
    model.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2), padding="same")))
    model.add(TimeDistributed(Dropout(0.25)))
    
    model.add(TimeDistributed(Flatten()))
    
    model.add(LSTM(32, return_sequences=True))  # Keep return_sequences=True to output features for all frames
    
    # Optional: Add a Dense layer to further process the features if needed.
    # model.add(Dense(some_number_of_units, activation='relu'))
    
    # The model is now set up to extract features. You can remove or comment out the next line if you don't want to display the summary.
    model.summary()
    
    # Return the LRCN model for feature extraction.
    return model

# Example usage:
# feature_extractor_model = create_LRCN_feature_extractor()
# Then you can use `feature_extractor_model.predict(your_video_data)` to extract features.

In [15]:
LRCN_feature_extractor = create_LRCN_feature_extractor()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 time_distributed_13 (TimeD  (None, 50, 60, 60, 16)    448       
 istributed)                                                     
                                                                 
 time_distributed_14 (TimeD  (None, 50, 15, 15, 16)    0         
 istributed)                                                     
                                                                 
 time_distributed_15 (TimeD  (None, 50, 15, 15, 16)    0         
 istributed)                                                     
                                                                 
 time_distributed_16 (TimeD  (None, 50, 15, 15, 32)    4640      
 istributed)                                                     
                                                                 
 time_distributed_17 (TimeD  (None, 50, 3, 3, 32)     

In [26]:
def create_dataset():
    '''
    This function will extract the data of the selected classes and create the required dataset.
    Returns:
        features:          A list containing the extracted frames of the videos.
        labels:            A list containing the indexes of the classes associated with the videos.
        video_files_paths: A list containing the paths of the videos in the disk.
    '''

    # Declared Empty Lists to store the features, labels and video file path values.
    features = []
    labels = []
    video_files_paths = []
    

        
    # Display the name of the class whose data is being extracted.
    print(f'Extracting Data of Class: {DATASET_DIR}')
        
    # Get the list of video files present in the specific class name directory.
    files_list = os.listdir(os.path.join(DATASET_DIR))
        
    # Iterate through all the files present in the files list.
    for file_name in files_list:
            
        # Get the complete video path.
        video_file_path = os.path.join(DATASET_DIR, file_name)

        # Extract the frames of the video file.
        frames = frames_extraction(video_file_path)
        # Check if the extracted frames are equal to the SEQUENCE_LENGTH specified above.
        # So ignore the vides having frames less than the SEQUENCE_LENGTH.
        if len(frames) == SEQUENCE_LENGTH:
            text = file_name.split("_\.")
            score = text[1]
            # Append the data to their repective lists.
            features.append(frames)
            labels.append(score)
            video_files_paths.append(video_file_path)

    # Converting the list to numpy arrays
    features = np.asarray(features)
    extracted_features = LRCN_feature_extractor.predict(features)
    labels = np.array(labels)  
    
    # Return the frames, class index, and video file path.
    return extracted_features, labels, video_files_paths

In [27]:
features, labels, video_files_paths = create_dataset()

Extracting Data of Class: D:\BaiduNetdiskDownload\score_training\sequence


IndexError: list index out of range

In [24]:
one_hot_encoded_labels = to_categorical(labels)

ValueError: invalid literal for int() with base 10: '-0.04.mp4'

In [28]:
features_train, features_test, labels_train, labels_test = train_test_split(features, labels,
                                                                            test_size = 0.25, shuffle = True,
                                                                            random_state = seed_constant)

In [31]:
print(features_train.shape, type(features_train))
print(labels_train.shape, type(labels_train))
frame_differences = np.diff(features_video, axis=0)

(714, 50, 32) <class 'numpy.ndarray'>
(714,) <class 'numpy.ndarray'>


In [30]:
# Example: Average the features to get a single feature vector for the video

#video_feature_vector = np.mean(features_in video, axis=0)

# At this point, you can use the video_feature_vector with an SVR model for scoring or any other downstream task.

# Train an SVR model using labeled data (if you have it)
# from sklearn.svm import SVR
svr_model = SVR()
svr_model.fit(features_train,labels_train)
train_score = svr_model.score(features_train,labels_train)
test_score = svr_model.score(features_test,labels_test)

# svr_model = load_model('path_to_pretrained_svr_model.h5')

# Use the SVR model to score the video based on its feature vector
# Assume you have a function called score_video_with_svr that takes the SVR model and the video feature vector
def score_video_with_svr(svr_model, video_feature_vector):
    # Use the SVR model to predict a score for the given video feature vector
    score = svr_model.predict(video_feature_vector.reshape(1, -1))
    return score 

# Example usage of the scoring function
score = score_video_with_svr(svr_model, video_feature_vector)
print(f"The video scored: {score}")

ValueError: Found array with dim 3. SVR expected <= 2.