In [None]:
import kagglehub

path = kagglehub.dataset_download("matthewjansen/ucf101-action-recognition")

print("Path to dataset files:", path)

## Data preparation + preprocessing

In [None]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

#for now doing on one class 
data_path = '/kaggle/input/ucf101-action-recognition/train/JumpingJack'

#extract frames from videos
def extract_frames(video_path, frame_size=(64, 64)):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        # Resize frame
        frame = cv2.resize(frame, frame_size)
        # Convert to grayscale
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        frames.append(frame)
    cap.release()
    return np.array(frames)

#load all videos and extract frames
all_frames = []
for file in os.listdir(data_path):
    if file.endswith('.avi'):
        video_frames = extract_frames(os.path.join(data_path, file))
        all_frames.append(video_frames)

#normalize
all_frames = [frames / 255.0 for frames in all_frames]

#stack all frames from all videos
dataset = np.vstack(all_frames)
dataset = np.expand_dims(dataset, axis=-1)  # Add channel dimension

#for training
def create_shifted_frames(data, sequence_length=10):
    x, y = [], []
    for i in range(len(data) - sequence_length):
        x.append(data[i : i + sequence_length])
        y.append(data[i + 1 : i + 1 + sequence_length])
    return np.array(x), np.array(y)

x, y = create_shifted_frames(dataset)

#split into training and validation sets
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.1, random_state=42)

print("Training Dataset Shapes: ", x_train.shape, y_train.shape)
print("Validation Dataset Shapes: ", x_val.shape, y_val.shape)

## Model development

In [None]:
from keras import layers, models

input_layer = layers.Input(shape=(None, 64, 64, 1))     #input layer for sequential data

#ConvLSTM layers
x = layers.ConvLSTM2D(filters=64, kernel_size=(5, 5), padding="same", return_sequences=True, activation="relu")(input_layer)
x = layers.BatchNormalization()(x)
x = layers.ConvLSTM2D(filters=64, kernel_size=(3, 3), padding="same", return_sequences=True, activation="relu")(x)
x = layers.BatchNormalization()(x)
x = layers.ConvLSTM2D(filters=64, kernel_size=(1, 1), padding="same", return_sequences=True, activation="relu")(x)

output_layer = layers.Conv3D(filters=1, kernel_size=(3, 3, 3), activation="sigmoid", padding="same")(x)

#build the model
model = models.Model(input_layer, output_layer)
model.compile(optimizer='adam', loss='binary_crossentropy')

print(model.summary())

## model training

In [None]:
from keras.callbacks import EarlyStopping, ReduceLROnPlateau

#define callbacks
early_stopping = EarlyStopping(monitor="val_loss", patience=10)
reduce_lr = ReduceLROnPlateau(monitor="val_loss", patience=5)

history = model.fit(
    x_train,
    y_train,
    validation_data=(x_val, y_val),
    batch_size=8,
    epochs=20,
    callbacks=[early_stopping, reduce_lr],
)

In [None]:
#aave the trained model
model.save('my_trained_model.h5')
print("Model saved to 'my_trained_model.h5'")

## to load model

In [None]:
from keras.models import load_model

model = load_model('my_trained_model.h5')
print("Model loaded successfully!")

## visualization

Randomly select a sequence from x_val
Use the model to predict frames for that input sequence
The predicted frames come directly from model.predict(input_seq)

In [None]:
def visualize_predictions(model, x_val, y_val):
    index = np.random.randint(0, len(x_val))
    input_seq = x_val[index:index + 1]
    true_seq = y_val[index:index + 1]

    predicted_seq = model.predict(input_seq)

    fig, axes = plt.subplots(3, 10, figsize=(20, 6))
    for i in range(10):
        axes[0, i].imshow(input_seq[0, i, :, :, 0], cmap='gray')
        axes[0, i].set_title(f"Input {i+1}")
        axes[0, i].axis('off')

        axes[1, i].imshow(true_seq[0, i, :, :, 0], cmap='gray')
        axes[1, i].set_title(f"True {i+1}")
        axes[1, i].axis('off')

        axes[2, i].imshow(predicted_seq[0, i, :, :, 0], cmap='gray')
        axes[2, i].set_title(f"Predicted {i+1}")
        axes[2, i].axis('off')

    plt.show()

visualize_predictions(model, x_val, y_val)

## Video Generation

In [None]:
print(predicted_frames.shape)


In [None]:
pip install opencv-python moviepy numpy

In [None]:
def visualize_predictions(model, x_val, y_val):
    index = np.random.randint(0, len(x_val))
    input_seq = x_val[index:index + 1]
    true_seq = y_val[index:index + 1]
    predicted_seq = model.predict(input_seq)
    
    fig, axes = plt.subplots(3, 10, figsize=(20, 6))
    for i in range(10):
        axes[0, i].imshow(input_seq[0, i, :, :, 0], cmap='gray')
        axes[0, i].set_title(f"Input {i+1}")
        axes[0, i].axis('off')
        axes[1, i].imshow(true_seq[0, i, :, :, 0], cmap='gray')
        axes[1, i].set_title(f"True {i+1}")
        axes[1, i].axis('off')
        axes[2, i].imshow(predicted_seq[0, i, :, :, 0], cmap='gray')
        axes[2, i].set_title(f"Predicted {i+1}")
        axes[2, i].axis('off')
    plt.show()
    
    return input_seq, true_seq, predicted_seq

input_frames, true_frames, predicted_frames = visualize_predictions(model, x_val, y_val)

frames_to_video_opencv(predicted_frames, 'predicted_video2.avi')
combine_input_true_predicted_video(
    input_frames=input_frames, 
    true_frames=true_frames, 
    predicted_frames=predicted_frames, 
    output_path='combined_video2.avi'
)

## Smooth Transition

In [None]:
pip install opencv-python moviepy numpy matplotlib

In [None]:
import numpy as np
import cv2

def create_transition_video(model, x_val, y_val, output_path='prediction_video121.avi'):
    """
    Generate a video that shows input frames, a transition screen, and predicted frames.
    
    Parameters:
    - model: The trained prediction model
    - x_val: Input validation sequences
    - y_val: True validation sequences
    - output_path: Path to save the output video
    
    Returns:
    - Path to the generated video file
    """

    index = np.random.randint(0, len(x_val))
    input_seq = x_val[index:index + 1]
    true_seq = y_val[index:index + 1]
    
    predicted_seq = model.predict(input_seq)
    
    frames = []
    
    for i in range(10):
        input_frame = input_seq[0, i, :, :, 0]
        input_frame = (input_frame * 255).astype(np.uint8)
        
        input_frame_color = cv2.cvtColor(input_frame, cv2.COLOR_GRAY2BGR)
        
        frames.append(input_frame_color)
    
    transition_frame = np.ones((input_seq.shape[2], input_seq.shape[3], 3), dtype=np.uint8) * 255
    font = cv2.FONT_HERSHEY_SIMPLEX
    cv2.putText(transition_frame, 'PREDICTION STARTS', 
                (50, transition_frame.shape[0]//2), 
                font, 1, (0, 0, 0), 2, cv2.LINE_AA)

    for _ in range(5):
        frames.append(transition_frame)
    
    for i in range(10):
        pred_frame = predicted_seq[0, i, :, :, 0]

        pred_frame = (pred_frame * 255).astype(np.uint8)
        
        pred_frame_color = cv2.cvtColor(pred_frame, cv2.COLOR_GRAY2BGR)
        
        frames.append(pred_frame_color)
    
    height, width = frames[0].shape[:2]
    fps = 10  
    
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    
    for frame in frames:
        out.write(frame)
    
    out.release()
    
    print(f"Video saved to {output_path}")
    return output_path

In [None]:
video_path = create_transition_video(model, x_val, y_val)

## without transition frame
No white frame between input and predicted frames

In [None]:
import numpy as np
import cv2

def create_transition_video(model, x_val, y_val, output_path='prediction_video131.avi'):
    """
    Generate a video that shows input frames and predicted frames.
    
    Parameters:
    - model: The trained prediction model
    - x_val: Input validation sequences
    - y_val: True validation sequences
    - output_path: Path to save the output video
    
    Returns:
    - Path to the generated video file
    """
    #randomly select a sequence
    index = np.random.randint(0, len(x_val))
    input_seq = x_val[index:index + 1]
    true_seq = y_val[index:index + 1]
    
    predicted_seq = model.predict(input_seq)
    
    frames = []
    
    for i in range(10):
        input_frame = input_seq[0, i, :, :, 0]
        
        input_frame = (input_frame * 255).astype(np.uint8)

        input_frame_color = cv2.cvtColor(input_frame, cv2.COLOR_GRAY2BGR)
        
        frames.append(input_frame_color)
    
    for i in range(10):
        pred_frame = predicted_seq[0, i, :, :, 0]

        pred_frame = (pred_frame * 255).astype(np.uint8)
        
        pred_frame_color = cv2.cvtColor(pred_frame, cv2.COLOR_GRAY2BGR)
        
        frames.append(pred_frame_color)
    
    height, width = frames[0].shape[:2]
    fps = 10  
    
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    
    for frame in frames:
        out.write(frame)
    
    out.release()
    
    print(f"Video saved to {output_path}")
    return output_path

In [None]:
video_path = create_transition_video(model, x_val, y_val)