In [1]:
# Cell 1
import torch
from torchvision import transforms
from PIL import Image
import numpy as np
from model import DSN
import torch.nn as nn
import cv2
import time
import os


In [2]:
# # Function to extract frames at a specified frame rate and append paths to a list
# def extract_frames(video_path, output_folder, frame_rate=2):
#     cap = cv2.VideoCapture(video_path)

#     if not cap.isOpened():
#         print("Error: Could not open video file.")
#         return

#     frame_width = int(cap.get(3))  # Get the width of the frames
#     frame_height = int(cap.get(4))  # Get the height of the frames

#     # Define the codec and create a VideoWriter object
#     fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # You can change the codec as needed
#     output_path = os.path.join(output_folder, "output_video.mp4")
#     out = cv2.VideoWriter(output_path, fourcc, frame_rate, (frame_width, frame_height))

#     start_time = time.time()
#     frame_count = 0
#     frames = []  # List to store frame paths

#     while cap.isOpened():
#         ret, frame = cap.read()

#         if not ret:
#             break

#         elapsed_time = time.time() - start_time
#         if elapsed_time >= 1.0 / frame_rate:
#             out.write(frame)
#             frame_count += 1
#             start_time = time.time()

#             # Save the frame as an image file
#             frame_filename = f"frame_{frame_count:04d}.png"
#             frame_path = os.path.join(output_folder, frame_filename)
#             cv2.imwrite(frame_path, frame)
#             frames.append(frame_path)

#     cap.release()
#     out.release()
#     cv2.destroyAllWindows()

#     print(f"Frames extracted: {frame_count}")
#     print(f"Frames per second: {frame_rate}")
#     print(f"Output video saved to: {output_path}")

#     return frames


# # Example usage
# video_path = "./video/IronMan.mp4"
# output_folder = "./frames"
# frames = extract_frames(video_path, output_folder, frame_rate=2)

# # Now 'extracted_frame_paths' contains a list of file paths for the extracted frames
# print("Extracted frame paths:", frames)


KeyboardInterrupt: 

In [2]:
import cv2
import os

def extract_frames(input_video, output_path, start_time, end_time, frame_rate):
    cap = cv2.VideoCapture(input_video)
    fps = cap.get(cv2.CAP_PROP_FPS)
    start_frame = int(start_time * fps)
    end_frame = int(end_time * fps)

    cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
    current_frame = start_frame

    frame_count=0
    frames = []
    while current_frame < end_frame:
        ret, frame = cap.read()
        if not ret:
            break

        if current_frame % int(fps / frame_rate) == 0:
            # Save the frame
            frame_name = f"{output_path}/frame_{frame_count}.png"  # You can change the format to PNG, etc.
            frame_filename = f"frame_{frame_count}.png"
            frame_path = os.path.join(output_path, frame_filename)
            frames.append(frame_path)
            cv2.imwrite(frame_name, frame)
            frame_count+=1

        current_frame += 1

    cap.release()
    return frames

In [3]:
# Cell 2
def _get_features(frames, gpu=True, batch_size=1):
    # Load pre-trained GoogLeNet model
    googlenet = torch.hub.load('pytorch/vision:v0.10.0', 'googlenet', weights='GoogLeNet_Weights.DEFAULT')

    # Remove the classification layer (last layer) to obtain features
    googlenet = torch.nn.Sequential(*(list(googlenet.children())[:-1]))

    # Set the model to evaluation mode
    googlenet.eval()

    # Initialize a list to store the features
    features = []

    # Image preprocessing pipeline
    preprocess = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    # Iterate through frames
    for frame_path in frames:
        # Load and preprocess the frame
        input_image = Image.open(frame_path)
        input_tensor = preprocess(input_image)
        input_batch = input_tensor.unsqueeze(0)  # Add batch dimension

        # Move the input and model to GPU if available
        if gpu:
            input_batch = input_batch.to('cuda')
            googlenet.to('cuda')

        # Perform feature extraction
        with torch.no_grad():
            output = googlenet(input_batch)

        # Append the features to the list
        features.append(output.squeeze().cpu().numpy())

    # Convert the list of features to a NumPy array
    features = np.array(features)

    return features.astype(np.float32)



In [4]:
# Cell 3
def _get_probs(features, gpu=True, mode=0):
    model_cache_key = "keyframes_rl_model_cache_" + str(mode)

    if mode == 1:
        model_path = "pretrained_model/model_1.pth.tar"
    else:
        model_path = "pretrained_model/model_0.pth.tar"
    model = DSN(in_dim=1024, hid_dim=256, num_layers=1, cell="lstm")
    if gpu:
        checkpoint = torch.load(model_path)
    else:
        checkpoint = torch.load(model_path, map_location='cpu')
    model.load_state_dict(checkpoint)
    if gpu:
        model = nn.DataParallel(model).cuda()
    model.eval()

    seq = torch.from_numpy(features).unsqueeze(0)
    if gpu: seq = seq.cuda()
    probs = model(seq)
    probs = probs.data.cpu().squeeze().numpy()
    return probs




In [15]:
# # print(_get_features(frames))

# features = _get_features(frames)
# highlight_scores = _get_probs(features)
# # print(features.shape)
# # print(features[0].shape)
# # print(_get_probs(features).shape)

Using cache found in C:\Users\princ/.cache\torch\hub\pytorch_vision_v0.10.0


In [None]:
import srt
from extract_frames import extract_frames

data=""
with open("test1.srt") as f:
    data = f.read()

In [7]:
subs = srt.parse(data)
torch.cuda.empty_cache()

for sub in subs:
    frames = []
    if not os.path.exists(f"frames/sub{sub.index}"):
        os.makedirs(f"frames/sub{sub.index}")
    frames = extract_frames("video/steve.mp4",os.path.join("frames",f"sub{sub.index}"),sub.start.total_seconds(),sub.end.total_seconds(),2)
    features = _get_features(frames)
    highlight_scores = _get_probs(features)

    highlight_scores = list(highlight_scores)
    sorted_indices = [i[0] for i in sorted(enumerate(highlight_scores), key=lambda x: x[1])]
    print(f"The indices of the list in the increasing order of value are {sorted_indices}.")
    # print(sub)

ModuleNotFoundError: No module named 'srt'

In [16]:
highlight_scores = list(highlight_scores)
sorted_indices = [i[0] for i in sorted(enumerate(highlight_scores), key=lambda x: x[1])]
print(f"The indices of the list in the increasing order of value are {sorted_indices}.")

The indices of the list in the increasing order of value are [83, 0, 1, 82, 2, 60, 79, 63, 3, 6, 81, 77, 80, 5, 18, 31, 67, 78, 9, 66, 64, 61, 59, 7, 4, 27, 29, 62, 16, 10, 28, 8, 26, 32, 40, 14, 65, 25, 39, 35, 19, 33, 38, 22, 13, 20, 55, 30, 12, 11, 41, 24, 56, 21, 34, 58, 23, 15, 43, 48, 49, 76, 68, 36, 17, 72, 37, 50, 73, 57, 44, 74, 42, 46, 70, 45, 75, 51, 69, 54, 52, 53, 71, 47].
