In [1]:
video_folder = "AlgonautsVideos268_All_30fpsmax"

In [11]:
import os
import gc
import cv2
import numpy as np
import sys
import pickle
from keras.applications.resnet50 import ResNet50, preprocess_input
from keras.preprocessing import image
from keras.models import Model
import contextlib

In [5]:
base_model = ResNet50(weights='imagenet', include_top=False)

# Select layers for feature extraction at the end of each stage
layers_to_extract = {

    'stage_2_conv': base_model.get_layer('conv2_block3_3_conv').output,
    'stage_3_conv': base_model.get_layer('conv3_block4_3_conv').output,
    'stage_4_conv': base_model.get_layer('conv4_block6_3_conv').output,  # Corrected layer name
    'stage_5_conv': base_model.get_layer('conv5_block3_3_conv').output,  # Corrected layer name
}
# Create models for each stage
models = {stage: Model(inputs=base_model.input, outputs=output) for stage, output in layers_to_extract.items()}


In [12]:
@contextlib.contextmanager
def suppress_stdout():
    """A context manager for suppressing stdout temporarily."""
    with open(os.devnull, 'w') as devnull:
        old_stdout = sys.stdout
        sys.stdout = devnull
        try:
            yield
        finally:
            sys.stdout = old_stdout


In [13]:
def preprocess_video(video_path, num_frames=30):
    # Capture video
    vidcap = cv2.VideoCapture(video_path)

    # Determine the total number of frames in the video
    total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))

    # Calculate the interval for frame sampling
    frame_interval = max(1, total_frames // num_frames)

    success, frame = vidcap.read()
    extracted_count = 0
    features_per_stage = {stage: [] for stage in layers_to_extract}

    while success and extracted_count < num_frames:
        # Check if the current frame is one of the frames to be extracted
        if total_frames > num_frames and (vidcap.get(cv2.CAP_PROP_POS_FRAMES) - 1) % frame_interval == 0:
            # Resize frame to match the input size expected by ResNet50
            resized_frame = cv2.resize(frame, (224, 224))  # Resize to 224x224

            # Preprocess frame
            img = image.img_to_array(resized_frame)
            img = np.expand_dims(img, axis=0)
            img = preprocess_input(img)

            # Extract and store features for each stage
            for stage, model in models.items():
                with suppress_stdout():
                    stage_features = model.predict(img)
                features_per_stage[stage].append(stage_features)

            extracted_count += 1

        success, frame = vidcap.read()

    # Stack features for 3D input
    stacked_features = {stage: np.stack(features, axis=0) for stage, features in features_per_stage.items()}

    return stacked_features


In [14]:
# Example usage
video_tensors = preprocess_video('AlgonautsVideos268_All_30fpsmax/0002_0-0-4-3146384004.mp4')

In [15]:
type(video_tensors)

dict

In [16]:
def process_batch(batch, input_folder, stage_folders):
    for filename in batch:
        video_id = filename[:4]

        # Check if the file for this video ID already exists in any stage folder
        if file_exists(video_id, stage_folders):
            print(f"Files for video ID {video_id} already exist. Skipping.")
            continue

        video_path = os.path.join(input_folder, filename)
        try:
            processed_data = preprocess_video(video_path)
            
            if not isinstance(processed_data, dict):
                raise ValueError(f"Unexpected data format for {filename}: {type(processed_data)}")

            video_id = filename[:4]
            for stage, features in processed_data.items():
                stage_folder = os.path.join(output_folder, stage)
                if not os.path.exists(stage_folder):
                    os.makedirs(stage_folder)
                    print(f"Created folder: {stage_folder}")

                output_file = os.path.join(stage_folder, f"{video_id}_{stage}.pkl")
                with open(output_file, 'wb') as file:
                    pickle.dump(features, file)
                    print(f"Saved file: {output_file}")

        except Exception as e:
            print(f"Error processing {filename}: {e}, data type: {type(processed_data)}")
            
        finally:
            # Explicitly delete large variables
            del processed_data
            # Call garbage collector
            gc.collect()


def file_exists(video_id, stage_folders):
    """Check if a file for the given video ID already exists in any of the stage folders."""
    for stage in stage_folders:
        filepath = os.path.join(stage_folders[stage], f"{video_id}_{stage}.pkl")
        if os.path.exists(filepath):
            return True
    return False

def process_and_save_videos(input_folder, output_folder, batch_size=90, id_range=None):
    # Ensure output folder exists and create a dictionary for stage folders
    stage_folders = {}
    stages = ['stage_1', 'stage_2', 'stage_3', 'stage_4', 'stage_5', 'final']
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    for stage in stages:
        stage_folder = os.path.join(output_folder, stage)
        stage_folders[stage] = stage_folder
        if not os.path.exists(stage_folder):
            os.makedirs(stage_folder)

    # Get all MP4 files
    all_files = [f for f in os.listdir(input_folder) if f.endswith(".mp4")]

    # Filter files based on ID range if provided
    if id_range is not None:
        start_id, end_id = id_range
        all_files = [f for f in all_files if start_id <= f[:4] <= end_id]

    # Process in batches
    for i in range(0, len(all_files), batch_size):
        batch = all_files[i:i + batch_size]
        process_batch(batch, input_folder, stage_folders)  # Pass 'stage_folders' instead of 'output_folder'
        print(f"Processed batch {i // batch_size + 1}/{(len(all_files) - 1) // batch_size + 1}")



In [None]:
input_folder = video_folder  # Update with your input folder path
output_folder = 'preprocessed_videos_30frames_conv'  # Update with your output folder path
batch_size = 250   # Adjust based on your system's capability
id_range = ('0001', '1000')  # Process videos with IDs from 1000 to 2000
process_and_save_videos(input_folder, output_folder, batch_size, id_range)

Created folder: preprocessed_videos_30frames_conv/stage_2_conv
Saved file: preprocessed_videos_30frames_conv/stage_2_conv/0059_stage_2_conv.pkl
Created folder: preprocessed_videos_30frames_conv/stage_3_conv
Saved file: preprocessed_videos_30frames_conv/stage_3_conv/0059_stage_3_conv.pkl
Created folder: preprocessed_videos_30frames_conv/stage_4_conv
Saved file: preprocessed_videos_30frames_conv/stage_4_conv/0059_stage_4_conv.pkl
Created folder: preprocessed_videos_30frames_conv/stage_5_conv
Saved file: preprocessed_videos_30frames_conv/stage_5_conv/0059_stage_5_conv.pkl
Saved file: preprocessed_videos_30frames_conv/stage_2_conv/0438_stage_2_conv.pkl
Saved file: preprocessed_videos_30frames_conv/stage_3_conv/0438_stage_3_conv.pkl
Saved file: preprocessed_videos_30frames_conv/stage_4_conv/0438_stage_4_conv.pkl
Saved file: preprocessed_videos_30frames_conv/stage_5_conv/0438_stage_5_conv.pkl
Saved file: preprocessed_videos_30frames_conv/stage_2_conv/0108_stage_2_conv.pkl
Saved file: preproc

In [None]:
###--------------------------------------------------------------------------------------------------------###

In [11]:
def count_files_in_subfolders(parent_folder):
    subfolders = ['stage_1', 'stage_2', 'stage_3', 'stage_4', 'stage_5', 'final']
    file_counts = {}

    for subfolder in subfolders:
        path = os.path.join(parent_folder, subfolder)
        if os.path.exists(path):
            # Count only files, excluding subdirectories
            file_counts[subfolder] = sum([os.path.isfile(os.path.join(path, name)) for name in os.listdir(path)])
        else:
            print(f"Subfolder {subfolder} does not exist in {parent_folder}")
            file_counts[subfolder] = 0

    return file_counts

# Usage
parent_folder = 'preprocessed_videos'  
file_counts = count_files_in_subfolders(parent_folder)
for subfolder, count in file_counts.items():
    print(f"{subfolder}: {count} files")

Subfolder stage_1 does not exist in preprocessed_videos
Subfolder stage_2 does not exist in preprocessed_videos
Subfolder stage_3 does not exist in preprocessed_videos
Subfolder stage_4 does not exist in preprocessed_videos
Subfolder stage_5 does not exist in preprocessed_videos
Subfolder final does not exist in preprocessed_videos
stage_1: 0 files
stage_2: 0 files
stage_3: 0 files
stage_4: 0 files
stage_5: 0 files
final: 0 files


In [30]:
# import os
# import pickle
# import numpy as np
# import random
# from sklearn.decomposition import PCA
# 
# def sample_and_apply_pca(stage_folders, repo_root, variance=0.999):
#     results = {}
#     for stage in stage_folders:
#         stage_folder = os.path.join(repo_root, stage)
#         if not os.path.exists(stage_folder):
#             print(f"Folder not found: {stage_folder}")
#             continue
# 
#         # Randomly sample three files
#         files = os.listdir(stage_folder)
#         sampled_files = random.sample(files, min(len(files), 3))
# 
#         for file in sampled_files:
#             file_path = os.path.join(stage_folder, file)
#             with open(file_path, 'rb') as f:
#                 tensor = pickle.load(f)
#                 original_shape = tensor.shape
#                 pca = PCA(n_components=variance)
#                 pca_tensor = pca.fit_transform(tensor.reshape(tensor.shape[0], -1))
#                 pca_shape = pca_tensor.shape
#                 results[file] = {'original_shape': original_shape, 'pca_shape': pca_shape, 'explained_variance': variance}
# 
#     return results
# 
# # Example usage
# stage_folders = ['stage_1', 'stage_2', 'stage_3', 'stage_4', 'stage_5', 'final']
# repo_root = 'preprocessed_videos'  # Replace with your repo root path
# 
# samples = sample_and_apply_pca(stage_folders, repo_root)
# for file, info in samples.items():
#     print(f"File: {file}, Original Shape: {info['original_shape']}, PCA Shape: {info['pca_shape']}, Variance: {info['explained_variance']}")


File: 0854_stage_1.pkl, Original Shape: (46, 1, 56, 56, 64), PCA Shape: (46, 45), Variance: 0.999
File: 0432_stage_1.pkl, Original Shape: (45, 1, 56, 56, 64), PCA Shape: (45, 44), Variance: 0.999
File: 0366_stage_1.pkl, Original Shape: (45, 1, 56, 56, 64), PCA Shape: (45, 40), Variance: 0.999
File: 0703_stage_2.pkl, Original Shape: (45, 1, 56, 56, 256), PCA Shape: (45, 44), Variance: 0.999
File: 0219_stage_2.pkl, Original Shape: (38, 1, 56, 56, 256), PCA Shape: (38, 37), Variance: 0.999
File: 0317_stage_2.pkl, Original Shape: (45, 1, 56, 56, 256), PCA Shape: (45, 44), Variance: 0.999
File: 0307_stage_3.pkl, Original Shape: (45, 1, 28, 28, 512), PCA Shape: (45, 44), Variance: 0.999
File: 0256_stage_3.pkl, Original Shape: (45, 1, 28, 28, 512), PCA Shape: (45, 44), Variance: 0.999
File: 0574_stage_3.pkl, Original Shape: (45, 1, 28, 28, 512), PCA Shape: (45, 44), Variance: 0.999
File: 0709_stage_4.pkl, Original Shape: (36, 1, 14, 14, 1024), PCA Shape: (36, 35), Variance: 0.999
File: 0207_s

In [None]:
# import cv2
# from keras.applications.resnet50 import ResNet50, preprocess_input, decode_predictions
# from keras.preprocessing import image
# import numpy as np
# 
# # Load the ResNet50 model pre-trained on ImageNet data
# model = ResNet50(weights='imagenet')
# 
# def classify_mid_frame(video_path):
#     # Capture the video
#     vidcap = cv2.VideoCapture(video_path)
# 
#     # Get the frame in the middle of the video
#     total_frames = vidcap.get(cv2.CAP_PROP_FRAME_COUNT)
#     mid_frame_index = int(total_frames / 2)
#     vidcap.set(cv2.CAP_PROP_POS_FRAMES, mid_frame_index)
#     success, mid_frame = vidcap.read()
#     
#     if not success:
#         return "Could not read the video file"
# 
#     # Convert the frame to a format suitable for ResNet50
#     img = cv2.cvtColor(mid_frame, cv2.COLOR_BGR2RGB)
#     img = cv2.resize(img, (224, 224))
#     x = image.img_to_array(img)
#     x = np.expand_dims(x, axis=0)
#     x = preprocess_input(x)
# 
#     # Classify the image
#     preds = model.predict(x)
#     return decode_predictions(preds, top=3)[0]


In [None]:
# # Example usage
# video_path = 'AlgonautsVideos268_All_30fpsmax/0006_0-1-9-9-8-6-5-9-4701998659.mp4'
# predictions = classify_mid_frame(video_patxh)
# print(predictions)