In [10]:
import os
import cv2
import torch
import pyiqa
import numpy as np
import pandas as pd
import datetime
from collections import deque
from tqdm import tqdm
from scenedetect import open_video, SceneManager
from scenedetect.detectors import AdaptiveDetector
from scenedetect.video_splitter import split_video_ffmpeg

# ==============================================================================
# 1. --- CONFIGURATION ---
# ==============================================================================

# --- Input/Output Folders & Files ---
MOVIE_INPUT_FOLDER = "movie"
OUTPUT_BASE_FOLDER = "movie_analysis_results"

# --- Log Filenames ---
# Tracks fully completed movies. Location: <OUTPUT_BASE_FOLDER>/
MOVIE_STATUS_LOG = "movie_processing_status.csv"
# Tracks processed scenes for a single movie. Location: <OUTPUT_BASE_FOLDER>/<MOVIE_NAME>/
SCENE_LOG_FILENAME = "scene_processing_log.csv"
# The scores CSV saved inside each scene's frame folder.
SCENE_SCORES_FILENAME = "quality_scores.csv"

# --- Analysis Parameters ---
SEQUENCE_LENGTH = 15
MUSIQ_THRESHOLD = 35.0
NIQE_THRESHOLD = 6.0
SCENE_DETECTOR_THRESHOLD = 3.0

# --- Debugging ---
# Set to True to save the final selected frames as images for verification.
DEBUG_SAVE_FRAMES = True
# Set to True to save each detected scene as a separate video clip.
# This is useful for verifying scene detection but can use a lot of disk space.
DEBUG_SAVE_SCENE_CLIPS = True # Default is False

# ==============================================================================
# 2. --- CORE FUNCTIONS ---
# ==============================================================================

def detect_scenes_and_get_frame_ranges(video_path):
    """
    Detects scenes and returns both the raw scene list and frame number ranges.
    """
    print(f"\n🔍 Detecting all scenes in '{os.path.basename(video_path)}'...")
    try:
        video = open_video(video_path)
        scene_manager = SceneManager()
        scene_manager.add_detector(AdaptiveDetector(adaptive_threshold=SCENE_DETECTOR_THRESHOLD))
        scene_manager.detect_scenes(video=video, show_progress=True)
        # Get raw scene list for splitting, and frame ranges for analysis
        scene_list_raw = scene_manager.get_scene_list()
        frame_ranges = [(s.get_frames(), e.get_frames()) for s, e in scene_list_raw]
        print(f"✅ Found {len(frame_ranges)} total scenes.")
        return scene_list_raw, frame_ranges
    except Exception as e:
        print(f"⚠️ Error during scene detection for {video_path}: {e}")
        return [], []

def find_sequence_in_scene(
    cap, video_name, scene_num, start_frame, end_frame,
    musiq_metric, niqe_metric, device, movie_output_folder
):
    """
    Analyzes a single scene, saving frames and quality scores locally if successful.
    """
    print(f"\n🎬 Processing Scene {scene_num} (Frames: {start_frame}-{end_frame})")
    
    cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
    frame_buffer = deque(maxlen=SEQUENCE_LENGTH)
    sequence_found = False

    with tqdm(total=(end_frame - start_frame), desc=f"Scanning Scene {scene_num}", unit="frame") as pbar:
        for frame_count in range(start_frame, end_frame):
            ret, frame = cap.read()
            if not ret: break
            pbar.update(1)

            if np.std(frame) < 10.0:
                frame_buffer.clear()
                continue

            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame_tensor = torch.tensor(frame_rgb).permute(2, 0, 1).unsqueeze(0) / 255.0
            frame_tensor = frame_tensor.to(device)

            with torch.no_grad():
                musiq_score = musiq_metric(frame_tensor).item()
                niqe_score = niqe_metric(frame_tensor).item()

            is_good_quality = (musiq_score >= MUSIQ_THRESHOLD) and (niqe_score <= NIQE_THRESHOLD)

            if is_good_quality:
                frame_data = {
                    'frame_num': frame_count, 'musiq_score': musiq_score,
                    'niqe_score': niqe_score, 'frame': frame,
                }
                frame_buffer.append(frame_data)
            else:
                frame_buffer.clear()

            if len(frame_buffer) == SEQUENCE_LENGTH:
                print(f"  -> 🎉 Success! Found high-quality sequence in Scene {scene_num}.")
                sequence_found = True

                scene_output_folder = os.path.join(movie_output_folder, f"scene_{scene_num:03d}_frames")
                os.makedirs(scene_output_folder, exist_ok=True)
                
                scores_data_for_csv = []
                for data in frame_buffer:
                    if DEBUG_SAVE_FRAMES:
                        frame_path = os.path.join(scene_output_folder, f"frame_{data['frame_num']:06d}.png")
                        cv2.imwrite(frame_path, data['frame'])
                    
                    scores_data_for_csv.append({
                        'frame_number': data['frame_num'],
                        'musiq_score': round(data['musiq_score'], 2),
                        'niqe_score': round(data['niqe_score'], 2)
                    })

                df_scores = pd.DataFrame(scores_data_for_csv)
                scores_csv_path = os.path.join(scene_output_folder, SCENE_SCORES_FILENAME)
                df_scores.to_csv(scores_csv_path, index=False)
                print(f"  -> 💾 Scores and frames saved in: {scene_output_folder}")
                break

    if not sequence_found:
        print(f"  -> ❌ No valid sequence found in Scene {scene_num}.")

# ==============================================================================
# 3. --- MAIN EXECUTION (HIERARCHICAL LOGGING) ---
# ==============================================================================

if __name__ == "__main__":
    if not os.path.isdir(MOVIE_INPUT_FOLDER):
        print(f"FATAL: Input directory not found at '{MOVIE_INPUT_FOLDER}'"); exit()

    # --- Setup IQA Models ---
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"🚀 Initializing IQA models on device: {device}")
    musiq_metric = pyiqa.create_metric('musiq', device=device)
    niqe_metric = pyiqa.create_metric('niqe', device=device)
        
    # --- Tier 1: Load Status of Fully Completed Movies from main CSV log ---
    os.makedirs(OUTPUT_BASE_FOLDER, exist_ok=True)
    movie_log_path = os.path.join(OUTPUT_BASE_FOLDER, MOVIE_STATUS_LOG)
    
    completed_movies = set()
    if os.path.exists(movie_log_path):
        try:
            df_movie_log = pd.read_csv(movie_log_path)
            if 'movie_name' in df_movie_log.columns:
                completed_movies = set(df_movie_log['movie_name'].unique())
            print(f"✅ Found main movie log. Will skip {len(completed_movies)} fully completed movies.")
        except Exception as e:
            print(f"⚠️ Could not read main movie log file '{movie_log_path}': {e}")


    # --- Process each movie in the input folder ---
    movie_files = sorted([f for f in os.listdir(MOVIE_INPUT_FOLDER) if f.endswith((".mp4", ".mkv", ".avi", ".mov"))])
    print(f"\nFound {len(movie_files)} total movies. Starting pipeline...")
    
    for movie_file in movie_files:
        video_path = os.path.join(MOVIE_INPUT_FOLDER, movie_file)
        video_name = os.path.basename(video_path).split('.')[0]

        if not video_name:
            print(f"\n⚠️  Could not determine a valid name for file '{movie_file}'. Skipping.")
            continue

        if video_name in completed_movies:
            print(f"\n⏭️  Skipping '{video_name}' (marked as fully complete in main log).")
            continue

        print(f"\n\n{'='*60}\n▶️  Processing movie: {video_name}\n{'='*60}")
        
        movie_specific_folder = os.path.join(OUTPUT_BASE_FOLDER, video_name)
        os.makedirs(movie_specific_folder, exist_ok=True)
        scene_log_path = os.path.join(movie_specific_folder, SCENE_LOG_FILENAME)

        processed_scene_indices = set()
        if os.path.exists(scene_log_path):
            try:
                df_scene_log = pd.read_csv(scene_log_path)
                if not df_scene_log.empty:
                    processed_scene_indices = set(df_scene_log['scene_index'].unique())
                    print(f"✅ Found scene log for '{video_name}'. Will skip {len(processed_scene_indices)} processed scenes.")
            except Exception as e:
                print(f"⚠️ Could not read scene log for '{video_name}': {e}")
        
        # This now returns both the raw list for splitting and frame ranges for analysis
        scene_list_raw, scene_ranges = detect_scenes_and_get_frame_ranges(video_path)
        if not scene_ranges:
            print(f"No scenes detected for '{video_name}'. Skipping."); continue
            
        # *** NEW: SAVE SCENE CLIPS IF IN DEBUG MODE ***
        if DEBUG_SAVE_SCENE_CLIPS:
            clips_output_folder = os.path.join(movie_specific_folder, "scene_clips")
            os.makedirs(clips_output_folder, exist_ok=True)
            print(f"  -> 🐛 Debug mode: Saving scene clips to '{clips_output_folder}'...")
            try:
                split_video_ffmpeg(video_path, scene_list_raw, output_dir=clips_output_folder, show_progress=True)
            except Exception as e:
                print(f"     -> ⚠️ Could not save scene clips: {e}")
                print("        (Please ensure ffmpeg is installed and in your system's PATH.)")

        cap = cv2.VideoCapture(video_path)
        if not cap.isOpened():
            print(f"Error: Could not open {video_path}. Skipping."); continue
            
        all_scenes_processed_this_run = True
        for i, (start_frame, end_frame) in enumerate(scene_ranges):
            scene_num = i + 1
            
            if scene_num in processed_scene_indices:
                continue

            all_scenes_processed_this_run = False
            find_sequence_in_scene(
                cap, video_name, scene_num, start_frame, end_frame,
                musiq_metric, niqe_metric, device, movie_specific_folder
            )
            
            log_entry = {'scene_index': scene_num, 'frame_range': f"({start_frame}, {end_frame})"}
            df_log = pd.DataFrame([log_entry])
            header = not os.path.exists(scene_log_path) or os.path.getsize(scene_log_path) == 0
            df_log.to_csv(scene_log_path, mode='a', header=header, index=False)
            print(f"  -> 📝 Logged Scene {scene_num} in '{video_name}'s specific log.")
        
        cap.release()

        if all_scenes_processed_this_run and len(scene_ranges) > 0:
             print(f"\nAll scenes for '{video_name}' were already processed. Updating main log.")
        
        log_entry = {
            'movie_name': video_name,
            'completion_timestamp': datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        }
        df_new_log = pd.DataFrame([log_entry])
        header = not os.path.exists(movie_log_path) or os.path.getsize(movie_log_path) == 0
        df_new_log.to_csv(movie_log_path, mode='a', header=header, index=False)
        print(f"✅ Marked '{video_name}' as fully processed in main log.")

    print(f"\n\n✨✨✨ All processing complete! ✨✨✨")

🚀 Initializing IQA models on device: cuda
Loading pretrained model MUSIQ from C:\Users\Welcome\.cache\torch\hub\pyiqa\musiq_koniq_ckpt-e95806b9.pth
✅ Found main movie log. Will skip 2 fully completed movies.

Found 2 total movies. Starting pipeline...

⏭️  Skipping 'Beast (1)' (marked as fully complete in main log).

⏭️  Skipping 'Beast (2)' (marked as fully complete in main log).


✨✨✨ All processing complete! ✨✨✨
