In [None]:
import os
import cv2
from mtcnn import MTCNN
import keras
import numpy as np
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt

# ========== TESTING ON UNSEEN VIDEOS ==========
print('\n' + '='*80)
print('TESTING ON UNSEEN VIDEOS')
print('='*80)

IS_KAGGLE = os.path.exists('/kaggle/input')
# Path to unseen video test data
if IS_KAGGLE:
    VIDEO_TEST_PATH = '/kaggle/input/video-test-unseen'
    MODEL_TEST_PATH = '/kaggle/working/models'
    DEBUG_DIR = '/kaggle/working/debug_images'
else:
    # Use local project folders (relative to notebook working dir)
    VIDEO_TEST_PATH = os.path.abspath('video-test-unseen')
    MODEL_TEST_PATH = os.path.abspath('models')
    DEBUG_DIR = os.path.abspath('debug_images')

# Create debug directory
os.makedirs(DEBUG_DIR, exist_ok=True)
print(f'Debug preprocessed images will be saved to: {DEBUG_DIR}')

if os.path.exists(VIDEO_TEST_PATH):
    print(f'Unseen video test path found: {VIDEO_TEST_PATH}')
    
    # Initialize face detector
    face_detector = MTCNN()
    
    def process_unseen_video(video_path, model, img_size, num_frames=10, save_debug=False, video_idx=0, model_name=''):
        """
        Process video with EXACT same preprocessing as training data.
        Matches extract_frames_from_video_to_files from import-image-celeb-df.ipynb
        
        Args:
            video_path: Path to video file
            model: Loaded Keras model
            img_size: Target image size for model
            num_frames: Number of frames to extract
            save_debug: Whether to save preprocessed images
            video_idx: Index of video for debug naming
            model_name: Model name for debug file naming
        
        Returns:
            dict with video_name, predictions per frame, and aggregated result
        """
        cap = cv2.VideoCapture(video_path)
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        
        if total_frames == 0:
            cap.release()
            return None
        
        frame_indices = np.linspace(0, total_frames - 1, num_frames, dtype=int)
        frame_predictions = []
        
        # CRITICAL: Match training preprocessing exactly
        FACE_MARGIN = 15  # Same as training
        RESIZE_MAX_WIDTH = 640  # Same as training
        MIN_CONFIDENCE = 0.8  # Same as training
        
        video_name = os.path.basename(video_path)
        
        for frame_num, frame_idx in enumerate(frame_indices):
            cap.set(cv2.CAP_PROP_POS_FRAMES, int(frame_idx))
            ret, frame = cap.read()
            
            if not ret:
                continue
            
            # Step 1: Resize frame if too large
            h, w = frame.shape[:2]
            if w > RESIZE_MAX_WIDTH:
                scale = RESIZE_MAX_WIDTH / float(w)
                frame = cv2.resize(frame, (RESIZE_MAX_WIDTH, int(h * scale)), 
                                 interpolation=cv2.INTER_AREA)
                h, w = frame.shape[:2]
            
            # Convert BGR to RGB
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            
            # Detect face
            try:
                # Step 2: MTCNN face detection
                detections = face_detector.detect_faces(frame_rgb)
                if not detections:
                    continue
                
                # Step 3: Select largest face by area (SAME AS TRAINING)
                best_face = None
                best_area = -1
                
                for detection in detections:
                    box = detection.get('box', None)
                    confidence = detection.get('confidence', 0.0)
                    
                    if box is None or confidence < MIN_CONFIDENCE:
                        continue
                    
                    # box is [x, y, width, height]
                    x, y, width, height = box
                    area = width * height
                    
                    if area > best_area:
                        best_area = area
                        # Store as (x1, y1, x2, y2, confidence)
                        best_face = (float(x), float(y), 
                                   float(x + width), float(y + height), 
                                   float(confidence))
                
                if best_face is None:
                    continue
                
                x1, y1, x2, y2, conf = best_face
                
                # Step 4: Apply FIXED margin (15 pixels, SAME AS TRAINING)
                x1i = int(round(x1))
                y1i = int(round(y1))
                x2i = int(round(x2))
                y2i = int(round(y2))
                
                x0 = max(0, x1i - FACE_MARGIN)
                y0 = max(0, y1i - FACE_MARGIN)
                x1c = min(frame_rgb.shape[1], x2i + FACE_MARGIN)
                y1c = min(frame_rgb.shape[0], y2i + FACE_MARGIN)
                
                face = frame_rgb[y0:y1c, x0:x1c]
                
                if face.size == 0:
                    continue
                
                # Step 5: Resize to target size with INTER_AREA (SAME AS TRAINING)
                face_resized = cv2.resize(face, (img_size, img_size), 
                                         interpolation=cv2.INTER_AREA)
                
                # Save preprocessed image BEFORE normalization (this is what gets fed to model)
                if save_debug and frame_num < 5:  # Save first 5 frames per video
                    debug_path = os.path.join(DEBUG_DIR, 
                                            f'{model_name}_vid{video_idx:02d}_{video_name[:-4]}_frame{frame_num:02d}.jpg')
                    face_bgr = cv2.cvtColor(face_resized, cv2.COLOR_RGB2BGR)
                    cv2.imwrite(debug_path, face_bgr)
                    
                    if frame_num == 0:  # Print details for first frame only
                        print(f"    Saved: {os.path.basename(debug_path)}")
                        print(f"    Shape: {face_resized.shape}, Range: [{face_resized.min()}, {face_resized.max()}]")
                
                # Step 6: Normalize to [0,1] (SAME AS TRAINING)
                face_normalized = face_resized.astype(np.float32) / 255.0
                
                # Add batch dimension
                face_batch = np.expand_dims(face_normalized, axis=0)
                
                # Predict
                pred_prob = model.predict(face_batch, verbose=0)[0][0]
                frame_predictions.append(float(pred_prob))
                
                # Debug: Print first prediction details
                if len(frame_predictions) == 1:
                    print(f"    Normalized range: [{face_normalized.min():.3f}, {face_normalized.max():.3f}]")
                    print(f"    Prediction: {pred_prob:.6f}")
                
            except Exception as e:
                print(f"    Frame {frame_idx} error: {e}")
                continue
        
        cap.release()
        
        if not frame_predictions:
            return None
        
        # Aggregate predictions
        avg_prob = np.mean(frame_predictions)
        video_pred = 1 if avg_prob > 0.5 else 0
        
        return {
            'video_name': os.path.basename(video_path),
            'frame_predictions': frame_predictions,
            'avg_probability': avg_prob,
            'prediction': video_pred,
            'prediction_label': 'Fake' if video_pred == 1 else 'Real',
            'num_frames_processed': len(frame_predictions)
        }
    
    
    # Find all video files
    video_extensions = ['.mp4', '.avi', '.mov', '.mkv']
    unseen_videos = []
    
    for root, dirs, files in os.walk(VIDEO_TEST_PATH):
        for file in files:
            if any(file.lower().endswith(ext) for ext in video_extensions):
                unseen_videos.append(os.path.join(root, file))
    
    print(f'\nFound {len(unseen_videos)} unseen videos')
    
    if unseen_videos:
        # Load best models
        print('\n--- Loading Best Models ---')
        print('Loading ResNet50 final model...')
        model_resnet_best = keras.models.load_model(
            os.path.join(MODEL_TEST_PATH, 'ResNet50_final.keras')
        )

        # Process videos
        print('\n--- Processing Unseen Videos ---')
        unseen_results = []
        
        for idx, video_path in enumerate(unseen_videos[:10]):  # Process first 10 videos
            print(f'\n[{idx+1}/10] {os.path.basename(video_path)}')
            
            # ResNet50 prediction with debug images (save for all videos)
            print('  ResNet50 (224x224):')
            result_resnet = process_unseen_video(
                video_path, model_resnet_best, 
                224, num_frames=10,
                save_debug=False, video_idx=idx, model_name='resnet50'
            )
            
            if result_resnet:
                unseen_results.append({
                    'video_name': result_resnet['video_name'],
                    'resnet50_prob': result_resnet['avg_probability'],
                    'resnet50_pred': result_resnet['prediction_label'],
                    'frames_processed': result_resnet['num_frames_processed']
                })
                
                print(f"  â†’ ResNet50: {result_resnet['prediction_label']} ({result_resnet['avg_probability']:.4f})")
        
        # Clean up models
        del model_resnet_best
        tf.keras.backend.clear_session()
        
        # Display results
        if unseen_results:
            print('\n' + '='*80)
            print('UNSEEN VIDEO PREDICTIONS SUMMARY')
            print('='*80)
            unseen_df = pd.DataFrame(unseen_results)
            unseen_df['ensemble_pred'] = unseen_df['ensemble_prob'].apply(
                lambda x: 'Fake' if x > 0.5 else 'Real'
            )
            display(unseen_df)
            
            # Save results
            unseen_results_path = os.path.join(PLOTS_DIR, 'unseen_video_predictions.csv')
            unseen_df.to_csv(unseen_results_path, index=False)
            print(f'\nPredictions saved to: {unseen_results_path}')
            
            # Plot predictions
            fig, ax = plt.subplots(figsize=(12, 6))
            x = np.arange(len(unseen_df))
            width = 0.25
            
            ax.bar(x - width, unseen_df['resnet50_prob'], width, label='ResNet50', alpha=0.8)
            
            ax.axhline(y=0.5, color='r', linestyle='--', label='Decision Threshold')
            ax.set_xlabel('Video')
            ax.set_ylabel('Fake Probability')
            ax.set_title('Unseen Video Predictions')
            ax.set_xticks(x)
            ax.set_xticklabels([v[:20] + '...' if len(v) > 20 else v for v in unseen_df['video_name']], 
                              rotation=45, ha='right')
            ax.legend()
            ax.grid(axis='y', alpha=0.3)
            
            plt.tight_layout()
            plot_path = os.path.join(PLOTS_DIR, 'unseen_video_predictions.png')
            plt.savefig(plot_path, dpi=150, bbox_inches='tight')
            print(f'Predictions plot saved to: {plot_path}')
            plt.show()
        else:
            print('No videos were successfully processed.')
    else:
        print('No video files found in the unseen test directory.')
else:
    print(f'Unseen video test path not found: {VIDEO_TEST_PATH}')
    print('Skipping unseen video testing.')


print('\n' + '='*80)
print('DEBUGGING COMPLETE!')
print('='*80)
print(f'âœ… Preprocessed images saved to: {DEBUG_DIR}')
print(f'\nðŸ“‚ Debug files created:')
print(f'   - resnet50_vid##_<videoname>_frame##.jpg   (224x224 preprocessed faces)')
