# GPU-Accelerated Face Extraction Pipeline

## Cross-Attention CNN Personality Trait Prediction Project

This notebook extracts faces from existing frame data using GPU-accelerated MTCNN face detection, then computes optical flow sequences for the Cross-Attention CNN model.

### Pipeline Overview:
1. **Verify GPU Support** - Ensure TensorFlow has GPU access
2. **Extract Faces** - Use MTCNN on existing frames (82,620 frames from 960 videos)
3. **Compute Optical Flow** - Generate flow sequences between consecutive face frames
4. **Progress Tracking** - Monitor extraction progress and save results

### Data Structure:
- **Input**: `data/processed/frames/` (existing frame extractions)
- **Output**: `data/processed/faces/` and `data/processed/optical_flow/`

In [1]:
# Import Required Libraries
import tensorflow as tf
import cv2
import numpy as np
import os
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import time
from datetime import datetime
import json
from tqdm.notebook import tqdm
import warnings
warnings.filterwarnings('ignore')

# Import MTCNN for face detection
from mtcnn import MTCNN

print(f"TensorFlow version: {tf.__version__}")
print(f"OpenCV version: {cv2.__version__}")
print(f"NumPy version: {np.__version__}")

TensorFlow version: 2.10.0
OpenCV version: 4.10.0
NumPy version: 1.24.0


In [2]:
# Check GPU Availability and Configuration
print("🔍 GPU Configuration Check")
print("=" * 50)

# List physical GPU devices
physical_gpus = tf.config.experimental.list_physical_devices('GPU')
logical_gpus = tf.config.experimental.list_logical_devices('GPU')

print(f"Physical GPUs: {len(physical_gpus)}")
print(f"Logical GPUs: {len(logical_gpus)}")

if physical_gpus:
    print("✅ GPU Support Available!")
    for i, gpu in enumerate(physical_gpus):
        print(f"   GPU {i}: {gpu}")
    
    # Enable memory growth to prevent TensorFlow from allocating all GPU memory
    try:
        for gpu in physical_gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("✅ GPU memory growth enabled")
    except RuntimeError as e:
        print(f"⚠️ Memory growth setup error: {e}")
else:
    print("❌ No GPU detected - using CPU")

# Test GPU with a simple operation
with tf.device('/GPU:0' if physical_gpus else '/CPU:0'):
    test_tensor = tf.constant([[1.0, 2.0], [3.0, 4.0]])
    result = tf.matmul(test_tensor, test_tensor)
    print(f"\n🧪 Test operation result: {result.numpy()}")
    print(f"Device used: {result.device}")

🔍 GPU Configuration Check
Physical GPUs: 1
Logical GPUs: 1
✅ GPU Support Available!
   GPU 0: PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')
⚠️ Memory growth setup error: Physical devices cannot be modified after being initialized

🧪 Test operation result: [[ 7. 10.]
 [15. 22.]]
Device used: /job:localhost/replica:0/task:0/device:GPU:0


In [4]:
# Initialize MTCNN Face Detector
print("🤖 Initializing MTCNN Face Detector")
print("=" * 50)

# Initialize MTCNN with default settings (it will auto-optimize)
detector = MTCNN()

print("✅ MTCNN detector initialized")

# Configuration
target_face_size = (224, 224)
frames_base_dir = 'data/processed/frames'
faces_base_dir = 'data/processed/faces'
flow_base_dir = 'data/processed/optical_flow'

# Create output directories
Path(faces_base_dir).mkdir(parents=True, exist_ok=True)
Path(flow_base_dir).mkdir(parents=True, exist_ok=True)
Path('results').mkdir(exist_ok=True)

print(f"📁 Output directories created:")
print(f"   Faces: {faces_base_dir}")
print(f"   Optical Flow: {flow_base_dir}")

🤖 Initializing MTCNN Face Detector
✅ MTCNN detector initialized
📁 Output directories created:
   Faces: data/processed/faces
   Optical Flow: data/processed/optical_flow


In [5]:
# Analyze Current Data Structure
print("📊 Data Structure Analysis")
print("=" * 50)

# Check frames directory structure
if os.path.exists(frames_base_dir):
    training_dirs = sorted([d for d in os.listdir(frames_base_dir) 
                           if os.path.isdir(os.path.join(frames_base_dir, d))])
    
    total_videos = 0
    total_frames = 0
    frame_stats = {}
    
    print(f"Found {len(training_dirs)} training directories:")
    
    for training_dir in training_dirs:
        training_path = os.path.join(frames_base_dir, training_dir)
        video_dirs = [d for d in os.listdir(training_path) 
                     if os.path.isdir(os.path.join(training_path, d))]
        
        dir_frames = 0
        for video_dir in video_dirs:
            video_path = os.path.join(training_path, video_dir)
            frame_files = [f for f in os.listdir(video_path) if f.endswith('.jpg')]
            dir_frames += len(frame_files)
        
        total_videos += len(video_dirs)
        total_frames += dir_frames
        frame_stats[training_dir] = {
            'videos': len(video_dirs),
            'frames': dir_frames
        }
        
        print(f"   {training_dir}: {len(video_dirs)} videos, {dir_frames} frames")
    
    print(f"\n📈 Summary:")
    print(f"   Total training directories: {len(training_dirs)}")
    print(f"   Total videos: {total_videos}")
    print(f"   Total frames: {total_frames:,}")
    
else:
    print("❌ Frames directory not found!")
    frame_stats = {}

📊 Data Structure Analysis
Found 12 training directories:
   training80_01: 80 videos, 6971 frames
   training80_02: 80 videos, 6874 frames
   training80_03: 80 videos, 6666 frames
   training80_04: 80 videos, 6858 frames
   training80_05: 80 videos, 6971 frames
   training80_06: 80 videos, 6861 frames
   training80_07: 80 videos, 6897 frames
   training80_08: 80 videos, 7060 frames
   training80_09: 80 videos, 6920 frames
   training80_10: 80 videos, 6738 frames
   training80_11: 80 videos, 6959 frames
   training80_12: 80 videos, 6845 frames

📈 Summary:
   Total training directories: 12
   Total videos: 960
   Total frames: 82,620


In [6]:
# Face Extraction Functions
def extract_face_from_frame(frame_path, detector, target_size=(224, 224)):
    """
    Extract face from a single frame using MTCNN
    
    Args:
        frame_path: Path to the frame image
        detector: MTCNN detector instance
        target_size: Target size for face image
    
    Returns:
        face_img: Processed face image or None if no face found
    """
    try:
        # Read image
        image = cv2.imread(frame_path)
        if image is None:
            return None
            
        # Convert BGR to RGB for MTCNN
        rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        # Detect faces
        result = detector.detect_faces(rgb_image)
        
        if result:
            # Use the first (most confident) face
            face = result[0]
            x, y, width, height = face['box']
            
            # Add padding and ensure bounds
            padding = 20
            x = max(0, x - padding)
            y = max(0, y - padding)
            width = min(rgb_image.shape[1] - x, width + 2*padding)
            height = min(rgb_image.shape[0] - y, height + 2*padding)
            
            # Extract face region
            face_img = rgb_image[y:y+height, x:x+width]
            
            # Resize to target size
            face_resized = cv2.resize(face_img, target_size)
            
            # Convert back to BGR for saving
            face_bgr = cv2.cvtColor(face_resized, cv2.COLOR_RGB2BGR)
            
            return face_bgr
            
    except Exception as e:
        print(f"Error processing {frame_path}: {e}")
        
    return None

def extract_faces_from_video_directory(video_frames_dir, video_faces_dir, detector):
    """
    Extract faces from all frames in a single video directory
    """
    Path(video_faces_dir).mkdir(parents=True, exist_ok=True)
    
    frame_files = sorted([f for f in os.listdir(video_frames_dir) if f.endswith('.jpg')])
    extracted_count = 0
    
    for frame_file in frame_files:
        frame_path = os.path.join(video_frames_dir, frame_file)
        face_img = extract_face_from_frame(frame_path, detector, target_face_size)
        
        if face_img is not None:
            face_filename = f"face_{os.path.splitext(frame_file)[0]}.jpg"
            face_path = os.path.join(video_faces_dir, face_filename)
            cv2.imwrite(face_path, face_img)
            extracted_count += 1
            
    return extracted_count

print("✅ Face extraction functions defined")

✅ Face extraction functions defined


In [None]:
# Main Face Extraction Pipeline
print("🚀 Starting GPU-Accelerated Face Extraction Pipeline")
print("=" * 60)

start_time = time.time()
extraction_stats = {
    'start_time': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
    'gpu_available': len(tf.config.experimental.list_logical_devices('GPU')) > 0,
    'training_directories': {},
    'total_faces': 0,
    'total_videos_processed': 0,
    'failed_videos': []
}

if os.path.exists(frames_base_dir):
    training_dirs = sorted([d for d in os.listdir(frames_base_dir) 
                           if os.path.isdir(os.path.join(frames_base_dir, d))])
    
    print(f"Processing {len(training_dirs)} training directories...")
    
    # Process each training directory
    for training_dir in tqdm(training_dirs, desc="Training Directories"):
        training_frames_path = os.path.join(frames_base_dir, training_dir)
        training_faces_path = os.path.join(faces_base_dir, training_dir)
        
        # Get all video directories
        video_dirs = sorted([d for d in os.listdir(training_frames_path) 
                           if os.path.isdir(os.path.join(training_frames_path, d))])
        
        training_faces = 0
        training_failed = 0
        
        # Process each video directory
        for video_dir in tqdm(video_dirs, desc=f"{training_dir} videos", leave=False):
            video_frames_path = os.path.join(training_frames_path, video_dir)
            video_faces_path = os.path.join(training_faces_path, video_dir)
            
            try:
                extracted = extract_faces_from_video_directory(
                    video_frames_path, video_faces_path, detector
                )
                
                if extracted > 0:
                    training_faces += extracted
                    extraction_stats['total_videos_processed'] += 1
                else:
                    training_failed += 1
                    extraction_stats['failed_videos'].append(f"{training_dir}/{video_dir}")
                    
            except Exception as e:
                print(f"Error processing {training_dir}/{video_dir}: {e}")
                training_failed += 1
                extraction_stats['failed_videos'].append(f"{training_dir}/{video_dir}")
        
        # Store training directory stats
        extraction_stats['training_directories'][training_dir] = {
            'videos_processed': len(video_dirs) - training_failed,
            'videos_failed': training_failed,
            'faces_extracted': training_faces
        }
        
        extraction_stats['total_faces'] += training_faces
        
        print(f"✅ {training_dir}: {training_faces} faces from {len(video_dirs)} videos")
    
    # Calculate processing time
    end_time = time.time()
    processing_time = end_time - start_time
    extraction_stats['end_time'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    extraction_stats['processing_time_seconds'] = processing_time
    extraction_stats['processing_time_formatted'] = str(time.strftime('%H:%M:%S', time.gmtime(processing_time)))
    
    print("\n" + "=" * 60)
    print("🎉 FACE EXTRACTION COMPLETED!")
    print("=" * 60)
    print(f"⏱️  Processing time: {extraction_stats['processing_time_formatted']}")
    print(f"😊 Total faces extracted: {extraction_stats['total_faces']:,}")
    print(f"📁 Videos processed: {extraction_stats['total_videos_processed']}")
    print(f"❌ Failed videos: {len(extraction_stats['failed_videos'])}")
    print(f"🎯 GPU acceleration: {'✅ Enabled' if extraction_stats['gpu_available'] else '❌ Disabled'}")
    
else:
    print("❌ Frames directory not found!")
    extraction_stats['error'] = 'Frames directory not found'

🚀 Starting GPU-Accelerated Face Extraction Pipeline
Processing 12 training directories...


Training Directories:   0%|          | 0/12 [00:00<?, ?it/s]

training80_01 videos:   0%|          | 0/80 [00:00<?, ?it/s]

✅ training80_01: 6966 faces from 80 videos


training80_02 videos:   0%|          | 0/80 [00:00<?, ?it/s]

In [None]:
# Save Extraction Results
results_file = 'results/face_extraction_results.json'

with open(results_file, 'w') as f:
    json.dump(extraction_stats, f, indent=2)

print(f"📊 Results saved to: {results_file}")

# Display detailed statistics
print("\n📈 Detailed Statistics:")
print("-" * 40)

if 'training_directories' in extraction_stats:
    for training_dir, stats in extraction_stats['training_directories'].items():
        success_rate = (stats['videos_processed'] / (stats['videos_processed'] + stats['videos_failed'])) * 100 if (stats['videos_processed'] + stats['videos_failed']) > 0 else 0
        print(f"{training_dir}:")
        print(f"  Faces: {stats['faces_extracted']:,}")
        print(f"  Videos: {stats['videos_processed']}/{stats['videos_processed'] + stats['videos_failed']} ({success_rate:.1f}% success)")
        print()

In [None]:
# Optical Flow Computation Functions
def compute_optical_flow_for_video(faces_dir, flow_dir):
    """
    Compute optical flow for a sequence of face images in one video
    
    Args:
        faces_dir: Directory containing face images
        flow_dir: Directory to save optical flow files
    
    Returns:
        Number of optical flow computations performed
    """
    try:
        Path(flow_dir).mkdir(parents=True, exist_ok=True)
        
        # Get all face images
        face_files = sorted([f for f in os.listdir(faces_dir) if f.endswith('.jpg')])
        
        if len(face_files) < 2:
            return 0
        
        flow_count = 0
        
        for i in range(len(face_files) - 1):
            # Read consecutive frames
            frame1_path = os.path.join(faces_dir, face_files[i])
            frame2_path = os.path.join(faces_dir, face_files[i + 1])
            
            frame1 = cv2.imread(frame1_path, cv2.IMREAD_GRAYSCALE)
            frame2 = cv2.imread(frame2_path, cv2.IMREAD_GRAYSCALE)
            
            if frame1 is not None and frame2 is not None:
                # Compute optical flow using Farneback method
                flow = cv2.calcOpticalFlowFarneback(
                    frame1, frame2, None, 
                    pyr_scale=0.5, levels=3, winsize=15, 
                    iterations=3, poly_n=5, poly_sigma=1.2, flags=0
                )
                
                # Save optical flow as numpy array
                flow_filename = f"flow_{i:04d}_{i+1:04d}.npy"
                flow_path = os.path.join(flow_dir, flow_filename)
                np.save(flow_path, flow)
                flow_count += 1
        
        return flow_count
        
    except Exception as e:
        print(f"Error computing optical flow for {faces_dir}: {e}")
        return 0

print("✅ Optical flow functions defined")

In [None]:
# Optical Flow Computation Pipeline
print("🌊 Starting Optical Flow Computation")
print("=" * 50)

flow_start_time = time.time()
flow_stats = {
    'start_time': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
    'training_directories': {},
    'total_flows': 0,
    'total_videos_processed': 0,
    'failed_videos': []
}

if os.path.exists(faces_base_dir):
    training_dirs = sorted([d for d in os.listdir(faces_base_dir) 
                           if os.path.isdir(os.path.join(faces_base_dir, d))])
    
    print(f"Computing optical flow for {len(training_dirs)} training directories...")
    
    # Process each training directory
    for training_dir in tqdm(training_dirs, desc="Computing Optical Flow"):
        training_faces_path = os.path.join(faces_base_dir, training_dir)
        training_flow_path = os.path.join(flow_base_dir, training_dir)
        
        # Get all video directories
        video_dirs = sorted([d for d in os.listdir(training_faces_path) 
                           if os.path.isdir(os.path.join(training_faces_path, d))])
        
        training_flows = 0
        training_failed = 0
        
        # Process each video directory
        for video_dir in tqdm(video_dirs, desc=f"{training_dir} optical flow", leave=False):
            video_faces_path = os.path.join(training_faces_path, video_dir)
            video_flow_path = os.path.join(training_flow_path, video_dir)
            
            try:
                flow_count = compute_optical_flow_for_video(video_faces_path, video_flow_path)
                
                if flow_count > 0:
                    training_flows += flow_count
                    flow_stats['total_videos_processed'] += 1
                else:
                    training_failed += 1
                    flow_stats['failed_videos'].append(f"{training_dir}/{video_dir}")
                    
            except Exception as e:
                print(f"Error computing optical flow for {training_dir}/{video_dir}: {e}")
                training_failed += 1
                flow_stats['failed_videos'].append(f"{training_dir}/{video_dir}")
        
        # Store training directory stats
        flow_stats['training_directories'][training_dir] = {
            'videos_processed': len(video_dirs) - training_failed,
            'videos_failed': training_failed,
            'flows_computed': training_flows
        }
        
        flow_stats['total_flows'] += training_flows
        
        print(f"✅ {training_dir}: {training_flows} optical flows from {len(video_dirs)} videos")
    
    # Calculate processing time
    flow_end_time = time.time()
    flow_processing_time = flow_end_time - flow_start_time
    flow_stats['end_time'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    flow_stats['processing_time_seconds'] = flow_processing_time
    flow_stats['processing_time_formatted'] = str(time.strftime('%H:%M:%S', time.gmtime(flow_processing_time)))
    
    print("\n" + "=" * 50)
    print("🎉 OPTICAL FLOW COMPUTATION COMPLETED!")
    print("=" * 50)
    print(f"⏱️  Processing time: {flow_stats['processing_time_formatted']}")
    print(f"🌊 Total optical flows: {flow_stats['total_flows']:,}")
    print(f"📁 Videos processed: {flow_stats['total_videos_processed']}")
    print(f"❌ Failed videos: {len(flow_stats['failed_videos'])}")
    
else:
    print("❌ Faces directory not found! Please run face extraction first.")
    flow_stats['error'] = 'Faces directory not found'

In [None]:
# Save Optical Flow Results
flow_results_file = 'results/optical_flow_results.json'

with open(flow_results_file, 'w') as f:
    json.dump(flow_stats, f, indent=2)

print(f"📊 Optical flow results saved to: {flow_results_file}")

# Create Combined Pipeline Summary
combined_stats = {
    'pipeline_completion_time': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
    'gpu_acceleration': len(tf.config.experimental.list_logical_devices('GPU')) > 0,
    'face_extraction': extraction_stats if 'extraction_stats' in locals() else {},
    'optical_flow': flow_stats if 'flow_stats' in locals() else {}
}

combined_results_file = 'results/complete_preprocessing_results.json'
with open(combined_results_file, 'w') as f:
    json.dump(combined_stats, f, indent=2)

print(f"\n📋 Complete pipeline results saved to: {combined_results_file}")

# Display Final Summary
print("\n" + "=" * 70)
print("🏁 COMPLETE PREPROCESSING PIPELINE SUMMARY")
print("=" * 70)

if 'extraction_stats' in locals() and 'flow_stats' in locals():
    total_pipeline_time = (extraction_stats.get('processing_time_seconds', 0) + 
                          flow_stats.get('processing_time_seconds', 0))
    
    print(f"📊 Final Statistics:")
    print(f"   • Total faces extracted: {extraction_stats.get('total_faces', 0):,}")
    print(f"   • Total optical flows: {flow_stats.get('total_flows', 0):,}")
    print(f"   • Videos processed: {extraction_stats.get('total_videos_processed', 0)}")
    print(f"   • GPU acceleration: {'✅ Enabled' if combined_stats['gpu_acceleration'] else '❌ Disabled'}")
    print(f"   • Total pipeline time: {time.strftime('%H:%M:%S', time.gmtime(total_pipeline_time))}")
    
    print(f"\n📁 Output Directories:")
    print(f"   • Faces: {faces_base_dir}")
    print(f"   • Optical Flow: {flow_base_dir}")
    print(f"   • Results: results/")
    
    print(f"\n✅ Preprocessing pipeline completed successfully!")
    print(f"   Ready for feature extraction and model training.")

else:
    print("⚠️ Pipeline incomplete - check error messages above")

In [None]:
# Data Verification and Next Steps
print("\n🔍 Data Verification")
print("=" * 30)

# Verify output structure
verification_results = {
    'faces_directory_exists': os.path.exists(faces_base_dir),
    'optical_flow_directory_exists': os.path.exists(flow_base_dir),
    'face_count': 0,
    'flow_count': 0
}

if verification_results['faces_directory_exists']:
    # Count total faces
    for root, dirs, files in os.walk(faces_base_dir):
        verification_results['face_count'] += len([f for f in files if f.endswith('.jpg')])

if verification_results['optical_flow_directory_exists']:
    # Count total optical flow files
    for root, dirs, files in os.walk(flow_base_dir):
        verification_results['flow_count'] += len([f for f in files if f.endswith('.npy')])

print(f"✅ Verification Results:")
print(f"   • Faces directory: {'✅' if verification_results['faces_directory_exists'] else '❌'}")
print(f"   • Optical flow directory: {'✅' if verification_results['optical_flow_directory_exists'] else '❌'}")
print(f"   • Total face images: {verification_results['face_count']:,}")
print(f"   • Total optical flow files: {verification_results['flow_count']:,}")

# Next Steps
print(f"\n🎯 Next Steps:")
print(f"   1. Extract static features (ResNet-50 2D CNN) → 512-dim features")
print(f"   2. Extract dynamic features (I3D 3D CNN) → 256-dim features")
print(f"   3. Validate data alignment (960 samples)")
print(f"   4. Begin Cross-Attention CNN model training")
print(f"\n📖 Ready to proceed with feature extraction and training!")