In [None]:
import os
import cv2
import shutil
import numpy as np
import matplotlib.pyplot as plt
from decord import VideoReader, cpu
from moviepy.editor import AudioFileClip
from scipy.io import wavfile
from scipy.fftpack import dct
from PIL import Image
import subprocess
import tensorflow as tf
import re
import time
import pandas as pd

# CONFIGURATION
input_size = 224
num_frame = 8
sampling_rate = 6
fps = 25  # frames per second for output video

OPENFACE_DIR = r"D:\OpenFace_2.2.0_win_x64\OpenFace_2.2.0_win_x64"
OPENFACE_PATH = os.path.join(OPENFACE_DIR, "FeatureExtraction.exe")

def sanitize_filename(filename):
    """Sanitize filename to remove special characters and limit length."""
    sanitized = re.sub(r'[<>:"/\\|?*]', '', filename)
    return sanitized[:50]

def read_video(file_path):
    """Read video file and return formatted frames."""
    vr = VideoReader(file_path, ctx=cpu(0))
    frames = vr.get_batch(range(len(vr))).asnumpy()
    return format_frames(frames, (input_size, input_size))

def format_frames(frame, output_size):
    """Resize and format video frames."""
    frame = tf.image.convert_image_dtype(frame, tf.uint8)
    frame = tf.image.resize(frame, output_size)
    return frame

def uniform_temporal_subsample(x, num_samples, clip_idx, total_clips, frame_rate=1, temporal_dim=0):
    """Uniformly sample frames from video."""
    t = tf.shape(x)[temporal_dim]
    max_offset = t - num_samples * frame_rate
    if max_offset < 0:
        raise ValueError("Video too short for requested clip sampling.")
    step = max_offset // total_clips
    offset = clip_idx * step
    indices = tf.linspace(
        tf.cast(offset, tf.float32),
        tf.cast(offset + (num_samples - 1) * frame_rate, tf.float32),
        num_samples
    )
    indices = tf.clip_by_value(indices, 0, tf.cast(t - 1, tf.float32))
    indices = tf.cast(tf.round(indices), tf.int32)
    return tf.gather(x, indices, axis=temporal_dim)

def clip_generator(image, num_frames=8, frame_rate=1, num_clips=1, crop_size=224):
    """Generate video clips from frames."""
    clips_list = []
    for i in range(num_clips):
        frame = uniform_temporal_subsample(
            image, num_frames, i, num_clips, frame_rate=frame_rate, temporal_dim=0
        )
        clips_list.append(frame)
    video = tf.stack(clips_list)
    video = tf.reshape(video, [num_clips * num_frames, crop_size, crop_size, 3])
    return video

def normalize_audio(signal):
    """Normalize audio signal to [-1, 1] range."""
    return signal / np.max(np.abs(signal))

def extract_audio(video_path):
    """Extract audio from video file and return normalized signal and sample rate."""
    with AudioFileClip(video_path) as audio_clip:
        audio_path = "temp_audio.wav"
        audio_clip.write_audiofile(audio_path, verbose=False, logger=None)
        sample_rate, signal = wavfile.read(audio_path)
        os.remove(audio_path)
    return normalize_audio(signal), sample_rate

def MFCC(signal, samplerate, num_ceps=13, nfilt=26, NFFT=512):
    """Calculate MFCC features from audio signal."""
    from scipy.signal import hamming
    
    # Pre-emphasis
    pre_emphasis = 0.97
    emphasized_signal = np.append(signal[0], signal[1:] - pre_emphasis * signal[:-1])

    # Framing
    frame_size = 0.025
    frame_stride = 0.01
    frame_length = int(round(frame_size * samplerate))
    frame_step = int(round(frame_stride * samplerate))

    signal_length = len(emphasized_signal)
    num_frames = int(np.ceil(float(np.abs(signal_length - frame_length)) / frame_step))

    pad_signal_length = num_frames * frame_step + frame_length
    z = np.zeros((pad_signal_length - signal_length))
    pad_signal = np.append(emphasized_signal, z)

    indices = np.tile(np.arange(0, frame_length), (num_frames, 1)) + \
              np.tile(np.arange(0, num_frames * frame_step, frame_step), (frame_length, 1)).T
    frames = pad_signal[indices.astype(np.int32, copy=False)]
    
    # Windowing
    frames *= hamming(frame_length)

    # Fourier Transform and Power Spectrum
    mag_frames = np.absolute(np.fft.rfft(frames, NFFT))
    pow_frames = ((1.0 / NFFT) * (mag_frames ** 2))

    # Mel Filterbank
    low_freq_mel = 0
    high_freq_mel = 2595 * np.log10(1 + (samplerate / 2) / 700)
    mel_points = np.linspace(low_freq_mel, high_freq_mel, nfilt + 2)
    hz_points = (700 * (10**(mel_points / 2595) - 1))
    bin = np.floor((NFFT + 1) * hz_points / samplerate).astype(int)

    fbank = np.zeros((nfilt, int(np.floor(NFFT / 2 + 1))))
    for m in range(1, nfilt + 1):
        f_m_minus, f_m, f_m_plus = bin[m-1], bin[m], bin[m+1]
        for k in range(f_m_minus, f_m):
            fbank[m-1, k] = (k - bin[m-1]) / (bin[m] - bin[m-1])
        for k in range(f_m, f_m_plus):
            fbank[m-1, k] = (bin[m+1] - k) / (bin[m+1] - bin[m])

    filter_banks = np.dot(pow_frames, fbank.T)
    filter_banks = np.where(filter_banks == 0, np.finfo(float).eps, filter_banks)
    filter_banks = 20 * np.log10(filter_banks)

    # MFCC
    mfcc = dct(filter_banks, type=2, axis=1, norm='ortho')[:, :num_ceps]
    return mfcc

def save_mfcc_as_image(mfcc, output_path, video_name):
    """Save MFCC features as an image."""
    plt.figure(figsize=(10, 4))
    plt.imshow(mfcc.T, aspect='auto', origin='lower', cmap='viridis')
    plt.colorbar()
    plt.title('MFCC Features')
    plt.tight_layout()
    
    output_file = os.path.join(output_path, f"{os.path.splitext(video_name)[0]}_mfcc.png")
    plt.savefig(output_file, dpi=300, bbox_inches='tight')
    plt.close()
    return output_file

def extract_openface_features(video_path, output_dir):
    """Extract facial features using OpenFace."""
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    video_name = os.path.basename(video_path)
    csv_path = os.path.join(output_dir, f"{os.path.splitext(video_name)[0]}.csv")
    
    command = [
        OPENFACE_PATH,
        "-f", video_path,
        "-out_dir", output_dir,
        "-of", os.path.splitext(video_name)[0],
        "-aus", "-2Dfp", "-pose", "-gaze", "-verbose"
    ]
    
    env = os.environ.copy()
    env["PATH"] = OPENFACE_DIR + os.pathsep + env.get("PATH", "")
    
    try:
        subprocess.run(command, check=True, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    except subprocess.CalledProcessError as e:
        print(f"OpenFace failed on {video_name}: {e.stderr.decode('utf-8') if e.stderr else 'Unknown error'}")
        return None
    
    if not os.path.exists(csv_path):
        print(f"OpenFace output not found: {csv_path}")
        return None
    
    return csv_path

def create_video_with_aus(video_path, au_data, output_path, num_frames=8):
    """Create a video with AU information overlaid on frames."""
    # Read the original video
    vr = VideoReader(video_path, ctx=cpu(0))
    total_frames = len(vr)
    
    # Select evenly spaced frames
    frame_indices = np.linspace(0, total_frames-1, num=num_frames, dtype=int)
    frames = vr.get_batch(frame_indices).asnumpy()
    
    # Prepare video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (input_size, input_size))
    
    # Match AU data with frames
    au_data['frame_index'] = au_data['frame'].apply(lambda x: x-1)  # OpenFace frames are 1-based
    
    for i, frame_idx in enumerate(frame_indices):
        frame = frames[i]
        frame = cv2.resize(frame, (input_size, input_size))
        
        # Get AU data for this frame
        frame_au = au_data[au_data['frame_index'] == frame_idx]
        
        if not frame_au.empty:
            # Draw AU information on frame
            au_values = []
            for col in frame_au.columns:
                if col.startswith('AU') and '_c' in col and frame_au[col].values[0] > 0:
                    au_name = col.split('_')[0]
                    au_intensity = frame_au[f"{au_name}_r"].values[0]
                    au_values.append(f"{au_name}: {au_intensity:.2f}")
            
            # Add text to frame
            y_offset = 30
            cv2.putText(frame, f"Frame: {frame_idx+1}/{total_frames}", (10, y_offset), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
            
            for j, au_text in enumerate(au_values):
                y_offset += 25
                cv2.putText(frame, au_text, (10, y_offset), 
                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
        
        # Write frame to output video
        out.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
    
    out.release()
    return output_path

def process_video(video_path, output_dir):
    """Process a single video file to extract MFCC and video with AUs."""
    video_name = os.path.basename(video_path)
    print(f"Processing {video_name}...")
    start_time = time.time()
    
    # Create temporary directory
    temp_dir = os.path.join(output_dir, "temp")
    os.makedirs(temp_dir, exist_ok=True)
    
    try:
        # 1. Extract audio and compute MFCC
        audio_signal, sample_rate = extract_audio(video_path)
        mfcc = MFCC(audio_signal, sample_rate)
        mfcc_image_path = save_mfcc_as_image(mfcc, output_dir, video_name)
        
        # 2. Extract facial features with OpenFace
        openface_csv = extract_openface_features(video_path, temp_dir)
        if openface_csv is None:
            raise Exception("OpenFace feature extraction failed")
        
        # Read AU data
        au_data = pd.read_csv(openface_csv)
        
        # 3. Create video with AU information
        output_video_path = os.path.join(output_dir, f"{os.path.splitext(video_name)[0]}_aus.mp4")
        create_video_with_aus(video_path, au_data, output_video_path, num_frames=8)
        
        elapsed = time.time() - start_time
        print(f"Successfully processed {video_name} in {elapsed:.2f} seconds.")
        return True
        
    except Exception as e:
        print(f"Error processing {video_name}: {str(e)}")
        return False
        
    finally:
        # Clean up temporary files
        if os.path.exists(temp_dir):
            for attempt in range(5):
                try:
                    shutil.rmtree(temp_dir)
                    break
                except PermissionError:
                    time.sleep(1)
            else:
                print(f"Warning: Could not delete temporary directory {temp_dir}")

def process_videos(input_path, output_path):
    """Process all videos in the input directory."""
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    
    video_files = [f for f in os.listdir(input_path) if f.lower().endswith(('.avi', '.mp4'))]
    total_videos = len(video_files)
    processed_count = 0
    
    print(f"Starting processing of {total_videos} videos...")
    
    for i, video_file in enumerate(video_files, 1):
        video_path = os.path.join(input_path, video_file)
        if process_video(video_path, output_path):
            processed_count += 1
        
        print(f"Progress: {i}/{total_videos} videos processed")
    
    print(f"\nFinished processing. Successfully processed {processed_count} of {total_videos} videos.")
    return processed_count

if __name__ == "__main__":
    #input_path = r"D:\eNTERFACE\Enterface_videos"
    #output_path = r"D:\eNTERFACE\Enterface_videoss"
    
    #processed_count = process_videos(input_path, output_path)
    #print(f"\nProcessing complete. {processed_count} videos were successfully processed.")