In [35]:
import os
import zipfile
import glob
import numpy as np
import pandas as pd
import torch
from scipy.interpolate import interp1d
from skimage.feature import local_binary_pattern
import cv2
from tqdm import tqdm
from dataset import EmotionDataset 
import mediapipe as mp

In [24]:
def check_sample_availability(sample_dir):
    # check video data
    video_files = [f for f in os.listdir(sample_dir) 
                   if f.endswith('.mp4') or f.endswith('.avi') or f.endswith('-Video.zip')]
    if not video_files:
        return False

    # check annotation files（Valence, Arousal, Liking）
    annotation_types = ['Valence', 'Arousal', 'Liking']
    for atype in annotation_types:
        annotation_zip_files = [f for f in os.listdir(sample_dir) if f'-{atype}.zip' in f]
        if not annotation_zip_files:
            return False

    return True


In [25]:
def process_landmarks(sample_dir):
    # find video
    video_files = [f for f in os.listdir(sample_dir) 
                   if f.endswith('.mp4') or f.endswith('.avi') or f.endswith('-Video.zip')]
    if not video_files:
        return None, None
    
    video_file = os.path.join(sample_dir, video_files[0])
    
    if video_file.endswith('-Video.zip'):
        video_extract_dir = os.path.join(sample_dir, 'Videos')
        if not os.path.exists(video_extract_dir):
            os.makedirs(video_extract_dir, exist_ok=True)
            with zipfile.ZipFile(video_file, 'r') as zip_ref:
                zip_ref.extractall(video_extract_dir)
        video_files = [f for f in os.listdir(video_extract_dir) 
                       if f.endswith('.mp4') or f.endswith('.avi')]
        if not video_files:
            return None, None
        video_file = os.path.join(video_extract_dir, video_files[0])
    
    # use Mediapipe to extract Landmarks
    mp_face_mesh = mp.solutions.face_mesh
    face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False,
                                      max_num_faces=1,
                                      refine_landmarks=True,
                                      min_detection_confidence=0.5,
                                      min_tracking_confidence=0.5)
    
    cap = cv2.VideoCapture(video_file)
    landmarks_seq = []
    frame_indices = []
    frame_idx = 0
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = face_mesh.process(rgb_frame)
        
        if results.multi_face_landmarks:
            face_landmarks = results.multi_face_landmarks[0]
            landmarks = []
            for lm in face_landmarks.landmark:
                landmarks.append(lm.x)
                landmarks.append(lm.y)
            landmarks_seq.append(landmarks)
            frame_indices.append(frame_idx)
        
        frame_idx += 1
    
    cap.release()
    face_mesh.close()
    
    if not landmarks_seq:
        return None, None 
    
    landmarks_seq = np.array(landmarks_seq)  
    frame_indices = np.array(frame_indices) 
    return landmarks_seq, frame_indices

In [26]:
def extract_lbp_features(image, num_points=8, radius=1, grid_x=7, grid_y=7):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    lbp_image = local_binary_pattern(gray, num_points, radius, method='uniform')
    height, width = lbp_image.shape
    grid_height = height // grid_y
    grid_width = width // grid_x
    lbp_features = []
    for i in range(grid_y):
        for j in range(grid_x):
            grid = lbp_image[i*grid_height:(i+1)*grid_height, j*grid_width:(j+1)*grid_width]
            hist, _ = np.histogram(grid.ravel(), bins=np.arange(0, num_points+3), range=(0, num_points+2))
            hist = hist.astype('float')
            hist /= (hist.sum() + 1e-7)
            lbp_features.extend(hist)
    return np.array(lbp_features)

In [27]:
def process_lbp_sequence(video_path, frame_indices):
    cap = cv2.VideoCapture(video_path)
    lbp_sequence = []
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_indices_set = set(frame_indices)
    current_frame_idx = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        if current_frame_idx in frame_indices_set:
            lbp_feature = extract_lbp_features(frame)
            lbp_sequence.append(lbp_feature)
        current_frame_idx += 1
    cap.release()
    lbp_sequence = np.array(lbp_sequence)  # Shape: (num_frames_with_landmarks, feature_dim)
    return lbp_sequence

In [28]:
def process_annotations(sample_dir, annotation_type):
    # Find the annotation zip file
    annotations_zip_files = [f for f in os.listdir(sample_dir) if f'-{annotation_type}.zip' in f]
    if not annotations_zip_files:
        return None
    annotations_zip = os.path.join(sample_dir, annotations_zip_files[0])

    # Extract annotations
    annotations_extract_dir = os.path.join(sample_dir, f'{annotation_type}')
    if not os.path.exists(annotations_extract_dir):
        os.makedirs(annotations_extract_dir, exist_ok=True)
        with zipfile.ZipFile(annotations_zip, 'r') as zip_ref:
            zip_ref.extractall(annotations_extract_dir)

    # Read annotations from CSV files (video annotations only)
    csv_files = glob.glob(os.path.join(annotations_extract_dir, f'*_{annotation_type}_V*.csv'))
    annotations_list = []
    for csv_file in csv_files:
        df = pd.read_csv(csv_file, header=None)
        times = df.iloc[:, 0].values  # Time column
        values = df.iloc[:, 1].values  # Annotation values
        annotations_list.append((times, values))
    if not annotations_list:
        return None

    # Average annotations from different annotators
    # First, create a time axis common to all annotators
    min_time = max(annot[0][0] for annot in annotations_list)
    max_time = min(annot[0][-1] for annot in annotations_list)
    if max_time <= min_time:
        return None  # Invalid time range
    common_times = np.linspace(min_time, max_time, num=round((max_time - min_time) * 66))  # Assuming 66 Hz sampling rate

    # Interpolate annotations to the common time axis
    interpolated_values = []
    for times, values in annotations_list:
        interp_func = interp1d(times, values, kind='linear', fill_value='extrapolate')
        interpolated_values.append(interp_func(common_times))
    # Average the interpolated values
    annotations_seq = np.mean(interpolated_values, axis=0)  # Shape: (seq_len,)
    # Return both common times and averaged annotations
    return common_times, annotations_seq

In [29]:
def align_sequences(landmarks_seq, lbp_seq, frame_indices, frame_timestamps, annotations_times, annotations_seq):
    # frame_indices: indices of frames with valid landmarks and LBP features
    # frame_timestamps: timestamps corresponding to frame_indices
    # annotations_times: timestamps of annotations
    # annotations_seq: annotation values

    # Interpolate annotations to match frame timestamps
    interp_func = interp1d(annotations_times, annotations_seq, kind='linear', fill_value='extrapolate')
    labels_seq = interp_func(frame_timestamps)

    # Ensure all sequences have the same length
    seq_len = min(len(landmarks_seq), len(lbp_seq), len(labels_seq))
    landmarks_seq = landmarks_seq[:seq_len]
    lbp_seq = lbp_seq[:seq_len]
    labels_seq = labels_seq[:seq_len]

    return landmarks_seq, lbp_seq, labels_seq

In [30]:
def save_processed_data(sample_dir, landmarks_seq, lbp_seq, labels_seq, seq_len, sample_id):
    processed_dir = os.path.join('processed_data', f'sample_{sample_id}')
    os.makedirs(processed_dir, exist_ok=True)
    np.save(os.path.join(processed_dir, 'landmarks.npy'), landmarks_seq)
    np.save(os.path.join(processed_dir, 'lbp.npy'), lbp_seq)
    np.save(os.path.join(processed_dir, 'labels.npy'), labels_seq)
    np.save(os.path.join(processed_dir, 'seq_len.npy'), np.array([seq_len]))

In [31]:
def process_all_samples(dataset_dir, desired_seq_len=None):
    sample_dirs = [os.path.join(dataset_dir, d) for d in os.listdir(dataset_dir)
                   if os.path.isdir(os.path.join(dataset_dir, d))]
    processed_count = 0
    for sample_dir in tqdm(sample_dirs, desc='Processing samples'):
        sample_id = os.path.basename(sample_dir)
        # check the video
        if not check_sample_availability(sample_dir):
            print(f"Skipping {sample_id}: Missing required files.")
            continue
        try:
            # process Landmarks
            landmarks_seq, frame_indices = process_landmarks(sample_dir)
            if landmarks_seq is None or len(landmarks_seq) == 0:
                print(f"Skipping {sample_id}: No valid landmarks.")
                continue

            video_files = [f for f in os.listdir(sample_dir) 
                           if f.endswith('.mp4') or f.endswith('.avi') or f.endswith('-Video.zip')]
            if video_files:
                video_file = os.path.join(sample_dir, video_files[0])
                if video_file.endswith('-Video.zip'):
                    video_extract_dir = os.path.join(sample_dir, 'Videos')
                    if not os.path.exists(video_extract_dir):
                        os.makedirs(video_extract_dir, exist_ok=True)
                        with zipfile.ZipFile(video_file, 'r') as zip_ref:
                            zip_ref.extractall(video_extract_dir)
                    video_files = [f for f in os.listdir(video_extract_dir) 
                                   if f.endswith('.mp4') or f.endswith('.avi')]
                    if not video_files:
                        print(f"Skipping {sample_id}: No video file found after extraction.")
                        continue
                    video_file = os.path.join(video_extract_dir, video_files[0])
                
                cap = cv2.VideoCapture(video_file)
                fps = cap.get(cv2.CAP_PROP_FPS)
                cap.release()
                if fps <= 0:
                    print(f"Skipping {sample_id}: Invalid FPS ({fps}).")
                    continue
            else:
                print(f"Skipping {sample_id}: No video file found.")
                continue

            frame_timestamps = frame_indices / fps

            # process LBP
            lbp_seq = process_lbp_sequence(video_file, frame_indices)
            if lbp_seq is None or len(lbp_seq) == 0:
                print(f"Skipping {sample_id}: No valid LBP features.")
                continue

            # process annotation
            annotations = {}
            for atype in ['Valence', 'Arousal', 'Liking']:
                annotations_data = process_annotations(sample_dir, atype)
                if annotations_data is None:
                    print(f"Skipping {sample_id}: Missing or invalid {atype} annotations.")
                    break  
                annotations[atype] = annotations_data
            else:
                # align sequence
                landmarks_seq_aligned, lbp_seq_aligned, valence_seq = align_sequences(
                    landmarks_seq, lbp_seq, frame_indices, frame_timestamps,
                    annotations['Valence'][0], annotations['Valence'][1]
                )
                _, _, arousal_seq = align_sequences(
                    landmarks_seq_aligned, lbp_seq_aligned, frame_indices, frame_timestamps,
                    annotations['Arousal'][0], annotations['Arousal'][1]
                )
                _, _, liking_seq = align_sequences(
                    landmarks_seq_aligned, lbp_seq_aligned, frame_indices, frame_timestamps,
                    annotations['Liking'][0], annotations['Liking'][1]
                )
                # Merge tags
                labels_seq = np.stack([valence_seq, arousal_seq, liking_seq], axis=1)  # 形状：(seq_len, 3)

                # Process sequence length
                seq_len = len(labels_seq)
                if desired_seq_len is not None:
                    if seq_len < desired_seq_len:
                        # Skip samples with insufficient sequence length
                        print(f"Skipping {sample_id}: Sequence too short ({seq_len} frames).")
                        continue
                    else:
                        # Randomly intercept a subsequence
                        start_idx = np.random.randint(0, seq_len - desired_seq_len + 1)
                        end_idx = start_idx + desired_seq_len
                        landmarks_seq_aligned = landmarks_seq_aligned[start_idx:end_idx]
                        lbp_seq_aligned = lbp_seq_aligned[start_idx:end_idx]
                        labels_seq = labels_seq[start_idx:end_idx]
                        seq_len = desired_seq_len

                # save processed data
                save_processed_data(sample_dir, landmarks_seq_aligned, lbp_seq_aligned, labels_seq, seq_len, sample_id)
                processed_count += 1
        except Exception as e:
            print(f"Error processing {sample_id}: {e}")
            continue
    print(f"Processed {processed_count} samples.")

In [34]:
dataset_dir ='SEWA_DB'  
desired_seq_len = 30  

process_all_samples(dataset_dir, desired_seq_len)

Processing samples:  30%|▎| 790/2630 [00:00<

Skipping SSD_C3_S092_P183_VC1_003651_004631: No valid landmarks.
Skipping SVH_C3_S085_P170_VC1_000901_001561: Missing required files.
Skipping C1_S020_P039_AD2: Missing required files.
Skipping SVL_C2_S062_P123_VC1_000489_001586: Missing required files.
Skipping SSD_C1_S001_P001_VC1_004201_005201: No valid landmarks.
Skipping SVL_C2_S048_P096_VC1_000837_001711: Missing required files.
Skipping C2_S197_P393_AD4: No valid landmarks.
Skipping C5_S134_P268_AD1: Missing required files.
Skipping C1_S008_P016_AD2: Missing required files.
Skipping C1_S028_P056_AD3: Missing required files.
Skipping C1_S030_P059_AD4: No valid landmarks.
Skipping C6_S186_P372_AD1: Missing required files.
Skipping C3_S081_P162_AD1: Missing required files.
Skipping C3_S083_P165_AD4: No valid landmarks.
Skipping SSD_C4_S111_P222_VC1_000901_001550: No valid landmarks.
Skipping C2_S050_P099_VC1: Missing required files.
Skipping C3_S091_P181_AD2: Missing required files.
Skipping SSD_C5_S143_P285_VC1_002401_002651: No v

Processing samples:  71%|▋| 1859/2630 [00:00

Skipping SSD_C4_S096_P191_VC1_001501_002000: No valid landmarks.
Skipping SSD_C4_S112_P224_VC1_002001_003300: No valid landmarks.
Skipping C2_S041_P082_AD3: Missing required files.
Skipping SVH_C3_S088_P175_VC1_000401_000981: Missing required files.
Skipping C4_S095_P189_AD1: Missing required files.
Skipping SAH_C1_S008_P016_VC1_005601_006984: Missing required files.
Skipping SAH_C2_S044_P087_VC1_000002_000683: Missing required files.
Skipping SAH_C5_S147_P293_VC1_006401_006701: Missing required files.
Skipping C1_S020_P040_AD2: Missing required files.
Skipping SSL_C5_S153_P305_VC1_006651_007001: No valid landmarks.
Skipping C2_S044_P088_AD4: No valid landmarks.
Skipping C3_S092_P183_AD2: Missing required files.
Skipping C1_S021_P042_AD2: Missing required files.
Skipping C1_S001_P002_AD3: Missing required files.
Skipping C5_S128_P256_AD4: No valid landmarks.
Skipping C5_S144_P288_AD1: Missing required files.
Skipping C5_S130_P259_AD3: Missing required files.
Skipping SVL_C1_S026_P052_V

Processing samples:  94%|▉| 2476/2630 [00:17

Skipping SSD_C6_S183_P365_VC1_000796_002910: No valid landmarks.
Skipping C4_S113_P225_AD2: Missing required files.
Skipping C1_S029_P058_AD3: Missing required files.
Skipping C1_S009_P018_AD2: Missing required files.
Skipping C1_S193_P386_AD4: No valid landmarks.
Skipping C2_S199_P397_AD2: Missing required files.


Processing samples:  94%|▉| 2480/2630 [00:18

Skipping C1_S006_P012_AD4: No valid landmarks.
Skipping SVH_C1_S194_P387_VC1_003501_003901: Missing required files.
Skipping C3_S078_P156_AD1: Missing required files.
Skipping C2_S198_P395_AD2: Missing required files.
Skipping C1_S192_P384_AD4: No valid landmarks.
Skipping SSL_C5_S153_P306_VC1_006701_007101: No valid landmarks.
Skipping SAH_C4_S098_P196_VC1_001501_002350: Missing required files.
Skipping SAH_C3_S069_P138_VC1_003001_004571: Missing required files.
Skipping TSD_C1_VC1: Missing required files.
Skipping SVL_C4_S096_P191_VC1_008201_008700: Missing required files.
Skipping C5_S135_P270_AD1: Missing required files.
Skipping C1_S026_P052_VC1: Missing required files.
Skipping C2_S046_P092_AD4: No valid landmarks.
Skipping SVH_C1_S009_P017_VC1_008201_008765: Missing required files.
Skipping C4_S121_P242_VC1: Missing required files.
Skipping SVH_C3_S073_P145_VC1_003241_004101: Missing required files.
Skipping SSD_C2_S037_P073_VC1_000788_001622: No valid landmarks.
Skipping C4_S09

Processing samples: 100%|█| 2630/2630 [00:22

Skipping C1_S006_P011_AD4: No valid landmarks.
Skipping C3_S079_P157_AD1: Missing required files.
Skipping C1_S193_P385_AD4: No valid landmarks.
Processed 0 samples.





In [None]:
from torch.utils.data import DataLoader

processed_data_dir = 'processed_data'  
dataset = EmotionDataset(processed_data_dir)


batch_size = 8
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# test DataLoader
for batch in dataloader:
    landmarks_seq, lbp_seq, labels_seq = batch
    print("Landmarks shape:", landmarks_seq.shape)
    print("LBP shape:", lbp_seq.shape)
    print("Labels shape:", labels_seq.shape)
    break  
