# Hybrid Feature Analyses SIM 2 - Detecting Pigs and the Swedish Chef

This notebook combines the audio and visual features extracted in previous analyses to predict the presence of pigs and the Swedish chef. Due to computational resource constraints and the large number of features, we exclusively employ the XGBoost Gradient Boosting algorithm for classification.

In [1]:
import pandas as pd

# Load Features & Ground Truth

Video Features

In [2]:
video_features_path = "../model_vars/sim2_video/feature_df.csv"
video_feature_df = pd.read_csv(video_features_path)
video_feature_df.shape

(115885, 127)

Audio Features

In [3]:
audio_features_path = "../model_vars/sim2_audio/sim2_audio_features.csv"
audio_feature_df = pd.read_csv(audio_features_path)
audio_feature_df.shape

(115885, 64)

Merge feature dataframes

In [27]:
import pandas as pd

# Create an `idx` column by combining `video_idx` and `frame_idx`
audio_feature_df['idx'] = audio_feature_df['video_idx'].astype(str) + "@" + audio_feature_df['frame_idx'].astype(str)
video_feature_df['idx'] = video_feature_df['video_idx'].astype(str) + "@" + video_feature_df['frame_idx'].astype(str)

# Extract the `idx` column as sets for comparison
audio_idx = set(audio_feature_df['idx'])
video_idx = set(video_feature_df['idx'])

In [28]:
# Find `audio_idx` not in `video_idx`
missing_in_video = audio_idx - video_idx
print(f"Missing in video_feature_df: {missing_in_video}")

# Find `video_idx` not in `audio_idx`
missing_in_audio = video_idx - audio_idx
print(f"Missing in audio_feature_df: {missing_in_audio}")


Missing in video_feature_df: set()
Missing in audio_feature_df: set()


In [4]:
from scripts.load_data import check_and_load


data_path = "../ground_truth_data"
frames_output_dir = "../ground_truth_data/frames"
audio_output_dir = "../ground_truth_data/audio"
annotations_path = "../ground_truth_data"

muppet_files = {
    "Muppets-02-01-01.avi": "GroundTruth_Muppets-02-01-01.csv",
    "Muppets-02-04-04.avi": "GroundTruth_Muppets-02-04-04.csv",
    "Muppets-03-04-03.avi": "GroundTruth_Muppets-03-04-03.csv",
}

annotations, audio_data, frames = check_and_load(data_path, frames_output_dir, audio_output_dir, annotations_path, muppet_files)

Frames and audio are already extracted.
Loading audio segments...
Loaded 3 audio files.
Loaded audio segments for 3 videos.
Loaded frames for 3 videos.
Number of videos with frames: 3
Video 0 has 38681 frames.
Video 1 has 38706 frames.
Video 2 has 38498 frames.


In [5]:
# Create a mapping from filenames to video indices
video_idx_map = {filename: idx for idx, filename in enumerate(muppet_files.keys())}

# Prepare ground truth data with corrected video_idx
ground_truth_data = []
for video_filename, annotation_df in annotations.items():
    video_idx = video_idx_map[video_filename]  # Map video filename to its index
    for _, row in annotation_df.iterrows():
        ground_truth_data.append({
            'video_idx': video_idx,  # Use mapped video index
            'frame_idx': row['Frame_number'],  # Assuming Frame_number exists
            #'Pigs': row['Pigs'],  # Assuming Pigs is a column in the annotation
            #'Cook': row['Cook']  # Assuming this column exists
        })

# Create a DataFrame for ground truth
ground_truth_df = pd.DataFrame(ground_truth_data)
ground_truth_df.shape

(115885, 2)

In [8]:
# Create an `idx` column by combining `video_idx` and `frame_idx`
ground_truth_df['idx'] = ground_truth_df['video_idx'].astype(str) + "@" + ground_truth_df['frame_idx'].astype(str)
audio_feature_df['idx'] = audio_feature_df['video_idx'].astype(str) + "@" + audio_feature_df['frame_idx'].astype(str)

# Extract the `idx` column as sets for comparison
audio_idx = set(ground_truth_df['idx'])
video_idx = set(audio_feature_df['idx'])

In [9]:
# Find `audio_idx` not in `video_idx`
missing_in_video = audio_idx - video_idx
print(f"Missing in video_feature_df: {missing_in_video}")

# Find `video_idx` not in `audio_idx`
missing_in_audio = video_idx - audio_idx
print(f"Missing in audio_feature_df: {missing_in_audio}")

Missing in video_feature_df: {'1@38556', '1@38574', '1@38532', '1@38620', '1@38601', '1@38550', '1@38547', '1@38635', '1@38576', '1@38639', '1@38626', '1@38596', '1@38554', '1@38701', '1@38510', '1@38572', '1@38694', '1@38578', '1@38517', '1@38649', '1@38642', '1@38604', '1@38602', '1@38651', '1@38533', '1@38638', '1@38666', '1@38552', '1@38619', '1@38673', '1@38544', '1@38523', '1@38704', '1@38631', '1@38605', '1@38592', '1@38647', '1@38688', '1@38702', '1@38636', '1@38571', '1@38585', '1@38512', '1@38500', '1@38539', '1@38559', '1@38566', '1@38698', '1@38579', '1@38530', '1@38676', '1@38699', '1@38670', '1@38563', '1@38690', '1@38665', '1@38588', '1@38667', '1@38617', '1@38705', '1@38671', '1@38627', '1@38565', '1@38567', '1@38531', '1@38502', '1@38558', '1@38570', '1@38703', '1@38695', '1@38691', '1@38540', '1@38656', '1@38632', '1@38681', '1@38696', '1@38591', '1@38637', '1@38663', '1@38499', '1@38669', '1@38675', '1@38679', '1@38603', '1@38513', '1@38515', '1@38536', '1@38568', '1

In [26]:
feature_df = pd.merge(video_feature_df, audio_feature_df, on=['video_idx', 'frame_idx'], how='inner')
feature_df.shape

(115885, 159)