## Results

In [41]:
import pandas as pd
import numpy as np
import sqlite3

TOTAL_FRAMES = 875_887
FPS = 30
SAMPLE_RATE = 10  # every 10th frame
WINDOW_SIZE = 30  # window size in seconds
WINDOW_DECISION_PERCENTAGE = 0.6  # at least 60% of frames in the window (proportion, not count)
GAP = 5 * FPS  # if segments are less than 5 seconds apart, merge them

# Calculated values
WINDOW_SIZE_FRAMES = WINDOW_SIZE * FPS  # e.g., 30s * 30fps = 900 frames
WINDOW_SIZE_SAMPLED = WINDOW_SIZE_FRAMES // SAMPLE_RATE  # e.g., 900 // 10 = 90 frames

print(f"Analysis parameters:")
print(f"- Window size: {WINDOW_SIZE} seconds = {WINDOW_SIZE_SAMPLED} sampled frames")
print(f"- Threshold: {WINDOW_DECISION_PERCENTAGE*100}% of window must be same category")
print(f"- Gap merging: {GAP/FPS} seconds")

results_df = pd.read_csv("/home/nele_pauline_suffo/projects/naturalistic-social-analysis/src/results/rq_01/frame_level_social_interactions.csv")

Analysis parameters:
- Window size: 30 seconds = 90 sampled frames
- Threshold: 60.0% of window must be same category
- Gap merging: 5.0 seconds


### Face Detection Information

In [42]:

# Calculate mutually exclusive counts
only_child_face = (results_df['face_frame_category'] == 'only_child').sum()
only_adult_face = (results_df['face_frame_category'] == 'only_adult').sum()
both_faces = (results_df['face_frame_category'] == 'both_faces').sum()
no_faces = (results_df['face_frame_category'] == 'no_faces').sum()
analysis_check_face = only_child_face + only_adult_face + both_faces + no_faces - TOTAL_FRAMES

print(f"Frames with ONLY child faces: {only_child_face:,} ({only_child_face / TOTAL_FRAMES * 100:.2f}%)")
print(f"Frames with ONLY adult faces: {only_adult_face:,} ({only_adult_face / TOTAL_FRAMES * 100:.2f}%)")
print(f"Frames with BOTH face types: {both_faces:,} ({both_faces / TOTAL_FRAMES * 100:.2f}%)")
print(f"Frames with NO faces: {no_faces:,} ({no_faces / TOTAL_FRAMES * 100:.2f}%)")
print(f"Analysis check : {analysis_check_face} frames deviation (should be 0)")

Frames with ONLY child faces: 52,395 (5.98%)
Frames with ONLY adult faces: 63,510 (7.25%)
Frames with BOTH face types: 17,967 (2.05%)
Frames with NO faces: 742,015 (84.72%)
Analysis check : 0 frames deviation (should be 0)


### Person Classification

In [43]:
# Calculate the numbers
only_child_person = (results_df['person_frame_category'] == 'only_child').sum()
only_adult_person = (results_df['person_frame_category'] == 'only_adult').sum()
both_persons = (results_df['person_frame_category'] == 'both_persons').sum()
no_persons = (results_df['person_frame_category'] == 'no_persons').sum()
analysis_check_person = only_child_person + only_adult_person + both_persons + no_persons - TOTAL_FRAMES

print(f"Frames with ONLY child persons: {only_child_person:,} ({only_child_person / TOTAL_FRAMES * 100:.2f}%)")
print(f"Frames with ONLY adult persons: {only_adult_person:,} ({only_adult_person / TOTAL_FRAMES * 100:.2f}%)")
print(f"Frames with BOTH person types: {both_persons:,} ({both_persons / TOTAL_FRAMES * 100:.2f}%)")
print(f"Frames with NO persons: {no_persons:,} ({no_persons / TOTAL_FRAMES * 100:.2f}%)")
print(f"Analysis check : {analysis_check_person} frames deviation (should be 0)")

Frames with ONLY child persons: 2,279 (0.26%)
Frames with ONLY adult persons: 9,590 (1.09%)
Frames with BOTH person types: 1,031 (0.12%)
Frames with NO persons: 862,987 (98.53%)
Analysis check : 0 frames deviation (should be 0)


### Combined Face and Person Presence Analysis

In [None]:
# Calculate combined presence patterns using the correct logic
only_child_present = ((results_df['child_present'] == 1) & (results_df['adult_present'] == 0)).sum()
only_adult_present = ((results_df['child_present'] == 0) & (results_df['adult_present'] == 1)).sum()
both_present = ((results_df['child_present'] == 1) & (results_df['adult_present'] == 1)).sum()
no_one_present = ((results_df['child_present'] == 0) & (results_df['adult_present'] == 0)).sum()
analysis_check_combined = only_child_present + only_adult_present + both_present + no_one_present - TOTAL_FRAMES

print(f"Frames with ONLY child present: {only_child_present:,} ({only_child_present / TOTAL_FRAMES * 100:.2f}%)")
print(f"Frames with ONLY adult present: {only_adult_present:,} ({only_adult_present / TOTAL_FRAMES * 100:.2f}%)")
print(f"Frames with BOTH present: {both_present:,} ({both_present / TOTAL_FRAMES * 100:.2f}%)")
print(f"Frames with NO ONE present: {no_one_present:,} ({no_one_present / TOTAL_FRAMES * 100:.2f}%)")
print(f"Analysis check: {analysis_check_combined} frames deviation (should be 0)")

# Additional insights
any_presence = only_child_present + only_adult_present + both_present
print(f"\nSummary Insights:")
print(f"Frames with ANY human presence: {any_presence:,} ({any_presence / TOTAL_FRAMES * 100:.2f}%)")
print(f"Frames with child presence: {only_child_present + both_present:,} ({(only_child_present + both_present) / TOTAL_FRAMES * 100:.2f}%)")
print(f"Frames with adult presence: {only_adult_present + both_present:,} ({(only_adult_present + both_present) / TOTAL_FRAMES * 100:.2f}%)")

In [51]:
def extract_segments_efficient(results_df):
    """
    Efficient approach: Create mutually exclusive segments using state changes.
    This ensures no overlapping segments and is much faster.
    """
    print("Creating mutually exclusive segments using state changes...")
    
    all_segments = []
    
    for video_id, video_df in results_df.groupby('video_id'):
        video_df = video_df.sort_values('frame_number').reset_index(drop=True)
        
        if len(video_df) == 0:
            continue
            
        print(f"Processing video {video_id} with {len(video_df)} frames...")
        
        # Get interaction states
        states = video_df['interaction_category'].values
        frame_numbers = video_df['frame_number'].values
        video_name = video_df['video_name'].iloc[0]
        
        # Find state changes (where interaction category changes)
        state_changes = []
        current_state = states[0]
        segment_start = frame_numbers[0]
        
        for i in range(1, len(states)):
            if states[i] != current_state:
                # State changed - end current segment
                segment_end = frame_numbers[i-1]
                
                # Only keep segments longer than minimum duration (5 seconds)
                segment_duration = (segment_end - segment_start) / FPS
                if segment_duration >= 5.0:  # 5 second minimum
                    all_segments.append({
                        'video_id': video_id,
                        'video_name': video_name,
                        'category': current_state,
                        'segment_start': segment_start,
                        'segment_end': segment_end,
                        'start_time_sec': segment_start / FPS,
                        'end_time_sec': segment_end / FPS,
                        'duration_sec': segment_duration
                    })
                
                # Start new segment
                current_state = states[i]
                segment_start = frame_numbers[i]
        
        # Handle the final segment
        segment_end = frame_numbers[-1]
        segment_duration = (segment_end - segment_start) / FPS
        if segment_duration >= 5.0:  # 5 second minimum
            all_segments.append({
                'video_id': video_id,
                'video_name': video_name,
                'category': current_state,
                'segment_start': segment_start,
                'segment_end': segment_end,
                'start_time_sec': segment_start / FPS,
                'end_time_sec': segment_end / FPS,
                'duration_sec': segment_duration
            })
    
    # Convert to DataFrame
    if all_segments:
        segments_df = pd.DataFrame(all_segments)
        segments_df = segments_df.sort_values(['video_id', 'start_time_sec']).reset_index(drop=True)
        
        # Verify no overlaps within each video
        print("\nVerifying segment integrity...")
        for video_id, video_segments in segments_df.groupby('video_id'):
            video_segments = video_segments.sort_values('start_time_sec')
            for i in range(len(video_segments) - 1):
                current_end = video_segments.iloc[i]['end_time_sec']
                next_start = video_segments.iloc[i + 1]['start_time_sec']
                if current_end >= next_start:
                    print(f"WARNING: Overlap detected in video {video_id}")
    else:
        segments_df = pd.DataFrame(columns=['video_id', 'video_name', 'category',
                                          'segment_start', 'segment_end', 
                                          'start_time_sec', 'end_time_sec', 'duration_sec'])
    
    return segments_df

def extract_segments_with_smoothing(results_df, min_segment_duration=10.0):
    """
    Alternative approach: Apply smoothing to remove noise, then extract segments.
    This reduces rapid state changes and creates more stable segments.
    """
    print(f"Creating segments with smoothing (min duration: {min_segment_duration}s)...")
    
    all_segments = []
    
    for video_id, video_df in results_df.groupby('video_id'):
        video_df = video_df.sort_values('frame_number').reset_index(drop=True)
        
        if len(video_df) == 0:
            continue
            
        # Apply median filter to smooth rapid changes (window = 3 seconds worth of frames)
        smoothing_window = int(3 * FPS / SAMPLE_RATE)  # 3 seconds
        
        # Convert categories to numeric for smoothing
        category_map = {'Alone': 0, 'Co-present Silent': 1, 'Interacting': 2}
        reverse_map = {0: 'Alone', 1: 'Co-present Silent', 2: 'Interacting'}
        
        numeric_states = video_df['interaction_category'].map(category_map)
        
        # Apply rolling median (for smoothing)
        smoothed_states = numeric_states.rolling(
            window=smoothing_window, 
            center=True, 
            min_periods=1
        ).median().round().astype(int)
        
        # Convert back to categories
        smoothed_categories = smoothed_states.map(reverse_map)
        
        # Now find segments using the smoothed data
        states = smoothed_categories.values
        frame_numbers = video_df['frame_number'].values
        video_name = video_df['video_name'].iloc[0]
        
        # Find state changes
        current_state = states[0]
        segment_start = frame_numbers[0]
        
        for i in range(1, len(states)):
            if states[i] != current_state:
                # State changed - end current segment
                segment_end = frame_numbers[i-1]
                segment_duration = (segment_end - segment_start) / FPS
                
                # Only keep segments longer than minimum duration
                if segment_duration >= min_segment_duration:
                    all_segments.append({
                        'video_id': video_id,
                        'video_name': video_name,
                        'category': current_state,
                        'segment_start': segment_start,
                        'segment_end': segment_end,
                        'start_time_sec': segment_start / FPS,
                        'end_time_sec': segment_end / FPS,
                        'duration_sec': segment_duration
                    })
                
                # Start new segment
                current_state = states[i]
                segment_start = frame_numbers[i]
        
        # Handle the final segment
        segment_end = frame_numbers[-1]
        segment_duration = (segment_end - segment_start) / FPS
        if segment_duration >= min_segment_duration:
            all_segments.append({
                'video_id': video_id,
                'video_name': video_name,
                'category': current_state,
                'segment_start': segment_start,
                'segment_end': segment_end,
                'start_time_sec': segment_start / FPS,
                'end_time_sec': segment_end / FPS,
                'duration_sec': segment_duration
            })
    
    # Convert to DataFrame
    if all_segments:
        segments_df = pd.DataFrame(all_segments)
        segments_df = segments_df.sort_values(['video_id', 'start_time_sec']).reset_index(drop=True)
    else:
        segments_df = pd.DataFrame(columns=['video_id', 'video_name', 'category',
                                          'segment_start', 'segment_end', 
                                          'start_time_sec', 'end_time_sec', 'duration_sec'])
    
    return segments_df

print("Available approaches:")
print("1. extract_segments_efficient() - Fast, based on state changes")
print("2. extract_segments_with_smoothing() - Applies smoothing to reduce noise")
print("Both ensure mutually exclusive segments!")

Configuration:
- Window size: 90 sampled frames (30 seconds)
- Decision threshold: 60.0% of window
- Gap merging: 5.0 seconds
- Sample rate: every 10th frame


In [52]:
# Use the efficient approach for mutually exclusive segments
merged_df = extract_segments_efficient(results_df)

print(f"\n=== SEGMENT ANALYSIS RESULTS ===")
print(f"Total segments found: {len(merged_df)}")

if len(merged_df) > 0:
    # Summary by category
    category_summary = merged_df.groupby('category').agg({
        'duration_sec': ['count', 'sum', 'mean', 'median']
    }).round(1)
    category_summary.columns = ['count', 'total_duration', 'mean_duration', 'median_duration']
    print(f"\nSegment summary by category:")
    print(category_summary)
    
    # Summary by video
    video_summary = merged_df.groupby('video_id').agg({
        'duration_sec': 'sum',
        'category': 'count'
    }).round(1)
    video_summary.columns = ['total_duration', 'segment_count']
    print(f"\nSegment summary by video:")
    print(video_summary.head())
    
    # Show first few segments
    print(f"\nFirst 10 segments:")
    print(merged_df[['video_id', 'video_name', 'category', 'start_time_sec', 'end_time_sec', 'duration_sec']].head(10))
else:
    print("No segments found!")

Computing rolling windows for each video...
Extracting segments by category...
Extracting segments by category...
  Processing 114571 frames for category: Interacting
    Found 1060 segments for Interacting
  Processing 395 frames for category: Co-present Silent
    Found 67 segments for Co-present Silent
  Processing 114571 frames for category: Interacting
    Found 1060 segments for Interacting
  Processing 395 frames for category: Co-present Silent
    Found 67 segments for Co-present Silent
  Processing 640509 frames for category: Alone
    Found 1505 segments for Alone
   video_id                              video_name     category  \
0         1  quantex_at_home_id254922_2022_04_12_01        Alone   
1         1  quantex_at_home_id254922_2022_04_12_01  Interacting   
2         1  quantex_at_home_id254922_2022_04_12_01        Alone   
3         1  quantex_at_home_id254922_2022_04_12_01        Alone   
4         1  quantex_at_home_id254922_2022_04_12_01  Interacting   

   segment

In [53]:
merged_df[merged_df['video_id'] == 10]

Unnamed: 0,video_id,video_name,category,segment_start,segment_end,start_time_sec,end_time_sec,duration_sec
127,10,quantex_at_home_id255237_2022_05_08_02,Interacting,0,1170,0.0,39.0,39.0
128,10,quantex_at_home_id255237_2022_05_08_02,Alone,630,1780,21.0,59.333333,38.333333
129,10,quantex_at_home_id255237_2022_05_08_02,Alone,1100,2890,36.666667,96.333333,59.666667
130,10,quantex_at_home_id255237_2022_05_08_02,Interacting,2180,3590,72.666667,119.666667,47.0
131,10,quantex_at_home_id255237_2022_05_08_02,Alone,2890,6880,96.333333,229.333333,133.0
132,10,quantex_at_home_id255237_2022_05_08_02,Alone,6260,8270,208.666667,275.666667,67.0
133,10,quantex_at_home_id255237_2022_05_08_02,Alone,8140,10310,271.333333,343.666667,72.333333
134,10,quantex_at_home_id255237_2022_05_08_02,Alone,9890,14610,329.666667,487.0,157.333333
135,10,quantex_at_home_id255237_2022_05_08_02,Alone,14210,15800,473.666667,526.666667,53.0
136,10,quantex_at_home_id255237_2022_05_08_02,Alone,15080,16070,502.666667,535.666667,33.0


In [58]:
def extract_segments_efficient(results_df):
    """
    Efficient approach: Create mutually exclusive segments using state changes.
    This ensures no overlapping segments and is much faster.
    """
    print("Creating mutually exclusive segments using state changes...")
    
    all_segments = []
    
    for video_id, video_df in results_df.groupby('video_id'):
        video_df = video_df.sort_values('frame_number').reset_index(drop=True)
        
        if len(video_df) == 0:
            continue
                    
        # Get interaction states
        states = video_df['interaction_category'].values
        frame_numbers = video_df['frame_number'].values
        video_name = video_df['video_name'].iloc[0]
        
        # Find state changes (where interaction category changes)
        current_state = states[0]
        segment_start = frame_numbers[0]
        
        for i in range(1, len(states)):
            if states[i] != current_state:
                # State changed - end current segment
                segment_end = frame_numbers[i-1]
                
                # Only keep segments longer than minimum duration (5 seconds)
                segment_duration = (segment_end - segment_start) / FPS
                if segment_duration >= 5.0:  # 5 second minimum
                    all_segments.append({
                        'video_id': video_id,
                        'video_name': video_name,
                        'category': current_state,
                        'segment_start': segment_start,
                        'segment_end': segment_end,
                        'start_time_sec': segment_start / FPS,
                        'end_time_sec': segment_end / FPS,
                        'duration_sec': segment_duration
                    })
                
                # Start new segment
                current_state = states[i]
                segment_start = frame_numbers[i]
        
        # Handle the final segment
        segment_end = frame_numbers[-1]
        segment_duration = (segment_end - segment_start) / FPS
        if segment_duration >= 5.0:  # 5 second minimum
            all_segments.append({
                'video_id': video_id,
                'video_name': video_name,
                'category': current_state,
                'segment_start': segment_start,
                'segment_end': segment_end,
                'start_time_sec': segment_start / FPS,
                'end_time_sec': segment_end / FPS,
                'duration_sec': segment_duration
            })
    
    # Convert to DataFrame
    if all_segments:
        segments_df = pd.DataFrame(all_segments)
        segments_df = segments_df.sort_values(['video_id', 'start_time_sec']).reset_index(drop=True)
        
        # Verify no overlaps within each video
        print("\nVerifying segment integrity...")
        for video_id, video_segments in segments_df.groupby('video_id'):
            video_segments = video_segments.sort_values('start_time_sec')
            for i in range(len(video_segments) - 1):
                current_end = video_segments.iloc[i]['end_time_sec']
                next_start = video_segments.iloc[i + 1]['start_time_sec']
                if current_end >= next_start:
                    print(f"WARNING: Overlap detected in video {video_id}")
    else:
        segments_df = pd.DataFrame(columns=['video_id', 'video_name', 'category',
                                          'segment_start', 'segment_end', 
                                          'start_time_sec', 'end_time_sec', 'duration_sec'])
    
    return segments_df

# Run the efficient segmentation
merged_df = extract_segments_efficient(results_df)

print(f"\n=== SEGMENT ANALYSIS RESULTS ===")
print(f"Total segments found: {len(merged_df)}")

if len(merged_df) > 0:
    # Summary by category
    category_summary = merged_df.groupby('category').agg({
        'duration_sec': ['count', 'sum', 'mean', 'median']
    }).round(1)
    category_summary.columns = ['count', 'total_duration', 'mean_duration', 'median_duration']
    print(f"\nSegment summary by category:")
    print(category_summary)
    
    # Summary by video
    video_summary = merged_df.groupby('video_id').agg({
        'duration_sec': 'sum',
        'category': 'count'
    }).round(1)
    video_summary.columns = ['total_duration', 'segment_count']    
    
    # Save to CSV
    output_path = "/home/nele_pauline_suffo/projects/naturalistic-social-analysis/src/results/rq_01/social_interaction_segments.csv"
    merged_df.to_csv(output_path, index=False)
    print(f"\n✅ Segments saved to: {output_path}")
else:
    print("No segments found!")

Creating mutually exclusive segments using state changes...


KeyError: 'frame_number'

In [55]:
results_df = pd.read_csv("/home/nele_pauline_suffo/projects/naturalistic-social-analysis/src/results/rq_01/social_interaction_segments.csv")

In [57]:
results_df[results_df["video_id"]==1]

Unnamed: 0,video_id,video_name,category,segment_start,segment_end,start_time_sec,end_time_sec,duration_sec
0,1,quantex_at_home_id254922_2022_04_12_01,Interacting,0,230,0.0,7.666667,7.666667
1,1,quantex_at_home_id254922_2022_04_12_01,Co-present Silent,240,430,8.0,14.333333,6.333333
2,1,quantex_at_home_id254922_2022_04_12_01,Alone,600,790,20.0,26.333333,6.333333
3,1,quantex_at_home_id254922_2022_04_12_01,Alone,1470,1700,49.0,56.666667,7.666667
4,1,quantex_at_home_id254922_2022_04_12_01,Alone,1760,1920,58.666667,64.0,5.333333
5,1,quantex_at_home_id254922_2022_04_12_01,Interacting,1930,2160,64.333333,72.0,7.666667
6,1,quantex_at_home_id254922_2022_04_12_01,Alone,2640,3020,88.0,100.666667,12.666667
7,1,quantex_at_home_id254922_2022_04_12_01,Alone,3340,3840,111.333333,128.0,16.666667
8,1,quantex_at_home_id254922_2022_04_12_01,Alone,4060,5350,135.333333,178.333333,43.0
9,1,quantex_at_home_id254922_2022_04_12_01,Alone,5630,5910,187.666667,197.0,9.333333


In [1]:
import sqlite3
import pandas as pd

# Connect to the database
db_path = '/home/nele_pauline_suffo/ProcessedData/quantex_annotations/annotations.db'
interaction_conn = sqlite3.connect(db_path)

# Define the video IDs you want to filter
video_ids = [5, 6, 7, 8, 11, 23]

# Query annotations with selected columns
query_annotations = f"""
SELECT video_id, image_id, bbox, person_age
FROM annotations
WHERE video_id IN ({', '.join(map(str, video_ids))}) 
AND category_id = 10
AND outside = 0
"""

# Query videos table for file names
query_videos = f"""
SELECT id, file_name
FROM videos
WHERE id IN ({', '.join(map(str, video_ids))})
"""

# Load data into DataFrames
face_detections_gt = pd.read_sql(query_annotations, interaction_conn)
videos_df = pd.read_sql(query_videos, interaction_conn)

# Close the DB connection
interaction_conn.close()

# Merge the two DataFrames on video_id
gt_df = face_detections_gt.merge(videos_df, left_on="video_id", right_on="id", how="left")

# Create combined image_id: file_name + "_" + zero-padded image_id
gt_df["image_id"] = gt_df.apply(lambda row: f"{row['file_name'].replace('.mp4','')}_{int(row['image_id']):06d}", axis=1)

# map person_age infant to child
gt_df["person_age"] = gt_df["person_age"].replace({"infant": "child"})

# Drop the extra 'id' column if not needed
gt_df.drop(columns=["id", "file_name", "video_id"], inplace=True)
gt_df.rename(columns={"bbox": "bbox_gt", "person_age": "person_age_gt"}, inplace=True)

gt_df["gt_idx"] = gt_df.groupby("image_id").cumcount() + 1

gt_df_wide = gt_df.pivot(index="image_id", columns="gt_idx")
gt_df_wide.columns = [f"{col[0]}_{col[1]}" for col in gt_df_wide.columns]
gt_df_wide.reset_index(inplace=True)

print(gt_df_wide.head())

                                        image_id  \
0  quantex_at_home_id255944_2022_03_08_01_000030   
1  quantex_at_home_id255944_2022_03_08_01_000060   
2  quantex_at_home_id255944_2022_03_08_01_000090   
3  quantex_at_home_id255944_2022_03_08_01_000120   
4  quantex_at_home_id255944_2022_03_08_01_000360   

                            bbox_gt_1 bbox_gt_2 bbox_gt_3 person_age_gt_1  \
0   [1911.28, 500.33, 2304.0, 1079.9]       NaN       NaN           adult   
1     [1297.97, 20.78, 1736.4, 575.0]       NaN       NaN           adult   
2     [1927.6, 190.42, 2304.0, 780.5]       NaN       NaN           adult   
3      [2045.0, 66.45, 2304.0, 565.2]       NaN       NaN           adult   
4  [1097.67, 496.01, 1222.22, 645.69]       NaN       NaN           adult   

  person_age_gt_2 person_age_gt_3  
0             NaN             NaN  
1             NaN             NaN  
2             NaN             NaN  
3             NaN             NaN  
4             NaN             NaN  


In [9]:
import json
import pandas as pd
from datetime import datetime

# Load JSON
with open("/home/nele_pauline_suffo/outputs/face_detections/yolo12l_validation_20250822_112418/predictions.json", "r") as f:
    data = json.load(f)

# Convert to DataFrame
df_pred = pd.DataFrame(data)

# Extract video_id and frame_number from image_id
def parse_video_id(image_id):
    # example: quantex_at_home_id255944_2022_03_08_01_000000 return 000000 without trailing zeros
    parts = image_id.split("_")
    # video_id from idXXXXX
    vid = int(parts[3].replace("id", ""))  # "id255944" → 255944
    return vid

def parse_frame_number(image_id):
    # last part is frame number
    return int(image_id.split("_")[-1])

df_pred["frame_number"] = df_pred["image_id"].apply(parse_frame_number)
# filter to only keep predictions with score greater than 0.25
df_pred = df_pred[df_pred["score"] >= 0.25]

# Rename columns
df_pred.rename(columns={"score": "confidence_score", "category_id": "person_age_pred", "bbox": "bbox_pred"}, inplace=True)

# Add extra columns
df_pred["person_age_pred"] = df_pred["person_age_pred"].replace({1: "child", 2: "adult"})
# Reorder columns
final_columns = ["image_id", "bbox_pred", "person_age_pred"]
df_pred = df_pred[final_columns]

df_pred["gt_idx"] = df_pred.groupby("image_id").cumcount() + 1

df_pred_wide = df_pred.pivot(index="image_id", columns="gt_idx")
df_pred_wide.columns = [f"{col[0]}_{col[1]}" for col in df_pred_wide.columns]
df_pred_wide.reset_index(inplace=True)

print(df_pred_wide.head())

                                        image_id  \
0  quantex_at_home_id255944_2022_03_08_01_000030   
1  quantex_at_home_id255944_2022_03_08_01_000060   
2  quantex_at_home_id255944_2022_03_08_01_000090   
3  quantex_at_home_id255944_2022_03_08_01_000120   
4  quantex_at_home_id255944_2022_03_08_01_000360   

                             bbox_pred_1 bbox_pred_2 bbox_pred_3 bbox_pred_4  \
0   [1928.706, 534.306, 375.294, 547.87]         NaN         NaN         NaN   
1   [1292.824, 33.548, 434.423, 543.073]         NaN         NaN         NaN   
2  [1929.726, 192.167, 372.645, 562.045]         NaN         NaN         NaN   
3   [2050.465, 64.385, 253.136, 498.399]         NaN         NaN         NaN   
4   [1113.687, 496.063, 109.06, 156.085]         NaN         NaN         NaN   

  person_age_pred_1 person_age_pred_2 person_age_pred_3 person_age_pred_4  
0             adult               NaN               NaN               NaN  
1             adult               NaN               Na

In [10]:
# join both tables on image_id (outer join)
df_final = df_pred_wide.merge(gt_df_wide, on="image_id", how="outer")

In [11]:
df_final[df_final["bbox_gt_1"].isna()]

Unnamed: 0,image_id,bbox_pred_1,bbox_pred_2,bbox_pred_3,bbox_pred_4,person_age_pred_1,person_age_pred_2,person_age_pred_3,person_age_pred_4,bbox_gt_1,bbox_gt_2,bbox_gt_3,person_age_gt_1,person_age_gt_2,person_age_gt_3
13,quantex_at_home_id255944_2022_03_08_01_002010,"[2100.492, 1188.071, 86.245, 105.115]",,,,child,,,,,,,,,
41,quantex_at_home_id255944_2022_03_08_01_003690,"[1991.526, 0.33, 125.2, 123.789]",,,,adult,,,,,,,,,
46,quantex_at_home_id255944_2022_03_08_01_011220,"[127.639, 286.176, 71.385, 75.641]",,,,child,,,,,,,,,
47,quantex_at_home_id255944_2022_03_08_01_011460,"[115.714, 0.0, 1091.078, 425.623]",,,,child,,,,,,,,,
54,quantex_at_home_id255944_2022_03_08_01_011850,"[2002.212, 0.0, 301.549, 530.796]",,,,adult,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
668,quantex_at_home_id260275_2022_04_28_01_008310,"[2251.084, 1105.558, 52.916, 172.597]",,,,adult,,,,,,,,,
673,quantex_at_home_id260275_2022_04_28_01_009270,"[2248.686, 101.383, 55.314, 189.439]",,,,adult,,,,,,,,,
675,quantex_at_home_id260275_2022_04_28_01_009630,"[2086.594, 0.0, 217.204, 161.988]",,,,child,,,,,,,,,
676,quantex_at_home_id260275_2022_04_28_01_010290,"[396.361, 0.0, 745.816, 467.368]",,,,adult,,,,,,,,,


In [20]:
missclassified_df = df_final[
    (df_final["person_age_pred_1"] != df_final["person_age_gt_1"]) &
    df_final["person_age_pred_1"].notna() &
    df_final["person_age_gt_1"].notna() &
    df_final["person_age_gt_2"].isna()
]

In [None]:
missclassified_df[["image_id", "person_age_pred_1", "persovn_age_gt_1"]]

Unnamed: 0,image_id,person_age_pred_1,person_age_gt_1
39,quantex_at_home_id255944_2022_03_08_01_003630,adult,child
65,quantex_at_home_id255944_2022_03_08_01_014160,adult,child
109,quantex_at_home_id255944_2022_03_10_01_009660,adult,child
622,quantex_at_home_id260275_2022_04_28_01_001140,child,adult
625,quantex_at_home_id260275_2022_04_28_01_001290,child,adult
629,quantex_at_home_id260275_2022_04_28_01_001560,child,adult
634,quantex_at_home_id260275_2022_04_28_01_001830,child,adult
639,quantex_at_home_id260275_2022_04_28_01_002280,child,adult
640,quantex_at_home_id260275_2022_04_28_01_002340,child,adult
644,quantex_at_home_id260275_2022_04_28_01_002550,child,adult
