In [3]:
import sqlite3
from pathlib import Path
import pandas as pd
import logging

def get_balanced_face_samples():
    conn = sqlite3.connect('/home/nele_pauline_suffo/outputs/detection_pipeline_results/detection_results.db')
    df = pd.read_sql_query("""
    WITH RankedFaces AS (
        SELECT 
            v.video_path,
            d.frame_number,
            d.confidence_score,
            d.proximity,
            d.object_class,
            CAST(d.proximity * 10 AS INTEGER) as proximity_bin
        FROM Detections d
        JOIN Videos v ON d.video_id = v.video_id
        WHERE d.object_class IN (2, 3)
            AND d.proximity BETWEEN 0 AND 1
        ),
        SampledFaces AS (
        SELECT *,
            ROW_NUMBER() OVER (
            PARTITION BY 
                CASE WHEN object_class = 3 THEN 'adult' 
                     WHEN object_class = 2 THEN 'child' 
                END,
                proximity_bin
            ORDER BY RANDOM()
            ) as rn
        FROM RankedFaces
        )
        SELECT 
        '/home/nele_pauline_suffo/ProcessedData/quantex_videos_processed/' || video_path || '/' || 
        video_path || '_' || printf('%06d', frame_number) || '.jpg' as frame_file_name,
        confidence_score,
        proximity,
        proximity_bin,
        CASE WHEN object_class = 3 THEN 'adult' 
             WHEN object_class = 2 THEN 'child' 
        END as age_group,
        CAST(proximity_bin/10.0 AS TEXT) || '-' || CAST((proximity_bin + 1)/10.0 AS TEXT) as proximity_range
        FROM SampledFaces 
        WHERE rn <= 10
        ORDER BY age_group, proximity_bin, proximity;
    """, conn)
    
    # Save to CSV
    output_path = Path('/home/nele_pauline_suffo/outputs/detection_pipeline_results/proximity_samples.csv')
    output_path.parent.mkdir(parents=True, exist_ok=True)
    df.to_csv(output_path, index=False)
    
    logging.info(f"Samples per bin:\n{df.groupby(['age_group', 'proximity_range']).size()}")
    
    conn.close()
    return df

In [4]:
df = get_balanced_face_samples()
df.head()

Unnamed: 0,frame_file_name,confidence_score,proximity,proximity_bin,age_group,proximity_range
0,/home/nele_pauline_suffo/ProcessedData/quantex...,0.387124,0.057428,0,adult,0.0-0.1
1,/home/nele_pauline_suffo/ProcessedData/quantex...,0.604948,0.082024,0,adult,0.0-0.1
2,/home/nele_pauline_suffo/ProcessedData/quantex...,0.631498,0.084016,0,adult,0.0-0.1
3,/home/nele_pauline_suffo/ProcessedData/quantex...,0.415078,0.085001,0,adult,0.0-0.1
4,/home/nele_pauline_suffo/ProcessedData/quantex...,0.284179,0.085001,0,adult,0.0-0.1


In [35]:
child_df = df[df['age_group'] == 'child']
child_df.head()

Unnamed: 0,video_path,frame_number,frame_file_name,confidence_score,proximity,proximity_bin,age_group,proximity_range
110,quantex_at_home_id264089_2023_05_14_01,30580,quantex_at_home_id264089_2023_05_14_01_frame_0...,0.27809,0.10182,1,child,0.1-0.2
111,quantex_at_home_id264089_2023_05_14_01,30290,quantex_at_home_id264089_2023_05_14_01_frame_0...,0.273533,0.137346,1,child,0.1-0.2
112,quantex_at_home_id260176_2022_11_06_02,29380,quantex_at_home_id260176_2022_11_06_02_frame_0...,0.344681,0.167893,1,child,0.1-0.2
113,quantex_at_home_id260176_2022_11_06_02,31500,quantex_at_home_id260176_2022_11_06_02_frame_0...,0.733888,0.176972,1,child,0.1-0.2
114,quantex_at_home_id264089_2023_05_14_01,15770,quantex_at_home_id264089_2023_05_14_01_frame_0...,0.331446,0.178066,1,child,0.1-0.2


In [38]:
child_df[child_df['proximity_bin'] == 0]

Unnamed: 0,video_path,frame_number,frame_file_name,confidence_score,proximity,proximity_bin,age_group,proximity_range


In [22]:
adult_df = df[df['age_group'] == 'adult'] 
adult_df.head()

Unnamed: 0,video_path,frame_number,frame_file_name,confidence_score,proximity,proximity_bin,age_group,proximity_range
0,quantex_at_home_id263284_2023_06_25_06,51080,quantex_at_home_id263284_2023_06_25_06_frame_0...,0.387124,0.057428,0,adult,0.0-0.1
1,quantex_at_home_id263284_2023_06_25_06,42670,quantex_at_home_id263284_2023_06_25_06_frame_0...,0.286812,0.062577,0,adult,0.0-0.1
2,quantex_at_home_id263284_2023_06_25_04,28350,quantex_at_home_id263284_2023_06_25_04_frame_0...,0.342625,0.076558,0,adult,0.0-0.1
3,quantex_at_home_id263284_2023_06_25_06,46900,quantex_at_home_id263284_2023_06_25_06_frame_0...,0.446824,0.077946,0,adult,0.0-0.1
4,quantex_at_home_id263284_2023_06_25_06,29270,quantex_at_home_id263284_2023_06_25_06_frame_0...,0.415343,0.078634,0,adult,0.0-0.1


In [23]:
adult_df[adult_df['proximity_bin'] == 0]

Unnamed: 0,video_path,frame_number,frame_file_name,confidence_score,proximity,proximity_bin,age_group,proximity_range
0,quantex_at_home_id263284_2023_06_25_06,51080,quantex_at_home_id263284_2023_06_25_06_frame_0...,0.387124,0.057428,0,adult,0.0-0.1
1,quantex_at_home_id263284_2023_06_25_06,42670,quantex_at_home_id263284_2023_06_25_06_frame_0...,0.286812,0.062577,0,adult,0.0-0.1
2,quantex_at_home_id263284_2023_06_25_04,28350,quantex_at_home_id263284_2023_06_25_04_frame_0...,0.342625,0.076558,0,adult,0.0-0.1
3,quantex_at_home_id263284_2023_06_25_06,46900,quantex_at_home_id263284_2023_06_25_06_frame_0...,0.446824,0.077946,0,adult,0.0-0.1
4,quantex_at_home_id263284_2023_06_25_06,29270,quantex_at_home_id263284_2023_06_25_06_frame_0...,0.415343,0.078634,0,adult,0.0-0.1
5,quantex_at_home_id263284_2023_06_25_06,39260,quantex_at_home_id263284_2023_06_25_06_frame_0...,0.350767,0.082358,0,adult,0.0-0.1
6,quantex_at_home_id263284_2023_06_25_06,38070,quantex_at_home_id263284_2023_06_25_06_frame_0...,0.645364,0.089816,0,adult,0.0-0.1
7,quantex_at_home_id263284_2023_06_25_06,29250,quantex_at_home_id263284_2023_06_25_06_frame_0...,0.34881,0.09537,0,adult,0.0-0.1
8,quantex_at_home_id263284_2023_06_25_06,39380,quantex_at_home_id263284_2023_06_25_06_frame_0...,0.345138,0.096871,0,adult,0.0-0.1
9,quantex_at_home_id263284_2023_06_25_06,39310,quantex_at_home_id263284_2023_06_25_06_frame_0...,0.358212,0.096871,0,adult,0.0-0.1


In [24]:
adult_df[adult_df['proximity_bin'] == 1]

Unnamed: 0,video_path,frame_number,frame_file_name,confidence_score,proximity,proximity_bin,age_group,proximity_range
10,quantex_at_home_id260176_2022_11_06_02,28750,quantex_at_home_id260176_2022_11_06_02_frame_0...,0.333979,0.109117,1,adult,0.1-0.2
11,quantex_at_home_id263284_2023_06_25_04,37270,quantex_at_home_id263284_2023_06_25_04_frame_0...,0.391213,0.115484,1,adult,0.1-0.2
12,quantex_at_home_id263284_2023_06_25_06,18430,quantex_at_home_id263284_2023_06_25_06_frame_0...,0.655851,0.154566,1,adult,0.1-0.2
13,quantex_at_home_id260176_2022_11_06_02,53040,quantex_at_home_id260176_2022_11_06_02_frame_0...,0.738301,0.162024,1,adult,0.1-0.2
14,quantex_at_home_id260275_2022_04_16_01,21930,quantex_at_home_id260275_2022_04_16_01_frame_0...,0.537715,0.168908,1,adult,0.1-0.2
15,quantex_at_home_id260176_2022_11_06_02,27740,quantex_at_home_id260176_2022_11_06_02_frame_0...,0.604216,0.17163,1,adult,0.1-0.2
16,quantex_at_home_id260275_2022_04_16_01,32510,quantex_at_home_id260275_2022_04_16_01_frame_0...,0.400234,0.179788,1,adult,0.1-0.2
17,quantex_at_home_id260176_2022_11_06_02,28540,quantex_at_home_id260176_2022_11_06_02_frame_0...,0.477549,0.182139,1,adult,0.1-0.2
18,quantex_at_home_id263284_2023_06_25_04,25260,quantex_at_home_id263284_2023_06_25_04_frame_0...,0.307953,0.194009,1,adult,0.1-0.2
19,quantex_at_home_id271611_2024_09_03_01,24010,quantex_at_home_id271611_2024_09_03_01_frame_0...,0.253398,0.197508,1,adult,0.1-0.2


In [34]:
adult_df[adult_df['proximity_bin'] == 9]

Unnamed: 0,video_path,frame_number,frame_file_name,confidence_score,proximity,proximity_bin,age_group,proximity_range
90,quantex_at_home_id263204_2025_01_06_01,43230,quantex_at_home_id263204_2025_01_06_01_frame_0...,0.922022,0.905905,9,adult,0.9-1.0
91,quantex_at_home_id262179_2023_01_22_03,27590,quantex_at_home_id262179_2023_01_22_03_frame_0...,0.902986,0.911335,9,adult,0.9-1.0
92,quantex_at_home_id261609_2022_04_01_01,3080,quantex_at_home_id261609_2022_04_01_01_frame_0...,0.909957,0.930715,9,adult,0.9-1.0
93,quantex_at_home_id260772_2022_08_17_01,10,quantex_at_home_id260772_2022_08_17_01_frame_0...,0.947399,0.938435,9,adult,0.9-1.0
94,quantex_at_home_id273855_2023_06_09_01,44570,quantex_at_home_id273855_2023_06_09_01_frame_0...,0.662327,0.941956,9,adult,0.9-1.0
95,quantex_at_home_id263293_2022_08_29_01,42770,quantex_at_home_id263293_2022_08_29_01_frame_0...,0.938678,0.942627,9,adult,0.9-1.0
96,quantex_at_home_id267863_2022_08_23_01,48710,quantex_at_home_id267863_2022_08_23_01_frame_0...,0.521613,0.960146,9,adult,0.9-1.0
97,quantex_at_home_id262179_2023_01_22_03,27810,quantex_at_home_id262179_2023_01_22_03_frame_0...,0.7759,0.970065,9,adult,0.9-1.0
98,quantex_at_home_id263204_2025_01_06_01,48590,quantex_at_home_id263204_2025_01_06_01_frame_0...,0.950059,0.984002,9,adult,0.9-1.0
99,quantex_at_home_id262200_2024_12_08_01,70,quantex_at_home_id262200_2024_12_08_01_frame_0...,0.95973,0.985335,9,adult,0.9-1.0
