## Object Interaction (Object Present Yes or No)

In [2]:
import sqlite3
import pandas as pd

In [None]:

query = """
WITH RandomSubjects AS (
    -- Select 5 random subjects
    SELECT DISTINCT s.child_id, s.video_name, v.video_id, s.age_at_recording
    FROM Subjects s
    JOIN Videos v ON s.video_name = v.video_path
    ORDER BY RANDOM()
    LIMIT 5
),
RandomFrames AS (
    -- Select 50 random frames per subject
    SELECT DISTINCT d.frame_number, d.video_id
    FROM Detections d
    JOIN RandomSubjects rs ON d.video_id = rs.video_id
    GROUP BY d.frame_number, d.video_id
    ORDER BY RANDOM()
    LIMIT 250
),
SocialContext AS (
    SELECT 
        d.frame_number,
        d.video_id,
        CASE
            WHEN MAX(CASE WHEN d.object_class IN (1,3) THEN 1 ELSE 0 END) = 1 
            AND MAX(CASE WHEN d.object_class IN (0,2) THEN 1 ELSE 0 END) = 1 
            THEN 'child and adult present'
            WHEN MAX(CASE WHEN d.object_class IN (1,3) THEN 1 ELSE 0 END) = 1 
            THEN 'adult present'
            WHEN MAX(CASE WHEN d.object_class IN (0,2) THEN 1 ELSE 0 END) = 1 
            THEN 'child present'
            ELSE 'alone'
        END as social
    FROM Detections d
    JOIN RandomFrames rf ON d.frame_number = rf.frame_number AND d.video_id = rf.video_id
    GROUP BY d.frame_number, d.video_id
),
ObjectTypes AS (
    -- Create all possible object types
    SELECT 
        'book' as object_type, 5 as object_class UNION ALL
        SELECT 'toy', 6 UNION ALL
        SELECT 'kitchenware', 7 UNION ALL
        SELECT 'screen', 8 UNION ALL
        SELECT 'food', 9 UNION ALL
        SELECT 'other_object', 10
),
FrameObjects AS (
    -- Get unique object presence per frame and object type
    SELECT DISTINCT
        rf.frame_number,
        rf.video_id,
        ot.object_type,
        ot.object_class,
        MAX(CASE WHEN d.object_class IS NOT NULL THEN 1 ELSE 0 END) as object_present
    FROM RandomFrames rf
    CROSS JOIN ObjectTypes ot
    LEFT JOIN Detections d ON 
        rf.frame_number = d.frame_number 
        AND rf.video_id = d.video_id 
        AND ot.object_class = d.object_class
    GROUP BY rf.frame_number, rf.video_id, ot.object_type, ot.object_class
)

SELECT DISTINCT
    fo.video_id,
    fo.frame_number as frame_id,
    rs.child_id as ID,
    fo.object_present,
    fo.object_type,
    rs.age_at_recording as age,
    COALESCE(sc.social, 'alone') as social
FROM 
    FrameObjects fo
    JOIN RandomSubjects rs ON fo.video_id = rs.video_id
    LEFT JOIN SocialContext sc ON fo.frame_number = sc.frame_number AND fo.video_id = sc.video_id
ORDER BY 
    rs.child_id, fo.video_id, fo.frame_number, fo.object_type;
"""

# Load data
with sqlite3.connect('/home/nele_pauline_suffo/outputs/detection_pipeline_results/detection_results.db') as conn:
    df = pd.read_sql_query(query, conn)

# Convert categorical variables to factors
df['social'] = pd.Categorical(df['social'])
df['object_type'] = pd.Categorical(df['object_type'])
df['ID'] = pd.Categorical(df['ID'])

print("Data shape:", df.shape)
print("\nNumber of unique subjects:", df['ID'].nunique())
print("\nVariable types:")
print(df.dtypes)
print("\nSample data (showing first 12 rows to see multiple object types per frame):")
display(df.head(12))

Data shape: (1500, 7)

Number of unique subjects: 5

Variable types:
video_id             int64
frame_id             int64
ID                category
object_present       int64
object_type       category
age                float64
social            category
dtype: object

Sample data (showing first 12 rows to see multiple object types per frame):


Unnamed: 0,video_id,frame_id,ID,object_present,object_type,age,social
0,137,2370,255944,0,book,5.92,child present
1,137,2370,255944,0,food,5.92,child present
2,137,2370,255944,0,kitchenware,5.92,child present
3,137,2370,255944,0,other_object,5.92,child present
4,137,2370,255944,0,screen,5.92,child present
5,137,2370,255944,0,toy,5.92,child present
6,137,2870,255944,0,book,5.92,adult present
7,137,2870,255944,0,food,5.92,adult present
8,137,2870,255944,0,kitchenware,5.92,adult present
9,137,2870,255944,0,other_object,5.92,adult present


In [4]:
# Convert columns to categorical type
df['object_type'] = df['object_type'].astype('category')
df['social_context'] = df['social'].astype('category')

In [5]:
# check if there are any Nan values
print("\nChecking for NaN values:")
print(df.isnull().sum())



Checking for NaN values:
video_id          0
frame_id          0
ID                0
object_present    0
object_type       0
age               0
social            0
social_context    0
dtype: int64


In [6]:
# save df to csv
df.to_csv('/home/nele_pauline_suffo/outputs/detection_pipeline_results/object_test_results.csv', index=False)

# Social Context (Alone or Not)

In [None]:
query = """
WITH RandomSubjects AS (
    -- Select 5 random subjects
    SELECT DISTINCT s.child_id, s.video_name, v.video_id, s.age_at_recording
    FROM Subjects s
    JOIN Videos v ON s.video_name = v.video_path
    ORDER BY RANDOM()
    LIMIT 5
),
RandomFrames AS (
    -- Select 50 random frames per subject
    SELECT DISTINCT d.frame_number, d.video_id
    FROM Detections d
    JOIN RandomSubjects rs ON d.video_id = rs.video_id
    GROUP BY d.frame_number, d.video_id
    ORDER BY RANDOM()
    LIMIT 250
),
PersonContext AS (
    SELECT 
        d.frame_number,
        d.video_id,
        -- Create binary alone/not_alone
        CASE WHEN COUNT(*) = 0 THEN 1 ELSE 0 END as alone,
        -- Determine person type present
        CASE
            WHEN MAX(CASE WHEN d.object_class IN (1,3) THEN 1 ELSE 0 END) = 1 
            AND MAX(CASE WHEN d.object_class IN (0,2) THEN 1 ELSE 0 END) = 1 
            THEN 'both'
            WHEN MAX(CASE WHEN d.object_class IN (1,3) THEN 1 ELSE 0 END) = 1 
            THEN 'adult'
            WHEN MAX(CASE WHEN d.object_class IN (0,2) THEN 1 ELSE 0 END) = 1 
            THEN 'child'
            ELSE 'none'
        END as person_type,
        -- Average proximity for the frame
        AVG(CASE WHEN d.proximity BETWEEN 0 AND 1 THEN d.proximity ELSE NULL END) as avg_proximity
    FROM RandomFrames rf
    LEFT JOIN Detections d ON rf.frame_number = d.frame_number 
        AND rf.video_id = d.video_id
        AND d.object_class IN (0,1,2,3)  -- Only person/face classes
    GROUP BY d.frame_number, d.video_id
)

SELECT DISTINCT
    pc.video_id,
    pc.frame_number as frame_id,
    rs.child_id as ID,
    rs.age_at_recording as age,
    pc.alone,
    pc.person_type,
    COALESCE(pc.avg_proximity, 0) as proximity
FROM 
    PersonContext pc
    JOIN RandomSubjects rs ON pc.video_id = rs.video_id
ORDER BY 
    rs.child_id, pc.video_id, pc.frame_number;
"""

# Load data
with sqlite3.connect('/home/nele_pauline_suffo/outputs/detection_pipeline_results/detection_results.db') as conn:
    df = pd.read_sql_query(query, conn)

# Convert categorical variables
df['person_type'] = pd.Categorical(df['person_type'])
df['ID'] = pd.Categorical(df['ID'])

print("Data shape:", df.shape)
print("\nNumber of unique subjects:", df['ID'].nunique())
print("\nVariable types:")
print(df.dtypes)
print("\nSample distribution:")
print("\nPerson type distribution:")
print(df['person_type'].value_counts())
print("\nNot alone distribution:")
print(df['not_alone'].value_counts())
print("\nProximity statistics:")
print(df['proximity'].describe())