In [None]:
import sqlite3
import pandas as pd

query = """
WITH RandomSubjects AS (
    -- Select 10 random subjects
    SELECT DISTINCT s.child_id, s.video_name, v.video_id
    FROM Subjects s
    JOIN Videos v ON s.video_name = v.video_path
    ORDER BY RANDOM()
    LIMIT 5
),
RandomFrames AS (
    -- Select 50 random frames per subject
    SELECT d.frame_number, d.video_id
    FROM Detections d
    JOIN RandomSubjects rs ON d.video_id = rs.video_id
    GROUP BY d.frame_number, d.video_id
    ORDER BY RANDOM()
    LIMIT 250
),
SocialContext AS (
    SELECT 
        d.frame_number,
        d.video_id,
        CASE
            WHEN MAX(CASE WHEN d.category_id IN (1,3) THEN 1 ELSE 0 END) = 1 
            AND MAX(CASE WHEN d.category_id IN (0,2) THEN 1 ELSE 0 END) = 1 
            THEN 'child and adult present'
            WHEN MAX(CASE WHEN d.category_id IN (1,3) THEN 1 ELSE 0 END) = 1 
            THEN 'adult present'
            WHEN MAX(CASE WHEN d.category_id IN (0,2) THEN 1 ELSE 0 END) = 1 
            THEN 'child present'
            ELSE 'alone'
        END as social
    FROM Detections d
    JOIN RandomFrames rf ON d.frame_number = rf.frame_number AND d.video_id = rf.video_id
    GROUP BY d.frame_number, d.video_id
),
ObjectTypes AS (
    -- Create all possible object types
    SELECT 
        'book' as object_type, 5 as category_id UNION ALL
        SELECT 'toy', 6 UNION ALL
        SELECT 'kitchenware', 7 UNION ALL
        SELECT 'screen', 8 UNION ALL
        SELECT 'food', 9 UNION ALL
        SELECT 'other_object', 10
),
FrameObjects AS (
    -- Cross join frames with all possible object types
    SELECT 
        rf.frame_number,
        rf.video_id,
        ot.object_type,
        ot.category_id,
        CASE WHEN d.category_id IS NOT NULL THEN 1 ELSE 0 END as object_present
    FROM RandomFrames rf
    CROSS JOIN ObjectTypes ot
    LEFT JOIN Detections d ON 
        rf.frame_number = d.frame_number 
        AND rf.video_id = d.video_id 
        AND ot.category_id = d.category_id
)

SELECT
    fo.frame_number as frame_id,
    s.child_id as ID,
    fo.object_present,
    fo.object_type,
    s.age_months as age,
    COALESCE(sc.social, 'alone') as social
FROM 
    FrameObjects fo
    JOIN RandomSubjects rs ON fo.video_id = rs.video_id
    JOIN Subjects s ON rs.subject_id = s.child_id
    LEFT JOIN SocialContext sc ON fo.frame_number = sc.frame_number AND fo.video_id = sc.video_id
ORDER BY 
    s.child_id, fo.frame_number, fo.object_type;
"""

# Load data
with sqlite3.connect('/home/nele_pauline_suffo/outputs/detection_pipeline_results/detection_results.db') as conn:
    df = pd.read_sql_query(query, conn)

# Convert categorical variables to factors
df['social'] = pd.Categorical(df['social'])
df['object_type'] = pd.Categorical(df['object_type'])
df['ID'] = pd.Categorical(df['ID'])

print("Data shape:", df.shape)
print("\nNumber of unique subjects:", df['ID'].nunique())
print("\nVariable types:")
print(df.dtypes)
print("\nSample data (showing first 12 rows to see multiple object types per frame):")
display(df.head(12))

DatabaseError: Execution failed on sql '
WITH RandomSubjects AS (
    -- Select 10 random subjects
    SELECT DISTINCT s.child_id, s.video_name, v.video_id
    FROM Subjects s
    JOIN Videos v ON s.video_name = v.video_path
    ORDER BY RANDOM()
    LIMIT 5
),
RandomFrames AS (
    -- Select 50 random frames per subject
    SELECT d.frame_number, d.video_id
    FROM Detections d
    JOIN RandomSubjects rs ON d.video_id = rs.video_id
    GROUP BY d.frame_number, d.video_id
    ORDER BY RANDOM()
    LIMIT 250
),
SocialContext AS (
    SELECT 
        d.frame_number,
        d.video_id,
        CASE
            WHEN MAX(CASE WHEN d.category_id IN (1,3) THEN 1 ELSE 0 END) = 1 
            AND MAX(CASE WHEN d.category_id IN (0,2) THEN 1 ELSE 0 END) = 1 
            THEN 'child and adult present'
            WHEN MAX(CASE WHEN d.category_id IN (1,3) THEN 1 ELSE 0 END) = 1 
            THEN 'adult present'
            WHEN MAX(CASE WHEN d.category_id IN (0,2) THEN 1 ELSE 0 END) = 1 
            THEN 'child present'
            ELSE 'alone'
        END as social
    FROM Detections d
    JOIN RandomFrames rf ON d.frame_number = rf.frame_number AND d.video_id = rf.video_id
    GROUP BY d.frame_number, d.video_id
),
ObjectTypes AS (
    -- Create all possible object types
    SELECT 
        'book' as object_type, 5 as category_id UNION ALL
        SELECT 'toy', 6 UNION ALL
        SELECT 'kitchenware', 7 UNION ALL
        SELECT 'screen', 8 UNION ALL
        SELECT 'food', 9 UNION ALL
        SELECT 'other_object', 10
),
FrameObjects AS (
    -- Cross join frames with all possible object types
    SELECT 
        rf.frame_number,
        rf.video_id,
        ot.object_type,
        ot.category_id,
        CASE WHEN d.category_id IS NOT NULL THEN 1 ELSE 0 END as object_present
    FROM RandomFrames rf
    CROSS JOIN ObjectTypes ot
    LEFT JOIN Detections d ON 
        rf.frame_number = d.frame_number 
        AND rf.video_id = d.video_id 
        AND ot.category_id = d.category_id
)

SELECT
    fo.frame_number as frame_id,
    s.child_id as ID,
    fo.object_present,
    fo.object_type,
    s.age_months as age,
    COALESCE(sc.social, 'alone') as social
FROM 
    FrameObjects fo
    JOIN RandomSubjects rs ON fo.video_id = rs.video_id
    JOIN Subjects s ON rs.subject_id = s.child_id
    LEFT JOIN SocialContext sc ON fo.frame_number = sc.frame_number AND fo.video_id = sc.video_id
ORDER BY 
    s.child_id, fo.frame_number, fo.object_type;
': no such column: d.category_id

In [12]:
# Convert columns to categorical type
df['object_type'] = df['object_type'].astype('category')
df['social_context'] = df['social_context'].astype('category')