# Child Mind Institute - BFRB Detection Competition
## Kaggle Submission

This notebook implements the submission requirements for the Kaggle competition.

In [None]:
# Import required libraries
import os
import pandas as pd
import numpy as np

# For parquet support
try:
    import pyarrow
    import pyarrow.parquet as pq
    PARQUET_AVAILABLE = True
except ImportError:
    print("Installing pyarrow for parquet support...")
    !pip install pyarrow
    import pyarrow
    import pyarrow.parquet as pq
    PARQUET_AVAILABLE = True

In [None]:
# Create a simple dummy submission for demonstration
def create_submission():
    """Create a submission file in the required format."""
    # Sample data
    sample_data = [
        {'sequence_id': 'SEQ_0001', 'gesture': 'hair_pull_scalp'},
        {'sequence_id': 'SEQ_0002', 'gesture': 'non_target'},
        {'sequence_id': 'SEQ_0003', 'gesture': 'skin_pick_face'},
        {'sequence_id': 'SEQ_0004', 'gesture': 'non_target'},
        {'sequence_id': 'SEQ_0005', 'gesture': 'hair_pull_eyebrow'},
        {'sequence_id': 'SEQ_0006', 'gesture': 'non_target'},
        {'sequence_id': 'SEQ_0007', 'gesture': 'skin_pick_cuticle'},
        {'sequence_id': 'SEQ_0008', 'gesture': 'non_target'},
        {'sequence_id': 'SEQ_0009', 'gesture': 'hair_pull_eyelash'},
        {'sequence_id': 'SEQ_0010', 'gesture': 'non_target'}
    ]
    
    # Create DataFrame
    df = pd.DataFrame(sample_data)
    print(f"Created submission dataframe with {len(df)} rows")
    
    return df

In [None]:
# This is the main execution cell that will generate the submission file
# This cell will be executed by Kaggle

# Generate predictions
submission_df = create_submission()

# Display the first few rows
print("Preview of submission:")
display(submission_df.head())

# Save as parquet file
output_file = 'submission.parquet'
submission_df.to_parquet(output_file, engine='pyarrow', index=False)
print(f"Submission saved to {output_file}")

# Verify the file was created
if os.path.exists(output_file):
    file_size = os.path.getsize(output_file) / 1024  # Size in KB
    print(f"Verified: {output_file} exists ({file_size:.2f} KB)")
    
    # Test reading it back
    test_df = pd.read_parquet(output_file)
    print(f"Successfully read back parquet file with {len(test_df)} rows")
else:
    print(f"Error: {output_file} was not created!")