# 🔍 CMI Data Exploration - Find Correct Paths

This notebook explores the available data in Kaggle environment to find correct file paths.

In [None]:
import os
import pandas as pd
import numpy as np

print("🔍 Exploring Kaggle data directory...")
print("Available input directories:")
if os.path.exists('/kaggle/input'):
    for item in os.listdir('/kaggle/input'):
        print(f"  {item}")
else:
    print("  /kaggle/input not found")

In [None]:
# Check the CMI competition directory
cmi_path = '/kaggle/input/cmi-detect-behavior-with-sensor-data'
if os.path.exists(cmi_path):
    print(f"📁 Files in {cmi_path}:")
    for item in os.listdir(cmi_path):
        file_path = os.path.join(cmi_path, item)
        if os.path.isfile(file_path):
            size = os.path.getsize(file_path) / (1024*1024)  # MB
            print(f"  {item} ({size:.1f} MB)")
        else:
            print(f"  {item} (directory)")
else:
    print(f"❌ {cmi_path} not found")
    print("Checking alternative paths...")
    
    # Try common alternatives
    alternatives = [
        '/kaggle/input/cmi-detect-behavior-with-sensor-data',
        '/kaggle/input/cmi-bfrb-detection',
        '/kaggle/input/cmi-sensor-data'
    ]
    
    for path in alternatives:
        if os.path.exists(path):
            print(f"✅ Found: {path}")
            for item in os.listdir(path):
                print(f"  {item}")
            break

In [None]:
# Try to load data with different file extensions
data_path = '/kaggle/input/cmi-detect-behavior-with-sensor-data'

file_formats = ['parquet', 'csv', 'feather']
datasets = ['train', 'test', 'sample_submission']

for dataset in datasets:
    for fmt in file_formats:
        file_path = f"{data_path}/{dataset}.{fmt}"
        if os.path.exists(file_path):
            print(f"✅ Found: {file_path}")
            try:
                if fmt == 'parquet':
                    df = pd.read_parquet(file_path)
                elif fmt == 'csv':
                    df = pd.read_csv(file_path)
                elif fmt == 'feather':
                    df = pd.read_feather(file_path)
                    
                print(f"  Shape: {df.shape}")
                print(f"  Columns: {list(df.columns[:10])}...")
                print(f"  Sample:\n{df.head(3)}")
                print()
                
            except Exception as e:
                print(f"  ❌ Error reading: {e}")
        else:
            print(f"❌ Not found: {file_path}")

In [None]:
# If we found the data, show detailed structure
try:
    # Try the most likely format
    train_df = pd.read_csv('/kaggle/input/cmi-detect-behavior-with-sensor-data/train.csv')
    test_df = pd.read_csv('/kaggle/input/cmi-detect-behavior-with-sensor-data/test.csv')
    
    print("📊 Data structure confirmed:")
    print(f"Train shape: {train_df.shape}")
    print(f"Test shape: {test_df.shape}")
    
    print(f"\nTrain columns: {list(train_df.columns)}")
    print(f"\nTest columns: {list(test_df.columns)}")
    
    # Check for behavior/target column
    if 'behavior' in train_df.columns:
        print(f"\nBehavior values:")
        print(train_df['behavior'].value_counts())
    
    if 'gesture' in train_df.columns:
        print(f"\nGesture values:")
        print(train_df['gesture'].value_counts())
        
except Exception as e:
    print(f"❌ Could not load data: {e}")
    print("\nTrying parquet format...")
    
    try:
        train_df = pd.read_parquet('/kaggle/input/cmi-detect-behavior-with-sensor-data/train.parquet')
        test_df = pd.read_parquet('/kaggle/input/cmi-detect-behavior-with-sensor-data/test.parquet')
        print("✅ Parquet format works!")
        print(f"Train shape: {train_df.shape}")
        print(f"Test shape: {test_df.shape}")
    except Exception as e2:
        print(f"❌ Parquet also failed: {e2}")