# 🔍 Kaggle File Explorer - Find Actual Data Paths

This notebook explores the Kaggle environment to find the correct file paths and structure.

In [None]:
import os
import glob

print("🔍 Exploring Kaggle Environment")
print("=" * 40)

# Check if /kaggle exists
if os.path.exists('/kaggle'):
    print("✅ /kaggle directory exists")
    
    # List contents of /kaggle
    print("\n📁 Contents of /kaggle:")
    for item in os.listdir('/kaggle'):
        item_path = os.path.join('/kaggle', item)
        if os.path.isdir(item_path):
            print(f"  📂 {item}/")
        else:
            print(f"  📄 {item}")
else:
    print("❌ /kaggle directory not found")

In [None]:
# Check /kaggle/input specifically
if os.path.exists('/kaggle/input'):
    print("✅ /kaggle/input exists")
    print("\n📁 Available datasets:")
    
    for item in os.listdir('/kaggle/input'):
        print(f"  📦 {item}")
        
        # Explore each dataset
        dataset_path = os.path.join('/kaggle/input', item)
        if os.path.isdir(dataset_path):
            print(f"    📁 Contents of {item}:")
            try:
                for file in os.listdir(dataset_path):
                    file_path = os.path.join(dataset_path, file)
                    if os.path.isfile(file_path):
                        size = os.path.getsize(file_path) / (1024*1024)  # MB
                        print(f"      📄 {file} ({size:.1f} MB)")
                    else:
                        print(f"      📂 {file}/")
            except PermissionError:
                print(f"      ❌ Permission denied")
            except Exception as e:
                print(f"      ❌ Error: {e}")
            print()
else:
    print("❌ /kaggle/input not found")

In [None]:
# Search for CMI-related files
print("🔍 Searching for CMI-related files...")

# Common patterns
search_patterns = [
    '/kaggle/input/*/train.*',
    '/kaggle/input/*/test.*',
    '/kaggle/input/*cmi*/*',
    '/kaggle/input/*behavior*/*',
    '/kaggle/input/*sensor*/*',
    '/kaggle/input/*/*train*',
    '/kaggle/input/*/*test*'
]

found_files = set()
for pattern in search_patterns:
    matches = glob.glob(pattern)
    for match in matches:
        found_files.add(match)

if found_files:
    print("\n✅ Found relevant files:")
    for file_path in sorted(found_files):
        if os.path.isfile(file_path):
            size = os.path.getsize(file_path) / (1024*1024)  # MB
            print(f"  📄 {file_path} ({size:.1f} MB)")
        else:
            print(f"  📂 {file_path}/")
else:
    print("❌ No relevant files found with common patterns")

In [None]:
# Try to find any CSV or Parquet files
print("\n🔍 Searching for all CSV and Parquet files...")

data_files = []
for root, dirs, files in os.walk('/kaggle/input'):
    for file in files:
        if file.endswith(('.csv', '.parquet', '.feather', '.json')):
            file_path = os.path.join(root, file)
            try:
                size = os.path.getsize(file_path) / (1024*1024)  # MB
                data_files.append((file_path, size))
            except:
                data_files.append((file_path, 0))

if data_files:
    print("\n📊 Found data files:")
    for file_path, size in sorted(data_files, key=lambda x: x[1], reverse=True):
        print(f"  📄 {file_path} ({size:.1f} MB)")
else:
    print("❌ No data files found")

In [None]:
# Check current working directory
print("\n📍 Current working directory:")
print(f"  {os.getcwd()}")

print("\n📁 Contents of current directory:")
for item in os.listdir('.'):
    if os.path.isdir(item):
        print(f"  📂 {item}/")
    else:
        print(f"  📄 {item}")

# Check if there are any data files in current directory
local_data = glob.glob('*.csv') + glob.glob('*.parquet') + glob.glob('*.feather')
if local_data:
    print("\n📊 Data files in current directory:")
    for file in local_data:
        size = os.path.getsize(file) / (1024*1024)
        print(f"  📄 {file} ({size:.1f} MB)")

In [None]:
# Environment variables
print("\n🌍 Relevant environment variables:")
env_vars = ['KAGGLE_KERNEL_RUN_TYPE', 'KAGGLE_DATA_PROXY_TOKEN', 'KAGGLE_USER_SECRETS_TOKEN']
for var in env_vars:
    value = os.environ.get(var, 'Not set')
    print(f"  {var}: {value}")

# Check if this is actually running in Kaggle
is_kaggle = any([
    os.path.exists('/kaggle'),
    'KAGGLE_KERNEL_RUN_TYPE' in os.environ,
    os.getcwd().startswith('/kaggle')
])

print(f"\n🎯 Running in Kaggle environment: {is_kaggle}")

if not is_kaggle:
    print("\n⚠️ This appears to be running locally, not in Kaggle!")
    print("For local testing, you would need to download the dataset manually.")