In [None]:
# ====================================================================
# CREATE AND SAVE TRAIN/TEST SPLIT - RUN THIS ONCE
# ====================================================================

import os
import numpy as np
from datetime import datetime

# Mount Google Drive
try:
    from google.colab import drive
    if not os.path.exists('/content/drive'):
        drive.mount('/content/drive')
    print("✅ Google Drive mounted")
except:
    print("⚠️ Not in Colab or Drive already mounted")

# Define your Google Drive path
GDRIVE_PROJECT_PATH = '/content/drive/MyDrive/segmentation-project'

def save_train_test_split(data_file_path, test_size=0.3, random_state=42):
    """Create and save train/test split to Google Drive"""

    # Create project directory
    os.makedirs(GDRIVE_PROJECT_PATH, exist_ok=True)

    # Load all files and update paths
    try:
        with open(data_file_path, 'r') as f:
            original_files = [line.strip() for line in f.readlines() if line.strip()]

        # Update paths from old format to new format
        all_files = [file.replace('neurite-oasis.v1.0/', 'segmentation_data/') for file in original_files]

        print(f"Loaded {len(all_files)} files from {data_file_path}")
        print(f"📝 Updated paths: neurite-oasis.v1.0/ → segmentation_data/")
    except FileNotFoundError:
        print(f"❌ {data_file_path} not found!")
        return None, None

    # Set random seed and shuffle
    np.random.seed(random_state)
    shuffled_files = np.random.permutation(all_files)

    # Calculate split
    n_total = len(shuffled_files)
    n_test = min(20, max(5, int(n_total * test_size)))
    n_train = n_total - n_test

    # Split the data
    test_files = shuffled_files[:n_test]
    train_files = shuffled_files[n_test:]

    print(f"📊 Dataset split: {n_total} total → {n_train} train, {n_test} test")

    # Save to Google Drive
    train_split_path = os.path.join(GDRIVE_PROJECT_PATH, 'train_split.txt')
    test_split_path = os.path.join(GDRIVE_PROJECT_PATH, 'test_split.txt')

    with open(train_split_path, 'w') as f:
        f.write('\n'.join(train_files))

    with open(test_split_path, 'w') as f:
        f.write('\n'.join(test_files))

    # Save metadata
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    metadata_path = os.path.join(GDRIVE_PROJECT_PATH, 'split_metadata.txt')
    with open(metadata_path, 'w') as f:
        f.write(f"Split created: {timestamp}\n")
        f.write(f"Random seed: {random_state}\n")
        f.write(f"Test size: {test_size}\n")
        f.write(f"Total files: {n_total}\n")
        f.write(f"Train files: {n_train}\n")
        f.write(f"Test files: {n_test}\n")
        f.write(f"Original data file: {data_file_path}\n")

    print(f"✅ Train split saved: {train_split_path}")
    print(f"✅ Test split saved: {test_split_path}")
    print(f"✅ Metadata saved: {metadata_path}")

    return train_files, test_files

✅ Google Drive mounted


In [None]:
# ====================================================================
# RUN THE SPLIT - MODIFY THESE PATHS AS NEEDED
# ====================================================================

# Your data file path
data_file_path = 'train_npy_copy.txt'

# Create and save the split
print("🔄 Creating train/test split...")
train_files, test_files = save_train_test_split(
    data_file_path=data_file_path,
    test_size=0.3,
    random_state=42
)

if train_files is not None:
    print("🎉 SPLIT CREATED AND SAVED SUCCESSFULLY!")
    print(f"📁 Files saved in: {GDRIVE_PROJECT_PATH}")
    print("🔒 Your test set is now locked for final evaluation only!")
    print("")
    print("Next steps:")
    print("1. Use train_split.txt for training your model")
    print("2. Use test_split.txt ONLY for final evaluation")
else:
    print("❌ Split creation failed!")

🔄 Creating train/test split...
Loaded 414 files from train_npy_copy.txt
📝 Updated paths: neurite-oasis.v1.0/ → segmentation_data/
📊 Dataset split: 414 total → 394 train, 20 test
✅ Train split saved: /content/drive/MyDrive/segmentation-project/train_split.txt
✅ Test split saved: /content/drive/MyDrive/segmentation-project/test_split.txt
✅ Metadata saved: /content/drive/MyDrive/segmentation-project/split_metadata.txt
🎉 SPLIT CREATED AND SAVED SUCCESSFULLY!
📁 Files saved in: /content/drive/MyDrive/segmentation-project
🔒 Your test set is now locked for final evaluation only!

Next steps:
1. Use train_split.txt for training your model
2. Use test_split.txt ONLY for final evaluation
