In [None]:
import os
import pandas as pd
from main_pipeline import process_single_sample

# Define file addresses explicitly with naming conventions
file_addresses = {
    # Address where raw cell images are stored
    'raw_dir': 'path/to/raw_cell_images',  # Images must be named: raw-<sample_id>.npy

    # Address where Dino model predictions are stored
    'dino_dir': 'path/to/dino_predictions',  # Files must be named: probs-<sample_id>.npy

    # Address where SimCLR model predictions are stored
    'simclr_dir': 'path/to/simclr_predictions',  # Files must be named: probs-<sample_id>.npy

    # Address where Z-disc CSVs and contour data are stored
    'zdiscs_dir': 'path/to/zdisc_data',  # Z-disc CSVs: zdiscs-<sample_id>.csv
                                        # Contours: contours-<sample_id>.npy

    # Address where the cell masks are stored
    'mask_dir': 'path/to/cell_masks',  # Mask files must be named: mask-<sample_id>.npy
}

# Define the output directory where graphs and features will be saved
output_dir = "sarcgraph-extracted-features"
os.makedirs(output_dir, exist_ok=True)

# Define the range of sample IDs to process
sample_ids = range(100)  # Adjust based on your dataset size

# Initialize a list to store feature dictionaries
all_features = []

# Iterate over each sample
for sample_id in sample_ids:
    try:
        # Process a single sample
        features = process_single_sample(sample_id, file_addresses, output_dir=output_dir)
        
        # Append features if processing is successful
        if features:
            all_features.append(features)
    
    except Exception as e:
        print(f"Skipping sample {sample_id}: {e}")
        continue

# Convert collected features into a Pandas DataFrame
features_df = pd.DataFrame(all_features)

# Save features as CSV
output_csv = os.path.join(output_dir, "features.csv")
features_df.to_csv(output_csv, index=False)

print(f"Feature extraction completed. Features saved to: {output_csv}")