## Spikesorting loop through all data

This demo shows how to loop through all data in a directory and apply the spike sorting pipeline to each file.


In [1]:
import grnsuite.spike_detection as spikes
import grnsuite.spike_sorting as sorting
import matplotlib.pyplot as plt
import matplotlib
import pandas as pd
import numpy as np
import os
import glob

matplotlib.use("TkAgg")  # Set the backend before creating any figures

In [3]:
def process_file(results_dir):
    """Process a single file and show figures"""
    print(f"\nProcessing {os.path.basename(results_dir)}...")
    
    # Load data
    waveforms = pd.read_csv(os.path.join(results_dir, 'waveforms.csv')).values
    spikes_df = pd.read_csv(os.path.join(results_dir, 'detected_spikes.csv'))
    processed_data = pd.read_csv(os.path.join(results_dir, 'processed_data.csv'))
    
    # Reduce dimensions and cluster
    reduced_waveforms = sorting.reduce_dimensions(waveforms, n_components=3)
    n_clusters = sorting.estimate_clusters(reduced_waveforms)
    labels = sorting.cluster_spikes(reduced_waveforms, n_clusters=n_clusters)
    
    # Create unit labels
    unit_labels = [f'unit_{label + 1}' for label in labels]
    spikes_df['unit'] = unit_labels
    
    # Save sorted spikes
    spikes_df.to_csv(os.path.join(results_dir, 'sorted_spikes.csv'), index=False)
    
    # create figure with 3 panels showing clustering:
    sorting.plot_clustering_summary(
        waveforms, 
        reduced_waveforms, 
        labels,
        save_path=os.path.join(results_dir, 'clustering_summary.png')
    )

    # Create second figure for time series
    sorting.plot_sorted_spikes(
        processed_data,
        spikes_df,
        save_path=os.path.join(results_dir, 'sorted_spikes.png')
    )
    # Print statistics
    print("\nUnit Statistics:")
    for unit in unique_units:
        count = len(spikes_df[spikes_df['unit'] == unit])
        print(f"{unit}: {count} spikes")

# Get all result directories and process each one
base_dir = "results"
result_dirs = glob.glob(os.path.join(base_dir, "*"))

for results_dir in result_dirs:
    try:
        process_file(results_dir)
    except Exception as e:
        print(f"Error processing {results_dir}: {str(e)}")
        continue


Processing 20231103-M04-sucr-100-Gal-A1-02...
Explained variance with 3 components: 98.37%
Clusters: 2, Silhouette Score: 0.245
Clusters: 3, Silhouette Score: 0.594
Clusters: 4, Silhouette Score: 0.263
Clusters: 5, Silhouette Score: 0.388
Clusters: 6, Silhouette Score: 0.266
Clusters: 7, Silhouette Score: 0.255
Clusters: 8, Silhouette Score: 0.257
Clusters: 9, Silhouette Score: 0.256
Clusters: 10, Silhouette Score: 0.266

Optimal number of clusters: 3
Cluster 0 size: 179
Cluster 1 size: 16
Cluster 2 size: 13
Error processing results\20231103-M04-sucr-100-Gal-A1-02: module 'grnsuite.spike_sorting' has no attribute 'plot_clustering_summary'

Processing 20231103-M04-sucr-100-Gal-A2-02...
Explained variance with 3 components: 98.66%
Clusters: 2, Silhouette Score: 0.401
Clusters: 3, Silhouette Score: 0.396
Clusters: 4, Silhouette Score: 0.286
Clusters: 5, Silhouette Score: 0.292
Clusters: 6, Silhouette Score: 0.277
Clusters: 7, Silhouette Score: 0.268
Clusters: 8, Silhouette Score: 0.237
C