In [None]:
# Z-score transformation of fluorescence traces.
# Z-score is calculated based on median and median absolute deviation (MAD)
# Saves Z-score transformed data to a new CSV file

import pandas as pd
import numpy as np
import plotly.graph_objects as go
import os

# Define the file path as a string
filename = "path_to_your_file.csv"  # Change to your actual file path

# Ensure the file exists and handle errors
try:
    # Attempt to read the CSV file into a DataFrame
    data = pd.read_csv(filename)
except FileNotFoundError:
    print(f"Error: File {filename} not found.")
    exit()
except pd.errors.EmptyDataError:
    print(f"Error: File {filename} is empty.")
    exit()
except pd.errors.ParserError:
    print(f"Error: File {filename} is not in a valid CSV format.")
    exit()

# Display the first few rows to verify the data
print("First few rows of the data:")
print(data.head())

# Identify cell columns (columns starting with 'cell-')
cell_columns = [col for col in data.columns if col.startswith('cell-')]
print("Cell columns identified:", cell_columns)

if not cell_columns:
    raise ValueError("No cell columns found. Columns should start with 'cell-'.")

# Extract time and fluorescence data
time = data.iloc[:, 0]  # First column is assumed to be time

# Coerce cell traces to numeric and clean obvious issues
F_clean = data[cell_columns].apply(pd.to_numeric, errors='coerce')
F_clean = F_clean.replace([np.inf, -np.inf], np.nan)
F_clean = F_clean.ffill().bfill()

# Drop rows that remain entirely NaN across all cells
all_nan_rows = F_clean.isna().all(axis=1)
if all_nan_rows.any():
    drop_count = int(all_nan_rows.sum())
    print(f"Warning: dropping {drop_count} rows with all-NaN cell values after cleaning.")
    F_clean = F_clean.loc[~all_nan_rows]
    time = time.loc[F_clean.index]

# Align indices after dropping rows
F_clean = F_clean.reset_index(drop=True)
time = time.reset_index(drop=True)

# Function to calculate median-based Z-score

def median_zscore(F_cell, eps=1e-9):
    """
    Calculate the median-based Z-score for a given fluorescence trace.
    Uses nan-aware statistics and floors MAD at eps.
    """
    arr = np.asarray(F_cell, dtype=float)
    arr[~np.isfinite(arr)] = np.nan

    median_F = np.nanmedian(arr)
    mad_F = np.nanmedian(np.abs(arr - median_F))

    if not np.isfinite(mad_F) or mad_F < eps:
        mad_F = eps

    return (arr - median_F) / mad_F

# Function to plot the Z-score signal

def plot_zscore_signal(time, z_score, cell_name):
    """
    Plot the Z-score signal for a given cell.
    """
    mask = np.isfinite(z_score)
    if not np.any(mask):
        print(f"Skipping plot for {cell_name}: all values are NaN after cleaning.")
        return

    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=time,
        y=z_score,
        mode='lines',
        name=f'Z-score ({cell_name})',
        line=dict(color='red', width=1)
    ))

    # Define the specific time for the vertical line
    specific_time = 150  # seconds

    y_min = float(np.nanmin(z_score))
    y_max = float(np.nanmax(z_score))

    # Add a vertical red dotted line at the specific time
    fig.add_shape(
        type="line",
        x0=specific_time, x1=specific_time,
        y0=y_min, y1=y_max,
        line=dict(color="red", width=2, dash="dot")
    )

    # Update layout for better visualization
    fig.update_layout(
        title=f"Z-score Signal for {cell_name}",
        xaxis_title='Time (s)',
        yaxis_title='Median-based Z-score',
        legend_title='Signal Type'
    )
    fig.show()

# Dictionary to store Z-score transformed data
Z_dict = {}

# Loop through each cell trace
for cell in cell_columns:
    F_cell = F_clean[cell]  # Extract cleaned fluorescence trace for the current cell
    Z = median_zscore(F_cell)  # Calculate Z-score for the current cell

    # Plot Z-score signal for the current cell
    plot_zscore_signal(time, Z, cell)

    # Store Z-score values in the dictionary
    Z_dict[cell] = Z

# Create a DataFrame from the Z-score dictionary
df_final = pd.DataFrame(Z_dict)
df_final.insert(0, 'time', time)  # Insert the time column at the beginning

# Define output file path
output_dir = "path_to_your_directory"  # Change to your actual directory path
output_filename = "z-score_file.csv"  # Change to your file name
output_filepath = os.path.join(output_dir, output_filename)

# Save the Z-score transformed data to a CSV file
df_final.to_csv(output_filepath, index=False)
print(f"Z-score data saved to {output_filepath}")

First few rows of the data:
       time   cell-00   cell-01   cell-02   cell-04   cell-05   cell-06  \
0  0.000000  6.858635  36.35141  31.75613  58.71564  15.51496  31.14989   
1  0.049960  5.964797  33.24638  30.78296  56.43075  12.73443  31.65451   
2  0.099920  8.158339  34.86550  30.40846  57.77472  13.06069  34.52333   
3  0.149880  4.340286  35.08062  29.46883  55.83646  12.16381  32.55184   
4  0.199841  5.791439  39.21795  30.30135  57.81745  12.76484  33.10281   

    cell-10   cell-12   cell-13   cell-14    cell-15   cell-16   cell-17  
0  61.40060  14.16967  19.48089  34.54708  10.466510  69.89262  34.89869  
1  58.28848  10.89028  18.18399  34.38356   9.435056  66.10366  35.44971  
2  55.15540  10.66403  19.05337  35.45826   9.706398  65.37921  33.79178  
3  53.16874  10.47113  15.40462  33.61523   9.256608  65.78354  34.11390  
4  51.57838  11.47072  15.50725  34.12380  10.801440  64.16599  33.09161  
Cell columns identified: ['cell-00', 'cell-01', 'cell-02', 'cell-04', '

Z-score data saved to C:/Users/Labo/Desktop/199-trial2-control-zscore_cells.csv
