# PLAID to Echo Protocol Generator (Modular Version)

**Date**: 2025-12-04  
**Version**: 2.0 (Modularized)

This notebook converts PLAID optimization results into Echo liquid handler protocols.

## Workflow
1. Configure experiment parameters
2. Load PLAID layouts
3. Load compound library
4. Calculate optimal stock concentrations
5. Generate source plates dynamically
6. Calculate compound volumes and backfill
7. Create Echo protocol
8. Generate visualizations and reports

## Key Features
- Uses modular `src/` functions (reusable!)
- Centralized configuration via `config.py`
- Automatic source plate generation
- Interactive visualizations
- Comprehensive PDF reports

## Setup

In [None]:
import os
import glob
import pandas as pd
import numpy as np

# Import our custom modules
from src import strip_zeros, strip_spaces, stockfinder
from src import generate_source_plates
from src import create_plate_visualization, generate_experiment_report
from config import ExperimentConfig

# Display settings
pd.set_option("max_colwidth", 200)
pd.set_option("display.max_columns", 100)
pd.set_option("display.max_rows", 10)

print(f"Current directory: {os.getcwd()}")

## 1. Experiment Configuration

In [None]:
# Initialize experiment configuration
config = ExperimentConfig(exp_name='colo8-v1-VP-organoid-48h-P1')

# Print configuration summary
config.print_summary()

# Create output directories if they don't exist
os.makedirs(config.output_dir, exist_ok=True)
os.makedirs(config.support_dir, exist_ok=True)

## 2. Load PLAID Files

In [None]:
# Load all PLAID files from folder and combine
filelist = glob.glob(os.path.join(config.plaid_folder, '*.csv'))
df_combined = pd.DataFrame()

for idx, file in enumerate(sorted(filelist)):
    # Read CSV with space stripping for cmpdnum
    df = pd.read_csv(
        file,
        index_col=False,
        converters={'cmpdnum': strip_spaces},
        usecols=['plateID', 'well', 'cmpdname', 'CONCuM', 'cmpdnum']
    )
    
    # Assign plate name from filename
    new_plate_name = os.path.splitext(os.path.basename(file))[0]
    df['plateID'] = df['plateID'].str.replace('plate_1', new_plate_name)
    
    df_combined = pd.concat([df_combined, df], ignore_index=True)

# Remove placeholder rows
df_combined = df_combined[~df_combined["plateID"].str.contains("----------")]

print(f"✓ Loaded {len(df_combined)} wells from PLAID")
print(f"  Unique compounds: {df_combined['cmpdname'].nunique()}")
print(f"  Plates: {df_combined['plateID'].nunique()}")

## 3. Process Well Notations

In [None]:
# Add well volume
df_combined['well_vol_uL'] = config.well_vol_uL

# Process well notations
df_combined['well_w_zero'] = df_combined['well']
df_combined['well'] = df_combined['well'].map(strip_zeros)
df_combined['well_letter'] = df_combined['well'].str.extract(r'([A-Z])')
df_combined['well_number'] = df_combined['well'].str.extract(r'(\d+)')

# Convert well number to int and format consistently
df_combined = df_combined.astype({'well_number': int})
df_combined['well'] = df_combined['well_letter'] + df_combined['well_number'].astype(str)
df_combined['well_w_zero'] = df_combined['well_letter'] + df_combined['well_number'].astype(str).apply(lambda x: x.zfill(2))
df_combined['well_number'] = df_combined['well_number'].astype(str).apply(lambda x: x.zfill(2))

print(f"✓ Processed well notations for {len(df_combined)} wells")

## 4. Load Compound Library

In [None]:
# Load compound library
library_file = 'colo8-list'
df_medchem = pd.read_csv(f"{config.import_dir}/{library_file}.csv", index_col=False)

print(f"✓ Loaded {len(df_medchem)} compounds from library")
display(df_medchem)

## 5. Map Compound Names

In [None]:
# Map compound codes to product names
df_combined['cmpdname'] = df_combined['cmpdname'].map(
    df_medchem.set_index('cmpd-code')['ProductName']
).fillna(df_combined['cmpdname'])

# Save combined PLAID data
plaid_combined_filename = 'colo8-plates'
df_combined.to_csv(f"{config.support_dir}/{plaid_combined_filename}.csv", index=False)

print(f"✓ Mapped compound names")
print(f"✓ Saved combined PLAID: {config.support_dir}/{plaid_combined_filename}.csv")

## 6. Prepare Library for Stockfinder

In [None]:
# Parse concentration and stock unit from library
df_library = df_medchem.copy()
df_library['stock_unit'] = df_library['Concentration'].str.split('(\d+\.\d+|\d+)', expand=True)[2]
df_library['max_stock'] = df_library['Concentration'].str.extract(r'(\d+\.\d+|\d+)').astype('float')
df_library['solvent'] = df_library['Solvent']

# Merge with combined data
df_forstockfinder = df_combined.merge(
    df_library[['max_stock', 'solvent', 'stock_unit', 'ProductName']],
    left_on='cmpdname',
    right_on='ProductName',
    how='left'
)

print(f"✓ Prepared {len(df_forstockfinder)} wells for stockfinder")

## 7. Calculate Optimal Stock Concentrations

In [None]:
# Apply stockfinder to calculate optimal stock concentrations
df_with_stock = df_forstockfinder.copy()
df_with_stock['CONCuM'] = df_with_stock['CONCuM'].astype('float')
df_with_stock['max_stock'] = df_with_stock['max_stock'].astype('float')
df_with_stock['stock_unit'] = df_with_stock['stock_unit'].fillna('')

# Apply stockfinder function
df_with_stock[['stock_conc_mM', 'availstocks_mM']] = df_with_stock.apply(
    lambda x: stockfinder(
        x.CONCuM,
        x.max_stock,
        x.solvent,
        x.stock_unit,
        config.dmso_max_perc,
        config.h2o_max_perc,
        config.well_vol_uL
    ),
    axis=1,
    result_type='expand'
)

df_with_stock = df_with_stock.drop(columns='max_stock')
df_with_stock['cmpd_w_stock'] = df_with_stock['cmpdname'] + "[" + df_with_stock['stock_conc_mM'].astype(str) + "]"

# Check for failed stock assignments
df_stock_conc_zero = df_with_stock[df_with_stock['stock_conc_mM'].isnull()]
if not df_stock_conc_zero.empty:
    print("⚠ Warning - following rows could not be assigned a Stock Concentration:")
    display(df_stock_conc_zero)
else:
    print(f"✓ Successfully assigned stock concentrations for all {len(df_with_stock)} wells")

## 8. Calculate Compound Volumes

In [None]:
# Calculate compound volumes based on stock concentrations
df_w_cmpd = df_with_stock.copy()

# Convert column types
df_w_cmpd['CONCuM'] = df_w_cmpd['CONCuM'].astype(float)
df_w_cmpd['well_vol_uL'] = df_w_cmpd['well_vol_uL'].astype(float)
df_w_cmpd['stock_conc_mM'] = df_w_cmpd['stock_conc_mM'].astype(float)

# Calculate volumes based on stock unit
# For mM stocks
df_w_cmpd.loc[df_w_cmpd['stock_unit'] == ' mM', 'CompVol_nL'] = (
    df_w_cmpd['CONCuM'] * df_w_cmpd['well_vol_uL']
) / df_w_cmpd['stock_conc_mM']

# For mg/mL stocks
df_w_cmpd.loc[df_w_cmpd['stock_unit'] == ' mg/mL', 'CompVol_nL'] = (
    df_w_cmpd['CONCuM'] * df_w_cmpd['well_vol_uL']
) / df_w_cmpd['stock_conc_mM']

# For % stocks (DMSO/water)
df_w_cmpd.loc[df_w_cmpd['stock_unit'] == ' %', 'CompVol_nL'] = (
    df_w_cmpd['well_vol_uL'] * 1000
) * (df_w_cmpd['CONCuM'] / 100)

# Convert to µL
df_w_cmpd['CompVol_uL'] = df_w_cmpd['CompVol_nL'] / 1000

print(f"✓ Calculated compound volumes for {len(df_w_cmpd)} wells")
print(f"  Sample of calculated volumes:")
display(df_w_cmpd[['well', 'cmpdname', 'CONCuM', 'stock_conc_mM', 'CompVol_nL']].head(10))

## 9. Generate Source Plates Dynamically

In [None]:
# Generate source plates based on stockfinder results
# Extract experiment prefix from exp_name (e.g., 'colo8-v1-VP-organoid-48h-P1' -> 'colo8')
exp_prefix = config.exp_name.split('-')[0]

source_plates = generate_source_plates(
    df_w_cmpd=df_w_cmpd,
    support_dir=config.support_dir,
    exp_prefix=exp_prefix
)

## 10. Load Source Plate(s)

In [None]:
# Load the generated DMSO source plate
source_file = f"{config.support_dir}/{exp_prefix}-SOURCE-dmso.csv"
df_source = pd.read_csv(source_file, sep=',')

print(f"✓ Loaded source plate: {source_file}")
print(f"  Total wells in source plate: {len(df_source)}")
print(f"  Compounds: {df_source['Compound'].nunique()}")
display(df_source[['well', 'Compound', 'CONCmM']].head(10))

## 11. Calculate DMSO Backfill

In [None]:
# Calculate maximum DMSO allowed per well
maxDMSO = (config.dmso_max_perc / 100) * config.well_vol_uL  # in µL
target_vol_DMSO_per_well = maxDMSO

print(f"Max allowed DMSO in well: {maxDMSO} µL ({maxDMSO * 1000} nL)")

# Group by well and sum compound volumes
df_backfill = df_w_cmpd.groupby(['well']).agg({'CompVol_uL': 'sum'}).reset_index()

# Only backfill DMSO wells
df_backfill = df_backfill.merge(df_w_cmpd[['well', 'solvent']], on='well', how='left')
df_backfill = df_backfill[df_backfill['solvent'] == 'dmso']

# Calculate backfill volume
df_backfill['backfill_vol_uL'] = target_vol_DMSO_per_well - df_backfill['CompVol_uL']

# Check for negative backfill
df_backfill_negative = df_backfill[df_backfill['backfill_vol_uL'] < 0]
if df_backfill_negative.empty:
    print("✓ No negative backfill found")
else:
    print("⚠ WARNING! Negative backfill results found - setting to 0")
    display(df_backfill_negative)

# Set negative backfill to 0
df_backfill.loc[df_backfill['backfill_vol_uL'] < 0, 'backfill_vol_uL'] = 0

# Prepare backfill dataframe
df_backfill = df_backfill.query('backfill_vol_uL > 0')
df_backfill.rename(columns={'backfill_vol_uL': 'CompVol_nL'}, inplace=True)
df_backfill['CompVol_nL'] = df_backfill['CompVol_nL'] * 1000  # Convert to nL
df_backfill.drop('CompVol_uL', axis=1, inplace=True)
df_backfill['cmpdname'] = 'dmso'
df_backfill['cmpd_w_stock'] = 'dmso[100.0]'
df_backfill['plateID'] = config.exp_name

# Combine compound transfers with backfill
df_w_cmpd_out = pd.concat([df_w_cmpd, df_backfill], ignore_index=True)

print(f"\n✓ Generated protocol with {len(df_w_cmpd_out)} transfers")
print(f"  Compounds: {len(df_w_cmpd)} | DMSO backfill: {len(df_backfill)}")

## 12. Create Echo Protocol

In [None]:
# Initialize Echo protocol dataframe
print_echo = pd.DataFrame(columns=[
    'Compound', 'Source_plate', 'Source well', 'Destination plate',
    'destination well', 'Transfer Volume', 'Source Plate Type',
    'Destination Plate Type', 'Destination Well X Offset',
    'Destination Well Y Offset'
])

# Fill basic information
print_echo['Compound'] = df_w_cmpd_out['cmpd_w_stock']
print_echo['Destination plate'] = df_w_cmpd_out['plateID']
print_echo['destination well'] = df_w_cmpd_out['well']
print_echo['Transfer Volume'] = df_w_cmpd_out['CompVol_nL']
print_echo['Source Plate Type'] = config.source_plate_type
print_echo['Destination Plate Type'] = config.dest_plate_type
print_echo['Destination Well X Offset'] = config.dest_well_x_offset
print_echo['Destination Well Y Offset'] = config.dest_well_y_offset

# Map source plate locations
for index, row in df_w_cmpd_out.iterrows():
    cmpd = row['cmpd_w_stock']
    source = df_source[df_source['cmpd_w_stock'] == cmpd]
    
    if source.empty:
        print(f'⚠ No source found for compound: {cmpd}')
        continue
    
    source_plate = source['sourceID'].values[0]
    source_well = source['well_source'].values[0]
    print_echo.loc[index, 'Source_plate'] = source_plate
    print_echo.loc[index, 'Source well'] = source_well

print(f"\n✓ Created Echo protocol with {len(print_echo)} transfers")
display(print_echo.head(10))

## 13. Save Echo Protocol

In [None]:
# Save Echo protocol
print_echo_filename = f'print_echo_{config.exp_name}'
print_echo.to_csv(f"{config.output_dir}/{print_echo_filename}.csv", index=False)

print(f"✓ Saved Echo protocol: {config.output_dir}/{print_echo_filename}.csv")

## 14. Generate Visualizations

In [None]:
# Create interactive plate visualization
create_plate_visualization(
    df_w_cmpd=df_w_cmpd,
    output_dir=config.output_dir,
    exp_name=config.exp_name,
    show=False  # Set to True to display in notebook
)

## 15. Generate Experiment Report

In [None]:
# Generate comprehensive experiment report
report_text = generate_experiment_report(
    df_w_cmpd=df_w_cmpd,
    df_backfill=df_backfill,
    print_echo=print_echo,
    df_source=df_source,
    exp_name=config.exp_name,
    well_vol_uL=config.well_vol_uL,
    dmso_max_perc=config.dmso_max_perc,
    output_dir=config.output_dir,
    support_dir=config.support_dir,
    plaid_combined_filename=plaid_combined_filename,
    save_pdf=True
)

## Summary Statistics

In [None]:
# Display replicate counts
number_of_replicates = df_w_cmpd.groupby(['cmpdname'])['cmpdname'].count()
print("Number of replicates per compound:")
with pd.option_context('display.max_rows', None):
    display(number_of_replicates)

---

## ✓ Pipeline Complete!

**Output Files:**
- Echo protocol: `echo-protocols/print_echo_{exp_name}.csv`
- Plate visualization: `echo-protocols/{exp_name}.html`
- Experiment report: `echo-protocols/report_{exp_name}.pdf`
- Source plate layout: `support-files/{exp_prefix}-SOURCE-dmso.csv`
- Combined PLAID: `support-files/{plaid_combined_filename}.csv`

**Ready for liquid handler!**