# Pipeline d'Automatisation du Gating FACS
## Tutoriel Jupyter Notebook

Ce notebook d√©montre l'utilisation du pipeline pour analyser des donn√©es de cytom√©trie en flux.

**Auteur**: Pipeline FACS Autogating  
**Date**: D√©cembre 2024  
**Version**: 1.0

---
## 1. Import des modules

In [None]:
# Imports standards
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Pipeline FACS
from facs_autogating import FCSGatingPipeline, example_standard_workflow
from facs_workflows_advanced import BatchFCSAnalysis, AdvancedGatingStrategies

# Configuration matplotlib
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['figure.dpi'] = 100
sns.set_style('whitegrid')

print("‚úÖ Modules import√©s avec succ√®s")

---
## 2. Exemple 1: Analyse simple d'un fichier FCS

### 2.1 Chargement du fichier

In [None]:
# Sp√©cifier le chemin du fichier FCS
fcs_path = 'data/sample_pbmc.fcs'  # √Ä adapter

# Charger avec compensation et transformation
pipeline = FCSGatingPipeline(
    fcs_path,
    compensate=True,
    transform='logicle'
)

print(f"üìä Fichier charg√©: {Path(fcs_path).name}")
print(f"   ‚Ä¢ √âv√©nements: {len(pipeline.data):,}")
print(f"   ‚Ä¢ Canaux: {len(pipeline.channels)}")
print(f"\nüìã Canaux disponibles:")
for i, channel in enumerate(pipeline.channels, 1):
    print(f"   {i:2d}. {channel}")

### 2.2 Visualisation initiale FSC/SSC

In [None]:
# Scatter plot FSC/SSC
fig, ax = plt.subplots(figsize=(10, 8))

ax.scatter(pipeline.data['FSC-A'], pipeline.data['SSC-A'], 
          s=1, c='blue', alpha=0.3, rasterized=True)
ax.set_xlabel('FSC-A', fontsize=12)
ax.set_ylabel('SSC-A', fontsize=12)
ax.set_title('Distribution FSC/SSC (avant gating)', fontsize=14, fontweight='bold')
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

### 2.3 Gating des singlets

In [None]:
# Gate pour s√©lectionner les singlets
singlets_gate = pipeline.gate_singlets_fsc_ssc(
    fsc_channel='FSC-A',
    fsc_h_channel='FSC-H',
    method='linear_fit',
    threshold=2.5
)

# Visualisation
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Avant gating
axes[0].scatter(pipeline.data['FSC-A'], pipeline.data['FSC-H'],
               s=1, c='lightgray', alpha=0.5, rasterized=True)
axes[0].set_xlabel('FSC-A')
axes[0].set_ylabel('FSC-H')
axes[0].set_title('Tous les √©v√©nements')
axes[0].grid(True, alpha=0.3)

# Apr√®s gating
axes[1].scatter(pipeline.data['FSC-A'], pipeline.data['FSC-H'],
               s=1, c='lightgray', alpha=0.3, rasterized=True, label='Exclus')
axes[1].scatter(pipeline.data.loc[singlets_gate, 'FSC-A'],
               pipeline.data.loc[singlets_gate, 'FSC-H'],
               s=1, c='red', alpha=0.5, rasterized=True, label='Singlets')
axes[1].set_xlabel('FSC-A')
axes[1].set_ylabel('FSC-H')
axes[1].set_title(f'Singlets: {singlets_gate.sum():,} / {len(singlets_gate):,}')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

### 2.4 Suppression des d√©bris

In [None]:
# Gate pour supprimer les d√©bris
viable_gate = pipeline.gate_debris_removal(
    fsc_channel='FSC-A',
    ssc_channel='SSC-A',
    percentile_low=2,
    parent_gate='singlets'
)

# Visualisation
fig, ax = plt.subplots(figsize=(10, 8))

# Background
ax.scatter(pipeline.data['FSC-A'], pipeline.data['SSC-A'],
          s=1, c='lightgray', alpha=0.3, rasterized=True, label='Exclus')

# Cellules viables
ax.scatter(pipeline.data.loc[viable_gate, 'FSC-A'],
          pipeline.data.loc[viable_gate, 'SSC-A'],
          s=1, c='green', alpha=0.5, rasterized=True, label='Cellules viables')

ax.set_xlabel('FSC-A', fontsize=12)
ax.set_ylabel('SSC-A', fontsize=12)
ax.set_title(f'Cellules viables: {viable_gate.sum():,} √©v√©nements', fontsize=14, fontweight='bold')
ax.legend()
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

### 2.5 Gating sur marqueur (ex: CD3)

In [None]:
# Identifier le canal CD3
cd3_channels = [ch for ch in pipeline.channels if 'CD3' in ch.upper()]

if cd3_channels:
    cd3_channel = cd3_channels[0]
    print(f"Canal CD3 d√©tect√©: {cd3_channel}")
    
    # Gating GMM
    cd3_gate = pipeline.gate_gmm_1d(
        channel=cd3_channel,
        n_components=2,
        select_component='positive',
        parent_gate='singlets_viable'
    )
    
    # Visualisation histogramme
    fig, ax = plt.subplots(figsize=(12, 6))
    
    # CD3 n√©gatif
    ax.hist(pipeline.data.loc[~cd3_gate & pipeline.gates['singlets_viable'], cd3_channel],
           bins=100, alpha=0.5, color='blue', label='CD3-', density=True)
    
    # CD3 positif
    ax.hist(pipeline.data.loc[cd3_gate, cd3_channel],
           bins=100, alpha=0.5, color='red', label='CD3+', density=True)
    
    ax.set_xlabel(cd3_channel, fontsize=12)
    ax.set_ylabel('Densit√©', fontsize=12)
    ax.set_title('Distribution CD3', fontsize=14, fontweight='bold')
    ax.legend(fontsize=11)
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
else:
    print("‚ö†Ô∏è Aucun canal CD3 d√©tect√© dans ce fichier")

### 2.6 Analyse CD4/CD8 (quadrants)

In [None]:
# Identifier les canaux CD4 et CD8
cd4_channels = [ch for ch in pipeline.channels if 'CD4' in ch.upper()]
cd8_channels = [ch for ch in pipeline.channels if 'CD8' in ch.upper()]

if cd4_channels and cd8_channels and cd3_channels:
    cd4_channel = cd4_channels[0]
    cd8_channel = cd8_channels[0]
    
    print(f"Canaux d√©tect√©s:")
    print(f"  ‚Ä¢ CD4: {cd4_channel}")
    print(f"  ‚Ä¢ CD8: {cd8_channel}")
    
    # Quadrants sur les CD3+
    cd3_positive_gate = f"singlets_viable_{cd3_channel}_positive"
    
    if cd3_positive_gate in pipeline.gates:
        quadrants = pipeline.gate_quadrants(
            channel_x=cd4_channel,
            channel_y=cd8_channel,
            parent_gate=cd3_positive_gate
        )
        
        # Visualisation des quadrants
        fig, ax = plt.subplots(figsize=(10, 10))
        
        # Couleurs pour chaque quadrant
        colors = {
            f"{cd3_positive_gate}_{cd4_channel}+{cd8_channel}+": 'red',     # DP
            f"{cd3_positive_gate}_{cd4_channel}+{cd8_channel}-": 'blue',    # CD4+
            f"{cd3_positive_gate}_{cd4_channel}-{cd8_channel}+": 'green',   # CD8+
            f"{cd3_positive_gate}_{cd4_channel}-{cd8_channel}-": 'gray'     # DN
        }
        
        labels = {
            f"{cd3_positive_gate}_{cd4_channel}+{cd8_channel}+": 'CD4+ CD8+ (DP)',
            f"{cd3_positive_gate}_{cd4_channel}+{cd8_channel}-": 'CD4+ CD8- (Helper)',
            f"{cd3_positive_gate}_{cd4_channel}-{cd8_channel}+": 'CD4- CD8+ (Cytotoxic)',
            f"{cd3_positive_gate}_{cd4_channel}-{cd8_channel}-": 'CD4- CD8- (DN)'
        }
        
        # Plot de chaque quadrant
        for quad_name, mask in quadrants.items():
            ax.scatter(
                pipeline.data.loc[mask, cd4_channel],
                pipeline.data.loc[mask, cd8_channel],
                s=2, c=colors[quad_name], alpha=0.6,
                label=f"{labels[quad_name]} ({mask.sum():,})",
                rasterized=True
            )
        
        # Lignes de s√©paration
        parent_data = pipeline.data[pipeline.gates[cd3_positive_gate]]
        cd4_thresh = parent_data[cd4_channel].median()
        cd8_thresh = parent_data[cd8_channel].median()
        
        ax.axvline(cd4_thresh, color='black', linestyle='--', linewidth=1, alpha=0.5)
        ax.axhline(cd8_thresh, color='black', linestyle='--', linewidth=1, alpha=0.5)
        
        ax.set_xlabel(cd4_channel, fontsize=12)
        ax.set_ylabel(cd8_channel, fontsize=12)
        ax.set_title('Quadrants CD4/CD8 sur cellules CD3+', fontsize=14, fontweight='bold')
        ax.legend(loc='upper right', fontsize=10)
        ax.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.show()
    else:
        print(f"‚ö†Ô∏è Gate {cd3_positive_gate} non trouv√©")
else:
    print("‚ö†Ô∏è Canaux CD4/CD8/CD3 non d√©tect√©s")

### 2.7 Statistiques globales

In [None]:
# Calculer les statistiques
stats = pipeline.compute_statistics()

# Afficher les principales populations
print("\nüìä STATISTIQUES DES POPULATIONS\n")
print("=" * 80)
display(stats[['Population', 'Count', 'Percentage_of_total']].head(20))

# Graphique des pourcentages
top_pops = stats.nlargest(10, 'Count')

fig, ax = plt.subplots(figsize=(12, 6))
bars = ax.barh(top_pops['Population'], top_pops['Percentage_of_total'])
ax.set_xlabel('Pourcentage du total (%)', fontsize=12)
ax.set_title('Top 10 des populations', fontsize=14, fontweight='bold')
ax.grid(axis='x', alpha=0.3)

# Ajouter les valeurs sur les barres
for i, (bar, pct) in enumerate(zip(bars, top_pops['Percentage_of_total'])):
    ax.text(bar.get_width() + 0.5, bar.get_y() + bar.get_height()/2,
           f'{pct:.1f}%', va='center', fontsize=9)

plt.tight_layout()
plt.show()

### 2.8 Export des r√©sultats

In [None]:
# Cr√©er le r√©pertoire de sortie
output_dir = Path('./results')
output_dir.mkdir(exist_ok=True)

# Export Excel
excel_path = output_dir / 'analysis_results.xlsx'
pipeline.export_to_excel(str(excel_path), include_populations=True)

print(f"‚úÖ R√©sultats export√©s: {excel_path}")

# Sauvegarder les figures
pipeline.plot_gates(
    'FSC-A', 'SSC-A',
    save_path=str(output_dir / 'gates_FSC_SSC.png')
)

print(f"‚úÖ Figures sauvegard√©es dans: {output_dir}")

---
## 3. Exemple 2: Analyse par lot (Batch)

### 3.1 Configuration de l'analyse par lot

In [None]:
# Liste de fichiers FCS √† analyser
fcs_files = [
    'data/control_1.fcs',
    'data/control_2.fcs',
    'data/treatment_1.fcs',
    'data/treatment_2.fcs'
]

# Noms des √©chantillons
sample_names = ['Control_1', 'Control_2', 'Treatment_1', 'Treatment_2']

# Initialisation
batch = BatchFCSAnalysis(fcs_files, sample_names)

print(f"üìÅ Analyse par lot configur√©e:")
print(f"   ‚Ä¢ {len(fcs_files)} fichiers")
print(f"   ‚Ä¢ √âchantillons: {', '.join(sample_names)}")

### 3.2 Ex√©cution du pipeline sur tous les fichiers

In [None]:
# Ex√©cuter le pipeline standard sur tous les fichiers
pipelines = batch.run_standard_pipeline(
    compensate=True,
    transform='logicle',
    gate_strategy='standard'
)

print(f"\n‚úÖ Traitement termin√©: {len(pipelines)} fichiers analys√©s")

### 3.3 Comparaison entre √©chantillons

In [None]:
# G√©n√©rer le tableau comparatif
comparison = batch.compare_populations()

print("\nüìä TABLEAU COMPARATIF\n")
display(comparison.head(20))

### 3.4 Visualisation comparative

In [None]:
# Graphique comparatif en barres
batch.plot_comparative_barplot(
    populations=None,  # Toutes les populations
    save_path='./results/comparative_barplot.png'
)

### 3.5 Export comparatif

In [None]:
# Export dans Excel
batch.export_comparative_excel('./results/batch_comparison.xlsx')

print("‚úÖ Analyse par lot export√©e")

---
## 4. Exemple 3: Panel T cells complet

In [None]:
# Charger l'√©chantillon
tcell_pipeline = FCSGatingPipeline(
    'data/tcell_panel.fcs',
    compensate=True,
    transform='logicle'
)

# 1. Singlets
tcell_pipeline.gate_singlets_fsc_ssc()

# 2. Live/Dead
if any('LIVE' in ch.upper() or 'DEAD' in ch.upper() for ch in tcell_pipeline.channels):
    viability_ch = [ch for ch in tcell_pipeline.channels if 'LIVE' in ch.upper() or 'DEAD' in ch.upper()][0]
    AdvancedGatingStrategies.gate_live_dead(
        tcell_pipeline,
        viability_channel=viability_ch,
        parent_gate='singlets'
    )

# 3. Lymphocytes (FSC/SSC)
tcell_pipeline.gate_rectangle(
    'FSC-A', 'SSC-A',
    x_min=30000, x_max=150000,
    y_min=0, y_max=100000,
    parent_gate='singlets'
)

# 4. CD3+ T cells
cd3_ch = [ch for ch in tcell_pipeline.channels if 'CD3' in ch.upper()][0]
tcell_pipeline.gate_gmm_1d(
    cd3_ch,
    n_components=2,
    select_component='positive',
    parent_gate='singlets_FSC-A_SSC-A_rect'
)

# 5. CD4/CD8
cd4_ch = [ch for ch in tcell_pipeline.channels if 'CD4' in ch.upper()][0]
cd8_ch = [ch for ch in tcell_pipeline.channels if 'CD8' in ch.upper()][0]

cd3_gate = f"singlets_FSC-A_SSC-A_rect_{cd3_ch}_positive"
tcell_pipeline.gate_quadrants(cd4_ch, cd8_ch, parent_gate=cd3_gate)

# 6. Statistiques
tcell_stats = tcell_pipeline.compute_statistics()
display(tcell_stats[['Population', 'Count', 'Percentage_of_total']])

# 7. Export
tcell_pipeline.export_to_excel('./results/tcell_panel_analysis.xlsx')

print("\n‚úÖ Analyse T cells compl√®te export√©e")

---
## 5. Conclusion

Ce notebook a d√©montr√©:
1. ‚úÖ Analyse simple d'un fichier FCS
2. ‚úÖ Gating automatis√© (singlets, viables, marqueurs)
3. ‚úÖ Analyse par lot avec comparaison
4. ‚úÖ Panel T cells complet

**Prochaines √©tapes**:
- Adapter les param√®tres de gating √† vos panels sp√©cifiques
- Cr√©er des strat√©gies de gating personnalis√©es
- Int√©grer dans vos workflows d'analyse

**Documentation compl√®te**: Voir `README.md`