# Consensus Regulon Generation

This notebook generates consensus regulons from multiple pySCENIC runs with configurable thresholds:

- **Individual cell lines**: occur_threshold=20, size_threshold=0
- **Combined analysis**: occur_threshold=0, size_threshold=0

The consensus approach combines results from multiple runs to identify robust regulatory relationships.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import pickle
import re
import pyscenic as ps
from pathlib import Path

# Import our consensus utilities
import sys
sys.path.append('../src')
from consensus_regulons import ConsensusRegulonGenerator

In [None]:
# Initialize the consensus generator
print("🔧 Initializing Consensus Regulon Generator")
print("Using the ConsensusRegulonGenerator class for robust consensus generation")

In [None]:
# Generate consensus regulons for individual cell lines
# Parameters: occur_threshold=20, size_threshold=0

results_folder = "../01_pyscenic_runs/results"

for cell_line in ['H1', 'WTC', 'H9', 'WIBJ2']:
    print(f"\n{'='*50}")
    print(f"Processing cell line: {cell_line}")
    print(f"{'='*50}")
    
    # Find regulon files for this cell line
    cell_line_dir = f"{results_folder}/{cell_line}"
    if os.path.exists(cell_line_dir):
        cell_line_results = os.listdir(cell_line_dir)
        cell_line_results = [x for x in cell_line_results if x.startswith("regulons_seed") and x.endswith(".pkl")]
        files = [f"{cell_line_dir}/{x}" for x in cell_line_results]
        
        print(f"Found {len(files)} regulon files")
        
        if len(files) > 0:
            # Set consensus parameters
            occur_threshold = 20
            size_threshold = 0
            output_dir = f"regulons/consensus_{occur_threshold}"
            
            # Use the ConsensusRegulonGenerator class
            consensus_gen = ConsensusRegulonGenerator(
                occur_threshold=occur_threshold,
                size_threshold=size_threshold
            )
            
            # Process regulons using the class method
            consensus_regulons = consensus_gen.generate_consensus(
                regulon_files=files,
                output_dir=output_dir,
                sample_name=cell_line
            )
            
            print(f"✅ Completed consensus for {cell_line}")
            print(f"   Generated {len(consensus_regulons)} consensus regulons")
        else:
            print(f"❌ No regulon files found for {cell_line}")
    else:
        print(f"❌ Directory not found: {cell_line_dir}")

In [None]:
# Generate consensus regulons for ALL cell lines combined
# Parameters: occur_threshold=0, size_threshold=0 (more permissive for combined analysis)

print(f"\n{'='*50}")
print(f"Processing ALL cell lines combined")
print(f"{'='*50}")

# Collect all regulon files from all cell lines
all_files = []
for cell_line in ['H1', 'WTC', 'H9', 'WIBJ2']:
    cell_line_dir = f"{results_folder}/{cell_line}"
    if os.path.exists(cell_line_dir):
        cell_line_results = os.listdir(cell_line_dir)
        cell_line_results = [x for x in cell_line_results if x.startswith("regulons_seed") and x.endswith(".pkl")]
        files = [f"{cell_line_dir}/{x}" for x in cell_line_results]
        all_files.extend(files)

print(f"Found {len(all_files)} total regulon files across all cell lines")

if len(all_files) > 0:
    # Set parameters for combined analysis (more permissive)
    occur_threshold = 0
    size_threshold = 0
    output_dir = f"regulons/consensus_{occur_threshold}"
    
    # Use the ConsensusRegulonGenerator class
    consensus_gen = ConsensusRegulonGenerator(
        occur_threshold=occur_threshold,
        size_threshold=size_threshold
    )
    
    # Process regulons using the class method
    consensus_regulons = consensus_gen.generate_consensus(
        regulon_files=all_files,
        output_dir=output_dir,
        sample_name="ALL_CELLLINES"
    )
    
    print(f"✅ Completed consensus for ALL cell lines combined")
    print(f"   Generated {len(consensus_regulons)} consensus regulons")
else:
    print(f"❌ No regulon files found")

print("\n🎉 Consensus regulon generation complete!")
print("✨ Using clean class-based approach - no redundant functions!")
print("Next step: Stage 3 - Morphogen network analysis")