# Generate Region-Seed Combinations for pySCENIC Multi-Array Jobs

This notebook generates the region-seed combinations file used by the SLURM array job submission.

Based on the original `generate_region_seed_combos.ipynb` from pyscenic_celline.

In [None]:
import random

# Set seed for reproducibility (same as original)
random.seed(42)

# Generate 100 different unique random integers between 0 and 1000
random_integers = random.sample(range(1001), 100)
print(f"Generated {len(random_integers)} random integers")
print(f"First 10: {random_integers[:10]}")

# Use the same seeds as in the original analysis
used_integers = [37, 4, 18, 9, 56, 63, 98, 88, 75, 42]
print(f"\nUsed integers from original analysis: {used_integers}")

# Check which used integers are in our random list
overlap = [x for x in used_integers if x in random_integers]
print(f"Overlap with original: {overlap}")

# Use top 20 for our analysis
top_20_integers = sorted(random_integers[:20])
print(f"\nTop 20 integers for this analysis: {top_20_integers}")

In [None]:
# Define the analysis regions and seeds
regions = ["cell_line"]  # Individual cell line analysis
seeds = top_20_integers

# Generate combinations file
combinations = []

# Add combinations for each region and seed
for region in regions:
    for seed in seeds:
        combinations.append(f"{region} {seed}")

print(f"Generated {len(combinations)} combinations")

# Save to file
with open("region_seed_combos.txt", "w") as f:
    for combo in combinations:
        f.write(combo + "\n")

print("✅ Saved combinations to region_seed_combos.txt")

# Display first few combinations
print("\nFirst 10 combinations:")
for i, combo in enumerate(combinations[:10]):
    print(f"{i+1}: {combo}")

print(f"\nTotal combinations for SLURM array: {len(combinations)}")
print("Each combination will run pySCENIC for all 4 cell lines (H1, H9, WIBJ2, WTC)")