# Extract Target Genes for a TF of Interest from CellOracle Links

In [None]:
import celloracle as co
import pandas as pd
import numpy as np

## 1. Load filtered Links object

In [None]:
# Update path to your filtered links file
links = co.load_hdf5(file_path="celloracle_results/CTR9_combined_3K_HVG_filtered.celloracle.links")
print(f"Clusters: {links.cluster}")

In [None]:
# ---- Set your TF and cluster names here ----
tf_of_interest = "CTR9"
cluster1 = links.cluster[0]
cluster2 = links.cluster[1]
print(f"Cluster 1: {cluster1}")
print(f"Cluster 2: {cluster2}")

## 2. Get target genes per cluster

In [None]:
df1 = links.filtered_links[cluster1]
df2 = links.filtered_links[cluster2]

targets1 = set(df1[df1["source"] == tf_of_interest]["target"])
targets2 = set(df2[df2["source"] == tf_of_interest]["target"])

print(f"{cluster1}: {len(targets1)} targets")
print(f"{cluster2}: {len(targets2)} targets")

## 3. Set comparisons

In [None]:
only_cluster1 = sorted(targets1 - targets2)
only_cluster2 = sorted(targets2 - targets1)
in_both = sorted(targets1 & targets2)

print(f"Only in {cluster1}: {len(only_cluster1)}")
print(only_cluster1)

print(f"\nOnly in {cluster2}: {len(only_cluster2)}")
print(only_cluster2)

print(f"\nIn both clusters: {len(in_both)}")
print(in_both)

## 4. Summary DataFrame with coefficients from both clusters

In [None]:
# Build a merged table with coefs from both clusters
edges1 = df1[df1["source"] == tf_of_interest][["target", "coef_mean", "coef_abs", "p"]].copy()
edges1.columns = ["target", f"coef_mean_{cluster1}", f"coef_abs_{cluster1}", f"p_{cluster1}"]

edges2 = df2[df2["source"] == tf_of_interest][["target", "coef_mean", "coef_abs", "p"]].copy()
edges2.columns = ["target", f"coef_mean_{cluster2}", f"coef_abs_{cluster2}", f"p_{cluster2}"]

merged = pd.merge(edges1, edges2, on="target", how="outer")

# Label each gene
def label_membership(row):
    in1 = pd.notna(row[f"coef_mean_{cluster1}"])
    in2 = pd.notna(row[f"coef_mean_{cluster2}"])
    if in1 and in2:
        return "both"
    elif in1:
        return f"{cluster1}_only"
    else:
        return f"{cluster2}_only"

merged["membership"] = merged.apply(label_membership, axis=1)
merged = merged.sort_values("membership")
merged

## 5. Export to CSV

In [None]:
merged.to_csv(f"{tf_of_interest}_targets_cluster_comparison.csv", index=False)
print(f"Saved to {tf_of_interest}_targets_cluster_comparison.csv")