# Count assayed genes and alleles

In [1]:
import polars as pl

In [2]:
inputs_dir = "../1_inputs/sqY2H"
outputs_dir = "../3_outputs/sqY2H"

In [3]:
# only look in the LWH1 media (r03)
lwh1_scores = pl.read_csv(f"{outputs_dir}/3_normalized_qaqc_consensus_scores.csv").filter(pl.col("condition_name") == "LWH1")

# keep only scores where there is at least one matched reference-variant set
orfs_with_alleles = lwh1_scores.filter(pl.col("db_mut_id") != 0).select("db_orf_id").unique().to_series().to_list()
orfs = lwh1_scores.filter(pl.col("db_mut_id") == 0).select("db_orf_id").unique().to_series().to_list()
complete_orf_pairs = [i for i in orfs if i in orfs_with_alleles]
lwh1_scores = lwh1_scores.filter(pl.col("db_orf_id").is_in(complete_orf_pairs))

# count interactions
num_interactions = lwh1_scores.shape[0]
num_interactors = len(lwh1_scores.select("ad_orf_id").to_series().unique().to_list())
num_genes = len(lwh1_scores.select("db_orf_id").to_series().unique().to_list())
num_alleles = len(lwh1_scores.select("db_mut_id").to_series().unique().to_list())

num_pos = lwh1_scores.filter(pl.col("adjusted_score") > 0).shape[0]
hit_percent = round((num_pos/num_interactions)*100)

print(f"There are {num_interactions} interactions between {num_interactors} interactor proteins and {num_alleles} variants from {num_genes} genes.")
print(f"{num_pos} interactions of the {num_interactions} tested ({hit_percent}%) were greater than 0.")

# Look at unique gene-gene interactions
lwh1_wt = lwh1_scores.filter(pl.col("db_mut_id") == 0).with_columns(
        pl.concat_str([pl.col("ad_orf_id"), pl.col("db_orf_id")], separator="_").alias("unique_ppi")
    ).select("unique_ppi").to_series().unique()

num_unique_ppi = lwh1_wt.shape[0]
print(f"There are {num_unique_ppi} unique PPIs assayed")

There are 3466 interactions between 368 interactor proteins and 558 variants from 82 genes.
2402 interactions of the 3466 tested (69%) were greater than 0.
There are 434 unique PPIs assayed


Using sequence confirmed results:

- There are 3466 interactions between 368 interactor proteins and 558 variants from 82 genes.
- 2402 interactions of the 3466 tested (69%) were greater than 0.
- There are 434 unique PPIs assayed

Using raw scores pulled from the database (v1):
- There are 2899 interactions between 246 interactor proteins and 678 variants from 81 genes.
- 1913 interactions of the 2899 tested (66%) were greater than 0.
- There are 319 unique PPIs assayed