# Compute abundance change for each allele

This script takes in the single-cell FACS data as input, applies various filtering criteria, and then produces a z-score for the abundance of each allele relative to the reference on the same plate.

In [1]:
import pandas as pd
import numpy as np
from random import choice
import matplotlib.pyplot as plt
import polars as pl
import process_dualipa as proc

In [2]:
dualipa_inputs = "../1_inputs"
dualipa_outputs = "../3_outputs"
meta_outputs = "../../../1_allele_collection/3_outputs"

n_cell_threshold = 800
wt_gfp_threshold = 100

In [3]:
# Read in single-cell measurements
pDEST_DUAL_df = pd.read_parquet(f"{dualipa_outputs}/facs_single_cell.parquet")

# Filter to only keep wells with more than 800 cells
pDEST_DUAL_df = pDEST_DUAL_df[pDEST_DUAL_df['n_cells'] > n_cell_threshold]

In [4]:
# get the mean and median measurements per well
keep_cols = ['symbol', 'node_type', 'nt_change','aa_change', 'pla', 'well', 'coordinates', 'n_cells', 'orf_id', 'mut_id', 'valid_well']
pDEST_DUAL_avg_df = pDEST_DUAL_df[keep_cols + ['avg_gfp', 'avg_mcherry','avg_GFP_mCherry_ratio']].drop_duplicates()
pDEST_DUAL_median_df = pDEST_DUAL_df[keep_cols + ['median_gfp', 'median_mcherry','median_GFP_mCherry_ratio']].drop_duplicates()

## 1. Compute the mean and mediam scores

__Functions for computing is in ```process_dualipa.py```__

Maxime's notes:

    Instead of using Georges' approach to compute the assay's variability, which uses a step with random pairings, 
    Luke suggested to compute a STD from the Log2FC of all individual WT measurements, relative to the mean of the WT of each gene.
    This is a more robust approach, as it does not rely on random pairings.
    This function computes the STD of the Log2FC of all individual WT measurements, relative to the mean of the WT of each gene.
    Returns a tuple with the mean and the STD of the Log2FC of all individual WT measurements, relative to the mean of the WT of each gene.

In [5]:
# _d objects: mean or median GFP_mCherry ratio of each unique (wt orf, plate) combination
# _l objects: list of WT:WT abundances (GFP:mCherry ratios) to estimate assay variability
wt_avg_d, wt_ratio_l = proc.get_wt_variability_d(pDEST_DUAL_avg_df, pDEST_DUAL_df)
wt_median_d, wt_ratio_l_median = proc.get_wt_variability_d_median(pDEST_DUAL_median_df, pDEST_DUAL_df)

In [6]:
wt_std, wt_mean = proc.wt_log2fc_variability(pDEST_DUAL_avg_df, pDEST_DUAL_df)

In [7]:
# Must be two WT replicates on same plate to compute the ratio, otherwise we return a NaN
pDEST_DUAL_avg_allele_df = proc.get_pDEST_DUAL_avg_allele_df(pDEST_DUAL_avg_df, wt_avg_d, wt_ratio_l, wt_std, wt_mean)
pDEST_DUAL_median_allele_df = proc.get_pDEST_DUAL_median_allele_df(pDEST_DUAL_median_df, wt_median_d, wt_ratio_l_median)

In [8]:
mean_df = pl.DataFrame(pDEST_DUAL_avg_allele_df)
median_df = pl.DataFrame(pDEST_DUAL_median_allele_df)

# append metadata and write out files
metadata = pl.read_csv(f"{meta_outputs}/varchamp_alleles_with_pillar_annotations.tsv", separator="\t", infer_schema_length=1000000).with_columns(
    pl.col("orf_id").cast(pl.Int64).alias("orf_id"),
    pl.col("mut_id").cast(pl.Int64).alias("mut_id")
)

In [24]:
median_df.shape

(1380, 19)

In [23]:
mean_df.shape

(1380, 22)

## 1.1 Merge the same allele together

In [26]:
median_meta_df = median_df.with_columns(
    pl.col("orf_id").cast(pl.Int64).alias("orf_id"),
    pl.col("mut_id").cast(pl.Int64).alias("mut_id")
).filter(
    pl.col("valid_well"),
    pl.col("zscore_log2").is_not_null()
).join(
    metadata.select(pl.col(["orf_id","mut_id"]+[c for c in metadata.columns if c not in median_df.columns])), on=["orf_id", "mut_id"], how="left"
)
print(median_meta_df.shape)

median_df_dup = median_meta_df.filter(pl.col("gene_variant").is_duplicated())
median_df_no_dup = median_meta_df.filter(~pl.col("gene_variant").is_duplicated())

median_df_dup = median_df_dup.group_by("symbol","nt_change","aa_change","gene_variant").agg(
    [pl.col(col).mean().alias(col) for col in median_df.columns if "median" in col or "zscore_log2" in col]
)
median_df_dup = median_df_dup.join(
    median_meta_df.select(["symbol","nt_change","aa_change","gene_variant"]+[col for col in median_meta_df.columns if col not in median_df_dup and col not in median_df]),
    on=["symbol","nt_change","aa_change","gene_variant"],
    how="left"
).unique(subset=["symbol","nt_change","aa_change","gene_variant"])

median_final_df = pl.concat([
    median_df_no_dup.select(pl.col(median_df_dup.columns)),
    median_df_dup,
], how="vertical_relaxed")

(777, 600)


In [25]:
mean_meta_df = mean_df.with_columns(
    pl.col("orf_id").cast(pl.Int64).alias("orf_id"),
    pl.col("mut_id").cast(pl.Int64).alias("mut_id")
).filter(
    pl.col("valid_well"),
    (pl.col("allele_wt_log2fc_zscore").is_not_null()) | (pl.col("zscore_log2").is_not_null())
).join(
    metadata.select(pl.col(["orf_id","mut_id"]+[c for c in metadata.columns if c not in median_df.columns])), on=["orf_id", "mut_id"], how="left"
)
print(mean_meta_df.shape)

mean_df_dup = mean_meta_df.filter(pl.col("gene_variant").is_duplicated())
mean_df_no_dup = mean_meta_df.filter(~pl.col("gene_variant").is_duplicated())

mean_df_dup = mean_df_dup.group_by("symbol","nt_change","aa_change","gene_variant").agg(
    [pl.col(col).mean().alias(col) for col in mean_df.columns if "allele_wt_log2fc_zscore" in col or "zscore_log2" in col]
).rename(
    {"zscore_log2fc"}
)
mean_df_dup = mean_df_dup.join(
    mean_meta_df.select(["symbol","nt_change","aa_change","gene_variant"]+[col for col in mean_meta_df.columns if col not in mean_df_dup and col not in mean_df]),
    on=["symbol","nt_change","aa_change","gene_variant"],
    how="left"
).unique(subset=["symbol","nt_change","aa_change","gene_variant"])

mean_final_df = pl.concat([
    mean_df_no_dup.select(pl.col(mean_df_dup.columns)),
    mean_df_dup,
], how="vertical_relaxed")

(799, 603)


In [21]:
mean_final_df.unique("gene_variant")

symbol,nt_change,aa_change,gene_variant,allele_wt_log2fc_zscore,zscore_log2,ensembl_gene_id,ccsb_mutation_id,ccsb_allele_id,spdi,collection,entry_plate_orig,entry_well_orig,entry_plate_conso,entry_well_conso,entry_seq_pool,db_plate,db_well,n2h_plate,n2h_well,dualip_plate,dualip_well,mislocalization_plate,mislocalization_well,entry_sequenced,entry_sequence_confirmation_class,db_sequenced,db_sequence_confirmation_class,n2h_sequenced,n2h_sequence_confirmation_class,n2h_ref_sequence_confirmation_class,dualip_sequenced,dualip_sequence_confirmation_class,dualip_ref_sequence_confirmation_class,mislocalization_sequenced,mislocalization_sequence_confirmation_class,mislocalization_ref_sequence_confirmation_class,…,clinvar_star,auth_reported_rep_score,Mode of Inheritance_ClinGen_repo,Updated_Evidence Codes_ClinGen_repo,aa_change_pp,Evidence Repo Link_ClinGen_repo,Interval 2 name,Interval 6 range,Interval 1 range,Rationale_presumed_PMID:34273903,aa_alt,clinvar_date_last_reviewed,auth_transcript_id,Interval 2 range,Naive_prior_prob_PMID:31131967,Approval Date_ClinGen_repo,simplified_consequence,Expert Panel_ClinGen_repo,aa_pos,transcript_ref,Disease_ClinGen_repo,gnomad_MAF,auth_reported_score,Interval 1 MaveDB class,clinvar_sig,hg38_start,consequence,Interval 5 MaveDB class,REVEL,auth_reported_func_class,Interval 3 name,Family History LR_PMID:31131967,PubMed Articles_ClinGen_repo,Assay_type,Interval 4 range,Calcualted_post_prob_PMID:31131967,gene_variant_pp
str,str,str,str,f64,f64,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,…,str,str,str,str,str,str,str,str,str,str,str,str,str,str,f64,str,str,str,f64,str,str,f64,f64,str,str,f64,str,str,f64,str,str,f64,str,str,str,f64,str
"""KCTD7""","""280C>T""","""Arg94Trp""","""KCTD7_Arg94Trp""",-3.957486,-2.426937,"""ENSG00000243335""","""CCSBVarC005604""","""ALE00005604""","""NC_000007.14:66633410:C:T""","""CEGS2""","""CegsMutGDEh1033""","""G11""","""GDEhDisVCh_40029""","""C12""","""1""","""CegsMutGDDh1033""","""G11""","""VUSN2Hmut_GDN2h_06""","""D06""","""VUSMutpDEST2_13""","""F11""","""VUSmut_GDEh06""","""D06""",1,1,1,1,1,1,1,1,1,1,1,1,1,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""SMAD3""","""401T>C""","""Val134Ala""","""SMAD3_Val134Ala""",0.169296,0.009866,"""ENSG00000166949""","""CCSBVarC007717""","""ALE00007703""","""NC_000015.10:67165253:T:C""","""Edgotyping3""","""VUSMutGDEh004""","""G11""","""GDEhDisVCh_40014""","""G06""","""2""","""VUSMutGDDh004""","""G11""","""NULL""","""NULL""","""VUSMutpDEST2_09""","""E02""","""NULL""","""NULL""",1,1,1,5,0,,,1,1,1,0,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""RAD51D""","""494G>A""","""Arg165Gln""","""RAD51D_Arg165Gln""",2.862984,1.600448,"""ENSG00000185379""","""CCSBVarC002917""","""ALE00002917""","""NC_000017.11:35106468:C:T""","""RC4""","""NatVar_GDE_001""","""F05""","""GDEhDisVCh_40046""","""E03""","""2""","""NULL""","""NULL""","""NULL""","""NULL""","""VUSMutpDEST2_06""","""G01""","""VUSmut_GDEh12""","""E02""",1,1,0,,0,,,1,1,1,1,1,1,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""PKP2""","""14G>A""","""Gly5Asp""","""PKP2_Gly5Asp""",-0.361786,-0.30373,"""ENSG00000057294""","""CCSBVarC008053""","""ALE00008039""","""NC_000012.12:32896718:C:T""","""Edgotyping3""","""VUSMutGDEh015""","""C07""","""GDEhDisVCh_40015""","""F12""","""1""","""VUSMutGDDh015""","""C07""","""VUSN2Hmut_GDN2h_04""","""E08""","""VUSMutpDEST2_13""","""D01""","""VUSmut_GDEh04""","""E08""",1,1,1,1,1,1,1,1,1,1,1,1,6,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""KRT6A""","""511A>G""","""Asn171Asp""","""KRT6A_Asn171Asp""",0.988879,0.493818,"""ENSG00000205420""","""CCSBVarC003668""","""ALE00003668""","""NC_000012.12:52492678:T:C""","""CEGS2""","""CegsMutGDEh1045""","""A04""","""GDEhDisVCh_40048""","""D10""","""2""","""CegsMutGDDh1045""","""A04""","""VUSN2Hmut_GDN2h_09""","""C09""","""VUSMutpDEST2_04""","""E09""","""VUSmut_GDEh09""","""C09""",1,1,1,1,1,1,1,1,1,1,1,1,1,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""KCTD1""","""98A>C""","""His33Pro""","""KCTD1_His33Pro""",0.057812,-0.055963,"""ENSG00000134504""","""CCSBVarC005804""","""ALE00005804""","""NC_000018.10:26501138:T:G""","""CEGS2""","""CegsMutGDEh1037""","""D07""","""GDEhDisVCh_40022""","""D07""","""2""","""CegsMutGDDh1037""","""D07""","""VUSN2Hmut_GDN2h_07""","""E08""","""VUSMutpDEST2_11""","""H07""","""VUSmut_GDEh07""","""E08""",1,1,1,1,1,1,1,1,1,1,1,1,1,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""KRT2""","""1427A>T""","""Glu476Val""","""KRT2_Glu476Val""",-6.990519,-4.217898,"""ENSG00000172867""","""CCSBVarC003452""","""ALE00003452""","""NC_000012.12:52646782:T:A""","""CEGS2""","""CegsMutGDEh1037""","""D09""","""GDEhDisVCh_40073""","""G07""","""1""","""CegsMutGDDh1037""","""D09""","""VUSN2Hmut_GDN2h_07""","""B09""","""VUSMutpDEST2_13""","""A07""","""VUSmut_GDEh07""","""B09""",1,1,1,1,1,1,6,1,1,2,1,1,99,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""SMAD3""","""696G>C""","""Trp232Cys""","""SMAD3_Trp232Cys""",-11.432783,-6.840988,"""ENSG00000166949""","""CCSBVarC007722""","""ALE00007708""","""NC_000015.10:67181278:G:C""","""Edgotyping3""","""VUSMutGDEh004""","""E12""","""GDEhDisVCh_40014""","""D07""","""2""","""VUSMutGDDh004""","""E12""","""VUSN2Hmut_GDN2h_01""","""F10""","""VUSMutpDEST2_09""","""C03""","""VUSmut_GDEh01""","""F10""",1,1,1,1,1,1,1,1,1,1,1,1,2,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""GALE""","""956G>A""","""Gly319Glu""","""GALE_Gly319Glu""",-1.285818,-0.849357,"""ENSG00000117308""","""CCSBVarC000328""","""ALE00000328""","""NC_000001.11:23796183:C:T""","""RC4""","""RC4_Mut_GDEh1017""","""E06""","""GDEhDisVCh_40068""","""G10""","""2""","""RC4_Mut_GDDh1017""","""E06""","""NULL""","""NULL""","""VUSMutpDEST2_02""","""C10""","""NULL""","""NULL""",1,1,1,1,0,,,1,1,1,0,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [22]:
median_final_df.unique("gene_variant")

symbol,nt_change,aa_change,gene_variant,median_gfp,median_mcherry,median_GFP_mCherry_ratio,wt_GFP_mCherry_ratio_median,allele_wt_ratio_median,zscore_median,zcat_median,zscore_log2,ensembl_gene_id,ccsb_mutation_id,ccsb_allele_id,spdi,collection,entry_plate_orig,entry_well_orig,entry_plate_conso,entry_well_conso,entry_seq_pool,db_plate,db_well,n2h_plate,n2h_well,dualip_plate,dualip_well,mislocalization_plate,mislocalization_well,entry_sequenced,entry_sequence_confirmation_class,db_sequenced,db_sequence_confirmation_class,n2h_sequenced,n2h_sequence_confirmation_class,n2h_ref_sequence_confirmation_class,…,clinvar_star,auth_reported_rep_score,Mode of Inheritance_ClinGen_repo,Updated_Evidence Codes_ClinGen_repo,aa_change_pp,Evidence Repo Link_ClinGen_repo,Interval 2 name,Interval 6 range,Interval 1 range,Rationale_presumed_PMID:34273903,aa_alt,clinvar_date_last_reviewed,auth_transcript_id,Interval 2 range,Naive_prior_prob_PMID:31131967,Approval Date_ClinGen_repo,simplified_consequence,Expert Panel_ClinGen_repo,aa_pos,transcript_ref,Disease_ClinGen_repo,gnomad_MAF,auth_reported_score,Interval 1 MaveDB class,clinvar_sig,hg38_start,consequence,Interval 5 MaveDB class,REVEL,auth_reported_func_class,Interval 3 name,Family History LR_PMID:31131967,PubMed Articles_ClinGen_repo,Assay_type,Interval 4 range,Calcualted_post_prob_PMID:31131967,gene_variant_pp
str,str,str,str,f64,f64,f64,f64,f64,f64,f64,f64,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,i64,i64,i64,i64,i64,i64,i64,…,str,str,str,str,str,str,str,str,str,str,str,str,str,str,f64,str,str,str,f64,str,str,f64,f64,str,str,f64,str,str,f64,str,str,f64,str,str,str,f64,str
"""PRKAR1A""","""1004G>C""","""Arg335Pro""","""PRKAR1A_Arg335Pro""",361.559998,2132.549927,0.146988,0.178867,0.821771,-0.612698,0.0,-0.547913,"""ENSG00000108946""","""CCSBVarC004971""","""ALE00004971""","""NC_000017.11:68530307:G:C""","""CEGS2""","""CegsMutGDEh1032""","""B03""","""GDEhDisVCh_40076""","""C05""","""2""","""CegsMutGDDh1032""","""B03""","""VUSN2Hmut_GDN2h_05""","""F11""","""VUSMutpDEST2_01""","""F11""","""VUSmut_GDEh05""","""F11""",1,1,1,1,1,1,1,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""HGD""","""688C>T""","""Pro230Ser""","""HGD_Pro230Ser""",304.52002,4594.449951,0.071102,0.245798,0.289271,-2.145362,-2.0,-3.752394,"""ENSG00000113924""","""CCSBVarC004567""","""ALE00004567""","""NC_000003.12:120644405:G:A""","""CEGS2""","""CegsMutGDEh1037""","""E02""","""GDEhDisVCh_40073""","""D07""","""1""","""CegsMutGDDh1037""","""E02""","""NULL""","""NULL""","""VUSMutpDEST2_11""","""G09""","""NULL""","""NULL""",1,1,1,1,0,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""AIPL1""","""244C>T""","""His82Tyr""","""AIPL1_His82Tyr""",1876.73999,4057.549927,0.50965,0.641451,0.794527,-0.691113,0.0,-0.651389,"""ENSG00000129221""","""CCSBVarC004941""","""ALE00004941""","""NC_000017.11:6433951:G:A""","""CEGS2""","""CegsMutGDEh1032""","""H10""","""GDEhDisVCh_40067""","""G07""","""1""","""CegsMutGDDh1032""","""H10""","""NULL""","""NULL""","""VUSMutpDEST2_05""","""D12""","""NULL""","""NULL""",1,1,1,5,0,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""DCX""","""139A>C""","""Ser47Arg""","""DCX_Ser47Arg""",1634.190002,1172.755005,0.980825,1.029343,0.952866,-0.235375,0.0,-0.093642,"""ENSG00000077279""","""CCSBVarC001371""","""ALE00001371""","""NC_000023.11:111410260:T:G""","""RC4""","""RC4_Mut_GDEh1018""","""E03""","""GDEhDisVCh_40049""","""G11""","""1""","""RC4_Mut_GDDh1018""","""E03""","""NULL""","""NULL""","""VUSMutpDEST2_07""","""F10""","""NULL""","""NULL""",1,1,1,1,0,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""TPM3""","""733A>G""","""Arg245Gly""","""TPM3_Arg245Gly""",1266.839966,4410.699707,0.295743,0.376148,0.786242,-0.714958,0.0,-0.68356,"""ENSG00000143549""","""CCSBVarC000885""","""ALE00000885""","""NC_000001.11:154170442:T:C""","""RC4""","""RC4_Mut_GDEh1014""","""C12""","""GDEhDisVCh_40086""","""B10""","""2""","""RC4_Mut_GDDh1014""","""C12""","""NULL""","""NULL""","""VUSMutpDEST2_05""","""D05""","""NULL""","""NULL""",1,1,1,1,0,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""TH""","""917G>A""","""Arg306His""","""TH_Arg306His""",1427.085022,4393.199951,0.324439,0.504775,0.642739,-1.127995,-1.0,-1.302071,"""ENSG00000180176""","""CCSBVarC008285""","""ALE00008271""","""NC_000011.10:2166693:C:T""","""Edgotyping3""","""VUSMutGDEh015""","""A10""","""GDEhDisVCh_40019""","""H09""","""1""","""VUSMutGDDh015""","""A10""","""VUSN2Hmut_GDN2h_04""","""C11""","""VUSMutpDEST2_15""","""G07""","""VUSmut_GDEh04""","""C11""",1,1,1,1,1,1,1,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""STXBP1""","""997A>G""","""Lys333Glu""","""STXBP1_Lys333Glu""",3001.110107,1629.599976,1.629567,1.265654,1.287529,0.727869,0.0,0.830189,"""ENSG00000136854""","""CCSBVarC007637""","""ALE00007623""","""NC_000009.12:127672084:A:G""","""Edgotyping3""","""VUSMutGDEh008""","""H09""","""GDEhDisVCh_40016""","""F07""","""2""","""VUSMutGDDh008""","""H09""","""VUSN2Hmut_GDN2h_02""","""F09""","""VUSMutpDEST2_06""","""D08""","""VUSmut_GDEh02""","""F09""",1,1,1,1,1,1,1,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""STXBP1""","""1439C>T""","""Pro480Leu""","""STXBP1_Pro480Leu""",438.959991,2029.299927,0.189057,1.265654,0.149375,-2.548019,-2.0,-5.780813,"""ENSG00000136854""","""CCSBVarC007654""","""ALE00007640""","""NC_000009.12:127678510:C:T""","""Edgotyping3""","""VUSMutGDEh008""","""B12""","""GDEhDisVCh_40016""","""G09""","""2""","""VUSMutGDDh008""","""B12""","""VUSN2Hmut_GDN2h_02""","""E11""","""VUSMutpDEST2_06""","""F10""","""VUSmut_GDEh02""","""E11""",1,1,1,1,1,1,1,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""TH""","""406G>A""","""Val136Met""","""TH_Val136Met""",2280.360107,4746.699707,0.492,0.504775,0.974691,-0.172556,0.0,-0.024136,"""ENSG00000180176""","""CCSBVarC008297""","""ALE00008283""","""NC_000011.10:2168572:C:T""","""Edgotyping3""","""VUSMutGDEh015""","""F11""","""GDEhDisVCh_40019""","""D11""","""1""","""VUSMutGDDh015""","""F11""","""VUSN2Hmut_GDN2h_05""","""B01""","""VUSMutpDEST2_15""","""D09""","""VUSmut_GDEh05""","""B01""",1,1,1,1,1,1,1,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [28]:
median_final_df.write_csv(f'{dualipa_outputs}/DUALIPA_median_zscore.csv')
mean_final_df.write_csv(f'{dualipa_outputs}/DUALIPA_mean_zscore.csv')

In [45]:
# mean_meta_df = mean_df.with_columns(
#     pl.col("orf_id").cast(pl.Int64).alias("orf_id"),
#     pl.col("mut_id").cast(pl.Int64).alias("mut_id")
# ).filter(
#     pl.col("valid_well"),
#     pl.col("zscore_log2").is_not_null()
# ).join(
#     metadata.select(pl.col(["orf_id", "mut_id"]+[c for c in metadata.columns if c not in mean_df.columns])), on=["orf_id", "mut_id"], how="left"
# )
# mean_df.write_csv(f'{dualipa_outputs}/DUALIPA_mean_zscore.csv')