In [1]:
# Imports
import pathlib
import pandas as pd
import numpy as np
import polars as pl

import black
import jupyter_black

jupyter_black.load(
    lab=False,
    line_length=79,
    verbosity="DEBUG",
    target_version=black.TargetVersion.PY310,
)

import warnings
warnings.filterwarnings("ignore")


DEBUG:jupyter_black:config: {'line_length': 79, 'target_versions': {<TargetVersion.PY310: 10>}}


<IPython.core.display.Javascript object>

In [2]:
# Define all paths and files
map_data_dir = pathlib.Path("/dgx1nas1/storage/data/jess/varchamp/sc_data/map_data/").resolve(strict=True)
bl_path = pathlib.Path(f'{map_data_dir}/bl_map_data.parquet')

In [4]:
# Read in data
bl = pl.read_parquet(bl_path)

feat_col = [i for i in bl.columns if "Metadata_" not in i] 
feat_col = [i for i in feat_col if i not in ['n_pos_pairs', 'n_total_pairs', 'average_precision']]

In [5]:
# Count the number of NaNs per cell
nd_cells = bl.select(feat_col).to_numpy()
nan_mask = np.isnan(nd_cells)
nan_per_cell = np.sum(nan_mask, axis=1).reshape(-1, 1) 

In [9]:
nan_s = pl.Series("nan_per_cell", nan_per_cell)
bl.insert_column(6, nan_s)

# clearly the NaN number do not match the average precision results
# next to check is the subsetting script

Metadata_Plate,Metadata_allele,Metadata_CellID,n_pos_pairs,n_total_pairs,average_precision,Metadata_Plate_right,nan_per_cell,Metadata_Well,Metadata_Batch,Metadata_allele_set,Metadata_Symbol,Metadata_entry_plate,Metadata_entry_well,Metadata_entry_numb,Metadata_dest_plate,Metadata_dest_well,Metadata_entry_ID,Metadata_node_type,Metadata_aa_change,Metadata_c96,Metadata_r96,Metadata_r384,Metadata_c384,Metadata_r384_letter,Metadata_384_well,Metadata_384ID,Metadata_allele_right,Metadata_batch,Metadata_control,Metadata_Gene,Metadata_MT,Metadata_Variant,Metadata_Sample_Unique,Metadata_batch_Plate,Metadata_TableNumber,Metadata_ImageNumber,…,Nuclei_Texture_Variance_AGP_5_03_256,Nuclei_Texture_Variance_DNA_10_00_256,Nuclei_Texture_Variance_DNA_10_01_256,Nuclei_Texture_Variance_DNA_10_02_256,Nuclei_Texture_Variance_DNA_10_03_256,Nuclei_Texture_Variance_DNA_20_00_256,Nuclei_Texture_Variance_DNA_20_01_256,Nuclei_Texture_Variance_DNA_20_02_256,Nuclei_Texture_Variance_DNA_20_03_256,Nuclei_Texture_Variance_DNA_5_00_256,Nuclei_Texture_Variance_DNA_5_01_256,Nuclei_Texture_Variance_DNA_5_02_256,Nuclei_Texture_Variance_DNA_5_03_256,Nuclei_Texture_Variance_GFP_10_00_256,Nuclei_Texture_Variance_GFP_10_01_256,Nuclei_Texture_Variance_GFP_10_02_256,Nuclei_Texture_Variance_GFP_10_03_256,Nuclei_Texture_Variance_GFP_20_00_256,Nuclei_Texture_Variance_GFP_20_01_256,Nuclei_Texture_Variance_GFP_20_02_256,Nuclei_Texture_Variance_GFP_20_03_256,Nuclei_Texture_Variance_GFP_5_00_256,Nuclei_Texture_Variance_GFP_5_01_256,Nuclei_Texture_Variance_GFP_5_02_256,Nuclei_Texture_Variance_GFP_5_03_256,Nuclei_Texture_Variance_Mito_10_00_256,Nuclei_Texture_Variance_Mito_10_01_256,Nuclei_Texture_Variance_Mito_10_02_256,Nuclei_Texture_Variance_Mito_10_03_256,Nuclei_Texture_Variance_Mito_20_00_256,Nuclei_Texture_Variance_Mito_20_01_256,Nuclei_Texture_Variance_Mito_20_02_256,Nuclei_Texture_Variance_Mito_20_03_256,Nuclei_Texture_Variance_Mito_5_00_256,Nuclei_Texture_Variance_Mito_5_01_256,Nuclei_Texture_Variance_Mito_5_02_256,Nuclei_Texture_Variance_Mito_5_03_256
str,str,str,i64,i64,f64,str,list[i64],str,f64,f64,str,str,str,f64,str,str,str,str,str,f64,f64,f64,f64,str,str,str,str,str,bool,str,str,str,str,str,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""2023-05-26_B1A…","""FBP1_""","""2023-05-26_B1A…",35,181,0.351602,"""2023-05-26_B1A…",[0],"""A01""",1.0,1.0,"""FBP1""","""DisWTGDEh02""","""D10""",76.0,"""FVrry_P02_Q01""","""A01""","""DisWTGDEh02_D1…","""disease_wt""",,1.0,1.0,1.0,1.0,"""A""","""A_1""","""1_1""","""FBP1_""","""2023_05_30_B1A…",false,"""FBP1""",,"""FBP1""","""FBP1""","""2023_05_30_B1A…",1.3571e9,3.0,…,0.252525,2.090942,2.209651,2.158674,2.294071,2.169098,0.122449,2.325079,2.0713,2.009928,2.015056,2.029191,2.015241,4.238936,2.974101,4.280794,4.619897,4.378369,0.959184,5.740136,9.644569,4.480578,4.324951,4.707542,4.349165,36.631548,36.725805,43.091843,44.649221,18.075429,32.55102,80.888997,88.212474,42.580016,35.327662,38.009556,40.556283
"""2023-05-26_B1A…","""FBP1_""","""2023-05-26_B1A…",35,193,0.224551,"""2023-05-26_B1A…",[0],"""A01""",1.0,1.0,"""FBP1""","""DisWTGDEh02""","""D10""",76.0,"""FVrry_P02_Q01""","""A01""","""DisWTGDEh02_D1…","""disease_wt""",,1.0,1.0,1.0,1.0,"""A""","""A_1""","""1_1""","""FBP1_""","""2023_05_30_B1A…",false,"""FBP1""",,"""FBP1""","""FBP1""","""2023_05_30_B1A…",2.63337874e8,4.0,…,1.454455,0.795926,0.886689,0.814714,0.833461,0.898551,0.812274,0.878649,0.930965,0.757588,0.793652,0.767911,0.772761,0.337606,0.371714,0.354284,0.30636,0.373344,0.380545,0.360014,0.510513,0.333787,0.351109,0.324352,0.315952,6.88884,2.908941,7.994863,8.650932,6.940564,4.004784,12.592023,17.97869,6.528384,5.895417,6.897715,7.054662
"""2023-05-26_B1A…","""GFAP_Glu205Lys…","""2023-05-26_B1A…",15,189,0.150911,"""2023-05-26_B1A…",[3],"""A02""",1.0,1.0,"""GFAP""","""VUSmut_GDEh10""","""F01""",6.0,"""FVrry_P02_Q02""","""A01""","""VUSmut_GDEh10_…","""allele""","""Glu205Lys""",1.0,1.0,1.0,2.0,"""A""","""A_2""","""1_2""","""GFAP_Glu205Lys…","""2023_05_30_B1A…",false,"""GFAP""","""Glu205Lys""","""GFAP Glu205Lys…","""GFAP Glu205Lys…","""2023_05_30_B1A…",2.9596e9,16.0,…,2.534944,0.57872,0.619109,0.60825,0.605351,0.583629,0.61437,0.729873,0.955981,0.558122,0.574416,0.545081,0.547292,4.464949,4.084821,5.336582,5.922374,4.484673,2.454185,8.923723,13.597341,4.221306,4.807722,4.362398,4.592364,10.573476,11.862023,11.818978,12.914687,11.24199,10.599282,19.018203,25.656542,11.931021,10.890998,10.534774,10.432102
"""2023-05-26_B1A…","""GFAP_Glu205Lys…","""2023-05-26_B1A…",15,161,0.103337,"""2023-05-26_B1A…",[0],"""A02""",1.0,1.0,"""GFAP""","""VUSmut_GDEh10""","""F01""",6.0,"""FVrry_P02_Q02""","""A01""","""VUSmut_GDEh10_…","""allele""","""Glu205Lys""",1.0,1.0,1.0,2.0,"""A""","""A_2""","""1_2""","""GFAP_Glu205Lys…","""2023_05_30_B1A…",false,"""GFAP""","""Glu205Lys""","""GFAP Glu205Lys…","""GFAP Glu205Lys…","""2023_05_30_B1A…",2.0550e9,17.0,…,1.366404,0.762455,0.911409,0.799866,0.827875,0.810781,0.392857,1.229405,0.891486,0.720598,0.75145,0.728531,0.753265,0.553061,0.744264,0.581945,0.524833,0.708598,1.739796,1.180006,0.643088,0.498116,0.604426,0.581243,0.587546,1.892735,3.511775,2.743783,2.622015,2.519251,12.336735,7.191781,3.982413,2.155372,2.629256,2.318919,2.332629
"""2023-05-26_B1A…","""GFAP_Glu205Lys…","""2023-05-26_B1A…",15,190,0.099247,"""2023-05-26_B1A…",[0],"""A02""",1.0,1.0,"""GFAP""","""VUSmut_GDEh10""","""F01""",6.0,"""FVrry_P02_Q02""","""A01""","""VUSmut_GDEh10_…","""allele""","""Glu205Lys""",1.0,1.0,1.0,2.0,"""A""","""A_2""","""1_2""","""GFAP_Glu205Lys…","""2023_05_30_B1A…",false,"""GFAP""","""Glu205Lys""","""GFAP Glu205Lys…","""GFAP Glu205Lys…","""2023_05_30_B1A…",2.0550e9,17.0,…,0.238012,0.92769,0.879547,0.879287,1.026635,1.077635,0.978465,1.063912,0.181947,0.847238,0.858083,0.833787,0.898102,0.989205,0.572382,0.928442,1.413872,1.858282,0.599639,1.338408,8.996219,0.788998,0.788801,0.789976,0.901218,1.336934,1.474125,1.521239,2.235957,1.160699,1.506862,2.325627,4.416352,1.42666,1.347653,1.348762,1.519655
"""2023-05-26_B1A…","""GFAP_Glu205Lys…","""2023-05-26_B1A…",15,165,0.115615,"""2023-05-26_B1A…",[0],"""A02""",1.0,1.0,"""GFAP""","""VUSmut_GDEh10""","""F01""",6.0,"""FVrry_P02_Q02""","""A01""","""VUSmut_GDEh10_…","""allele""","""Glu205Lys""",1.0,1.0,1.0,2.0,"""A""","""A_2""","""1_2""","""GFAP_Glu205Lys…","""2023_05_30_B1A…",false,"""GFAP""","""Glu205Lys""","""GFAP Glu205Lys…","""GFAP Glu205Lys…","""2023_05_30_B1A…",2.8972e9,18.0,…,0.198287,1.865373,1.955528,1.884951,2.015727,1.935524,1.690139,2.067797,0.32133,1.769722,1.840348,1.755898,1.8437,53.496621,47.43308,47.150506,65.331319,78.2431,3.793354,68.255979,233.551939,47.271524,59.903199,45.69567,44.178435,10.564933,11.856852,10.649994,14.882466,11.205164,2.809164,14.686997,12.141274,11.600185,12.605974,10.181699,9.974999
"""2023-05-26_B1A…","""GFAP_Glu205Lys…","""2023-05-26_B1A…",15,206,0.096313,"""2023-05-26_B1A…",[0],"""A02""",1.0,1.0,"""GFAP""","""VUSmut_GDEh10""","""F01""",6.0,"""FVrry_P02_Q02""","""A01""","""VUSmut_GDEh10_…","""allele""","""Glu205Lys""",1.0,1.0,1.0,2.0,"""A""","""A_2""","""1_2""","""GFAP_Glu205Lys…","""2023_05_30_B1A…",false,"""GFAP""","""Glu205Lys""","""GFAP Glu205Lys…","""GFAP Glu205Lys…","""2023_05_30_B1A…",2.8972e9,18.0,…,0.398355,1.085035,1.167056,0.981834,1.111848,0.489632,0.109375,1.027409,1.091956,0.943505,0.973507,0.988114,0.952713,26.819178,32.326158,21.060615,28.467299,89.026834,5.777344,20.065395,49.595733,19.23445,20.644767,27.522094,19.737113,5.021998,5.062674,4.706952,5.312403,15.744417,0.246094,3.493207,15.167289,3.763847,3.843027,4.85393,3.907483
"""2023-05-26_B1A…","""FBP1_Asn213Lys…","""2023-05-26_B1A…",15,178,0.206,"""2023-05-26_B1A…",[0],"""A03""",1.0,1.0,"""FBP1""","""VUSmut_GDEh13""","""F01""",6.0,"""FVrry_P02_Q01""","""A02""","""VUSmut_GDEh13_…","""allele""","""Asn213Lys""",2.0,1.0,1.0,3.0,"""A""","""A_3""","""1_3""","""FBP1_Asn213Lys…","""2023_05_30_B1A…",false,"""FBP1""","""Asn213Lys""","""FBP1 Asn213Lys…","""FBP1 Asn213Lys…","""2023_05_30_B1A…",1.8441e9,19.0,…,0.190099,0.558212,0.575699,0.567422,0.514132,0.596238,0.819799,0.680542,0.645536,0.570188,0.532925,0.514905,0.514665,8.703689,7.669552,8.695668,8.054806,9.134741,9.790927,10.45779,11.48924,8.474879,8.139735,8.179271,7.833612,10.036319,9.215491,11.545623,9.603137,12.747118,14.015796,12.933567,12.358573,10.582324,8.203426,8.29518,8.974831
"""2023-05-26_B1A…","""FBP1_Asn213Lys…","""2023-05-26_B1A…",15,176,0.134109,"""2023-05-26_B1A…",[0],"""A03""",1.0,1.0,"""FBP1""","""VUSmut_GDEh13""","""F01""",6.0,"""FVrry_P02_Q01""","""A02""","""VUSmut_GDEh13_…","""allele""","""Asn213Lys""",2.0,1.0,1.0,3.0,"""A""","""A_3""","""1_3""","""FBP1_Asn213Lys…","""2023_05_30_B1A…",false,"""FBP1""","""Asn213Lys""","""FBP1 Asn213Lys…","""FBP1 Asn213Lys…","""2023_05_30_B1A…",3.5809e9,20.0,…,0.344357,0.743111,0.76714,0.654601,0.715709,0.914303,0.760617,0.682484,0.842111,0.67068,0.666557,0.667001,0.650946,8.739469,8.853578,8.0253,7.356518,8.820238,10.532908,9.128138,7.478648,8.238118,7.778321,7.512358,7.265708,9.623358,10.946707,9.550027,10.37971,15.325122,17.084719,7.044214,18.367851,9.215095,8.018763,9.517477,7.96618
"""2023-05-26_B1A…","""FBP1_Asn213Lys…","""2023-05-26_B1A…",15,171,0.210243,"""2023-05-26_B1A…",[3],"""A03""",1.0,1.0,"""FBP1""","""VUSmut_GDEh13""","""F01""",6.0,"""FVrry_P02_Q01""","""A02""","""VUSmut_GDEh13_…","""allele""","""Asn213Lys""",2.0,1.0,1.0,3.0,"""A""","""A_3""","""1_3""","""FBP1_Asn213Lys…","""2023_05_30_B1A…",false,"""FBP1""","""Asn213Lys""","""FBP1 Asn213Lys…","""FBP1 Asn213Lys…","""2023_05_30_B1A…",1.7066e9,21.0,…,0.099597,0.70383,0.776584,0.72093,0.697296,0.780646,1.194444,0.850169,0.83809,0.679737,0.702575,0.672557,0.680577,0.06243,0.076089,0.06483,0.064719,0.074399,0.143121,0.088309,0.085906,0.061508,0.064244,0.059874,0.062952,2.852163,3.457231,3.037473,2.535214,2.99591,5.964521,4.379395,3.130752,2.710036,2.960245,2.758515,2.676858
