In [1]:
# Imports
import pathlib
import pandas as pd
import numpy as np
import polars as pl

import black
import jupyter_black

jupyter_black.load(
    lab=False,
    line_length=79,
    verbosity="DEBUG",
    target_version=black.TargetVersion.PY310,
)

import warnings
warnings.filterwarnings("ignore")


DEBUG:jupyter_black:config: {'line_length': 79, 'target_versions': {<TargetVersion.PY310: 10>}}


<IPython.core.display.Javascript object>

In [2]:
# Define all paths and files
map_data_dir = pathlib.Path("/dgx1nas1/storage/data/jess/varchamp/sc_data/map_data/").resolve(strict=True)
bl_path = pathlib.Path(f'{map_data_dir}/norm_map_data.parquet')

In [3]:
# Read in data
bl = pl.read_parquet(bl_path)

feat_col = [i for i in bl.columns if "Metadata_" not in i] 
feat_col = [i for i in feat_col if i not in ['n_pos_pairs', 'n_total_pairs', 'average_precision']]

In [4]:
# Count the number of NaNs per cell
nd_cells = bl.select(feat_col).to_numpy()
nan_mask = np.isnan(nd_cells)
nan_per_cell = np.sum(nan_mask, axis=1).reshape(-1, 1) 

In [5]:
nan_s = pl.Series("nan_per_cell", nan_per_cell)
bl.insert_column(6, nan_s)

# clearly the NaN number do not match the average precision results
# next to check is the subsetting script

Metadata_Plate,Metadata_allele,Metadata_CellID,n_pos_pairs,n_total_pairs,average_precision,nan_per_cell,Metadata_Plate_right,Metadata_Well,Metadata_Batch,Metadata_allele_set,Metadata_Symbol,Metadata_entry_plate,Metadata_entry_well,Metadata_entry_numb,Metadata_dest_plate,Metadata_dest_well,Metadata_entry_ID,Metadata_node_type,Metadata_aa_change,Metadata_c96,Metadata_r96,Metadata_r384,Metadata_c384,Metadata_r384_letter,Metadata_384_well,Metadata_384ID,Metadata_allele_right,Metadata_batch,Metadata_control,Metadata_Gene,Metadata_MT,Metadata_Variant,Metadata_Sample_Unique,Metadata_batch_Plate,Metadata_TableNumber,Metadata_ImageNumber,…,Nuclei_Texture_Variance_AGP_5_03_256,Nuclei_Texture_Variance_DNA_10_00_256,Nuclei_Texture_Variance_DNA_10_01_256,Nuclei_Texture_Variance_DNA_10_02_256,Nuclei_Texture_Variance_DNA_10_03_256,Nuclei_Texture_Variance_DNA_20_00_256,Nuclei_Texture_Variance_DNA_20_01_256,Nuclei_Texture_Variance_DNA_20_02_256,Nuclei_Texture_Variance_DNA_20_03_256,Nuclei_Texture_Variance_DNA_5_00_256,Nuclei_Texture_Variance_DNA_5_01_256,Nuclei_Texture_Variance_DNA_5_02_256,Nuclei_Texture_Variance_DNA_5_03_256,Nuclei_Texture_Variance_GFP_10_00_256,Nuclei_Texture_Variance_GFP_10_01_256,Nuclei_Texture_Variance_GFP_10_02_256,Nuclei_Texture_Variance_GFP_10_03_256,Nuclei_Texture_Variance_GFP_20_00_256,Nuclei_Texture_Variance_GFP_20_01_256,Nuclei_Texture_Variance_GFP_20_02_256,Nuclei_Texture_Variance_GFP_20_03_256,Nuclei_Texture_Variance_GFP_5_00_256,Nuclei_Texture_Variance_GFP_5_01_256,Nuclei_Texture_Variance_GFP_5_02_256,Nuclei_Texture_Variance_GFP_5_03_256,Nuclei_Texture_Variance_Mito_10_00_256,Nuclei_Texture_Variance_Mito_10_01_256,Nuclei_Texture_Variance_Mito_10_02_256,Nuclei_Texture_Variance_Mito_10_03_256,Nuclei_Texture_Variance_Mito_20_00_256,Nuclei_Texture_Variance_Mito_20_01_256,Nuclei_Texture_Variance_Mito_20_02_256,Nuclei_Texture_Variance_Mito_20_03_256,Nuclei_Texture_Variance_Mito_5_00_256,Nuclei_Texture_Variance_Mito_5_01_256,Nuclei_Texture_Variance_Mito_5_02_256,Nuclei_Texture_Variance_Mito_5_03_256
str,str,str,i64,i64,f64,list[i64],str,str,f64,f64,str,str,str,f64,str,str,str,str,str,f64,f64,f64,f64,str,str,str,str,str,bool,str,str,str,str,str,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""2023-05-30_B1A…","""SGCA_""","""2023-05-30_B1A…",15,133,1.0,[36],"""2023-05-30_B1A…","""A01""",1.0,1.0,"""SGCA""","""DisWTGDEh03""","""G02""",15.0,"""FVrry_P04_Q01""","""A01""","""DisWTGDEh03_G0…","""disease_wt""",,1.0,1.0,1.0,1.0,"""A""","""A_1""","""1_1""","""SGCA_""","""2023_05_30_B1A…",false,"""SGCA""",,"""SGCA""","""SGCA""","""2023_05_30_B1A…",6.81371757e8,2.0,…,-2.646108,-1.84769,-2.009282,-1.945559,-2.013849,-1.709098,-1.265755,-1.704033,-1.273739,-1.997757,-2.025088,-1.985578,-1.974301,-0.331336,-0.287098,-0.267861,-0.250638,-0.065029,0.01195,-0.047914,-0.335462,-0.312453,-0.262383,-0.28019,-0.289452,0.591681,0.580494,0.594624,0.590686,0.604622,0.636227,0.610804,0.609371,0.60681,0.602345,0.607116,0.597077
"""2023-05-30_B1A…","""SGCA_""","""2023-05-30_B1A…",15,134,1.0,[36],"""2023-05-30_B1A…","""A01""",1.0,1.0,"""SGCA""","""DisWTGDEh03""","""G02""",15.0,"""FVrry_P04_Q01""","""A01""","""DisWTGDEh03_G0…","""disease_wt""",,1.0,1.0,1.0,1.0,"""A""","""A_1""","""1_1""","""SGCA_""","""2023_05_30_B1A…",false,"""SGCA""",,"""SGCA""","""SGCA""","""2023_05_30_B1A…",2.2555093e8,4.0,…,0.116216,-1.158843,-1.586812,-1.438906,-1.395323,-1.709098,-1.265755,-1.704033,-1.273739,-1.314871,-1.312871,-1.326644,-1.406801,-0.331336,-0.287098,-0.267861,-0.250638,-0.065029,0.01195,-0.047914,-0.335462,-0.312453,-0.262383,-0.28019,-0.289452,3.462165,2.346083,2.364915,3.450576,0.601508,0.632423,0.607819,0.608675,3.401776,3.824682,3.263643,3.093475
"""2023-05-30_B1A…","""STXBP1_Asp207A…","""2023-05-30_B1A…",15,137,1.0,[42],"""2023-05-30_B1A…","""A02""",1.0,1.0,"""STXBP1""","""VUSmut_GDEh02""","""D08""",60.0,"""FVrry_P04_Q02""","""A01""","""VUSmut_GDEh02_…","""allele""","""Asp207Asn""",1.0,1.0,1.0,2.0,"""A""","""A_2""","""1_2""","""STXBP1_Asp207A…","""2023_05_30_B1A…",false,"""STXBP1""","""Asp207Asn""","""STXBP1 Asp207A…","""STXBP1 Asp207A…","""2023_05_30_B1A…",1.3297e9,10.0,…,0.486254,-0.629614,-0.640392,-0.585283,-0.538356,-0.524619,-0.81021,-0.166246,0.013372,-0.701402,-0.641282,-0.686407,-0.665041,-34.744597,-33.919276,-34.843273,-34.114489,-38.304035,-38.352985,-37.365931,-39.123876,-35.748316,-35.287351,-35.771193,-35.098164,-0.329491,-0.385603,-0.355976,-0.401949,-0.352295,0.580561,0.11924,0.528511,-0.494999,-0.498839,-0.479323,-0.458016
"""2023-05-30_B1A…","""STXBP1_Asp207A…","""2023-05-30_B1A…",15,128,1.0,[42],"""2023-05-30_B1A…","""A02""",1.0,1.0,"""STXBP1""","""VUSmut_GDEh02""","""D08""",60.0,"""FVrry_P04_Q02""","""A01""","""VUSmut_GDEh02_…","""allele""","""Asp207Asn""",1.0,1.0,1.0,2.0,"""A""","""A_2""","""1_2""","""STXBP1_Asp207A…","""2023_05_30_B1A…",false,"""STXBP1""","""Asp207Asn""","""STXBP1 Asp207A…","""STXBP1 Asp207A…","""2023_05_30_B1A…",3.8339e9,12.0,…,-1.066805,-0.92582,-0.926819,-0.946871,-0.886718,-0.777988,-1.515865,-0.680835,-0.497536,-1.034902,-0.984862,-1.008603,-0.98417,-34.754675,-33.92997,-34.855888,-34.117461,-38.307996,-38.415244,-37.399508,-39.11945,-35.759137,-35.296924,-35.780434,-35.108228,-0.494259,-0.296792,-0.34681,-0.219133,-0.147661,-0.498615,0.25046,0.570016,-0.556592,-0.416755,-0.474417,-0.419299
"""2023-05-30_B1A…","""STXBP1_Asp207A…","""2023-05-30_B1A…",15,126,1.0,[42],"""2023-05-30_B1A…","""A02""",1.0,1.0,"""STXBP1""","""VUSmut_GDEh02""","""D08""",60.0,"""FVrry_P04_Q02""","""A01""","""VUSmut_GDEh02_…","""allele""","""Asp207Asn""",1.0,1.0,1.0,2.0,"""A""","""A_2""","""1_2""","""STXBP1_Asp207A…","""2023_05_30_B1A…",false,"""STXBP1""","""Asp207Asn""","""STXBP1 Asp207A…","""STXBP1 Asp207A…","""2023_05_30_B1A…",1.3865e9,13.0,…,8.607228,-0.874901,-0.809207,-0.858326,-0.85975,-1.796597,-1.583246,-1.691678,-1.430795,-0.969436,-0.880986,-0.967596,-0.926275,-34.761497,-33.938223,-34.860927,-34.126246,-38.324429,-38.415244,-37.404653,-39.137708,-35.764929,-35.30306,-35.786695,-35.114562,-0.460605,-0.140616,-0.407636,-0.386347,-1.200077,-0.791974,-1.101142,-0.823378,-0.543733,-0.44621,-0.546564,-0.468363
"""2023-05-30_B1A…","""STXBP1_Asp207A…","""2023-05-30_B1A…",15,117,1.0,[42],"""2023-05-30_B1A…","""A02""",1.0,1.0,"""STXBP1""","""VUSmut_GDEh02""","""D08""",60.0,"""FVrry_P04_Q02""","""A01""","""VUSmut_GDEh02_…","""allele""","""Asp207Asn""",1.0,1.0,1.0,2.0,"""A""","""A_2""","""1_2""","""STXBP1_Asp207A…","""2023_05_30_B1A…",false,"""STXBP1""","""Asp207Asn""","""STXBP1 Asp207A…","""STXBP1 Asp207A…","""2023_05_30_B1A…",1.4634e9,17.0,…,-0.219725,7.246385,5.596378,7.517625,1.702828,0.431829,11.073431,6.834569,1.091558,7.549825,7.636388,7.54753,7.534541,-34.751849,-33.926911,-34.850615,-34.115623,-38.311335,-38.398449,-37.394206,-39.113571,-35.755624,-35.292609,-35.776174,-35.103996,4.495002,4.61296,5.120289,2.890554,1.043843,5.147643,5.317991,2.920967,5.362093,5.111246,5.232572,5.139688
"""2023-05-30_B1A…","""SH3BP2_Gly420G…","""2023-05-30_B1A…",15,139,1.0,[42],"""2023-05-30_B1A…","""A03""",1.0,1.0,"""SH3BP2""","""VUSmut_GDEh07""","""H11""",88.0,"""FVrry_P04_Q01""","""A02""","""VUSmut_GDEh07_…","""allele""","""Gly420Glu""",2.0,1.0,1.0,3.0,"""A""","""A_3""","""1_3""","""SH3BP2_Gly420G…","""2023_05_30_B1A…",false,"""SH3BP2""","""Gly420Glu""","""SH3BP2 Gly420G…","""SH3BP2 Gly420G…","""2023_05_30_B1A…",3.6543e9,21.0,…,-1.251948,1.846573,0.570176,6.605719,5.95759,-1.159761,-1.171948,-0.980524,-1.331724,7.235204,8.206024,6.994635,6.54523,-4.205689,-4.232825,-4.251635,-4.240708,-4.892827,-4.353308,-4.873873,-4.462264,-4.300406,-4.23643,-4.272252,-4.25243,2.245186,3.147542,2.667184,1.780743,0.309819,0.291249,0.312479,0.316978,2.804552,2.937092,2.895503,2.438019
"""2023-05-30_B1A…","""SH3BP2_Gly420G…","""2023-05-30_B1A…",15,142,1.0,[42],"""2023-05-30_B1A…","""A03""",1.0,1.0,"""SH3BP2""","""VUSmut_GDEh07""","""H11""",88.0,"""FVrry_P04_Q01""","""A02""","""VUSmut_GDEh07_…","""allele""","""Gly420Glu""",2.0,1.0,1.0,3.0,"""A""","""A_3""","""1_3""","""SH3BP2_Gly420G…","""2023_05_30_B1A…",false,"""SH3BP2""","""Gly420Glu""","""SH3BP2 Gly420G…","""SH3BP2 Gly420G…","""2023_05_30_B1A…",1.48263253e8,22.0,…,0.534139,-0.16331,-0.132631,-0.10075,-0.226874,-0.582911,-0.342001,-0.078305,-0.361807,-0.163828,-0.1704,-0.178387,-0.176676,-4.205689,-4.232825,-4.251635,-4.240708,-4.892827,-4.353308,-4.873873,-4.462264,-4.300406,-4.23643,-4.272252,-4.25243,1.516028,1.688321,1.720534,1.73287,1.592061,2.202329,3.482721,3.980293,1.626891,1.530785,1.5184,1.583669
"""2023-05-30_B1A…","""STXBP1_Pro480L…","""2023-05-30_B1A…",15,124,1.0,[41],"""2023-05-30_B1A…","""A04""",1.0,1.0,"""STXBP1""","""VUSmut_GDEh02""","""E11""",85.0,"""FVrry_P04_Q02""","""A02""","""VUSmut_GDEh02_…","""allele""","""Pro480Leu""",2.0,1.0,1.0,4.0,"""A""","""A_4""","""1_4""","""STXBP1_Pro480L…","""2023_05_30_B1A…",false,"""STXBP1""","""Pro480Leu""","""STXBP1 Pro480L…","""STXBP1 Pro480L…","""2023_05_30_B1A…",3.3920e9,28.0,…,-0.240461,0.385202,0.445457,0.532069,0.39329,0.538507,-0.439083,0.513955,0.724368,0.46231,0.514601,0.520052,0.458577,0.531513,0.525803,0.539001,0.545274,0.516789,0.506303,0.540434,0.573309,0.52531,0.526879,0.528625,0.529088,0.343869,0.326425,0.332351,0.324762,0.288952,0.294061,0.287184,0.26313,0.355986,0.345099,0.352956,0.350039
"""2023-05-30_B1A…","""STXBP1_Pro480L…","""2023-05-30_B1A…",15,129,1.0,[41],"""2023-05-30_B1A…","""A04""",1.0,1.0,"""STXBP1""","""VUSmut_GDEh02""","""E11""",85.0,"""FVrry_P04_Q02""","""A02""","""VUSmut_GDEh02_…","""allele""","""Pro480Leu""",2.0,1.0,1.0,4.0,"""A""","""A_4""","""1_4""","""STXBP1_Pro480L…","""2023_05_30_B1A…",false,"""STXBP1""","""Pro480Leu""","""STXBP1 Pro480L…","""STXBP1 Pro480L…","""2023_05_30_B1A…",3.3370e9,32.0,…,8.904545,11.607912,10.217837,11.652991,7.074573,-0.72589,-0.654102,-0.608403,-0.658307,10.142257,10.875133,10.409241,11.142958,0.531513,0.525803,0.539001,0.545274,0.516789,0.506303,0.540434,0.573309,0.52531,0.526879,0.528625,0.529088,0.343869,0.326425,0.332351,0.324762,0.288952,0.294061,0.287184,0.26313,0.355986,0.345099,0.352956,0.350039
