In [1]:
# Imports
import pathlib
import pandas as pd
import numpy as np
import polars as pl

import black
import jupyter_black

jupyter_black.load(
    lab=False,
    line_length=79,
    verbosity="DEBUG",
    target_version=black.TargetVersion.PY310,
)

import warnings
warnings.filterwarnings("ignore")


DEBUG:jupyter_black:config: {'line_length': 79, 'target_versions': {<TargetVersion.PY310: 10>}}


<IPython.core.display.Javascript object>

In [2]:
# Define all paths and files
map_data_dir = pathlib.Path("/dgx1nas1/storage/data/jess/varchamp/sc_data/map_data/").resolve(strict=True)
bl_path = pathlib.Path(f'{map_data_dir}/norm_map_data.parquet')

In [4]:
# Read in data
bl = pl.read_parquet(bl_path)

feat_col = [i for i in bl.columns if "Metadata_" not in i] 
feat_col = [i for i in feat_col if i not in ['n_pos_pairs', 'n_total_pairs', 'average_precision']]

In [5]:
# Count the number of NaNs per cell
nd_cells = bl.select(feat_col).to_numpy()
nan_mask = np.isnan(nd_cells)
nan_per_cell = np.sum(nan_mask, axis=1).reshape(-1, 1) 

In [6]:
nan_s = pl.Series("nan_per_cell", nan_per_cell)
bl.insert_column(6, nan_s)

Metadata_allele,Metadata_Plate,Metadata_CellID,n_pos_pairs,n_total_pairs,average_precision,nan_per_cell,Metadata_Plate_right,Metadata_Well,Metadata_Batch,Metadata_allele_set,Metadata_Symbol,Metadata_entry_plate,Metadata_entry_well,Metadata_entry_numb,Metadata_dest_plate,Metadata_dest_well,Metadata_entry_ID,Metadata_node_type,Metadata_aa_change,Metadata_c96,Metadata_r96,Metadata_r384,Metadata_c384,Metadata_r384_letter,Metadata_384_well,Metadata_384ID,Metadata_allele_right,Metadata_batch,Metadata_control,Metadata_Gene,Metadata_MT,Metadata_Variant,Metadata_Sample_Unique,Metadata_batch_Plate,Metadata_TableNumber,Metadata_ImageNumber,…,Nuclei_Texture_Variance_AGP_5_03_256,Nuclei_Texture_Variance_DNA_10_00_256,Nuclei_Texture_Variance_DNA_10_01_256,Nuclei_Texture_Variance_DNA_10_02_256,Nuclei_Texture_Variance_DNA_10_03_256,Nuclei_Texture_Variance_DNA_20_00_256,Nuclei_Texture_Variance_DNA_20_01_256,Nuclei_Texture_Variance_DNA_20_02_256,Nuclei_Texture_Variance_DNA_20_03_256,Nuclei_Texture_Variance_DNA_5_00_256,Nuclei_Texture_Variance_DNA_5_01_256,Nuclei_Texture_Variance_DNA_5_02_256,Nuclei_Texture_Variance_DNA_5_03_256,Nuclei_Texture_Variance_GFP_10_00_256,Nuclei_Texture_Variance_GFP_10_01_256,Nuclei_Texture_Variance_GFP_10_02_256,Nuclei_Texture_Variance_GFP_10_03_256,Nuclei_Texture_Variance_GFP_20_00_256,Nuclei_Texture_Variance_GFP_20_01_256,Nuclei_Texture_Variance_GFP_20_02_256,Nuclei_Texture_Variance_GFP_20_03_256,Nuclei_Texture_Variance_GFP_5_00_256,Nuclei_Texture_Variance_GFP_5_01_256,Nuclei_Texture_Variance_GFP_5_02_256,Nuclei_Texture_Variance_GFP_5_03_256,Nuclei_Texture_Variance_Mito_10_00_256,Nuclei_Texture_Variance_Mito_10_01_256,Nuclei_Texture_Variance_Mito_10_02_256,Nuclei_Texture_Variance_Mito_10_03_256,Nuclei_Texture_Variance_Mito_20_00_256,Nuclei_Texture_Variance_Mito_20_01_256,Nuclei_Texture_Variance_Mito_20_02_256,Nuclei_Texture_Variance_Mito_20_03_256,Nuclei_Texture_Variance_Mito_5_00_256,Nuclei_Texture_Variance_Mito_5_01_256,Nuclei_Texture_Variance_Mito_5_02_256,Nuclei_Texture_Variance_Mito_5_03_256
str,str,str,i64,i64,f64,list[i64],str,str,f64,f64,str,str,str,f64,str,str,str,str,str,f64,f64,f64,f64,str,str,str,str,str,bool,str,str,str,str,str,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""SGCA_""","""2023-05-30_B1A…","""2023-05-30_B1A…",15,151,0.222026,[0],"""2023-05-30_B1A…","""A01""",1.0,1.0,"""SGCA""","""DisWTGDEh03""","""G02""",15.0,"""FVrry_P04_Q01""","""A01""","""DisWTGDEh03_G0…","""disease_wt""",,1.0,1.0,1.0,1.0,"""A""","""A_1""","""1_1""","""SGCA_""","""2023_05_30_B1A…",false,"""SGCA""",,"""SGCA""","""SGCA""","""2023_05_30_B1A…",9.18442321e8,3.0,…,-0.149773,-1.492684,-1.606854,-1.572236,-1.640447,-1.288093,-0.730511,-1.176142,-0.711019,-1.648421,-1.668214,-1.648521,-1.635086,-0.331336,-0.287098,-0.267861,-0.250638,-0.065029,0.01195,-0.047914,-0.335462,-0.312453,-0.262383,-0.28019,-0.289452,1.20401,1.323741,1.251662,1.154016,1.473899,3.105966,1.840996,1.247403,1.23819,1.209079,1.216536,1.236908
"""SGCA_""","""2023-05-30_B1A…","""2023-05-30_B1A…",15,139,0.11926,[0],"""2023-05-30_B1A…","""A01""",1.0,1.0,"""SGCA""","""DisWTGDEh03""","""G02""",15.0,"""FVrry_P04_Q01""","""A01""","""DisWTGDEh03_G0…","""disease_wt""",,1.0,1.0,1.0,1.0,"""A""","""A_1""","""1_1""","""SGCA_""","""2023_05_30_B1A…",false,"""SGCA""",,"""SGCA""","""SGCA""","""2023_05_30_B1A…",9.18442321e8,3.0,…,-1.212465,-1.508272,-1.66275,-1.593375,-1.631005,-1.352689,-0.788987,-1.238401,-0.826313,-1.66453,-1.685227,-1.646784,-1.625174,-0.331336,-0.287098,-0.267861,-0.250638,-0.065029,0.01195,-0.047914,-0.335462,-0.312453,-0.262383,-0.28019,-0.289452,1.073172,1.044423,1.079382,0.987235,1.244935,1.363404,1.241244,1.847754,1.079297,1.078707,1.060516,1.016406
"""SGCA_""","""2023-05-30_B1A…","""2023-05-30_B1A…",15,137,0.090643,[0],"""2023-05-30_B1A…","""A01""",1.0,1.0,"""SGCA""","""DisWTGDEh03""","""G02""",15.0,"""FVrry_P04_Q01""","""A01""","""DisWTGDEh03_G0…","""disease_wt""",,1.0,1.0,1.0,1.0,"""A""","""A_1""","""1_1""","""SGCA_""","""2023_05_30_B1A…",false,"""SGCA""",,"""SGCA""","""SGCA""","""2023_05_30_B1A…",4.0749e9,9.0,…,-0.442175,-0.202202,-0.251772,-0.367657,-0.253337,-0.285188,-0.458464,-0.106483,-0.178144,-0.538085,-0.4405,-0.51638,-0.41955,-0.330879,-0.286457,-0.267375,-0.250033,-0.063926,0.01195,-0.047914,-0.335462,-0.312091,-0.26197,-0.279694,-0.288908,2.052783,1.859879,1.822614,2.121436,2.980951,3.261539,1.718576,2.196109,1.954574,2.040526,1.852038,2.012003
"""SGCA_""","""2023-05-30_B1A…","""2023-05-30_B1A…",15,122,0.093566,[0],"""2023-05-30_B1A…","""A01""",1.0,1.0,"""SGCA""","""DisWTGDEh03""","""G02""",15.0,"""FVrry_P04_Q01""","""A01""","""DisWTGDEh03_G0…","""disease_wt""",,1.0,1.0,1.0,1.0,"""A""","""A_1""","""1_1""","""SGCA_""","""2023_05_30_B1A…",false,"""SGCA""",,"""SGCA""","""SGCA""","""2023_05_30_B1A…",4.0749e9,9.0,…,-1.184258,-0.93781,-1.072723,-1.045281,-0.975291,-0.638582,-0.056753,-0.583861,-0.326079,-1.159807,-1.167542,-1.141327,-1.089139,-0.331336,-0.287098,-0.267861,-0.250638,-0.065029,0.01195,-0.047914,-0.335462,-0.312453,-0.262383,-0.28019,-0.289452,1.47543,1.460369,1.290255,1.475528,1.985418,1.722662,1.201488,2.028792,1.382724,1.429929,1.439068,1.435422
"""STXBP1_Asp207A…","""2023-05-30_B1A…","""2023-05-30_B1A…",15,142,0.108001,[0],"""2023-05-30_B1A…","""A02""",1.0,1.0,"""STXBP1""","""VUSmut_GDEh02""","""D08""",60.0,"""FVrry_P04_Q02""","""A01""","""VUSmut_GDEh02_…","""allele""","""Asp207Asn""",1.0,1.0,1.0,2.0,"""A""","""A_2""","""1_2""","""STXBP1_Asp207A…","""2023_05_30_B1A…",false,"""STXBP1""","""Asp207Asn""","""STXBP1 Asp207A…","""STXBP1 Asp207A…","""2023_05_30_B1A…",1.3297e9,10.0,…,-0.323611,-0.447108,-0.449099,-0.501716,-0.433406,0.219447,0.375925,-0.358187,0.536197,-0.575462,-0.539367,-0.549884,-0.541343,-34.743619,-33.919626,-34.844139,-34.108659,-38.303545,-38.386475,-37.386153,-39.129026,-35.746577,-35.285938,-35.768467,-35.095054,-0.952348,-0.874962,-0.965419,-0.901464,-0.853765,0.00203,-0.883655,-0.626178,-1.020993,-1.009458,-1.023596,-1.003276
"""STXBP1_Asp207A…","""2023-05-30_B1A…","""2023-05-30_B1A…",15,133,0.514448,[0],"""2023-05-30_B1A…","""A02""",1.0,1.0,"""STXBP1""","""VUSmut_GDEh02""","""D08""",60.0,"""FVrry_P04_Q02""","""A01""","""VUSmut_GDEh02_…","""allele""","""Asp207Asn""",1.0,1.0,1.0,2.0,"""A""","""A_2""","""1_2""","""STXBP1_Asp207A…","""2023_05_30_B1A…",false,"""STXBP1""","""Asp207Asn""","""STXBP1 Asp207A…","""STXBP1 Asp207A…","""2023_05_30_B1A…",1.3297e9,10.0,…,-0.280568,-0.691679,-0.500098,-0.614817,-0.649927,-0.525695,-0.150329,-0.157443,-0.140059,-0.742501,-0.678263,-0.748076,-0.702375,-34.761226,-33.938223,-34.860629,-34.125976,-38.323942,-38.415244,-37.40433,-39.137189,-35.764696,-35.302992,-35.786452,-35.114326,-0.922088,-0.816713,-0.938542,-0.850433,-0.917527,-0.330841,-0.796971,-0.507361,-0.968541,-0.961472,-0.989048,-0.955618
"""STXBP1_Asp207A…","""2023-05-30_B1A…","""2023-05-30_B1A…",15,137,0.427656,[0],"""2023-05-30_B1A…","""A02""",1.0,1.0,"""STXBP1""","""VUSmut_GDEh02""","""D08""",60.0,"""FVrry_P04_Q02""","""A01""","""VUSmut_GDEh02_…","""allele""","""Asp207Asn""",1.0,1.0,1.0,2.0,"""A""","""A_2""","""1_2""","""STXBP1_Asp207A…","""2023_05_30_B1A…",false,"""STXBP1""","""Asp207Asn""","""STXBP1 Asp207A…","""STXBP1 Asp207A…","""2023_05_30_B1A…",3.8339e9,12.0,…,-0.475946,0.309543,0.285065,0.315395,0.677051,0.634716,1.191545,0.597454,1.066751,0.318405,0.298055,0.357544,0.362447,-34.761497,-33.938223,-34.860927,-34.126246,-38.324429,-38.415244,-37.404653,-39.137708,-35.764929,-35.30306,-35.786695,-35.114562,0.841817,0.800136,0.699382,0.994192,1.351941,1.719364,0.413671,1.508528,0.885487,0.907737,0.953875,0.899889
"""STXBP1_Asp207A…","""2023-05-30_B1A…","""2023-05-30_B1A…",15,142,0.462364,[0],"""2023-05-30_B1A…","""A02""",1.0,1.0,"""STXBP1""","""VUSmut_GDEh02""","""D08""",60.0,"""FVrry_P04_Q02""","""A01""","""VUSmut_GDEh02_…","""allele""","""Asp207Asn""",1.0,1.0,1.0,2.0,"""A""","""A_2""","""1_2""","""STXBP1_Asp207A…","""2023_05_30_B1A…",false,"""STXBP1""","""Asp207Asn""","""STXBP1 Asp207A…","""STXBP1 Asp207A…","""2023_05_30_B1A…",2.2331e9,18.0,…,1.180349,-0.974859,-0.884736,-0.816282,-0.842834,-0.956392,-1.366649,-0.434723,-0.527613,-1.012768,-0.96467,-0.990978,-0.995133,-34.761497,-33.938223,-34.860927,-34.126246,-38.324429,-38.415244,-37.404653,-39.137708,-35.764929,-35.30306,-35.786695,-35.114562,0.447563,0.283896,0.50136,0.473952,0.661059,0.237296,0.664118,3.66376,0.391157,0.337928,0.334516,0.334255
"""SH3BP2_Gly420G…","""2023-05-30_B1A…","""2023-05-30_B1A…",15,139,0.417637,[0],"""2023-05-30_B1A…","""A03""",1.0,1.0,"""SH3BP2""","""VUSmut_GDEh07""","""H11""",88.0,"""FVrry_P04_Q01""","""A02""","""VUSmut_GDEh07_…","""allele""","""Gly420Glu""",2.0,1.0,1.0,3.0,"""A""","""A_3""","""1_3""","""SH3BP2_Gly420G…","""2023_05_30_B1A…",false,"""SH3BP2""","""Gly420Glu""","""SH3BP2 Gly420G…","""SH3BP2 Gly420G…","""2023_05_30_B1A…",4.0440e9,19.0,…,-0.473929,6.401682,6.641083,7.21359,4.755174,-1.159761,-1.171948,-0.980524,-1.331724,7.360787,4.983042,6.385035,6.080521,-4.205689,-4.232825,-4.251635,-4.240708,-4.892827,-4.353308,-4.873873,-4.462264,-4.300406,-4.23643,-4.272252,-4.25243,9.994259,8.504515,10.393965,12.082558,0.309819,0.291249,0.312479,0.316978,10.270793,9.166169,9.632175,10.663784
"""SH3BP2_Gly420G…","""2023-05-30_B1A…","""2023-05-30_B1A…",15,112,0.433566,[0],"""2023-05-30_B1A…","""A03""",1.0,1.0,"""SH3BP2""","""VUSmut_GDEh07""","""H11""",88.0,"""FVrry_P04_Q01""","""A02""","""VUSmut_GDEh07_…","""allele""","""Gly420Glu""",2.0,1.0,1.0,3.0,"""A""","""A_3""","""1_3""","""SH3BP2_Gly420G…","""2023_05_30_B1A…",false,"""SH3BP2""","""Gly420Glu""","""SH3BP2 Gly420G…","""SH3BP2 Gly420G…","""2023_05_30_B1A…",1.48263253e8,22.0,…,5.112497,1.039729,1.136688,0.965463,1.017666,0.70347,-1.104568,1.099589,1.013388,0.879528,0.921143,0.845329,0.866863,-4.205689,-4.232825,-4.251635,-4.240708,-4.892827,-4.353308,-4.873873,-4.462264,-4.300406,-4.23643,-4.272252,-4.25243,0.542701,0.491803,0.5451,0.554155,0.491329,0.313566,0.566526,0.717864,0.53605,0.520672,0.539749,0.546525
