## 1.load packages

In [1]:
import os
import numpy as np
import pandas as pd
import scvelo as scv

## 2.params

In [2]:
os.chdir("D:/Projects/CASH-Human/")
dir_for_ldata_CASH = "./data/loom/ldata_CASH.h5ad"
dir_for_obj_meta = "./data/obj_meta/"

## 3. Load ldata_CASH

In [3]:
ldata = scv.read(dir_for_ldata_CASH, cache=False)
ldata.var_names_make_unique()
print(ldata)
print(ldata.obs)
print(len(ldata.obs['barcode']))
print(len(ldata.obs['barcode'].unique()))

AnnData object with n_obs × n_vars = 26440 × 37487
    obs: 'patient', 'barcode', 'group', 'patient_id'
    var: 'Accession', 'Chromosome', 'End', 'Start', 'Strand'
    layers: 'ambiguous', 'matrix', 'spliced', 'unspliced'
                       patient             barcode group patient_id
CellID                                                             
N3:AAAGTAGCACCAACCGx  Patient1  AAAGTAGCACCAACCG-1  CASH          1
N3:AAATGCCCATTAACCGx  Patient1  AAATGCCCATTAACCG-1  CASH          1
N3:AAAGTAGCATGTTCCCx  Patient1  AAAGTAGCATGTTCCC-1  CASH          1
N3:AAAGTAGTCAACTCTTx  Patient1  AAAGTAGTCAACTCTT-1  CASH          1
N3:AACCATGCAGTCACTAx  Patient1  AACCATGCAGTCACTA-1  CASH          1
...                        ...                 ...   ...        ...
N7:TTTGTCATCTACTCATx  Patient5  TTTGTCATCTACTCAT-5  CASH          5
N7:TTTGTCACAAGCCGCTx  Patient5  TTTGTCACAAGCCGCT-5  CASH          5
N7:TTTGTCATCGAACGGAx  Patient5  TTTGTCATCGAACGGA-5  CASH          5
N7:TTTGGTTGTGCATCTAx  Patient

## 4. subset ldata per cell

In [12]:
dir_for_ldata_sub = "./data/loom/ldata_sub"
if os.path.exists(dir_for_ldata_sub):
    print("directory has been created")
else:
    os.makedirs(dir_for_ldata_sub)
    print(f"Created directory: {dir_for_ldata_sub}")

for i_file in os.listdir(dir_for_obj_meta):
    print(i_file)
    cell = str.split(i_file, "_")[0]
    print(cell)

    ## load meta
    meta = pd.read_csv(os.path.join(dir_for_obj_meta, i_file))
    print(meta.head(1))

    ## subset
    ldata_sub = ldata[np.isin(ldata.obs.barcode, meta.barcode)]
    print(ldata_sub)
    print(ldata_sub.obs)

    ## add meta
    meta_matched = ldata_sub.obs[["barcode"]].merge(meta, on = "barcode", how = "left")
    print(meta_matched)
    ldata_sub.obs["cluster"] = meta_matched["sub_celltype"].values
    ldata_sub.obs["UMAP1"] = meta_matched["UMAP1"].values
    ldata_sub.obs["UMAP2"] = meta_matched["UMAP2"].values
    umap = ldata_sub.obs[["barcode", "UMAP1", "UMAP2"]]
    umap.set_index("barcode", inplace=True)
    ldata_sub.obsm["umap"] = umap.values 
    print(ldata_sub)
    print(ldata_sub.obs.head())
    print(ldata_sub.obsm)
    ldata_sub.write(os.path.join(dir_for_ldata_sub, f"{cell}_ldata.h5ad"), compression= "gzip")

directory has been created
B_obj_meta.csv
B
              barcode     orig.ident group   patient  sub_celltype     UMAP1  \
0  AAAGTAGCAGCTCGCA-1  CASH_Patient1  CASH  Patient1  c36_Bn_TCL1A -1.474113   

      UMAP2  
0 -1.295199  
View of AnnData object with n_obs × n_vars = 673 × 37487
    obs: 'patient', 'barcode', 'group', 'patient_id'
    var: 'Accession', 'Chromosome', 'End', 'Start', 'Strand'
    layers: 'ambiguous', 'matrix', 'spliced', 'unspliced'
                       patient             barcode group patient_id
CellID                                                             
N3:AAAGTAGCAGCTCGCAx  Patient1  AAAGTAGCAGCTCGCA-1  CASH          1
N3:AACCATGAGTGCGATGx  Patient1  AACCATGAGTGCGATG-1  CASH          1
N3:AAGACCTGTAGCGATGx  Patient1  AAGACCTGTAGCGATG-1  CASH          1
N3:AATCCAGAGTAATCCCx  Patient1  AATCCAGAGTAATCCC-1  CASH          1
N3:AATCGGTTCTTAACCTx  Patient1  AATCGGTTCTTAACCT-1  CASH          1
...                        ...                 ...   ...      