# Used Data

|Cells     |DNA-SLX   |RNA-IDX |
|----------|----------|--------|
|NA12878   |SLX-24514 |SITTD8  |


## Setup dataset

In [None]:
import pandas as pd
from pathlib import Path
import subprocess

In [None]:
profiles_dir = Path("") #path to directory which will contain the profile tsvs
workflow_dir = Path("") #absolute path to the root of the cell matching workflow (cell_matching)
scrna_cnv_dir = Path("")
genes_ref = scrna_cnv_dir /"resources/annotate_genes_hg19_update_20230126.txt" # Path to gene reference file (should be present in scRNA-seq processing workflow)
sample_name = "cell2cell"
scabsolute_result = "" # absolute path to scAbsolute rds object
rna_idx = "" #RNA Sample name
samplesheet_path = profiles_dir / "data_file.csv" #Path to samplesheet

In [None]:
methods = { # first element is a list of profile modes to create (at least one from [cn,cat], second element is path to the output dir for the method, which should contain one directory for each SLX)
    "scabsolute" : : (["cn"],scabsolute_result),
    "rna": (["cn"],scrna_cnv_dir/"results/cellranger_post"/rna_idx), #qc filtered gene expression profiles
    "copykat":(["cn"],scrna_cnv_dir/"results/copykat"/rna_idx), 
    "copyvae":(["cn"],scrna_cnv_dir/"results/copyvae"/rna_idx), 
}


## Extract all profiles (scAbsolute, gene expression and scRNA-seq based CNV) and write samplesheet

In [None]:
with open(samplesheet_path,"wt") as outhandle:
    outhandle.write("Data_source,Data_file,Mode\n")
    for method in methods:
        for mode in methods[method][0]:
            subprocess.run([
                str(workflow_dir/"cell_matching/scripts/transform_data.py"),
                "-i",scabsolute_result,
                "-n","unfiltered",
                "-o",str(profiles_dir.absolute()),
                "-t","scabsolute",
                "-g",genes_ref,
            ])
            outhandle.write(f"{method},{profiles_dir.absolute()/f'{sample_name}_{method}_{mode}.tsv'},{mode}\n")
        