### Generate JSON config file

Enter scGHOST settings

In [1]:
# filepath settings
schic_directory = "/directory/of/higashi/imputed/maps"
label_info_path = "/path/to/label_info.pickle"
label_info_cell_type_key = "cluster label"
data_directory = "/directory/to/save/scghost/outputs/"

NUM_CHROMOSOMES = 22
chromosomes = {chrom_num : {
    'adj' : f'chr{chrom_num}_sparse_adj.npy',
    'imputed' : f'chr{chrom_num}_exp1_nbr_5_impute.hdf5',
    'integer' : chrom_num,
} for chrom_num in range(1,NUM_CHROMOSOMES+1)}

chrom_sizes = 'data/hg19.chrom.sizes'
chrom_indices = None
embeddings_path = "/path/to/exp1_0_origin.npy"
higashi_scab_path = "/path/to/higashi/scAB.hdf5"
cell_type = None

# hyperparameters
random_walk_num_walks = 50
random_walk_ignore_top = 0.02
random_walk_top_percentile = 0.25
eps = 1e-8
num_clusters = 5
batch_size = 16
epochs = 5
resolution = 500000
neighbor_contacts = False
kmeans_init = 1

# misc settings
nearest_neighbor_override = None
gpu_uniques = True
cluster_gpu_caching = True

Generate python dictionary

In [2]:
settings_dict = {
    'schic_directory': schic_directory,
    'label_info': {
        'path': label_info_path,
        'cell_type_key': label_info_cell_type_key,
    },
    'data_directory': data_directory,
    'chromosomes': chromosomes,
    'chrom_sizes': chrom_sizes,
    'chrom_indices': chrom_indices,
    'embeddings_path': embeddings_path,
    'higashi_scab_path': higashi_scab_path,
    'cell_type': cell_type,
    'random_walk': {
        'num_walks': random_walk_num_walks,
        'ignore_top': random_walk_ignore_top,
        'top_percentile': random_walk_top_percentile,
    },
    'epis': eps,
    'num_clusters': num_clusters,
    'batch_size': batch_size,
    'epochs': epochs,
    'resolution': resolution,
    'neighbor_contacts': neighbor_contacts,
    'nearest_neighbor_override': nearest_neighbor_override,
    'gpu_uniques': gpu_uniques,
    'cluster_gpu_caching': cluster_gpu_caching,
    'kmeans_init': kmeans_init,
}


import json 

with open("tutorial.json", "w") as outfile: 
    json_string = json.dumps(settings_dict, indent=4)
    outfile.write(json_string)

### Run scGHOST

In [None]:
import subprocess

subprocess.call(['python scghost.py --config tutorial.json'],shell=True)

### Format scGHOST output

In [2]:
import pickle
import os
from tqdm import trange

# enter labels.pkl path
label_filepath = '/mnt/e/data/scghost_pfc_output/publication_results/labels.pkl'
labels = pickle.load(open(label_filepath,'rb'))

# enter cropped_indices.pkl path
cropped_indices_filepath = '/mnt/e/data/scghost_pfc_output/publication_results/cropped_indices.pkl'
cropped_indices = pickle.load(open(cropped_indices_filepath,'rb'))

# enter resolution
resolution = 500000

# enter bed file output directory
bed_file_directory = 'bed_files'
chrom_prefix = 'chr' # change this to '' if chromosomes are labeled chr1,chr2,... instead of 1,2,...

sc_subcompartment_names = ['scA1','scA2','scB1','scB2','scB3'] # default for scGHOST k=5

os.makedirs(bed_file_directory,exist_ok=True)

num_cells = labels[ list( labels.keys() )[0] ].shape[0]

for cell_num in trange(num_cells):

    with open(os.path.join(bed_file_directory,f'cell_{cell_num}.bed'),'w') as f:

        for chromosome in labels:

            annotations = labels[chromosome][cell_num]

            for locus in range(len(annotations)):

                position = cropped_indices[chromosome][locus]
                annotation = sc_subcompartment_names[ annotations[locus] ]

                line = f'{chrom_prefix}{chromosome}\t{int(position * resolution)}\t{int((position+1) * resolution)}\t{annotation}\n'
                f.write(line)

100%|██████████| 4238/4238 [01:01<00:00, 69.12it/s]
