In [1]:
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt
import seaborn as sns
import glob
import os

import warnings
warnings.filterwarnings('ignore')



In [2]:
method = 'BayesSpace'
pred_key = 'spatial.cluster'
dataset = 'BRCA1'

data_folder = f'../data/{dataset}'
input_dir = f'../Results/results_old/{dataset}/{method}'
output_dir = f'../Results/results_new/{dataset}/{method}'
os.makedirs(output_dir, exist_ok=True)

file = input_dir
print(method)

BayesSpace


In [3]:
def process_adata(adata):
    sc.pp.normalize_total(adata, target_sum=1e4)
    sc.pp.log1p(adata)
    sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
    sc.pp.pca(adata)
    sc.pp.neighbors(adata)
    return adata

def plot_trajectory(adata, out_path):
    fix, ax = plt.subplots(figsize=(6, 6))
    sc.tl.paga(adata, groups='pred')
    sc.pl.paga(
        adata, 
        show=False, 
        plot=True,
        fontsize=13,
        ax=ax,
        text_kwds={"color": "#555555"}
    )
    #  Remove plot borders (spines)
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    ax.spines["left"].set_visible(False)
    ax.spines["bottom"].set_visible(False)
    
    # Remove axis labels and ticks
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_xlabel("")
    ax.set_ylabel("")

    # Save the figure
    plt.savefig(os.path.join(out_path, 'trajectory.png'), dpi=300, bbox_inches='tight')
    plt.savefig(os.path.join(out_path, 'trajectory.pdf'), format='pdf', bbox_inches='tight')
    plt.show()

In [None]:
print(f'================= Processing {method} {dataset} =================')
out_path = output_dir
os.makedirs(out_path, exist_ok=True)
section_id = 'V1_Human_Breast_Cancer_Block_A_Section_1'

file_path = os.path.join(data_folder, section_id)

adata = sc.read_visium(file_path)
adata.var_names_make_unique()
metadata = pd.read_csv(file + '/cell_metadata.csv', index_col=0)
gt_metadata = pd.read_csv(os.path.join(file_path, 'metadata.tsv'), sep='\t')

adata.obs['gt'] = gt_metadata['fine_annot_type'].values
adata = adata[~pd.isnull(adata.obs['gt'])]

# Match adata and metadata 
# adata = adata[adata.obs.index.isin(metadata.index)]

# Sort metadata by index based on adata
metadata = metadata.loc[adata.obs.index]
gt_metadata = gt_metadata.loc[adata.obs.index]


pred = metadata[pred_key].values
if min(pred) == 0:
    pred += 1

adata.obs['pred'] = pred.astype(str)
adata.obs['gt'] = adata.obs['gt'].astype(str)

# Remove labels corresponding to Healthy_1 and Healthy_2 
adata = adata[~adata.obs['pred'].isin(['6', '7'])]

process_adata(adata)
plot_trajectory(adata, out_path)
print(f'    Results saved to {out_path}')
# break

print(f'================= Finished {method} {dataset} =================')
# print(df_metrics)



KeyError: "None of [Index(['AAACAAGTATCTCCCA-1', 'AAACACCAATAACTGC-1', 'AAACAGAGCGACTCCT-1',\n       'AAACAGGGTCTATATT-1', 'AAACAGTGTTCCTGGG-1', 'AAACATTTCCCGGATT-1',\n       'AAACCCGAACGAAATC-1', 'AAACCGGGTAGGTACC-1', 'AAACCTAAGCAGCCGG-1',\n       'AAACCTCATGAAGTTG-1',\n       ...\n       'TTGTGGTAGGAGGGAT-1', 'TTGTGGTGGTACTAAG-1', 'TTGTGTATGCCACCAA-1',\n       'TTGTTAGCAAATTCGA-1', 'TTGTTCAGTGTGCTAC-1', 'TTGTTGTGTGTCAAGA-1',\n       'TTGTTTCACATCCAGG-1', 'TTGTTTCATTAGTCTA-1', 'TTGTTTCCATACAACT-1',\n       'TTGTTTGTGTAAATTC-1'],\n      dtype='object', length=3798)] are in the [index]"

## Combine all metrics

In [174]:
input_dir = f'../Results/results_new/{dataset}'
output_path = f'../Results/results_new/{dataset}/metrics.csv'
input_files = glob.glob(input_dir + '/*')
input_files = [f for f in input_files if os.path.isdir(f)]

all_metrics = []
for file in input_files:
    method = file.split('/')[-1]
    df_metrics = pd.read_csv(os.path.join(file, 'metrics.csv'), index_col=0)
    all_metrics.append(df_metrics)

with open(output_path, 'w') as f:
    for df in all_metrics:
        df.to_csv(f, index=True, header=not f.tell()) 