In [None]:
import scanpy as sc
from matplotlib import pyplot as plt

sc.set_figure_params(dpi=100)
plt.rcParams["figure.figsize"] = [6, 4]

import warnings

warnings.filterwarnings("ignore")

In [None]:
from scimilarity.utils import lognorm_counts, align_dataset
from scimilarity import CellAnnotation

Load scRNA-seq data

In [None]:

# Load the tutorial data
# Set data_path to the location of the tutorial dataset
data_path = "../input/panc8_scina.h5ad"
adams = sc.read(data_path)

Import SCimilarity - Cell annotation object

In [None]:
model_path = "../models/model_v1.1"
ca = CellAnnotation(model_path=model_path)

Match feature space with SCimilarity models

In [None]:
adams = align_dataset(adams, ca.gene_order, gene_overlap_threshold=0)

Normalize data consistent with SCimilarity

In [None]:
#adams = lognorm_counts(adams)

Compute embeddings

In [None]:



adams.obsm["X_scimilarity"] = ca.get_embeddings(adams.X)

Compute visualization of embeddings
Use UMAP to visualize SCimilarity embeddings

In [None]:


sc.pp.neighbors(adams, use_rep="X_scimilarity")
sc.tl.umap(adams)

Visualize

In [None]:
adams.obs.columns


In [None]:
sc.pl.umap(adams, color="celltype", legend_fontsize=5)

Unconstrained annotation
Cells can be classified as any type that is in the SCimilarity reference

In [None]:
predictions, nn_idxs, nn_dists, nn_stats = ca.get_predictions_knn(
    adams.obsm["X_scimilarity"]
)
adams.obs["predictions_unconstrained"] = predictions.values

In [None]:
celltype_counts = adams.obs.predictions_unconstrained.value_counts()
well_represented_celltypes = celltype_counts[celltype_counts > 20].index

sc.pl.umap(
    adams[adams.obs.predictions_unconstrained.isin(well_represented_celltypes)],
    color="predictions_unconstrained",
    legend_fontsize=5,
)

Constrained classification

In [None]:
target_celltypes = [
    "acinar",
    "activated_setllate",
    "alpha",
    "beta",
    "delta",
    "ductal",
    "endothelial",
    "epsilon",
    "gamma",
    "macrophage",
    "mast",
    "quiescent_stellate",
    "schwann",
    "endothelial cell",
    "mast cell",
    "pancreatic A cell",
    "pancreatic D cell",
    "pancreatic acinar cell",
    "pancreatic ductal cell",
    "pancreatic stellate cell",
    "type B pancreatic cell",
]

ca.safelist_celltypes(target_celltypes)

In [None]:
adams = ca.annotate_dataset(adams)

In [None]:
sc.pl.umap(adams, color="celltype_hint", legend_fontsize=5)

Annotation QC

In [None]:
sc.pl.umap(adams, color="min_dist", vmax=0.1)

In [None]:
adams.obs


####我不知道那一列代表预测准确，我都给你了，你自己选择一下吧

In [None]:
actual_value = []
hypothesis_value = []
for index, row in adams.obs.iterrows():
    actual_value.append(row["celltype"])
    hypothesis_value.append(row["celltype_hint"])
actual_value_set = set(actual_value)
hypothesis_value_set = set(hypothesis_value)
print(actual_value_set)
print(hypothesis_value_set)

In [None]:
import csv
mapping = [["NA"],["alpha","pancreatic A cell"],["acinar","pancreatic acinar cell"],["quiescent_stellate","pancreatic stellate cell"],["beta","type B pancreatic cell"],["epsilon"],["endothelial","endothelial cell"],["ductal","pancreatic ductal cell"],["macrophage","macrophage"],["mast","mast cell"],["gamma"],["schwann"],["activated_stellate"]]                                            
result_TP = {'alpha':0,'NA':0, 'acinar':0, 'quiescent_stellate':0, 'beta':0, 'delta':0, 'epsilon':0, 'endothelial':0, 'ductal':0, 'macrophage':0, 'mast':0, 'gamma':0, 'schwann':0, 'activated_stellate':0}
result_FP = {'alpha':0,'NA':0, 'acinar':0, 'quiescent_stellate':0, 'beta':0, 'delta':0, 'epsilon':0, 'endothelial':0, 'ductal':0, 'macrophage':0, 'mast':0, 'gamma':0, 'schwann':0, 'activated_stellate':0}
result_FN = {'alpha':0,'NA':0, 'acinar':0, 'quiescent_stellate':0, 'beta':0, 'delta':0, 'epsilon':0, 'endothelial':0, 'ductal':0, 'macrophage':0, 'mast':0, 'gamma':0, 'schwann':0, 'activated_stellate':0}
#真阳性，假阳性，假阴性


#迭代后，只要actual_value，hypothesis_value处在同一个mapping里，就认定真阳性，根据actualvlue寻找resultTP并+1，其它同理，我可以保证mapping的每个元素[0]都是result的key
# 定义函数以查找值对应的组键
result_keys = result_TP.keys()

# 定义改进后的组查找函数
def find_group(value, mapping, result_keys):
    # 遍历映射表，检查值是否属于某个组
    for group in mapping:
        if value in group:
            return group[0]  # 返回组的第一项（即结果字典的键）
    # 如果不在映射表中，检查是否是结果字典的有效键
    if value in result_keys:
        return value  # 直接返回自身（如 'B'）
    else:
        return 'NA'  # 否则归类到 'NA'

# 遍历数据并更新统计结果
for index, row in adams.obs.iterrows():
    actual_value = row["celltype"]
    hypothesis_value = row["celltype_hint"]
    
    # 获取实际组和预测组（传入 result_keys 参数）
    actual_group = find_group(actual_value, mapping, result_keys)
    hypothesis_group = find_group(hypothesis_value, mapping, result_keys)
    
    # 更新 TP、FP、FN
    if actual_group == hypothesis_group:
        result_TP[actual_group] += 1
    else:
        # 假阴性：实际组未被正确识别
        result_FN[actual_group] += 1
        # 假阳性：预测组错误标记
        result_FP[hypothesis_group] += 1
print(result_TP)
print(result_FP)
print(result_FN)

with open('../output/output_report/result_panc8_scina.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    
    # 写入表头
    writer.writerow(['Category', 'Precision', 'Recall', 'F1 Score'])
    
    for key in result_TP.keys():
        thisTP = result_TP[key]
        thisFP = result_FP[key]
        thisFN = result_FN[key]
        
        # 计算 Precision
        pre = thisTP / (thisTP + thisFP) if (thisTP + thisFP) != 0 else 0
        
        # 计算 Recall
        rec = thisTP / (thisTP + thisFN) if (thisTP + thisFN) != 0 else 0
        
        # 计算 F1
        f1 = 2 * (pre * rec) / (pre + rec) if (pre + rec) != 0 else 0
        
        # 写入行数据
        writer.writerow([key, round(pre, 4), round(rec, 4), round(f1, 4)])
    


    

In [None]:
adams.write("../output/output_modified_h5ad/panc8_scina_mod.h5ad")