In [1]:
import os
import os.path as osp
import scanpy as sc
import scipy.sparse as sp
import torch
import time
import pandas as pd
import numpy as np
import diopy
working_dir = 'D:/博士/spatialid-main'
os.chdir(working_dir)
from torch.utils.data import DataLoader, Dataset
from basic_model.cell_type_ann_model import DNNModel
from basic_model.focal_loss import MultiCEFocalLoss


from basic_model.cell_type_ann_model import SpatialModelTrainer

In [19]:

class DNNTrainer:
    def __init__(self, input_dims, num_classes, gpu):
        self.set_device(gpu)
        self.set_model(input_dims, hidden_dims=1024, output_dims=num_classes)
        self.set_optimizer()

    def set_model(self, input_dims, hidden_dims, output_dims):
        self.model = DNNModel(input_dims, hidden_dims, output_dims).to(self.device)

    def set_optimizer(self):
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=1e-3, weight_decay=5e-4)

    def set_device(self, gpu=None):
        if gpu is not None and torch.cuda.is_available():
            self.device = torch.device("cuda:{}".format(gpu))
        else:
            self.device = torch.device("cpu")

    def save_model(self, marker_genes, batch_size, label_names, path):
        state = {'model': self.model,
                 'optimizer': self.optimizer.state_dict(),
                 'marker_genes': marker_genes,
                 'batch_size': batch_size,
                 'label_names': label_names
                 }
        torch.save(state, path)
        print(f"  [{time.strftime('%Y-%m-%d %H:%M:%S')} Model is saved in: {path}]")

    def train(self, data_loader, marker_genes=None, class_nums=None, batch_size=4096, label_names=None, epochs=200, gamma=2, alpha=.25, path="dnn_CTX_cellbin.bgi"):
        self.model.train()
        best_loss = np.inf
        for epoch in range(epochs):
            epoch_acc = []
            epoch_loss = []
            for idx, data in enumerate(data_loader):
                inputs, targets = data
                inputs = inputs.to(self.device)
                targets = targets.long().to(self.device)
                output = self.model(inputs)
                loss = MultiCEFocalLoss(class_num=class_nums, gamma=gamma, alpha=alpha, reduction="mean")(output, targets)
                train_loss = loss.item()

                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()

                total = targets.size(0)
                prediction = output.argmax(1)
                correct = prediction.eq(targets).sum().item()

                accuracy = correct / total * 100.
                epoch_acc.append(accuracy)
                epoch_loss.append(train_loss)
            print(f"  [{time.strftime('%Y-%m-%d %H:%M:%S')} Epoch: {epoch+1:3d} Loss: {np.mean(epoch_loss):.5f}, acc: {np.mean(epoch_acc):.2f}%]")
            if np.mean(epoch_loss) < best_loss:
                best_loss = np.mean(epoch_loss)
                self.save_model(marker_genes, batch_size, label_names, path)

    def validation(self, data_loader, model_path):
        checkpoint = torch.load(model_path)
        label_names = checkpoint['label_names']
        dnn_model = checkpoint["model"].to(self.device)
        dnn_model.eval()

        dnn_predictions = []
        val_acc = []
        with torch.no_grad():
            for idx, data in enumerate(data_loader):
                inputs, targets = data
                inputs = inputs.to(self.device)
                targets = targets.long().to(self.device)
                outputs = dnn_model(inputs)
                dnn_predictions.append(outputs.detach().cpu().numpy())

                total = targets.size(0)
                prediction = outputs.argmax(1)
                correct = prediction.eq(targets).sum().item()
                accuracy = correct / total * 100.
                val_acc.append(accuracy)
                pseudo_class = pd.Categorical([label_names[i] for i in dnn_predictions[-1].argmax(1)])
                print(f"  [{time.strftime('%Y-%m-%d %H:%M:%S')} accuracy: {accuracy:.2f}% \npseudo_class: {pseudo_class}")
            print(f"  [{time.strftime('%Y-%m-%d %H:%M:%S')} total accuracy: {np.mean(val_acc):.2f}%")


class DNNDataset(Dataset):
    def __init__(self, adata, ann_key, marker_genes=None):
        self.adata = adata
        self.shape = adata.shape
        self.ann_key = ann_key
        if sp.issparse(adata.X):
            adata.X = adata.X.toarray()

        if marker_genes is None:
            data = adata.X
        else:
            gene_indices = adata.var_names.get_indexer(marker_genes)
            data = np.pad(adata.X, ((0, 0), (0, 1)))[:, gene_indices].copy()

        norm_factor = np.linalg.norm(data, axis=1, keepdims=True)
        norm_factor[norm_factor == 0] = 1
        self.data = data / norm_factor

    def __len__(self):
        return self.shape[0]

    def __getitem__(self, idx):
        x = self.data[idx].squeeze()
        y = self.adata.obs[self.ann_key].cat.codes[idx]
        return x, y


def transform_data_loader(adata, ann_key, marker_genes=None, batch_size=4096):
    dataset = DNNDataset(adata, ann_key, marker_genes=marker_genes)
    train_loader = DataLoader(dataset=dataset, batch_size=batch_size, drop_last=True, shuffle=True, num_workers=0)
    return train_loader


def dnn_workflow(data_path,
                 ann_key,
                 marker_genes=None,
                 batch_size=4096,
                 epochs=200,
                 gpu="0",
                 model_name="dnn_CTX_cellbin.bgi",
                 model_path="./output",
                 filter_mt=False,
                 cell_min_counts=300,
                 gene_min_cells=10,
                 cell_max_counts=98.):
    """
    :param data_path: data path, which must be AnnData format.
    :param ann_key: the annotation key in .obs.keys() list.
    :param marker_genes: whether to use marker list data to train the model. If None, all data is used to train the model. Default, None.
    :param batch_size:
    :param epochs:
    :param gpu: whether to use GPU training model. If None, the CPU training model is used. If it is number, the corresponding GPU training model is invoked.
    :param model_name:
    :param model_path: save dnn model path.
    :param filter_mt: whether to filter MT- gene.
    :param cell_min_counts:
    :param gene_min_cells:
    :param cell_max_counts: filter cell counts outliers.  If the value is 100, no filtering is performed. Range: (0, 100).
    :return:
    """
    os.makedirs(model_path, exist_ok=True)
    # assert data_path.endswith(".h5ad"), "Error, Got an invalid DATA_PATH!"
    # adata = sc.read_h5ad(data_path)
    adata = diopy.input.read_h5(data_path)
    


  
    
    print(f"  [Data Info] \n {adata}")
    assert batch_size <= adata.shape[0], "Error, Batch size cannot be larger than the data set row."

    # if filter_mt:
    #     adata.var["mt"] = adata.var_names.str.startswith(["MT-", "mt-", "Mt-"])
    #     sc.pp.calculate_qc_metrics(adata, qc_vars=["mt"], inplace=True)
    #     adata = adata[adata.obs["pct_counts_mt"] < 10].copy()
    # if cell_min_counts > 0:
    #     sc.pp.filter_cells(adata, min_counts=cell_min_counts)
    if gene_min_cells > 0:
        sc.pp.filter_genes(adata, min_cells=gene_min_cells)
    # if cell_max_counts < 100:
    #     max_count = np.percentile(adata.obs["nCount_RNA"], cell_max_counts)
    #     sc.pp.filter_cells(adata, max_counts=max_count)

    print(f"  [After Preprocessing Data Info] \n {adata}")
    label_names = adata.obs[ann_key].cat.categories.tolist()
    class_nums = len(adata.obs[ann_key].cat.categories)
    if marker_genes is None:
        marker_list = adata.var_names.tolist()
    else:
        marker_list = marker_genes

    data_loader = transform_data_loader(adata, ann_key, marker_genes, batch_size)

    trainer = DNNTrainer(input_dims=adata.shape[1],
                         num_classes=len(adata.obs[ann_key].cat.categories),
                         gpu=gpu)

    trainer.train(data_loader, marker_genes=marker_list, class_nums=class_nums, batch_size=batch_size,
                  label_names=label_names, epochs=epochs, path=osp.join(model_path, model_name))
    trainer.validation(data_loader, osp.join(model_path, model_name))


if __name__ == '__main__':
    
    # 设置工作目录
    working_dir = 'E:/doctor/自研/mapping/data/20230814final/h5/sc/'
    os.chdir(working_dir)

    # 获取所有文件列表
    datalist = [f for f in os.listdir() if os.path.isfile(f)]

    # 循环遍历文件
    for i in range(2, len(datalist)):
    # for i in range(0, 1):
        print(datalist[i])

        # 读取st数据
        sc_path = os.path.join(working_dir, datalist[i])
       
        # 根据st数据选取sc数据
        if datalist[i] == "sc_HIP":
#             sc_obj = diopy.input.read_h5("E:/doctor/自研/mapping/data/20230814final/h5/sc/sc_HIP.h5")
            ann_key = 'annotation'

        elif datalist[i] == "sc_CB.h5":
#             sc_obj = diopy.input.read_h5("E:/doctor/自研/mapping/data/20230814final/h5/sc/sc_CB.h5")
            ann_key = 'annotation'

        elif datalist[i] == "sc_OB.h5":
#             sc_obj = diopy.input.read_h5("E:/doctor/自研/mapping/data/20230814final/h5/sc/sc_OB.h5")
            ann_key = 'type'

        elif datalist[i] == "sc_human_CTX.h5":
#             sc_obj = diopy.input.read_h5("E:/doctor/自研/mapping/data/20230814final/h5/sc/sc_human_CTX.h5")
            ann_key = 'Subclass'


        elif datalist[i] == "sc_mouse_CTX.h5":
#             sc_obj = diopy.input.read_h5("E:/doctor/自研/mapping/data/20230814final/h5/sc/sc_mouse_CTX.h5")
            ann_key = 'Type'

        start_time = time.time()
        dnn_workflow(sc_path,
                     ann_key,
                     marker_genes=None,
                     batch_size=100,
                     epochs=100,
                     gpu="0",
                     model_name="dnn_"+datalist[i]+".bgi",
                     model_path="E:/doctor/自研/mapping/data/20230814final/spatialID/model",
                     filter_mt=False,
                     cell_min_counts=300,
                     gene_min_cells=10,
                     cell_max_counts=98.)
        
        end_time = time.time()
        time_diff = end_time-start_time
        print(time_diff)

        time_name="E:/doctor/自研/mapping/data/20230814final/result/spatialID/"+datalist[i]+"_spatialID_prediction_time.csv"
        pd.Series([time_diff], name='time_diff').to_csv(time_name, header=True)

sc_human_CTX.h5
  [Data Info] 
 AnnData object with n_obs × n_vars = 17336 × 36517
    obs: 'Subclass', 'nFeature', 'nCount'
  [After Preprocessing Data Info] 
 AnnData object with n_obs × n_vars = 17336 × 31208
    obs: 'Subclass', 'nFeature', 'nCount'
    var: 'n_cells'
  [2023-08-22 21:59:57 Epoch:   1 Loss: 0.30655, acc: 40.94%]
  [2023-08-22 21:59:59 Model is saved in: E:/doctor/自研/mapping/data/20230814final/spatialID/model\dnn_sc_human_CTX.h5.bgi]
  [2023-08-22 22:00:02 Epoch:   2 Loss: 0.21117, acc: 58.45%]
  [2023-08-22 22:00:04 Model is saved in: E:/doctor/自研/mapping/data/20230814final/spatialID/model\dnn_sc_human_CTX.h5.bgi]
  [2023-08-22 22:00:07 Epoch:   3 Loss: 0.19495, acc: 60.58%]


RuntimeError: [enforce fail at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\caffe2\serialize\inline_container.cc:300] . unexpected pos 136310784 vs 136310712

In [12]:
for i in range(0, len(datalist)):
# for i in range(0, 1):
    print(datalist[i])

sc_CB.h5
sc_HIP.h5
sc_human_CTX.h5
sc_mouse_CTX.h5
sc_OB.h5


In [11]:
datalist

['sc_CB.h5', 'sc_HIP.h5', 'sc_human_CTX.h5', 'sc_mouse_CTX.h5', 'sc_OB.h5']

In [11]:

import torch
import torch_geometric
np.random.seed(0)
torch.manual_seed(0)
torch.cuda.manual_seed(0)
torch.cuda.manual_seed_all(0)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True


def load_data(data_path, filter_mt=True, min_cells=10, min_counts=300, max_percent=98.0):
    """
    loading and processing dataset
    :param data_path: Note that, the input data must be raw, can not do any preprocessing!
    :param filter_mt: Whether to filter MT- genes. default, True
    :param min_cells: Whether to filter genes. default, 10
    :param min_counts: Whether to filter cells. default, 300
    :param max_percent: Whether to filter cells, Range: (0, 100). default, 98.0
    :return:
    """

    print("======> Loading data...")
    adata = diopy.input.read_h5(data_path)
    # adata = sc.read_h5ad(data_path)
    print('  Original Data Info: %d cells × %d genes.' % (adata.shape[0], adata.shape[1]))

    # if filter_mt:
    #     adata.var["mt"] = adata.var_names.str.startswith(("MT-", "Mt-", "mt-"))
    #     sc.pp.calculate_qc_metrics(adata, qc_vars=["mt"], inplace=True)
    #     adata = adata[adata.obs["pct_counts_mt"] < 10].copy()
    # if min_counts > 0:
    #     sc.pp.filter_cells(adata, min_counts=min_counts)
    # if min_cells > 0:
    #     sc.pp.filter_genes(adata, min_cells=min_cells)
    # if max_percent < 100:
    #     max_counts = np.percentile(adata.X.sum(1), max_percent)
    #     sc.pp.filter_cells(adata, max_counts=max_counts)

    # adata_X_sparse_backup = adata.X.copy()
    print('  After Preprocessing Data Info: %d cells × %d genes.' % (adata.shape[0], adata.shape[1]))
    if sp.issparse(adata.X):
        adata.X = adata.X.toarray()
    return adata


def transfer_from_sc_data(adata, dnn_path, gpu="0"):
    """
    :param adata:
    :param dnn_path: Pre-trained DNN model save path
    :param gpu: gpu number
    :return:
    """
    print("========> Transfering from sc-dataset...")
    if gpu is not None and torch.cuda.is_available():
        device = torch.device("cuda:{}".format(gpu))
    else:
        device = torch.device("cpu")

    checkpoint = torch.load(dnn_path)
    dnn_model = checkpoint["model"].to(device)
    dnn_model.eval()

    marker_genes = checkpoint["marker_genes"]
    gene_indices = adata.var_names.get_indexer(marker_genes)
    adata_X = np.pad(adata.X, ((0, 0), (0, 1)))[:, gene_indices].copy()
    norm_factor = np.linalg.norm(adata_X, axis=1, keepdims=True)
    norm_factor[norm_factor == 0] = 1
    dnn_inputs = torch.Tensor(adata_X / norm_factor).split(checkpoint["batch_size"])
    # Inference with DNN model.
    dnn_predictions = []
    with torch.no_grad():
        for batch_idx, inputs in enumerate(dnn_inputs):
            inputs = inputs.to(device)
            outputs = dnn_model(inputs)
            dnn_predictions.append(outputs.detach().cpu().numpy())
    label_names = checkpoint['label_names']
    adata.obsm['psuedo_label'] = np.concatenate(dnn_predictions)
    adata.obs['psuedo_class'] = pd.Categorical([label_names[i] for i in adata.obsm['psuedo_label'].argmax(1)])
    adata.uns['psuedo_classes'] = label_names
    return adata


def distribution_fine_tune(adata, pca_dim=200, k_graph=30, edge_weight=True, epochs=200, w_cls=20, w_dae=1., w_gae=1.,
                           gpu="0", save_path="./output"):
    """
    :param adata:
    :param pca_dim: PCA dims, default=200
    :param k_graph: neighbors number, default=30
    :param edge_weight: Add edge weight to the graph model, default=True
    :param epochs: GCN training epochs, default=200
    :param w_cls: class num weight, default=20
    :param w_dae: dnn weight
    :param w_gae: gcn weight
    :param gpu: gpu number
    :param save_path: results save path
    :return:
    """
    device = torch.device("cuda:{}".format(gpu))
    # if gpu is not None and torch.cuda.is_available():
    #     device = torch.device("cuda:{}".format(gpu))
    # else:
    #     device = torch.device("cpu")

    print("========> Model Training...")
    sc.pp.normalize_per_cell(adata, counts_per_cell_after=1e4)
    sc.pp.log1p(adata)
    adata.X += 0.0001
    adata.X = (adata.X - adata.X.mean(0)) / adata.X.std(0)
    print("std/n")
    print(adata.X.std(0))
    print("mean/n")
    print(adata.X.mean(0))
    gene_mat = torch.Tensor(adata.X)

    u, s, v = torch.pca_lowrank(gene_mat, pca_dim)
    gene_mat = torch.matmul(gene_mat, v)
    #tqy告诉我加的这个
    
    if datalist[i] in ["st_HIP_starmap.h5", "C2D2_HIP_cellbin.h5", "C2D2_OB_cellbin.h5", "C2D2_CB_cellbin.h5"]:
        adata.obs["coor_x"] = pd.to_numeric(adata.obs["coor_x"], errors='coerce')
        adata.obs["coor_y"] = pd.to_numeric(adata.obs["coor_y"], errors='coerce')
        adata.obsm['spatial'] = np.array(pd.DataFrame({"coor_x": adata.obs["coor_x"], "coor_y": adata.obs["coor_y"]}))

    elif datalist[i] in ["st_human_CTX.h5", "st_mouse_CTX.h5", "HIP_slide_seq.h5", "HIP_test1.h5"]:
#         adata.obsm['spatial'] = np.array(pd.DataFrame({"coor_x": adata.obs["imagerow"], "coor_y": adata.obs["imagecol"]}))
        adata.obs["imagerow"] = pd.to_numeric(adata.obs["imagerow"], errors='coerce')
        adata.obs["imagecol"] = pd.to_numeric(adata.obs["imagecol"], errors='coerce')
        adata.obsm['spatial'] = np.array(pd.DataFrame({"coor_x": adata.obs["imagerow"], "coor_y": adata.obs["imagecol"]}))


    cell_coo = torch.Tensor(adata.obsm['spatial'])

    # cell_coo = torch.Tensor(adata.obsm['X_spatial'])
    data = torch_geometric.data.Data(x=gene_mat, pos=cell_coo)
    data = torch_geometric.transforms.KNNGraph(k=k_graph, loop=True)(data)
    data.y = torch.Tensor(adata.obsm['psuedo_label'])

    # Make distances as edge weights.
    if edge_weight:
        data = torch_geometric.transforms.Distance()(data)
        data.edge_weight = 1 - data.edge_attr[:, 0]
    else:
        data.edge_weight = torch.ones(data.edge_index.size(1))

    # Train self-supervision model.
    input_dim = data.num_features
    num_classes = len(adata.uns['psuedo_classes'])
    trainer = SpatialModelTrainer(input_dim, num_classes, device=device)
    trainer.train(data, epochs, w_cls, w_dae, w_gae)

    # save_path="D:/博士/spatialid-main/SPATIALID/output/HIP_binsize"
    # 不保存了吧
    #trainer.save_checkpoint(osp.join(save_path+"model.bgi"))
    trainer.save_checkpoint(save_path + "model.bgi")
    # Inference.
    print('\n==> Inferencing...')
    predictions = trainer.valid(data)
    celltype_pred = pd.Categorical([adata.uns['psuedo_classes'][i] for i in predictions])

    # Save results.
    result = pd.DataFrame({'cell': adata.obs_names.tolist(), 'celltype_pred': celltype_pred})
    #result.to_csv(osp.join(save_path, "model.csv"), index=False)
    result.to_csv((save_path+"model.csv"), index=False)
    adata.obs['celltype_pred'] = pd.Categorical(celltype_pred)
    # adata.X = adata_X_sparse_backup

    # # --------------------------------------------------
    # adata.obsm["X_pca"] = gene_mat.detach().cpu().numpy()
    # #adata.write(osp.join(save_path, "adata.h5ad"))
    #
    # # Save visualization.
    # spot_size = 30
    # psuedo_top100 = adata.obs['psuedo_class'].to_numpy()
    # other_classes = list(pd.value_counts(adata.obs['psuedo_class'])[100:].index)
    # psuedo_top100[adata.obs['psuedo_class'].isin(other_classes)] = 'Others'
    # adata.obs['psuedo_top100'] = pd.Categorical(psuedo_top100)
    # sc.pl.spatial(adata, img_key=None, color=['psuedo_top100'], spot_size=spot_size, show=False)
    # plt.savefig(osp.join(save_path, "psuedo_top100.pdf"), bbox_inches='tight', dpi=150)
    # sc.pl.spatial(adata, img_key=None, color=['celltype_pred'], spot_size=spot_size, show=False)
    # plt.savefig(osp.join(save_path, "celltype_pred.pdf"), bbox_inches='tight', dpi=150)
    print("Done!")


def spatial_classification_tool(data_path, filter_mt, min_cells, min_counts, max_percent, pca_dim,
                                k_graph, edge_weight, epochs, w_cls, w_dae, w_gae,
                                dnn_path, save_path, gpu="0"):
    adata = load_data(data_path, filter_mt=filter_mt, min_cells=min_cells, min_counts=min_counts, max_percent=max_percent)

    adata = transfer_from_sc_data(adata, dnn_path, gpu=gpu)

    distribution_fine_tune(adata, pca_dim=pca_dim, k_graph=k_graph, edge_weight=edge_weight, epochs=epochs, w_cls=w_cls,w_dae=w_dae, w_gae=w_gae, gpu=gpu, save_path=save_path)


if __name__ == "__main__":
    # 设置工作目录
    working_dir = 'E:/doctor/自研/mapping/data/20230814final/h5/st/'
    os.chdir(working_dir)

    # 获取所有文件列表
    datalist = [f for f in os.listdir() if os.path.isfile(f)]


#     for i in range(0, len(datalist)):
    for i in range(5, 6):
        print(datalist[i])
    # for i in range(6, 7):
#         if(datalist[i]!="C2D2_HIP_cellbin.h5"):
#             continue
        # 读取st数据
        st_path = os.path.join(working_dir, datalist[i])
        
        # 根据st数据选取sc数据
        if datalist[i] in ["st_HIP_starmap.h5", "C2D2_HIP_cellbin.h5", "HIP_slide_seq.h5", "HIP_test1.h5"]:
            name_anno = 'sc_HIP.h5'

        elif datalist[i] == "C2D2_CB_cellbin.h5":
            name_anno = 'sc_CB.h5'

        elif datalist[i] == "C2D2_OB_cellbin.h5":
            name_anno = 'sc_OB.h5'

        elif datalist[i] == "st_human_CTX.h5":
            name_anno = 'sc_human_CTX.h5'

        elif datalist[i] == "st_mouse_CTX.h5":
            name_anno = 'sc_mouse_CTX.h5'
            
        data_path_circle = st_path
        dnn_path_circle = "E:/doctor/自研/mapping/data/20230814final/spatialID/model/" + "dnn_"+name_anno+".bgi"
        save_path_circle = "E:/doctor/自研/mapping/data/20230814final/spatialID/result/" + datalist[i]
        
        start_time = time.time()
        spatial_classification_tool(data_path=data_path_circle,
                                    filter_mt=False,
                                    min_cells=10,
                                    min_counts=200,
                                    max_percent=98.,
                                    pca_dim=50,
                                    k_graph=39,
                                    edge_weight=True,
                                    epochs=500,
                                    w_cls=20,
                                    w_dae=1,
                                    w_gae=1,
                                    dnn_path=dnn_path_circle,
                                    save_path=save_path_circle,
                                    gpu="0")

        end_time = time.time()
        time_diff = end_time-start_time
        print(time_diff)

        time_name="E:/doctor/自研/mapping/data/20230814final/result/spatialID2/"+datalist[i]+"_spatialID_prediction_time.csv"
        pd.Series([time_diff], name='time_diff').to_csv(time_name, header=True)
    

st_HIP_starmap.h5
  Original Data Info: 4632 cells × 5413 genes.
  After Preprocessing Data Info: 4632 cells × 5413 genes.
std/n
[1.0000015 1.0000418 0.9999959 ... 1.0000038 1.0000205 1.0000216]
mean/n
[ 3.9133138e-06  2.0202751e-06  1.7764295e-07 ...  3.6029228e-06
 -3.7510779e-06  1.5031241e-06]
  [Epoch   1] Loss: 8.90255, Time: 0.14 s, Psuedo-Acc: 5.16%
  [Epoch   2] Loss: 8.08281, Time: 0.27 s, Psuedo-Acc: 7.53%
  [Epoch   3] Loss: 7.51573, Time: 0.41 s, Psuedo-Acc: 12.50%
  [Epoch   4] Loss: 7.12634, Time: 0.53 s, Psuedo-Acc: 19.95%
  [Epoch   5] Loss: 6.83398, Time: 0.65 s, Psuedo-Acc: 28.13%
  [Epoch   6] Loss: 6.58420, Time: 0.79 s, Psuedo-Acc: 35.19%
  [Epoch   7] Loss: 6.37379, Time: 0.92 s, Psuedo-Acc: 41.21%
  [Epoch   8] Loss: 6.16213, Time: 1.04 s, Psuedo-Acc: 44.62%
  [Epoch   9] Loss: 5.95993, Time: 1.17 s, Psuedo-Acc: 49.31%
  [Epoch  10] Loss: 5.76960, Time: 1.31 s, Psuedo-Acc: 53.02%
  [Epoch  11] Loss: 5.58425, Time: 1.44 s, Psuedo-Acc: 56.89%
  [Epoch  12] Loss: 5

KeyboardInterrupt: 

In [23]:
datalist

['C2D2_CB_cellbin.h5',
 'C2D2_HIP_cellbin.h5',
 'C2D2_OB_cellbin.h5',
 'HIP_slide_seq.h5',
 'HIP_test1.h5',
 'st_HIP_starmap.h5',
 'st_human_CTX.h5',
 'st_mouse_CTX.h5']

In [34]:
i

5

In [38]:
adata = diopy.input.read_h5(st_path)
adata

AnnData object with n_obs × n_vars = 4632 × 5413
    obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'x', 'y', 'predict', 'celltype', 'coor_x', 'coor_y', 'celltype2'

In [39]:
adata.obs['coor_x']

index
T_139116    24029
T_139120    23534
T_139142    23959
T_139143    23339
T_139144    23980
            ...  
T_169293    46211
T_169294    46094
T_169295    46054
T_169297    45991
T_169298    45884
Name: coor_x, Length: 4632, dtype: category
Categories (4215, object): ['22912', '23018', '23104', '23291', ..., '47464', '47466', '47470', '47483']

In [None]:
adata.obs["coor_x"] = adata.obs["coor_x"].astype(int)
