In [2]:
# 1. 加载必要的库
library(Seurat)
library(SeuratDisk) # 用于 SaveH5Seurat 和 Convert
# 如果你没有安装 zellkonverter，建议安装：BiocManager::install("zellkonverter")

# 2. 读取数据并创建对象
pbmc3k.data <- Read10X(data.dir = "3k")
pbmc3k <- CreateSeuratObject(counts = pbmc3k.data, project = "PBMC3K", min.features = 200)

pbmc5k.data <- Read10X(data.dir = "5k")
pbmc5k <- CreateSeuratObject(counts = pbmc5k.data, project = "PBMC5K", min.features = 200)

# 3. 合并数据 (Merge)
# 注意：在 Seurat v5 中，合并后的数据会以 layers 形式存储（例如 counts.PBMC3K）
pbmc.combined <- merge(pbmc3k, y = pbmc5k, add.cell.ids = c("3K", "5K"))

# 4. 关键修复步骤：合并图层 (Join Layers)
# 这是解决 "source object not found" 报错的核心！
# 它把各样本的 counts 拼成一个大矩阵，存放在标准的 "counts" 路径下
pbmc.combined <- JoinLayers(pbmc.combined)

# 5. 标准预处理 (scGPT 等模型通常需要规范化的数据)
pbmc.combined[["percent.mt"]] <- PercentageFeatureSet(pbmc.combined, pattern = "^MT-")
pbmc.combined <- subset(pbmc.combined, subset = nFeature_RNA > 200 & percent.mt < 5)

# 标准化与寻找高变基因
pbmc.combined <- NormalizeData(pbmc.combined)
pbmc.combined <- FindVariableFeatures(pbmc.combined)
pbmc.combined <- ScaleData(pbmc.combined)

# 6. 导出数据
# 设置默认 Assay 确保万无一失
DefaultAssay(pbmc.combined) <- "RNA"

# 导出为 h5Seurat 中间格式
SaveH5Seurat(pbmc.combined, filename = "pbmc_for_gpt.h5Seurat", overwrite = TRUE)

# 转换为 h5ad
# 此时因为执行了 JoinLayers，Convert 函数能顺利找到 "X" (data) 槽位
Convert("pbmc_for_gpt.h5Seurat", dest = "h5ad", overwrite = TRUE)

message("转换完成！文件已保存为 pbmc_for_gpt.h5ad")

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
Normalizing layer: counts

Finding variable features for layer counts

Centering and scaling data matrix

Creating h5Seurat file for version 3.1.5.9900

Validating h5Seurat file

Adding data from RNA as X



ERROR: Error in assay.group$obj_copy_to(dst_loc = dfile, dst_name = "X", src_name = x.data): HDF5-API Errors:
    error #000: ../../src/H5Ocopy.c in H5Ocopy(): line 240: unable to copy object
        class: HDF5
        major: Object header
        minor: Unable to copy object

    error #001: ../../src/H5VLcallback.c in H5VL_object_copy(): line 5495: object copy failed
        class: HDF5
        major: Virtual Object Layer
        minor: Unable to copy object

    error #002: ../../src/H5VLcallback.c in H5VL__object_copy(): line 5456: object copy failed
        class: HDF5
        major: Virtual Object Layer
        minor: Unable to copy object

    error #003: ../../src/H5VLnative_object.c in H5VL__native_object_copy(): line 125: unable to copy object
        class: HDF5
        major: Object header
        minor: Unable to copy object

    error #004: ../../src/H5Ocopy.c in H5O__copy(): line 291: source object not found
        class: HDF5
        major: Symbol table
        minor: Object not found

    error #005: ../../src/H5Gloc.c in H5G_loc_find(): line 442: can't fin


In [4]:
library(SingleCellExperiment)
library(zellkonverter)

# 假设你的对象名是 pbmc.combined
# 1. 确保图层已合并
pbmc.combined <- JoinLayers(pbmc.combined)

# 2. 转换为 SingleCellExperiment 对象 (这是 zellkonverter 的中间格式)
sce <- as.SingleCellExperiment(pbmc.combined)

# 3. 直接保存为 h5ad (这种方式生成的 h5ad 结构非常标准)
writeH5AD(sce, file = "pbmc_for_gpt_fixed.h5ad")

message("转换完成！请在 Python 中读取 pbmc_for_gpt_fixed.h5ad")

Registered S3 method overwritten by 'zellkonverter':
  method                                             from      
  py_to_r.pandas.core.arrays.categorical.Categorical reticulate

+ "C:/Users/ASUS/.basilisk/1.18.0/0/condabin/conda.bat" create --yes --prefix "C:/Users/ASUS/.basilisk/1.18.0/zellkonverter/1.16.0/zellkonverterAnnDataEnv-0.10.9" "python=3.12.7" --quiet -c conda-forge --override-channels

+ "C:/Users/ASUS/.basilisk/1.18.0/0/condabin/conda.bat" install --yes --prefix "C:/Users/ASUS/.basilisk/1.18.0/zellkonverter/1.16.0/zellkonverterAnnDataEnv-0.10.9" "python=3.12.7" -c conda-forge --override-channels

+ "C:/Users/ASUS/.basilisk/1.18.0/0/condabin/conda.bat" install --yes --prefix "C:/Users/ASUS/.basilisk/1.18.0/zellkonverter/1.16.0/zellkonverterAnnDataEnv-0.10.9" -c conda-forge "python=3.12.7" "anndata=0.10.9" "h5py=3.12.1" "hdf5=1.14.3" "natsort=8.4.0" "numpy=2.1.2" "packaging=24.1" "pandas=2.2.3" "python=3.12.7" "scipy=1.14.1" --override-channels

[36mℹ[39m Using the [3