In [2]:
import scanpy as sc
import glob
import os

# 1. 指定输入文件夹
input_dir = "/root/Desktop/my_pan/workspace/Data/lung_h5ad_output"

# 2. 搜索所有 .h5ad 文件
h5ad_paths = sorted(glob.glob(os.path.join(input_dir, "*.h5ad")))

# 3. 读取并收集 AnnData 对象
adatas = [sc.read_h5ad(p) for p in h5ad_paths]

# 4. 提取批次名称（文件名去掉后缀）
batch_names = [os.path.splitext(os.path.basename(p))[0] for p in h5ad_paths]

# 5. 合并
adata_merged = adatas[0].concatenate(
    adatas[1:],
    batch_key="batch",
    batch_categories=batch_names
)

# 6. 保存为新的 h5ad
out_path = os.path.join(input_dir, "merged_lung_batches.h5ad")
adata_merged.write(out_path)

print(f"✅ 合并完成，输出：{out_path}")


✅ 合并完成，输出：/root/Desktop/my_pan/workspace/Data/lung_h5ad_output/merged_lung_batches.h5ad


  [AnnData(sparse.csr_matrix(a.shape), obs=a.obs) for a in all_adatas],
AnnData expects .var.index to contain strings, but got values like:
    []

    Inferred to be: empty

  value_idx = self._prep_dim_index(value.index, attr)
AnnData expects .var.index to contain strings, but got values like:
    []

    Inferred to be: empty

  value_idx = self._prep_dim_index(value.index, attr)


In [6]:
import scanpy as sc
adata = sc.read_h5ad("/root/Desktop/my_pan/workspace/Data/lung_h5ad_output/merged_lung_batches.h5ad")
print(adata)

print(adata.obs_names[:5])   # 打印细胞名（行索引）
print(adata.var_names[:5])  

AnnData object with n_obs × n_vars = 161211 × 9116
    obs: 'batch', 'original_batch'
Index(['00R_AC107638.2', '0610005C13Rik', '0610007P14Rik', '0610009B22Rik',
       '0610009E02Rik'],
      dtype='object')
Index(['Lung_1.AAAACGAGATGGAGGACT', 'Lung_1.AAAACGAGTTTACGTGGC',
       'Lung_1.AAAACGATACAGTCACTT', 'Lung_1.AAAACGATACAGTTTAGG',
       'Lung_1.AAAACGATCTCTCATCCC'],
      dtype='object')


  utils.warn_names_duplicates("obs")


In [5]:
import scanpy as sc
import glob
import os

# 输入目录
input_dir = "/root/Desktop/my_pan/workspace/Data/lung_h5ad_output"

# 找到所有 .h5ad 文件
h5ad_paths = sorted(glob.glob(os.path.join(input_dir, "*.h5ad")))

# 读取并打上 batch 标签
adatas = []
for i, fp in enumerate(h5ad_paths, start=1):
    ad = sc.read_h5ad(fp)
    batch_name = f"lung_batch{i}"
    ad.obs["batch"] = batch_name
    ad.obs["original_batch"] = batch_name
    adatas.append(ad)

# 合并所有
adata_merged = adatas[0].concatenate(
    adatas[1:],
    batch_key="batch",
    batch_categories=[f"lung_batch{i}" for i in range(1, len(adatas)+1)],
    index_unique=None,
    join="outer"
)

# 缺失值填 0
adata_merged.X = adata_merged.X.toarray() if hasattr(adata_merged.X, "toarray") else adata_merged.X
adata_merged.X[adata_merged.X != adata_merged.X] = 0  # NaN -> 0

# 保存输出
out_path = os.path.join(input_dir, "merged_lung_batches.h5ad")
adata_merged.write(out_path)
print("✅ 合并完成，输出文件：", out_path)

  utils.warn_names_duplicates("obs")
  [AnnData(sparse.csr_matrix(a.shape), obs=a.obs) for a in all_adatas],
  utils.warn_names_duplicates("obs")


✅ 合并完成，输出文件： /root/Desktop/my_pan/workspace/Data/lung_h5ad_output/merged_lung_batches.h5ad
