In [None]:
import scanpy as sc
import pandas as pd

adata = sc.read_h5ad("data/rna_PBMC.h5ad")

In [None]:
# Optional: Filter low-quality genes and cells
sc.pp.filter_cells(adata, min_genes=200)
sc.pp.filter_genes(adata, min_cells=3)

# Extract raw counts
raw_counts = adata.raw.to_adata().X if adata.raw else adata.X

In [12]:
print(adata.shape)           # (cells, genes)
print(adata.raw.shape)       # (cells, genes) if .raw exists
print(adata.var_names.shape) # Should match genes in adata

(11331, 26222)
(11331, 36601)
(26222,)


In [None]:
import scipy.sparse as sp

# Choose the correct matrix
if adata.raw is not None:
    raw_data = adata.raw
    counts = raw_data.X
    genes = raw_data.var_names
else:
    counts = adata.X
    genes = adata.var_names

# Ensure dense format
if sp.issparse(counts):
    counts = counts.toarray()

# Now build the dataframe correctly
df = pd.DataFrame(counts.T, index=genes, columns=adata.obs_names)
df.to_csv("data/pbmc_counts.csv")


In [18]:
adata.obs[['predicted.id']].to_csv("data/cell_types.txt", sep="\t", header=False)