In [None]:
import scanpy as sc
import pandas as pd

adata = sc.read_h5ad("data/rna_PBMC.h5ad")

In [None]:
# Optional: Filter low-quality genes and cells
sc.pp.filter_cells(adata, min_genes=200)
sc.pp.filter_genes(adata, min_cells=3)

# Extract raw counts
raw_counts = adata.raw.to_adata().X if adata.raw else adata.X

In [12]:
print(adata.shape)           # (cells, genes)
print(adata.raw.shape)       # (cells, genes) if .raw exists
print(adata.var_names.shape) # Should match genes in adata

(11331, 26222)
(11331, 36601)
(26222,)


In [None]:
import scipy.sparse as sp

# Choose the correct matrix
if adata.raw is not None:
    raw_data = adata.raw
    counts = raw_data.X
    genes = raw_data.var_names
else:
    counts = adata.X
    genes = adata.var_names

# Ensure dense format
if sp.issparse(counts):
    counts = counts.toarray()

# Now build the dataframe correctly
df = pd.DataFrame(counts.T, index=genes, columns=adata.obs_names)
df.to_csv("data/pbmc_counts.csv")


In [18]:
adata.obs[['predicted.id']].to_csv("data/cell_types.txt", sep="\t", header=False)

In [23]:
import pandas as pd

# 1) load
expr = pd.read_csv('test/06hHep_ExpressionDataOrdered.csv', index_col=0)
known = pd.read_csv('test/known_network.csv')
unknown = pd.read_csv('test/unknown_network.csv')

# 2) intersect genes
genes = set(expr.index) & (set(known.TF) | set(known.gene))
expr = expr.loc[list(genes)]
known = known[known.TF.isin(genes) & known.gene.isin(genes)]
unknown = unknown[unknown.TF.isin(genes) & unknown.gene.isin(genes)]

# 3a) dump STGRNS files
expr.to_csv('test/expression_stgrns_common.csv')
known.to_csv('test/known_network_stgrns_common.csv', index=False)
unknown.to_csv('test/unknown_network_stgrns_common.csv', index=False)

# 3b) dump DeepRIG files
expr.to_csv('test/Expression_deeprig_common.csv')  # keeps gene names as first column
net_dr = known[['TF','gene']].rename(columns={'TF':'Gene1','gene':'Gene2'})
net_dr.to_csv('test/network_deeprig_common.csv', index=False)


In [24]:
df2 = pd.read_csv('test/stgrns/expression_stgrns_common.csv', index_col=0)

# write it as an HDF5 store with the group name the script expects
df2.to_hdf('data/mesc_cell_expression.h5',
          key='mesc_cell_expression',
          mode='w')
