In [2]:
import scanpy as sc
import pandas as pd
ad = sc.read_text("exprMatrix.tsv.gz")
meta = pd.read_csv("meta.tsv", sep="\t")
ad.var = meta

In [3]:
print(ad)

Index(['cellId', 'nCount_RNA', 'nFeature_RNA', 'Cluster', 'Cluster_name',
       'Annotation', 'Dataset', 'Protocol', 'Age'],
      dtype='object')
AnnData object with n_obs × n_vars = 2000 × 190022
    var: 'cellId', 'nCount_RNA', 'nFeature_RNA', 'Cluster', 'Cluster_name', 'Annotation', 'Dataset', 'Protocol', 'Age'


First, we find the list of cluster names:

In [4]:
clusterNameSet = set(ad.var['Cluster_name'])
print('List of Cluster Names:')
print(clusterNameSet)

List of Cluster Names:
{'ME', 'CN2', 'NEC1', 'CBC', 'PGC1', 'Inter', 'BRC', 'NEC2', 'PGC2', 'NEC4', 'UPRC2', 'NEC3', 'CN1', 'OPC/OL', 'IN', 'AS1', 'UPRC1', 'Neuron', 'CN4', 'AS3', 'AS2', 'GPC', 'CN5', 'CN3'}


Second, we need a way to get the average expression value for a gene for all cells in a cluster:

In [None]:
def getAvgExprForGeneInCluster(gene, cluster):
    exprFrameForCluster = ad[ad.obs.index == gene, ad.var['Cluster_name'] == cluster].X
    return exprFrameForCluster.mean() if not exprFrameForCluster.size == 0 else 0

print(getAvgExprForGeneInCluster('SOX2','CN3'))
print(ad[ad.obs.index == 'SOX2', ad.var['Cluster_name'] == 'CN3'].X.tolist())

Third, we combine the average expression values for a gene for a cluster in a dataframe:

In [27]:
import numpy as np

geneList = ['AQP4', 'SCL1A3', 'HepaCAM1', 'CD44', 'NCAM1', 'CD24', 'FUT4', \
               'CXCR4', 'FOXO4', 'PDGFRA', 'ITGB2', 'TFRC', 'PROM1', 'NKX2-2']

dataFrame = pd.DataFrame(np.array([[getAvgExprForGeneInCluster(gene, cluster) \
                                    for gene in geneList] for cluster in clusterNameSet]), \
                                    columns = geneList, index=clusterNameSet)



print(dataFrame)

            AQP4  SCL1A3  HepaCAM1      CD44     NCAM1  CD24  FUT4     CXCR4  \
ME      0.007709     0.0       0.0  0.129248  0.381037   0.0   0.0  0.406132   
CN2     0.006827     0.0       0.0  0.012715  0.964154   0.0   0.0  0.103462   
NEC1    0.035137     0.0       0.0  0.342560  0.463413   0.0   0.0  1.173386   
CBC     0.007456     0.0       0.0  0.086671  0.081521   0.0   0.0  0.230967   
PGC1    0.019188     0.0       0.0  0.250737  0.130685   0.0   0.0  0.578163   
Inter   0.017490     0.0       0.0  0.153802  0.524793   0.0   0.0  0.563521   
BRC     0.018997     0.0       0.0  0.217212  0.121197   0.0   0.0  0.432291   
NEC2    0.010201     0.0       0.0  0.212694  0.450097   0.0   0.0  1.258110   
PGC2   -0.002075     0.0       0.0  0.291515  0.158913   0.0   0.0  0.689419   
NEC4    0.028726     0.0       0.0  0.228243  0.318951   0.0   0.0  0.959562   
UPRC2  -0.009971     0.0       0.0  0.373038  0.137948   0.0   0.0  0.312741   
NEC3    0.039304     0.0       0.0  0.36