In [1]:
import numpy as np
import pandas as pd
from scipy import stats
import scvelo as scv

In [2]:
adata = scv.datasets.pancreas()
adata

AnnData object with n_obs × n_vars = 3696 × 27998
    obs: 'clusters_coarse', 'clusters', 'S_score', 'G2M_score'
    var: 'highly_variable_genes'
    uns: 'clusters_coarse_colors', 'clusters_colors', 'day_colors', 'neighbors', 'pca'
    obsm: 'X_pca', 'X_umap'
    layers: 'spliced', 'unspliced'
    obsp: 'distances', 'connectivities'

In [3]:
scv.pp.filter_genes(adata, min_shared_counts=20)
scv.pp.normalize_per_cell(adata)
scv.pp.filter_genes_dispersion(adata, n_top_genes=2000)
scv.pp.log1p(adata)

Filtered out 20801 genes that are detected 20 counts (shared).
Normalized count data: X, spliced, unspliced.
Extracted 2000 highly variable genes.


In [4]:
def volcano_data(adata,clusters, cell_name):
    s = adata.to_df(layer='spliced')
    u = adata.to_df(layer='unspliced')
    df = pd.DataFrame(adata.obs)
    cell = df.index[df[clusters] == cell_name].tolist()
    dfs = s.loc[cell].T
    dfu = u.loc[cell].T
    mean_s = dfs.mean(axis=1)
    mean_u = dfu.mean(axis=1)
    gene = list(dfs.index)
    ge = pd.DataFrame(gene, columns=['Gene'])
    l2fc = np.array(np.log2(mean_u/mean_s))
    ge["l2fc"] = l2fc
    p_value = stats.ttest_ind(np.array(u.loc[cell]),np.array(s.loc[cell]))
    ge["pv"]= p_value[1]
    ge.fillna(0, inplace=True)
    temp = ge[(ge.l2fc!=0) & (ge.pv!=0)]
    exgene = np.array(temp.index)
    res_list = [np.array(ge['Gene'])[i] for i in exgene]    
    EXG = pd.DataFrame(res_list, columns=['exp_Gene'])
    EXG.to_csv('expgene.csv')
    print("Data save as name expgene.csv")
    ge.to_csv('volcano.csv')
    print("Data save as name volcano.csv") 

In [5]:
volcano_data(adata,clusters='clusters', cell_name='Beta')

Data save as name expgene.csv
Data save as name volcano.csv


In [6]:
df = pd.read_csv("volcano.csv")
df

Unnamed: 0.1,Unnamed: 0,Gene,l2fc,pv
0,0,Sntg1,5.440889,0.000000e+00
1,1,Snhg6,-3.220835,9.868038e-17
2,2,Ncoa2,1.997536,9.787000e-42
3,3,Sbspon,-1.654909,3.039429e-01
4,4,Ube2w,3.273632,2.854878e-27
...,...,...,...,...
1995,1995,Tmem27,-6.345090,0.000000e+00
1996,1996,Uty,2.530546,1.671147e-11
1997,1997,Ddx3y,-0.159687,4.376616e-01
1998,1998,Eif2s3y,-0.877883,5.315068e-04


In [7]:
df2 = pd.read_csv("expgene.csv")
df2

Unnamed: 0.1,Unnamed: 0,exp_Gene
0,0,Snhg6
1,1,Ncoa2
2,2,Sbspon
3,3,Ube2w
4,4,Mcm3
...,...,...
1591,1591,Ap1s2
1592,1592,Uty
1593,1593,Ddx3y
1594,1594,Eif2s3y
