In [1]:
import pandas as pd
import numpy as np
import os
import json
import stream as st
import networkx as nx
import shutil

In [2]:
adata = st.read('./stream_result.pkl',file_format='pkl',workdir='./stream_result')

Saving results in: ./stream_result


In [3]:
adata

AnnData object with n_obs × n_vars = 1656 × 4762 
    obs: 'label', 'label_color', 'node', 'branch_id', 'branch_id_alias', 'branch_lam', 'branch_dist', 'S0_pseudotime', 'S3_pseudotime', 'S2_pseudotime', 'S5_pseudotime', 'S4_pseudotime', 'S1_pseudotime'
    var: 'n_counts', 'n_cells'
    uns: 'experiment', 'workdir', 'label_color', 'var_genes', 'trans_mlle', 'epg', 'flat_tree', 'seed_epg', 'seed_flat_tree', 'ori_epg', 'epg_obj', 'ori_epg_obj', 'subwaymap_S4', 'scaled_gene_expr', 'leaf_genes_all', 'leaf_genes'
    obsm: 'var_genes', 'X_mlle', 'X_dr', 'X_vis_umap', 'X_spring', 'X_subwaymap_S4'

In [4]:
reportdir = './stream_report'

In [5]:
if(not os.path.exists(reportdir)):
        os.makedirs(reportdir)

In [6]:
list_cells = []

for i in range(adata.shape[0]):
    dict_coord_cells = dict()
    dict_coord_cells['cell_id'] = adata.obs_names[i]
    dict_coord_cells['x'] = adata.obsm['X_dr'][i,0]
    dict_coord_cells['y'] = adata.obsm['X_dr'][i,1]
    dict_coord_cells['z'] = adata.obsm['X_dr'][i,2]
    list_cells.append(dict_coord_cells)

In [7]:
with open(os.path.join(reportdir,'scatter.json'), 'w') as f:
    json.dump(list_cells, f)

In [8]:
list_curves = []

epg = adata.uns['epg']
flat_tree = adata.uns['flat_tree']
dict_nodes_pos = nx.get_node_attributes(epg,'pos')
dict_nodes_label = nx.get_node_attributes(flat_tree,'label')

for edge_i in flat_tree.edges():
    branch_i_nodes = flat_tree.edges[edge_i]['nodes']
    branch_i_pos = np.array([dict_nodes_pos[i] for i in branch_i_nodes])
    df_coord_curve_i = pd.DataFrame(branch_i_pos)
#     for j in range(df_coord_curve_i.shape[0]):
#         list_coord_curves = []
#         dict_coord_curves = dict()
#         dict_coord_curves['branch_id'] = dict_nodes_label[edge_i[0]] + '_' + dict_nodes_label[edge_i[1]]
#     #     dict_coord_curves['pseudotime'] = pseudotime_new[i]
#         dict_coord_curves['xyz'] = df_coord_curve_i.loc[j,].tolist()
#         list_curves.append(dict_coord_curves)
    dict_coord_curves = dict()
    dict_coord_curves['branch_id'] = dict_nodes_label[edge_i[0]] + '_' + dict_nodes_label[edge_i[1]]
    dict_coord_curves['xyz'] = [{'x':df_coord_curve_i.iloc[j,0],
                                 'y':df_coord_curve_i.iloc[j,1],
                                 'z':df_coord_curve_i.iloc[j,2]} for j in range(df_coord_curve_i.shape[0])]
    
    df_coord_curve_i[0].tolist()
    dict_coord_curves['y'] = df_coord_curve_i[1].tolist()
    dict_coord_curves['z'] = df_coord_curve_i[2].tolist()
    list_curves.append(dict_coord_curves)

In [9]:
with open(os.path.join(reportdir,'stream.json'), 'w') as f:
    json.dump(list_curves, f)

In [10]:
list_metadata = []

for i in range(adata.shape[0]):
    dict_metadata = dict()
    dict_metadata['cell_id'] = adata.obs_names[i]
    dict_metadata['label'] = adata.obs['label'].tolist()[i]
    dict_metadata['label_color'] = adata.obs['label_color'].tolist()[i]
    list_metadata.append(dict_metadata)

In [11]:
with open(os.path.join(reportdir,'metadata.json'), 'w') as f:
    json.dump(list_metadata, f)

In [12]:
df_genes = pd.DataFrame(adata.raw.X,index=adata.raw.obs_names,columns=adata.raw.var_names)

In [13]:
adata.uns['leaf_genes_all'].index.tolist()[1:10]

['Gata1', 'Mpl', 'Emb', 'Vamp5', 'Prtn3', 'Epx', 'Klf1', 'Lcp1', 'Limd2']

In [14]:
top_genes = adata.uns['leaf_genes_all'].index.tolist()[1:30]

In [15]:
for g in top_genes:
    list_genes = []
    for x in adata.obs_names:
        dict_genes = dict()
        dict_genes['cell_id'] = x
        dict_genes[g] = np.float(df_genes.loc[x,g])
        list_genes.append(dict_genes)
    with open(os.path.join(reportdir,'gene_'+g+'.json'), 'w') as f:
        json.dump(list_genes, f)

In [16]:
shutil.make_archive('stream_report', 'zip', reportdir)

'/Users/huidong/Projects/SinglecellVR/stream_report.zip'