This file creates images containing RNA expression data for many patient samples and genes. The methods used to create these images are in https://journals.plos.org/plosone/article/file?id=10.1371/journal.pone.0230536&type=printable. The tree map algorithm reference can be found in  https://plotly.com/python/treemaps/. 

In [86]:
import pandas as pd
import numpy as np
import json
import plotly.express as px
import os
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import plotly.io as pio

### Getting small and larger files of gene mapping

In [2]:
gene_mapping = pd.read_csv('SmallHierarchicalMap_for_TreeFrame.csv')

In [3]:
large_gene_mapping = pd.read_csv('HierarchicalMap_for_TreeFrame.csv')

In [4]:
#Number of genes present in final sample
len(set(gene_mapping['GeneName']))

1314

In [5]:
gene_mapping.head()

Unnamed: 0.1,Unnamed: 0,Unnamed: 0_x,SRR1782685,SRR1782686,SRR1782687,SRR1782688,SRR1782689,SRR1782690,SRR1782691,SRR1782692,...,SRR1783003,SRR1783004,SRR1783005,SRR1783006,GeneName,keggBriteId,Unnamed: 0_y,Functional Annotation Group,Functional Annotation Subgroup,Functional Annotation
0,0,0,5.064846e-05,1.4e-05,8.582364e-05,4.8e-05,2.264291e-05,5.6e-05,5e-05,5.633922e-05,...,3.859068e-05,5.708651e-05,8.075028e-05,5.1e-05,TSPAN6,4147,42,Genes and Proteins,Protein families: signaling and cellular proce...,Exosome
1,1,6,9.827314e-05,0.000225,0.0001748377,8.9e-05,0.0001891272,9.1e-05,0.000185,7.452947e-05,...,0.0001135467,0.0001103236,0.0001155178,7.7e-05,CFH,4147,42,Genes and Proteins,Protein families: signaling and cellular proce...,Exosome
2,2,7,7.948245e-05,6.7e-05,7.657127e-05,7.8e-05,6.452575e-05,7.3e-05,5.9e-05,6.720284e-05,...,6.469774e-05,6.73244e-05,6.668016e-05,9e-05,FUCA2,4147,42,Genes and Proteins,Protein families: signaling and cellular proce...,Exosome
3,6,94,2.159849e-07,1e-06,6.38094e-07,0.0,5.235355e-07,0.0,2e-06,2.526423e-07,...,3.256597e-07,1.638063e-07,4.078297e-07,0.0,MPO,4147,42,Genes and Proteins,Protein families: signaling and cellular proce...,Exosome
4,8,107,3.336967e-05,7.7e-05,5.264275e-05,2.5e-05,8.808484e-05,6.2e-05,0.00021,4.623353e-05,...,0.000132435,5.651319e-05,5.067284e-05,3e-05,ITGAL,4147,42,Genes and Proteins,Protein families: signaling and cellular proce...,Exosome


In [6]:
large_gene_mapping.head()

Unnamed: 0.1,Unnamed: 0,Unnamed: 0_x,SRR1782685,SRR1782686,SRR1782687,SRR1782688,SRR1782689,SRR1782690,SRR1782691,SRR1782692,...,SRR1783003,SRR1783004,SRR1783005,SRR1783006,GeneName,keggBriteId,Unnamed: 0_y,Functional Annotation Group,Functional Annotation Subgroup,Functional Annotation
0,0,0,5.1e-05,1.4e-05,8.6e-05,4.8e-05,2.3e-05,5.6e-05,5e-05,5.6e-05,...,3.9e-05,5.7e-05,8.1e-05,5.1e-05,TSPAN6,4147,42,Genes and Proteins,Protein families: signaling and cellular proce...,Exosome
1,1,6,9.8e-05,0.000225,0.000175,8.9e-05,0.000189,9.1e-05,0.000185,7.5e-05,...,0.000114,0.00011,0.000116,7.7e-05,CFH,4147,42,Genes and Proteins,Protein families: signaling and cellular proce...,Exosome
2,2,7,7.9e-05,6.7e-05,7.7e-05,7.8e-05,6.5e-05,7.3e-05,5.9e-05,6.7e-05,...,6.5e-05,6.7e-05,6.7e-05,9e-05,FUCA2,4147,42,Genes and Proteins,Protein families: signaling and cellular proce...,Exosome
3,3,45,7.8e-05,6.1e-05,7.4e-05,5.9e-05,7.2e-05,0.0001,7e-05,8.3e-05,...,9.2e-05,7.3e-05,8.9e-05,0.000103,ARF5,4147,42,Genes and Proteins,Protein families: signaling and cellular proce...,Exosome
4,4,49,0.000105,6.6e-05,0.000108,0.000119,8.2e-05,0.000115,0.000125,0.00014,...,0.000153,0.000149,0.000157,0.000183,AK2,4147,42,Genes and Proteins,Protein families: signaling and cellular proce...,Exosome


### Creating Small Mapping

In [7]:
names = list(gene_mapping['GeneName'])
matching_keggBriteId = list(gene_mapping['keggBriteId'])
curr_patient = list(gene_mapping['SRR1782685'])
len(names), len(matching_keggBriteId)

(1314, 1314)

In [8]:
print('Number of unique functional groups: ', len(np.unique(matching_keggBriteId)))

Number of unique functional groups:  39


In [9]:
v = np.copy(matching_keggBriteId)
np.where(v[:-1] != v[1:])[0]

array([ 156,  172,  218,  358,  474,  533,  579,  599,  749,  816,  820,
        854,  863,  890,  905,  952,  972, 1045, 1054, 1082, 1110, 1145,
       1160, 1196, 1212, 1224, 1232, 1249, 1266, 1275, 1276, 1286, 1289,
       1291, 1295, 1304, 1307, 1310])

In [10]:
ind1 = 0
ind2 = len(gene_mapping)
plt_values = names[ind1:ind2], matching_keggBriteId[ind1:ind2], curr_patient[ind1:ind2]

# fig = px.treemap(
#     names = plt_values[0],
#     parents = plt_values[1]
# #     values = plt_values[2]
# )
# # fig.show()

In [11]:
# plt_values

In [100]:
genes = plt_values[0]
brites = plt_values[1]
values = plt_values[2]
new_df = pd.DataFrame(
    dict(genes=genes, brites=brites, values=values)
)
new_df["all"] = "all"  # in order to have a single root node

# print(new_df)
fig3 = px.treemap(new_df, path=['all', 'brites',
                        'genes'], color='values')

# fig.marker(showscale=None)
fig3.show()

In [127]:
x = np.array(values)
normalized = (x-min(x))/(max(x)-min(x))

In [129]:
genes2 = genes + list(np.unique(brites))
parents2 = brites + ['all'] *len(list(np.unique(brites)))
values2 = list(normalized) + [0.0] * len(list(np.unique(brites)))

In [135]:
fig = make_subplots(cols=1, rows=1)

fig2 = go.Treemap(labels=genes2, parents=parents2)
fig2.marker.colors = values2
fig2.marker.showscale = False
fig.add_trace(fig2)

fig.show()

In [131]:
fig.data

(Treemap({
     'labels': [TSPAN6, CFH, FUCA2, ..., 4147, 4515, 4812],
     'marker': {'colors': [0.004589220713139456, 0.008904458100121335,
                           0.0072018474304278044, ..., 0.0, 0.0, 0.0],
                'showscale': False},
     'parents': [4147, 4147, 4147, ..., all, all, all]
 }),)

In [119]:
fig3.data[0]['marker']['showscale'] = False
fig3.show()

In [120]:
fig3.data[0]['marker']

treemap.Marker({
    'coloraxis': 'coloraxis',
    'colors': array([6.78192630e-04, 4.65447489e-05, 7.76465765e-05, ..., 1.99102093e-04,
                     2.32615752e-04, 1.95666963e-04]),
    'showscale': False
})

### Saving Images

In [15]:
if not os.path.exists("images/small/"):
    os.mkdir("images/small/")

In [136]:
fig.write_image("images/small/{}.png".format('SRR1782685'))

In [17]:
# pio.orca.config.executable = '/Users/pkarnati/anaconda/envs/ps2-env-2/lib/python3.7/site-packages/plotly/io/_orca.py'

In [18]:
# pio.__file__

In [19]:
# pio.orca.config

In [20]:
pio.orca.status

orca status
-----------
    state: running
    executable: /usr/local/bin/orca
    version: 1.3.1
    port: 52762
    pid: 83434
    command: ['/usr/local/bin/orca', 'serve', '-p', '52762', '--plotly', '/Users/pkarnati/anaconda/envs/ps2-env-2/lib/python3.7/site-packages/plotly/package_data/plotly.min.js', '--graph-only', '--mathjax', 'https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js']


### Small mapping for all patients

In [146]:
def get_info(patient_name):
    print('getting info for {}...'.format(patient_name))
    names = list(gene_mapping['GeneName'])
    matching_keggBriteId = list(gene_mapping['keggBriteId'])
    curr_patient = list(gene_mapping[patient_name])
    return names, matching_keggBriteId, curr_patient

def prepare_input(plt_values):
    print('preparing input...')
    genes = plt_values[0]
    brites = plt_values[1]
    values = plt_values[2]
    
    x = np.array(values)
    normalized = (x-min(x))/(max(x)-min(x))
    
    genes2 = genes + list(np.unique(brites))
    parents2 = brites + ['all'] *len(list(np.unique(brites)))
    values2 = list(normalized) + [0.0] * len(list(np.unique(brites)))
    
    return genes2, parents2, values2

def create_plot(genes, parents, values):
    fig = make_subplots(cols=1, rows=1)

    fig2 = go.Treemap(labels=genes, parents=parents)
    fig2.marker.colors = values
    fig2.marker.showscale = False
    fig.add_trace(fig2)

#     fig.show()
    
    return fig

def save_fig(fig, patient_name):
    fig.write_image("images/small/{}.png".format(patient_name))

In [147]:
patient_name = 'SRR1782685'
plt_values = get_info(patient_name)
genes, parents, values = prepare_input(plt_values)
fig = create_plot(genes, parents, values)
save_fig(fig, patient_name)

getting info for SRR1782685...
preparing input...


In [157]:
patients = list(gene_mapping)[2:-6]

for patient_name in patients:
    plt_values = get_info(patient_name)
    genes, parents, values = prepare_input(plt_values)
    fig = create_plot(genes, parents, values)
    save_fig(fig, patient_name)

getting info for SRR1782685...
preparing input...
getting info for SRR1782686...
preparing input...
getting info for SRR1782687...
preparing input...
getting info for SRR1782688...
preparing input...
getting info for SRR1782689...
preparing input...
getting info for SRR1782690...
preparing input...
getting info for SRR1782691...
preparing input...
getting info for SRR1782692...
preparing input...
getting info for SRR1782693...
preparing input...
getting info for SRR1782694...
preparing input...
getting info for SRR1782695...
preparing input...
getting info for SRR1782696...
preparing input...
getting info for SRR1782697...
preparing input...
getting info for SRR1782698...
preparing input...
getting info for SRR1782699...
preparing input...
getting info for SRR1782700...
preparing input...
getting info for SRR1782701...
preparing input...
getting info for SRR1782702...
preparing input...
getting info for SRR1782703...
preparing input...
getting info for SRR1782704...
preparing input...


getting info for SRR1782849...
preparing input...
getting info for SRR1782850...
preparing input...
getting info for SRR1782851...
preparing input...
getting info for SRR1782852...
preparing input...
getting info for SRR1782853...
preparing input...
getting info for SRR1782854...
preparing input...
getting info for SRR1782855...
preparing input...
getting info for SRR1782856...
preparing input...
getting info for SRR1782857...
preparing input...
getting info for SRR1782858...
preparing input...
getting info for SRR1782859...
preparing input...
getting info for SRR1782860...
preparing input...
getting info for SRR1782861...
preparing input...
getting info for SRR1782862...
preparing input...
getting info for SRR1782863...
preparing input...
getting info for SRR1782864...
preparing input...
getting info for SRR1782865...
preparing input...
getting info for SRR1782866...
preparing input...
getting info for SRR1782867...
preparing input...
getting info for SRR1782868...
preparing input...
