# Visualizing tissue specific metabolite profiles with matplotlib

## Generating leaf-root time series metabolite concentration figures


In [1]:
import pandas as pd

In [2]:
leafdf=pd.read_csv('Col0_leaf-KEGG.csv')
rootdf=pd.read_csv('Col0_root-KEGG.csv')

this experiment has 8 timepoints and several replicates for each timepoint.

In [3]:
kegg = leafdf['KEGG']

In [4]:
leafdf.filter(regex="^[0-9]+")

Unnamed: 0,14_L03_full_1,14_L04_full_1,14_L06_full_1,14_L08_full_1,14_L11_full_1,14_L14_full_1,14_L15_full_1,14_L16_full_1,16_L02_full_1,16_L03_full_1,...,26_L12_full_1,26_L16_full_1,28_L01_full_1,28_L03_full_1,28_L05_full_1,28_L07_full_1,28_L10_full_1,28_L11_full_1,28_L12_full_1,28_L16_full_1
0,2203756.0,3824887.0,2344465.0,3792906.0,4841002.0,4434021.0,2732656.0,4564863.0,5266019.0,3600278.0,...,2943271.0,2071103.0,2454579.0,2397335.0,1734449.0,2204660.0,1167445.0,1914662.0,1970794.0,1605533.0
1,13561129.0,50437850.0,91753760.0,60463460.0,66567170.0,45412260.0,13722980.0,24082220.0,60042700.0,45303610.0,...,58815650.0,22991871.0,37523700.0,52447060.0,39081728.0,49191520.0,31938336.0,24822914.0,32476930.0,32291218.0
2,16057.0,31637.0,32402.0,57312.0,74466.0,39796.0,20284.0,40833.0,52812.0,43187.0,...,18248.0,16370.0,21695.0,26408.0,15584.0,18835.0,17585.0,20245.0,16166.0,17971.0
3,5079051.0,7011010.0,5109477.0,2931404.0,6928890.0,7095492.0,6624830.0,6502495.0,5946074.0,7899030.0,...,733280.0,164654.0,2067595.0,0.0,9800.0,151148.0,0.0,211766.0,322111.0,62312.0
4,25791284.0,53463280.0,26378440.0,53995600.0,57847140.0,45349220.0,46691260.0,73980560.0,56044040.0,64167640.0,...,56847220.0,55757367.0,89740820.0,91961840.0,68547782.0,65454540.0,65470115.0,88872250.0,80325607.0,48711723.0
5,66359806.0,58549480.0,12580370.0,47641270.0,9224643.0,54006940.0,124018800.0,63091490.0,85599020.0,36824480.0,...,106044500.0,83031234.0,88648400.0,100862300.0,71438299.0,61775640.0,84378360.0,97747239.0,75847567.0,53400658.0
6,3296.0,2536.0,5965.0,13158.0,13257.0,3069.0,6860.0,2269.0,11260.0,9221.0,...,2587.0,2160.0,2408.0,2865.0,1919.0,2661.0,2586.0,1960.0,1207.0,2477.0
7,773335.0,1517080.0,1159387.0,1981835.0,2988896.0,1851033.0,1002323.0,1838016.0,3784283.0,2298809.0,...,1525744.0,1143921.0,1810272.0,1400014.0,1050251.0,1350022.0,829497.0,1086525.0,1078238.0,1227341.0
8,43010.0,89659.0,60910.0,111675.0,158361.0,98004.0,51511.0,97722.0,199588.0,118907.0,...,85228.0,67197.0,101266.0,78281.0,60862.0,76860.0,52032.0,60671.0,62204.0,71802.0
9,247880.0,649325.0,1047389.0,547008.0,1628296.0,939781.0,371836.0,1955726.0,1479960.0,1078240.0,...,613182.0,490747.0,620310.0,519569.0,413446.0,565879.0,377578.0,434685.0,524628.0,489080.0


In [5]:
timepoints=["14","16","18","20","22","24","26","28"]

In [6]:
kegg.name='KEGG'
leafmat=pd.concat([kegg])
rootmat=pd.concat([kegg])
for i in timepoints:
    leafprofile = leafdf.filter(regex="^"+i)
    rootprofile = rootdf.filter(regex="^"+i)
    leafmean = leafprofile.mean(axis=1)
    rootmean = rootprofile.mean(axis=1)
    leafmat=pd.concat([leafmat,leafmean],axis=1)
    rootmat=pd.concat([rootmat,rootmean],axis=1)

In [7]:
leafmat.columns = ["KEGG"] + timepoints
rootmat.columns = ["KEGG"] + timepoints

In [8]:
leafsum = leafmat.groupby('KEGG').sum()
rootsum = rootmat.groupby('KEGG').sum()

In [11]:
for i in leafsum.index:
    foo = pd.concat([leafsum.ix[i], rootsum.ix[i]], axis=1)
    foo.columns = ['leaf', 'root']
    tmp = foo.plot(kind='bar', title=i)
    fig = tmp.get_figure()
    fig.savefig('./' + i + '.png')
    fig.clear()

## Mapping profile barcharts on KEGG pathway

### Get all compounds in this metabolic profile

In [26]:
cpds = leafdf['KEGG'].append(rootdf['KEGG']).unique()

In [28]:
cpds

array(['C00025', 'C00037', 'C00042', 'C00047', 'C00049', 'C00065',
       'C00073', 'C00085', 'C00089', 'C00095', 'C00099', 'C00121',
       'C00122', 'C00123', 'C00134', 'C00137', 'C00148', 'C00158',
       'C00160', 'C00183', 'C00208', 'C00249', 'C00258', 'C00267',
       'C00315', 'C00429', 'C00492', 'C00493', 'C01595', 'C01753',
       'C01789', 'C00022', 'C00026', 'C00078', 'C00181', 'C00253',
       'C00334', 'C00407', 'C01083', 'C02938', 'C05422'], dtype=object)

### The keggutil package

To search KEGG pathway mappable compounds, you can use keggutil package (http://github.com/kozo2/keggutil).  
This function is not supported by KEGG mapper(http://www.genome.jp/kegg/mapper.html).

In [29]:
import keggutil

In [30]:
def addind(x):
    return x.replace('C', 'cpd:C')
cpdsFor = keggutil.search_pathway_object(map(addind, cpds))

path:map00010
path:map00020
path:map00030
path:map00040
path:map00051
path:map00052
path:map00053
path:map00061
path:map00062
path:map00071
path:map00072
path:map00073
path:map00100
path:map00120
path:map00121
path:map00130
path:map00140
path:map00190
path:map00195
path:map00230
path:map00231
path:map00232
path:map00240
path:map00250
path:map00253
path:map00254
path:map00260
path:map00270
path:map00280
path:map00281
path:map00290
path:map00300
path:map00310
path:map00311
path:map00330
path:map00331
path:map00332
path:map00340
path:map00350
path:map00351
path:map00360
path:map00361
path:map00362
path:map00363
path:map00364
path:map00365
path:map00380
path:map00400
path:map00401
path:map00402
path:map00403
path:map00410
path:map00430
path:map00440
path:map00450
path:map00460
path:map00471
path:map00472
path:map00473
path:map00480
path:map00500
path:map00510
path:map00512
path:map00520
path:map00521
path:map00522
path:map00523
path:map00524
path:map00531
path:map00532
path:map00534
path:m

In [31]:
cpdsFor

{'notFound': set(),
 'path:map00010': {'cpd:C00022', 'cpd:C00267'},
 'path:map00020': {'cpd:C00022',
  'cpd:C00026',
  'cpd:C00042',
  'cpd:C00122',
  'cpd:C00158'},
 'path:map00030': {'cpd:C00022', 'cpd:C00121', 'cpd:C00258'},
 'path:map00040': {'cpd:C00022', 'cpd:C00026', 'cpd:C00181'},
 'path:map00051': {'cpd:C00095', 'cpd:C00267'},
 'path:map00052': {'cpd:C00089',
  'cpd:C00095',
  'cpd:C00137',
  'cpd:C00267',
  'cpd:C00492'},
 'path:map00053': {'cpd:C00022', 'cpd:C00026', 'cpd:C00137', 'cpd:C05422'},
 'path:map00061': {'cpd:C00249'},
 'path:map00062': {'cpd:C00249'},
 'path:map00071': {'cpd:C00249'},
 'path:map00073': {'cpd:C00249'},
 'path:map00100': {'cpd:C01753', 'cpd:C01789'},
 'path:map00120': {'cpd:C00037'},
 'path:map00190': {'cpd:C00042', 'cpd:C00122'},
 'path:map00230': {'cpd:C00037'},
 'path:map00240': {'cpd:C00099', 'cpd:C00429'},
 'path:map00250': {'cpd:C00022',
  'cpd:C00025',
  'cpd:C00026',
  'cpd:C00042',
  'cpd:C00049',
  'cpd:C00122',
  'cpd:C00158',
  'cpd:C003

In [37]:
cpdsFor['path:map01230']

{'cpd:C00022',
 'cpd:C00025',
 'cpd:C00026',
 'cpd:C00037',
 'cpd:C00047',
 'cpd:C00049',
 'cpd:C00065',
 'cpd:C00073',
 'cpd:C00078',
 'cpd:C00123',
 'cpd:C00148',
 'cpd:C00158',
 'cpd:C00183',
 'cpd:C00407',
 'cpd:C00493'}

### Mapping tissue specific  profiles on KEGG *Biosynthesis of amino acids* pathway

#### creating table data file for matplotlib figures

In [74]:
import os
def apath(x):
    foo = 'file:' + os.path.abspath('.') + "\\" + x.replace("cpd:", "") + ".png"
    return foo
pngpath = map(apath, cpdsFor['path:map01230'])

def rmcpdhead(x):
    return x.replace("cpd:", "")
cpdid = map(rmcpdhead, cpdsFor['path:map01230'])

In [75]:
pngpath

['file:C:\\Users\\knishida\\projects\\togotrial2014\\C00049.png',
 'file:C:\\Users\\knishida\\projects\\togotrial2014\\C00493.png',
 'file:C:\\Users\\knishida\\projects\\togotrial2014\\C00407.png',
 'file:C:\\Users\\knishida\\projects\\togotrial2014\\C00026.png',
 'file:C:\\Users\\knishida\\projects\\togotrial2014\\C00078.png',
 'file:C:\\Users\\knishida\\projects\\togotrial2014\\C00037.png',
 'file:C:\\Users\\knishida\\projects\\togotrial2014\\C00025.png',
 'file:C:\\Users\\knishida\\projects\\togotrial2014\\C00022.png',
 'file:C:\\Users\\knishida\\projects\\togotrial2014\\C00148.png',
 'file:C:\\Users\\knishida\\projects\\togotrial2014\\C00158.png',
 'file:C:\\Users\\knishida\\projects\\togotrial2014\\C00123.png',
 'file:C:\\Users\\knishida\\projects\\togotrial2014\\C00073.png',
 'file:C:\\Users\\knishida\\projects\\togotrial2014\\C00047.png',
 'file:C:\\Users\\knishida\\projects\\togotrial2014\\C00183.png',
 'file:C:\\Users\\knishida\\projects\\togotrial2014\\C00065.png']

In [59]:
d = {'cgraph' : pd.Series(pngpath), 'plotcpd' : pd.Series(cpdid)}

In [60]:
df = pd.DataFrame(d)

In [61]:
df

Unnamed: 0,cgraph,plotcpd
0,file:C:\Users\knishida\projects\togotrial2014C...,C00049
1,file:C:\Users\knishida\projects\togotrial2014C...,C00493
2,file:C:\Users\knishida\projects\togotrial2014C...,C00407
3,file:C:\Users\knishida\projects\togotrial2014C...,C00026
4,file:C:\Users\knishida\projects\togotrial2014C...,C00078
5,file:C:\Users\knishida\projects\togotrial2014C...,C00037
6,file:C:\Users\knishida\projects\togotrial2014C...,C00025
7,file:C:\Users\knishida\projects\togotrial2014C...,C00022
8,file:C:\Users\knishida\projects\togotrial2014C...,C00148
9,file:C:\Users\knishida\projects\togotrial2014C...,C00158


#### Convert the DataFrame to JSON and send it to Cytoscape

In [63]:
import json
profile = json.loads(df.to_json(orient="records"))
print(json.dumps(profile, indent=4))

[
    {
        "plotcpd": "C00049", 
        "cgraph": "file:C:\\Users\\knishida\\projects\\togotrial2014C00049.png"
    }, 
    {
        "plotcpd": "C00493", 
        "cgraph": "file:C:\\Users\\knishida\\projects\\togotrial2014C00493.png"
    }, 
    {
        "plotcpd": "C00407", 
        "cgraph": "file:C:\\Users\\knishida\\projects\\togotrial2014C00407.png"
    }, 
    {
        "plotcpd": "C00026", 
        "cgraph": "file:C:\\Users\\knishida\\projects\\togotrial2014C00026.png"
    }, 
    {
        "plotcpd": "C00078", 
        "cgraph": "file:C:\\Users\\knishida\\projects\\togotrial2014C00078.png"
    }, 
    {
        "plotcpd": "C00037", 
        "cgraph": "file:C:\\Users\\knishida\\projects\\togotrial2014C00037.png"
    }, 
    {
        "plotcpd": "C00025", 
        "cgraph": "file:C:\\Users\\knishida\\projects\\togotrial2014C00025.png"
    }, 
    {
        "plotcpd": "C00022", 
        "cgraph": "file:C:\\Users\\knishida\\projects\\togotrial2014C00022.png"
    }, 
    {


In [65]:
import requests

PORT_NUMBER = 1234
BASE_URL = "http://localhost:" + str(PORT_NUMBER) + "/v1/"
HEADERS = {'Content-Type': 'application/json'}

# Make sure Cytoscape  RESTful API App is running!
cytoscape_state = requests.get(BASE_URL)
print(json.dumps(json.loads(cytoscape_state.content), indent=4))

{
    "memoryStatus": {
        "usedMemory": 595, 
        "freeMemory": 2845, 
        "maxMemory": 6288, 
        "totalMemory": 3441
    }, 
    "numberOfCores": 4, 
    "apiVersion": "v1"
}


#### Loading a KGML pathway data file from KEGG REST API

In [67]:
pathway_location = "http://rest.kegg.jp/get/rn01230/kgml"
res1 = requests.post(BASE_URL + "networks?source=url", data=json.dumps([pathway_location]), headers=HEADERS)
result = json.loads(res1.content)
pathway_suid = result[0]["networkSUID"][0]
print("Pathway SUID = " + str(pathway_suid))

Pathway SUID = 2296


In [68]:
new_table_data = {
    "key": "KEGG_NODE_LABEL",
    "dataKey": "plotcpd",
    "data": df
}

update_table_url =  BASE_URL + "networks/" + str(pathway_suid) + "/tables/defaultnode"
requests.put(update_table_url, data=json.dumps(new_table_data), headers=HEADERS)

TypeError:                                                cgraph plotcpd
0   file:C:\Users\knishida\projects\togotrial2014C...  C00049
1   file:C:\Users\knishida\projects\togotrial2014C...  C00493
2   file:C:\Users\knishida\projects\togotrial2014C...  C00407
3   file:C:\Users\knishida\projects\togotrial2014C...  C00026
4   file:C:\Users\knishida\projects\togotrial2014C...  C00078
5   file:C:\Users\knishida\projects\togotrial2014C...  C00037
6   file:C:\Users\knishida\projects\togotrial2014C...  C00025
7   file:C:\Users\knishida\projects\togotrial2014C...  C00022
8   file:C:\Users\knishida\projects\togotrial2014C...  C00148
9   file:C:\Users\knishida\projects\togotrial2014C...  C00158
10  file:C:\Users\knishida\projects\togotrial2014C...  C00123
11  file:C:\Users\knishida\projects\togotrial2014C...  C00073
12  file:C:\Users\knishida\projects\togotrial2014C...  C00047
13  file:C:\Users\knishida\projects\togotrial2014C...  C00183
14  file:C:\Users\knishida\projects\togotrial2014C...  C00065 is not JSON serializable