In [0]:
import pandas as pd

# import Rhea

In [0]:
rhea2kegg = pd.read_csv("ftp://ftp.expasy.org/databases/rhea/tsv/rhea2kegg_reaction.tsv", sep="\t")

In [67]:
rhea2kegg.head()

Unnamed: 0,RHEA_ID,DIRECTION,MASTER_ID,ID
0,10003,BI,10000,R02938
1,10007,BI,10004,R04010
2,10011,BI,10008,R07180
3,10015,BI,10012,R07170
4,10019,BI,10016,R02381


# Lipid metabolism reaction IDs in KEGG


In [0]:
keggtext = "00061Fatty acid biosynthesis00062Fatty acid elongation00071Fatty acid degradation00072Synthesis and degradation of ketone bodies00073Cutin, suberine and wax biosynthesis00100Steroid biosynthesis00120Primary bile acid biosynthesis00121Secondary bile acid biosynthesis00140Steroid hormone biosynthesis00561Glycerolipid metabolism00564Glycerophospholipid metabolism00565Ether lipid metabolism00600Sphingolipid metabolism00590Arachidonic acid metabolism00591Linoleic acid metabolism00592alpha-Linolenic acid metabolism01040Biosynthesis of unsaturated fatty acids"

In [0]:
import re
pathwayIds = re.findall(r"[0-9]{5}", keggtext)


In [10]:
pathwayIds

['00061',
 '00062',
 '00071',
 '00072',
 '00073',
 '00100',
 '00120',
 '00121',
 '00140',
 '00561',
 '00564',
 '00565',
 '00600',
 '00590',
 '00591',
 '00592',
 '01040']

In [0]:
lipidmaps = pd.DataFrame()

In [71]:
for id in pathwayIds:
  try:
    lipidmaps = pd.concat([lipidmaps, pd.read_csv("http://rest.kegg.jp/link/rn/map" + id, sep="\t", header=None)])
  except pd.errors.EmptyDataError:
    print("No reaction ID in map" + id)

No reaction ID in map00121


In [72]:
lipidmaps.head()

Unnamed: 0,0,1
0,path:map00061,rn:R00742
1,path:map00061,rn:R01280
2,path:map00061,rn:R01624
3,path:map00061,rn:R01626
4,path:map00061,rn:R01706


# How many KEGG lipid reaction IDs in Rhea?

---



In [73]:
lipidmaps[1].unique().shape

(764,)

In [74]:
rhea2kegg.shape

(6290, 4)

In [75]:
rhea2kegg[rhea2kegg["ID"].isin(lipidmaps[1].str.replace("rn:", ""))].shape

(362, 4)

In [76]:
rhea2kegg[rhea2kegg["ID"].isin(lipidmaps[1].str.replace("rn:", ""))].head()

Unnamed: 0,RHEA_ID,DIRECTION,MASTER_ID,ID
45,10191,BI,10188,R01978
62,10275,BI,10272,R02240
70,10307,BI,10304,R04864
76,10335,BI,10332,R01319
79,10347,BI,10344,R03109


In [0]:
lipidsInRhea2kegg = rhea2kegg[rhea2kegg["ID"].isin(lipidmaps[1].str.replace("rn:", ""))]

In [87]:
lipidsInRhea2kegg.head()

Unnamed: 0,RHEA_ID,DIRECTION,MASTER_ID,ID
45,10191,BI,10188,R01978
62,10275,BI,10272,R02240
70,10307,BI,10304,R04864
76,10335,BI,10332,R01319
79,10347,BI,10344,R03109


In [0]:
keggrids = lipidsInRhea2kegg["ID"].unique()

In [79]:
keggrids.shape

(351,)

# These 351 reactions in EcoCyc?

In [0]:
ecocyc = pd.read_csv("ftp://ftp.expasy.org/databases/rhea/tsv/rhea2ecocyc.tsv", sep="\t")

In [81]:
ecocyc.head()

Unnamed: 0,RHEA_ID,DIRECTION,MASTER_ID,ID
0,10041,LR,10040,325-BISPHOSPHATE-NUCLEOTIDASE-RXN
1,10093,LR,10092,DEOXYNUCLEOTIDE-3-PHOSPHATASE-RXN
2,10117,LR,10116,RXN-9952
3,10121,LR,10120,AMINOCYL-TRNA-HYDROLASE-RXN
4,10137,LR,10136,GLYOCARBOLIG-RXN


In [82]:
ecocyc[ecocyc["RHEA_ID"].isin(lipidsInRhea2kegg["RHEA_ID"])]["ID"].unique().shape

(11,)

In [83]:
ecocyc[ecocyc["MASTER_ID"].isin(lipidsInRhea2kegg["MASTER_ID"])]["ID"].unique().shape

(33,)

We can see the reaction representations between EcoCyc and KEGG are quite different.

# KEGG Lipid reaction list not in Ecocyc

## Reaction IDs only in KEGG

In [0]:
masterIds = ecocyc[ecocyc["MASTER_ID"].isin(lipidsInRhea2kegg["MASTER_ID"])]["MASTER_ID"].unique()

In [86]:
masterIds

array([10272, 10492, 10736, 11092, 11096, 11308, 12080, 12593, 13769,
       15177, 15221, 15313, 15325, 16089, 16105, 16229, 16913, 18381,
       19709, 19889, 20828, 21036, 21564, 21644, 22432, 23516, 23568,
       30119, 31451, 33751, 33763, 41788, 41792, 41932])

In [0]:
commons = lipidsInRhea2kegg[lipidsInRhea2kegg["MASTER_ID"].isin(masterIds)]["ID"].unique()

In [109]:
commons

array(['R02240', 'R06871', 'R00623', 'R00842', 'R00844', 'R00742',
       'R10707', 'R01801', 'R01034', 'R02746', 'R01797', 'R00749',
       'R00851', 'R06870', 'R02685', 'R01799', 'R01800', 'R01012',
       'R02241', 'R00289', 'R02055', 'R00238', 'R00391', 'R00847',
       'R01778', 'R01514', 'R07639', 'R04014', 'R07390', 'R02029',
       'R07162', 'R01624', 'R01626', 'R01706'], dtype=object)

In [0]:
onlyInKegg = lipidsInRhea2kegg[~lipidsInRhea2kegg["MASTER_ID"].isin(masterIds)]["ID"].unique()

In [100]:
onlyInKegg

array(['R01978', 'R04864', 'R01319', 'R03109', 'R04309', 'R01596',
       'R04469', 'R02266', 'R01312', 'R02251', 'R02056', 'R01802',
       'R03468', 'R03370', 'R03262', 'R04263', 'R01497', 'R01923',
       'R01039', 'R01021', 'R02265', 'R01318', 'R01468', 'R01500',
       'R05775', 'R01498', 'R05759', 'R00856', 'R02042', 'R05703',
       'R02002', 'R03718', 'R02893', 'R05333', 'R02402', 'R02242',
       'R03617', 'R04807', 'R01310', 'R10828', 'R07296', 'R05794',
       'R03199', 'R00639', 'R01281', 'R05731', 'R01836', 'R02691',
       'R01838', 'R01594', 'R02814', 'R07204', 'R03353', 'R02620',
       'R02976', 'R01011', 'R03325', 'R04472', 'R02350', 'R01030',
       'R02239', 'R04413', 'R01891', 'R03977', 'R01274', 'R02347',
       'R02349', 'R03719', 'R01593', 'R03415', 'R04126', 'R04473',
       'R02817', 'R02268', 'R02689', 'R03627', 'R01595', 'R01026',
       'R03059', 'R01013', 'R01461', 'R04452', 'R00748', 'R01495',
       'R03058', 'R03360', 'R04520', 'R03991', 'R02214', 'R034

In [0]:
from IPython.display import HTML

In [106]:
imgsrcs = ""
for id in onlyInKegg:
  imgsrcs = imgsrcs + 'This is ' + id + '<img src="http://rest.kegg.jp/get/' + id + '/image">'
HTML(imgsrcs)