## This notebook prepares input for IPath3 visualisation

In [1]:
from pathlib import Path
from definitions import ROOT_DIR
import pandas as pd
import numpy as np

- Inputs

In [2]:
p_root_dir = Path(ROOT_DIR).parent
p_analysis = p_root_dir / "6_plots\q1_plots"

# Compunds used in the study
p_compounds = p_root_dir / "5_data_analysis/compounds_ids.csv"

# HMDB ID to KEGG ID mapping
p_kegg = p_analysis / "hmdb_kegg_2020-09-09.csv"

- Load data

In [3]:
compounds = pd.read_csv(p_compounds, index_col='internal_id')
# Remove entries for thermometer ions and fiducials
compounds = compounds[compounds.hmdb_primary != 'custom'][['name_short', 'hmdb_primary']]

kegg = pd.read_csv(p_kegg)

- Map KEGG IDs to the compounds
- Note which compounds have no mapping

In [43]:
df = pd.merge(compounds, kegg, how='left', on='hmdb_primary')

In [44]:
unmapped_compounds = df.loc[df.kegg_id != df.kegg_id, 'name_short']
pd.Series(pd.Series(),index=unmapped_compounds).to_dict()

  pd.Series(pd.Series(),index=unmapped_compounds).to_dict()


{'N-Acetylgalactosamine 6-phosphate': nan,
 'Cholesteryl acetate': nan,
 'TG 15:0-18:1-15:0': nan,
 'MG 18:1': nan,
 'Cardiolipin 18:1': nan,
 'DG 18:0-22:6': nan,
 'PS (POPS) 16:0-18:1': nan,
 'PG 16:0-18:1': nan,
 'Lyso PI 17:1': nan,
 'PI 16:0-18:1': nan,
 'PE 18:0-20:4': nan,
 'Lyso PE 18:0': nan,
 'Lyso PA 18:1': nan,
 'Lyso PG 16:0': nan,
 'Lyso PS 17:1': nan,
 'SM d18:1-16:0': nan,
 'PC (O) C16-18:1': nan,
 'Cholesteryl ester 17:0': nan}

In [45]:
mapping = {'N-Acetylgalactosamine 6-phosphate': 'C06376',
 'Cholesteryl acetate': 'C02530',
 'TG 15:0-18:1-15:0': 'C00422',
 'MG 18:1': 'C01885',
 'Cardiolipin 18:1': 'C05980',
 'DG 18:0-22:6': 'C00165',
 'PS (POPS) 16:0-18:1': 'C02737',
 'PG 16:0-18:1': 'C00344',
 'Lyso PI 17:1': 'C03819',
 'PI 16:0-18:1': 'C01194',
 'PE 18:0-20:4': 'C00350',
 'Lyso PE 18:0': 'C04438',
 'Lyso PA 18:1': 'C00681',
 'Lyso PG 16:0': 'C18126',
 'Lyso PS 17:1': 'C18125',
 'SM d18:1-16:0': 'C00550',
 'PC (O) C16-18:1': 'C05212',
 'Cholesteryl ester 17:0': 'C02530'}

- Complete the mapping manually

In [46]:
for x in mapping.keys():
    df.loc[df.name_short == x, 'kegg_id'] = mapping[x]

- Print and copy KEGG IDs to IPath3 website [https://pathways.embl.de/ipath3.cgi?map=metabolic](https://pathways.embl.de/ipath3.cgi?map=metabolic)

In [51]:
ids = [print(x) for x in df.kegg_id]

C00221
C00092
C05378
C00111
C00597
C00074
C00022
C00024
C00186
C00345
C00199
C03736
C00184
C00257
C00508
C00158
C00417
C00026
C00042
C00122
C00149
C00036
C00029
C00167
C00352
C06376
C00043
C00140
C00137
C01041
C01177
C00249
C00418
C00187
C04025
C03761
C00154
C00318
C00695
C00037
C05122
C00951
C00152
C00049
C01042
C00064
C00025
C12270
C00334
C00079
C00082
C00355
C03758
C00788
C00164
C00135
C00386
C00785
C00388
C00062
C00327
C03406
C00077
C00169
C00148
C01157
C00134
C00315
C00750
C00300
C00791
C00179
C00504
C00101
C00073
C00491
C00127
C00051
C00245
C00019
C00021
C00155
C00097
C03793
C00183
C00864
C00831
C00010
C00065
C00078
C00328
C01717
C00632
C03722
C00003
C00004
C00153
C00253
C00780
C05635
C01598
C03150
C00455
C00262
C00020
C00147
C00385
C00242
C00008
C00002
C00575
C00438
C00337
C00295
C00299
C00106
C00178
C00055
C00475
C00380
C00015
C00114
C01996
C00378
C00255
C00314
C00120
C05443
C02477
C05850
C00061
C00016
C02305
C00670
C03017
C02571
C01005
C00366
C02530
C00463
C00329
C00246
C00093

- Save results

In [50]:
df.to_csv(p_analysis / 'compound_kegg_mapping.csv')