In [1]:
import pandas as pnd

from helper_functions import *

# define biolog mappings

In [11]:
biolog_mappings = get_biolog_C_mappings()
print(biolog_mappings.shape)
biolog_mappings

(190, 7)


Unnamed: 0,source,PM,BiGG_ID,BiGG_exchange,SEED_ID,SEED_exchange,KEGG_ID
L-Arabinose,{'C'},{'PM01:A02'},arab__L,EX_arab__L_e,cpd00224,EX_cpd00224_e0,C00259
D-Saccharic Acid,{'C'},{'PM01:A04'},glcr,EX_glcr_e,cpd00571,EX_cpd00571_e0,C00818
D-Galactose,{'C'},{'PM01:A06'},gal,EX_gal_e,cpd00108,EX_cpd00108_e0,C00124
L-Aspartic Acid,"{'C', 'N'}","{'PM01:A07', 'PM05:A06', 'PM03:A10'}",asp__L,EX_asp__L_e,cpd00041,EX_cpd00041_e0,C00049
D-Alanine,"{'C', 'N'}","{'PM01:A09', 'PM05:D06', 'PM03:C03'}",ala__D,EX_ala__D_e,cpd00117,EX_cpd00117_e0,C00133


# 01_klebsiella

In [12]:
biolog_exp = pnd.read_excel('01_klebsiella/tables/Supplemental_Table_S4.xlsx', sheet_name='SuppTable4_strainAccuracyCalls_', skiprows=1)
print(biolog_exp.shape)
biolog_exp.head()

(3478, 5)


Unnamed: 0,strain,substrate,growth_call,model_value,call
0,K_variicola_variicola_01A065,3-(3-hydroxy-phenyl)propionate,1.0,1,TP
1,K_variicola_variicola_01A065,3-hydroxycinnamic acid,0.0,1,FP
2,K_variicola_variicola_01A065,gamma-aminobutyric acid,1.0,1,TP
3,K_variicola_variicola_01A065,L-Sorbose,1.0,1,TP
4,K_variicola_variicola_01A065,D-Galactarate,1.0,1,TP


In [13]:
# correction dictionary:
correct_dict = {
    'L-Arabanose': 'L-Arabinose',
    'D-Manose': 'D-Mannose',
    'Tricarballylate': 'Tricarballylic acid',
    'Citrate': 'Citric acid',
    'L-Alanyl-Glycine': 'Ala-Gly',
    'Glycyl-L-Proline': 'Gly-Pro',
    'Galacticol': 'Dulcitol',
    'gamma-aminobutyric acid': 'g-Amino-N-Butyric acid',
    'D,L-alpha-GlycerolPhosphate': 'D,L-a-Glycerol Phosphate',
    'D-Glucose-6-Phospate': 'D-Glucose-6-Phosphate',
    'D-Galactonic Acid-gamma-Lactone': 'D-Galactonic acid-g-Lactone',
    'alpha-Keto-Butyric Acid': 'a-Ketobutyric acid',
    '2-Deoxy Adenosine': '2`-Deoxyadenosine',
    'N-Acetyl-beta-D Mannosamine': 'N-Acetyl-D-Mannosamine',
    'p-Hydroxy Phenyl Acetic Acid': 'p-Hydroxyphenyl Acetic acid',
    'N-AcetylNeuraminic Acid': 'N-Acetyl-Neuraminic acid',
    '5-Keto-D Gluconic Acid': '5-Keto-D-Gluconic acid',
    '2-Oxoglutarate': 'a-Ketoglutaric acid',
    'D-Galactarate': 'Mucic acid',
    'Ribitol': 'Adonitol',
}

In [14]:
# apply correct_dict:
for index, row in biolog_exp.iterrows(): 
    if row['substrate'] in correct_dict.keys(): 
        biolog_exp.loc[index, 'substrate'] = correct_dict[row['substrate']]

In [15]:
# define the 94 substrates analyzed by Hawkey2022
# (names are according to biolog_mappings)

hawkey_substrates = set(list(biolog_exp['substrate'].unique()))
print('hawkey_substrates:', len(hawkey_substrates))


hawkey_substrates: 94


In [16]:
# some substrate are outside the scope of Biolog plates:

hawkey_substrates_pm = hawkey_substrates.intersection(set(biolog_mappings.index.to_list()))
hawkey_substrates_notpm = hawkey_substrates - hawkey_substrates_pm
print('hawkey_substrates_notpm:', len(hawkey_substrates_notpm))
print(hawkey_substrates_notpm)


hawkey_substrates_notpm: 4
{'3-(3-hydroxy-phenyl)propionate', 'L-ascorbate', 'Ethanolamine', '3-hydroxycinnamic acid'}


In [17]:
# remove the non-Biolog substrates and save the table:
biolog_exp = biolog_exp[biolog_exp['substrate'].isin(hawkey_substrates_notpm) ==False]
biolog_exp = biolog_exp[['strain', 'substrate', 'growth_call']]
biolog_exp


Unnamed: 0,strain,substrate,growth_call
2,K_variicola_variicola_01A065,g-Amino-N-Butyric acid,1.0
3,K_variicola_variicola_01A065,L-Sorbose,1.0
4,K_variicola_variicola_01A065,Mucic acid,1.0
5,K_variicola_variicola_01A065,Tricarballylic acid,0.0
8,K_pneumoniae_SB1067,g-Amino-N-Butyric acid,1.0
...,...,...,...
3473,K_pneumoniae_SB612,Dihydroxyacetone,1.0
3474,K_pneumoniae_SB615,Dihydroxyacetone,1.0
3475,K_pneumoniae_SB617,Dihydroxyacetone,1.0
3476,K_variicola_tropicalensis_CDC4241-71,Dihydroxyacetone,1.0


In [18]:
# adjust strain names:
for index, row in biolog_exp.iterrows(): 
    strain_id = row['strain']
    strain_id = strain_id.replace('K_variicola_tropicalensis_', 'Kvt_')
    strain_id = strain_id.replace('K_pneumoniae_', 'Kp_')
    strain_id = strain_id.replace('K_quasi_simil_', 'Kqs_')
    strain_id = strain_id.replace('K_africa_', 'Ka_')
    strain_id = strain_id.replace('K_quasi_quasi_', 'Kqq_')
    strain_id = strain_id.replace('K_quasivariicola_', 'Kqv_')
    strain_id = strain_id.replace('K_variicola_variicola_', 'Kvv_')
    
    strain_id = strain_id.replace('CDC4241-71', 'CDC4241_71')
    strain_id = strain_id.replace('NJST258-1', 'NJST258_1')
    strain_id = strain_id.replace('NTUH_K2044', 'NTUHK2044')
    strain_id = strain_id.replace('CIP52_145', 'CIP52.145')
    strain_id = strain_id.replace('03-9138-2', '03_9138_2')
    strain_id = strain_id.replace('At-22', 'At_22')
    
    biolog_exp.loc[index, 'strain']  = strain_id

    
print(biolog_exp.shape)
biolog_exp.to_csv('01_klebsiella/tables/biolog_exp.csv')
print(len(biolog_exp['substrate'].unique()))
biolog_exp

(3330, 3)
90


Unnamed: 0,strain,substrate,growth_call
2,Kvv_01A065,g-Amino-N-Butyric acid,1.0
3,Kvv_01A065,L-Sorbose,1.0
4,Kvv_01A065,Mucic acid,1.0
5,Kvv_01A065,Tricarballylic acid,0.0
8,Kp_SB1067,g-Amino-N-Butyric acid,1.0
...,...,...,...
3473,Kp_SB612,Dihydroxyacetone,1.0
3474,Kp_SB615,Dihydroxyacetone,1.0
3475,Kp_SB617,Dihydroxyacetone,1.0
3476,Kvt_CDC4241_71,Dihydroxyacetone,1.0


# 02_ralstonia

In [9]:


# import raw Biolog data:
gc_pm1 = pnd.read_excel('02_ralstonia/tables/rsol_WT_AsPM 1-_GrowthByStrain.xlsx', sheet_name='Sheet1')
gc_pm1 = gc_pm1.set_index('Strain', drop=True)
gc_pm1 = gc_pm1.rename(columns={i: i.replace(' PM 1-', '') for i in gc_pm1.columns})
gc_pm1 = gc_pm1.drop(columns=['Negative Control', 'Growth'])
gc_pm1


gc_pm2 = pnd.read_excel('02_ralstonia/tables/rsol_WT_AsPM 2-A_GrowthByStrain.xlsx', sheet_name='Sheet1')
gc_pm2 = gc_pm2.set_index('Strain', drop=True)
gc_pm2 = gc_pm2.rename(columns={i: i.replace(' PM 2-A', '') for i in gc_pm2.columns})
gc_pm2 = gc_pm2.drop(columns=['Negative Control', 'Growth'])
gc_pm2


carbon_sources = pnd.concat([gc_pm1.T, gc_pm2.T])
carbon_sources = carbon_sources.rename(columns={'Psi07': 'PSI07'})
carbon_sources.tail()

Strain,BA7,PSS4,GMI1000,R24,PSI07,CFBP2957,UW551,MOLK2,RUN2340,K60,BDBR229
Putrescine,0,0,0,0,0,0,0,0,0,0,0
Dihydroxyacetone,1,1,0,0,0,0,1,1,0,1,0
"2,3-Butanediol",0,0,0,0,0,0,0,0,0,0,0
"2,3-Butanedione",0,0,0,0,0,0,0,0,0,0,0
3-Hydroxy-2-butanone,0,0,0,0,0,0,0,0,0,0,0


In [10]:
correct_dict = {
    'D-Saccharic acid': 'D-Saccharic Acid',
    'Succinic acid': 'Succinic Acid',
    'L-Aspartic acid': 'L-Aspartic Acid',
    'D-Glucuronic acid': 'D-Glucuronic Acid',
    'D-Gluconic acid': 'D-Gluconic Acid',
    'DL-a-Glycerol Phosphate': 'D,L-a-Glycerol Phosphate',
    'L-Lactic acid': 'L-Lactic Acid',
    'Formic acid': 'Formic Acid',
    'L-Glutamic acid': 'L-Glutamic Acid',
    'DL-Malic acid': 'D,L-Malic Acid',
    'Acetic acid': 'Acetic Acid',
    'a-D-Glucose': 'a-d-glucose',
    'D-Aspartic acid': 'D-Aspartic Acid',
    '1,2-Propanediol': '1,2-propanediol',
    'a-D-Lactose': 'alpha-D-Lactose',
    'm-Tartaric acid': 'm-tartaric acid',
    'a-Hydroxybutyric acid': 'a-hydroxybutyric acid',
    'Gly-Asp': 'gly-asp',
    'Fumaric acid': 'Fumaric Acid',
    'Propionic acid': 'Propionic Acid',
    'Glycolic acid': 'Glycolic Acid',
    'Glyoxylic acid': 'Glyoxylic Acid',
    'Gly-Glu': 'gly-glu',
    'Acetoacetic acid': 'acetoacetic acid',
    'D-Malic acid': 'D-Malic Acid',
    'L-Malic acid': 'L-Malic Acid',
    'm-Hydroxyphenyl Acetic acid': 'm-hydroxyphenyl acetic acid',
    'Pyruvic acid': 'Pyruvic Acid',
    'L-Galactonic acid-g-Lactone': 'L-Galactonic Acid-gamma-Lactone',
    'D-Galacturonic acid': 'D-Galacturonic Acid',
    '2-Aminoethanol': '2-aminoethanol',
    'Chondroitin Sulfate C': 'chondroitin sulfate c',
    'Dextrin': 'dextrin',
    'Glycogen': 'glycogen',
    'Mannan': 'mannan',
    'Pectin': 'pectin',
    'N-Acetyl-D-Galactosamine': 'n-acetyl-d-galactosamine',
    'b-D-Allose': 'b-d-allose',
    'D-Arabinose': 'd-arabinose',
    'L-Arabitol': 'l-arabitol',
    '2-Deoxy-D-Ribose': '2-deoxy-d-ribose',
    'a-Methyl-D-Glucoside': 'a-methyl-d-glucoside',
    'b-Methyl-D-Galactoside': 'b-methyl-d-galactoside',
    'b-Methyl-D-Glucuronic acid': 'b-methyl-d-glucuronic acid',
    'Salicin': 'salicin',
    'Stachyose': 'stachyose',
    'D-Tagatose': 'D-tagatose',
    'Xylitol': 'xylitol',
    'Butyric acid': 'butyric acid',
    'Capric acid': 'capric acid',
    'Caproic acid': 'caproic acid',
    'Citraconic acid': 'citraconic acid',
    'Citramalic acid': 'citramalic acid',
    '2-Hydroxybenzoic acid': '2-hydroxybenzoic acid',
    '4-Hydroxybenzoic acid': '4-hydroxybenzoic acid',
    'b-Hydroxybutyric acid': 'b-hydroxybutyric acid',
    'g-Hydroxybutyric acid': 'g-hydroxybutyric acid',
    'Itaconic acid': 'itaconic acid',
    'Malonic acid': 'malonic acid',
    'Oxalic acid': 'oxalic acid',
    'Quinic acid': 'quinic acid',
    'Sebacic acid': 'sebacic acid',
    'D-Tartaric acid': 'd-tartaric acid',
    'L-Tartaric acid': 'L-Tartaric Acid',
    'Acetamide': 'acetamide',
    'N-Acetyl-L-Glutamic acid': 'n-acetyl-l-glutamic acid',
    'Glycine': 'glycine',
    'L-Homoserine': 'l-homoserine',
    'Hydroxy-L-Proline': 'hydroxy-l-proline',
    'L-Isoleucine': 'l-isoleucine',
    'L-Leucine': 'l-leucine',
    'L-Lysine': 'l-lysine',
    'L-Methionine': 'l-methionine',
    'L-Pyroglutamic acid': 'l-pyroglutamic acid',
    'L-Valine': 'l-valine',
    'D,L-Carnitine': 'd,l-carnitine',
    '2,3-Butanediol': '2,3-butanediol',
    '2,3-Butanedione': '2,3-butanedione',
    '3-Hydroxy-2-butanone': '3-hydroxy-2-butanone'
}

In [11]:
# covert to long table
biolog_exp = []
for i in carbon_sources.index: 
    for j in carbon_sources.columns:
        cell = carbon_sources.loc[i, j]
        substrate = i
        if substrate in correct_dict.keys(): 
            substrate = correct_dict[substrate]
        biolog_exp.append({'strain': j, 'substrate': substrate, 'growth_call': cell})
biolog_exp = pnd.DataFrame.from_records(biolog_exp)

print(biolog_exp.shape)
biolog_exp.to_csv('02_ralstonia/tables/biolog_exp.csv')
print(len(biolog_exp['substrate'].unique()))
biolog_exp

(2090, 3)
190


Unnamed: 0,strain,substrate,growth_call
0,BA7,L-Arabinose,0
1,PSS4,L-Arabinose,0
2,GMI1000,L-Arabinose,0
3,R24,L-Arabinose,0
4,PSI07,L-Arabinose,0
...,...,...,...
2085,UW551,3-hydroxy-2-butanone,0
2086,MOLK2,3-hydroxy-2-butanone,0
2087,RUN2340,3-hydroxy-2-butanone,0
2088,K60,3-hydroxy-2-butanone,0


# 03_pseudomonas

In [12]:

# import raw biolog data:
biolog_exp = pnd.read_excel('03_pseudomonas/tables/aem.02443-19-sd002.xlsx', sheet_name='Dataset S1', skiprows=3)
biolog_exp = biolog_exp.drop(columns=['Phylogenetic subgroups', 'Fig. 4 order', 'Phylogenetic order', 'Accession number'])

biolog_exp['Strain'] = biolog_exp['Strain'].astype(str)
metadata = pnd.read_csv('03_pseudomonas/tables/metadata.csv', index_col=0)
biolog_exp = biolog_exp[biolog_exp['Strain'].isin(metadata['Strain'].to_list())]

biolog_exp = biolog_exp.set_index('Strain', drop=True, verify_integrity=True)
biolog_exp = biolog_exp.T
biolog_exp = biolog_exp.rename(columns={i: str(i).replace(' ', '_') for i in biolog_exp.columns})
print(biolog_exp.shape)
biolog_exp.tail() 


(270, 36)


Strain,30-84,ATCC17415,TAMOak81,ATCC17411,ATCC17809,ChPhzS135,DTR133,SLPH10,ToZa7,ChPhzTR44,...,C50,DSM_6698,ChPhzS23,66,ChPhzS24,ChPhzTR38,ChPhzTR39,ChPhzTR18,PA23,O6
Val.Tyr.Val,-,+,-,+,+,+,-,-,-,-,...,+,+,+,+,-,+,-,-,-,-
Gly.Phe.Phe,-,+,+,+,+,+,+,+,+,+,...,+,+,+,+,+,+,+,-,+,+
Leu.Leu.Leu,+,+,-,+,+,+,+,+,+,+,...,+,+,+,+,+,+,+,-,-,-
Phe.Gly.Gly,-,+,-,+,+,+,+,+,+,+,...,+,+,+,+,+,+,+,-,-,-
Tyr.Gly.Gly,+,+,-,+,+,+,-,+,+,+,...,+,+,+,+,+,-,+,-,+,-


In [13]:
correct_dict = {
    'L.Arabinose': 'L-Arabinose',
    'N.Acetyl.D.Glucosamine': 'N-Acetyl-D-Glucosamine',
    'D.Saccharic.acid': 'D-Saccharic Acid',
    'Succinic.acid': 'Succinic Acid',
    'D.Galactose': 'D-Galactose',
    'L.Proline': 'L-Proline',
    'D.Alanine': 'D-Alanine',
    'D.Trehalose': 'D-Trehalose',
    'D.Mannose': 'D-Mannose',
    'D.Serine': 'D-Serine',
    'D.Sorbitol': 'D-Sorbitol',
    'L.Fucose': 'L-Fucose',
    'D.Glucuronic.acid': 'D-Glucuronic Acid',
    'D.L.a.Glycerol.Phosphate': 'D,L-a-Glycerol Phosphate',
    'D.Xylose': 'D-Xylose',
    'L.Lactic.acid': 'L-Lactic Acid',
    'Formic.acid': 'Formic Acid',
    'D.Mannitol': 'D-Mannitol',
    'D.Glucose.6.Phosphate': 'D-Glucose-6-Phosphate',
    'D.Galactonic.acid.g.Lactone': 'D-Galactonic acid-g-Lactone',
    'D.Ribose': 'D-Ribose',
    'Tween.20': 'Tween 20',
    'L.Rhamnose': 'L-Rhamnose',
    'D.Fructose': 'D-Fructose',
    'Acetic.acid': 'Acetic Acid',
    'a.D.Glucose': 'a-d-glucose',
    'D.Aspartic.acid': 'D-Aspartic Acid',
    'D.Glucosaminic.acid': 'D-Glucosaminic acid',
    'Tween.40': 'Tween 40',
    'a.Ketoglutaric.acid': 'a-Ketoglutaric acid',
    'm.Tartaric.acid': 'm-tartaric acid',
    'D.Fructose.6.Phosphate': 'D-Fructose-6-Phosphate',
    'Tween.80': 'Tween 80',
    'a.Hydroxyglutaric.acid.g.Lactone': 'a-Hydroxyglutaric acid-g-Lactone',
    'a.Hydroxybutyric.acid': 'a-hydroxybutyric acid',
    'Gly.Asp': 'gly-asp',
    'm.Inositol': 'm-Inositol',
    'Bromosuccinic.acid': 'Bromosuccinic acid',
    'Mucic.acid': 'Mucic acid',
    'Gly.Glu': 'gly-glu',
    'L.Threonine': 'L-Threonine',
    'L.Alanine': 'L-Alanine',
    'Ala.Gly': 'Ala-Gly',
    'Mono.Methylsuccinate': 'Mono-Methylsuccinate',
    'D.Malic.acid': 'D-Malic Acid',
    'Gly.Pro': 'Gly-Pro',
    'p.Hydroxyphenyl.Acetic.acid': 'p-Hydroxyphenyl Acetic acid',
    'Pyruvic.acid': 'Pyruvic Acid',
    'L.Galactonic.acid.g.Lactone': 'L-Galactonic Acid-gamma-Lactone',
    'D.Galacturonic.acid': 'D-Galacturonic Acid',
    'X2.Aminoethanol': '2-aminoethanol',
    'Dextrin': 'dextrin',
    'Pectin': 'pectin',
    'D.Arabitol': 'D-Arabitol',
    'L.Arabitol': 'l-arabitol',
    'X2.Deoxy.D.Ribose': '2-deoxy-d-ribose',
    'i.Erythritol': 'i-Erythritol',
    'Xylitol': 'xylitol',
    'd.Amino.Valeric.acid': 'd-Amino Valeric acid',
    'Butyric.acid': 'butyric acid',
    'Caproic.acid': 'caproic acid',
    'Citraconic.acid': 'citraconic acid',
    'D.L.Citramalic.acid': 'citramalic acid',
    'D.Glucosamine': 'D-Glucosamine',
    'X4.Hydroxybenzoic.acid': '4-hydroxybenzoic acid',
    'b.Hydroxybutyric.acid': 'b-hydroxybutyric acid',
    'g.Hydroxybutyric.acid': 'g-hydroxybutyric acid',
    'a.Keto.Valeric.acid': 'a-Keto-Valeric acid',
    'Itaconic.acid': 'itaconic acid',
    'X5.Keto.D.Gluconic.acid': '5-Keto-D-Gluconic acid',
    'D.Lactic.acid.Methyl.Ester': 'D-Lactic acid Methyl Ester',
    'Malonic.acid': 'malonic acid',
    'Quinic.acid': 'quinic acid',
    'Sebacic.acid': 'sebacic acid',
    'Sorbic.acid': 'Sorbic acid',
    'Succinamic.acid': 'Succinamic acid',
    'D.Tartaric.acid': 'Tartaric acid',
    'L.Tartaric.acid': 'L-Tartaric Acid',
    'L.Alaninamide': 'L-Alaninamide',
    'N.Acetyl.L.Glutamic.acid': 'n-acetyl-l-glutamic acid',
    'L.Arginine': 'L-Arginine',
    'L.Histidine': 'L-Histidine',
    'L.Homoserine': 'l-homoserine',
    'Hydroxy.L.Proline': 'hydroxy-l-proline',
    'L.Isoleucine': 'l-isoleucine',
    'L.Leucine': 'l-leucine',
    'L.Lysine': 'l-lysine',
    'L.Ornithine': 'L-Ornithine',
    'L.Phenylalanine': 'L-Phenylalanine',
    'L.Pyroglutamic.acid': 'l-pyroglutamic acid',
    'L.Valine': 'l-valine',
    'D.L.Carnitine': 'DL-Carnitine',
    'D.L.Octopamine': 'D,L-Octopamine',
    'Nitrite': 'nitrite',
    'L.Cysteine': 'l-cysteine',
    'L.Histidine.1': 'L-Histidine',
    'L.Leucine.1': 'l-leucine',
    'L.Lysine.1': 'l-lysine',
    'L.Methionine.1': 'l-methionine',
    'L.Tryptophan': 'l-tryptophan',
    'L.Tyrosine': 'l-tyrosine',
    'D.Asparagine': 'L-Asparagine',
    'D.Aspartic.acid.1': 'D-Aspartic Acid',
    'D.Glutamic.acid': 'L-Glutamic Acid',
    'D.Lysine': 'd-lysine',
    'D.Serine.1': 'D-Serine ',
    'D.Valine': 'd-valine',
    'L.Citrulline': 'l-citrulline',
    'L.Homoserine.1': 'l-homoserine',
    'N.Acetyl.L.Glutamic.acid.1': 'n-acetyl-l-glutamic acid',
    'N.Phthaloyl.L.Glutamic.acid': 'N-Phthaloyl-L-Glutamic acid',
    'L.Pyroglutamic.acid.1': 'l-pyroglutamic acid',
    'Hydroxylamine': 'hydroxylamine',
    'N.Amylamine': 'N-Amylamine',
    'N.Butylamine': 'N-Butylamine',
    'Histamine': 'histamine',
    'b.Phenylethylamine': 'b-phenylethylamine',
    'Tyramine.1': 'Tyramine',
    'Acetamide.1': 'acetamide',
    'Formamide': 'formamide',
    'Glucuronamide.1': 'Glucuronamide',
    'D.L.Lactamide': 'DL-Lactamide',
    'D.Glucosamine.1': 'D-Glucosamine',
    'D.Galactosamine': 'd-galactosamine',
    'D.Mannosamine': 'D-Mannosamine',
    'N.Acetyl.D.Glucosamine.1': 'N-Acetyl-D-Glucosamine',
    'N.Acetyl.D.Galactosamine.1': 'n-acetyl-d-galactosamine',
    'N.Acetyl.D.Mannosamine.1': 'N-Acetyl-D-Mannosamine',
    'Cytosine': 'cytosine',
    'Guanine': 'guanine',
    'Thymine': 'thymine',
    'Thymidine.1': 'Thymidine',
    'Uracil': 'uracil',
    'Uridine.1': 'Uridine',
    'Alloxan': 'alloxan',
    'Parabanic.acid': 'Parabanic acid',
    'e.Amino.N.Caproic.acid': 'e-amino-n-caproic acid',
    'D.L.a.Amino.Caprylic.acid': 'DL-a-Amino-Caprylic acid',
    'd.Amino.N.Valeric.acid': 'd-amino-n-valeric acid',
    'a.Amino.N.Valeric.acid': 'a-Amino-N-Valeric acid',
    'Tripolyphosphate': 'tripolyphosphate',
    'Adenosine.2..Monophosphate': 'adenosine 2`-monophosphate',
    'Adenosine.3..Monophosphate': 'adenosine 3`-monophosphate',
    'Carbamyl.Phosphate': 'carbamyl phosphate',
    'Phospho.Glycolic.acid': 'phospho-glycolic acid',
    'D.Glucose.6.Phosphate.1': 'D-Glucose-6-Phosphate',
    'Cytidine.5..Monophosphate': 'Cytidine 2`-Monophosphate',
    'Cytidine.2..3..Cyclic.Monophosphate': 'cytidine 2`,3`-cyclic monophosphate',
    'Cytidine.3..5..Cyclic.Monophosphate': 'cytidine 3`,5`-cyclic monophosphate',
    'D.Mannose.1.Phosphate': 'd-mannose-1-phosphate',
    'Cysteamine.S.Phosphate': 'Cysteamine-S-Phosphate',
    'Uridine.3..Monophosphate': 'Uridine 2`-Monophosphate',
    'Phosphoryl.Choline': 'phosphoryl choline',
    'Phosphono.Acetic.acid': 'phosphono acetic acid',
    'Inositol.Hexaphosphate': 'inositol hexaphosphate',
    'Thymidine.3..5..Cyclic.Monophosphate': 'Thymidine 3`,5`-Cyclic Monophosphate',
    'N.Acetyl.L.Cysteine': 'n-acetyl-l-cysteine',
    'L.Djenkolic.acid': 'l-djenkolic acid',
    'p.Aminobenzene.Sulfonic.acid': 'p-Aminobenzene Sulfonic acid',
    'Orotic.acid': 'Orotic acid',
    'Ala.Pro': 'Ala-Pro',
    'Asp.Asp': 'Asp-Asp',
    'Asp.Glu': 'Asp-Glu',
    'Asp.Phe': 'Asp-Phe',
    'Asp.Trp': 'Asp-Trp',
    'Glu.Asp': 'Glu-Asp',
    'Glu.Glu': 'Glu-Glu',
    'Glu.Trp': 'Glu-Trp',
    'Gly.Met.2': 'gly-met',
    'Gly.Pro.1': 'Gly-Pro',
    'Gly.Thr': 'Gly-Thr',
    'Gly.Trp': 'Gly-Trp',
    'His.Pro': 'His-Pro',
    'Ile.Met': 'Ile-Met',
    'Ile.Pro': 'Ile-Pro',
    'Leu.Leu': 'Leu-Leu',
    'Leu.Met': 'Leu-Met',
    'Lys.Pro': 'Lys-Pro',
    'Met.Glu': 'Met-Glu',
    'Met.Gly': 'Met-Gly',
    'Met.Ile': 'Met-Ile',
    'Met.Leu': 'Met-Leu',
    'Met.Met': 'Met-Met',
    'Met.Phe': 'Met-Phe',
    'Met.Pro': 'Met-Pro',
    'Met.Trp': 'Met-Trp',
    'Met.Val': 'Met-Val',
    'Phe.Phe': 'Phe-Phe',
    'Phe.Pro': 'Phe-Pro',
    'Phe.Trp': 'Phe-Trp',
    'Pro.Asp': 'Pro-Asp',
    'Pro.Gly': 'Pro-Gly',
    'Pro.Hyp': 'Pro-Hyp',
    'Pro.Leu': 'Pro-Leu',
    'Pro.Phe': 'Pro-Phe',
    'Pro.Pro': 'Pro-Pro',
    'Ser.Met': 'Ser-Met',
    'Ser.Pro': 'Ser-Pro',
    'Thr.Met': 'Thr-Met',
    'Thr.Pro': 'Thr-Pro',
    'Trp.Asp': 'Trp-Asp',
    'Trp.Phe': 'Trp-Phe',
    'Trp.Trp': 'Trp-Trp',
    'Trp.Tyr': 'Trp-Tyr',
    'Tyr.Phe': 'Tyr-Phe',
    'Tyr.Trp': 'Tyr-Trp',
    'Val.Asp': 'Val-Asp',
    'Asp.Gly': 'Asp-Gly',
    'Gly.Asp.1': 'gly-asp',
    'Gly.Ile': 'Gly-Ile',
    'His.His': 'His-His',
    'Leu.Pro': 'Leu-Pro',
    'Lys.Met': 'Lys-Met',
    'Met.Thr': 'Met-Thr',
    'Met.Tyr': 'Met-Tyr',
    'Phe.Met': 'Phe-Met',
    'Pro.Glu': 'Pro-Glu',
    'Pro.lle': 'Pro-lle',
    'Pro.Ser': 'Pro-Ser',
    'Pro.Trp': 'Pro-Trp',
    'Ser.Asp': 'Ser-Asp',
    'Ser.Glu': 'Ser-Glu',
    'Thr.Asp': 'Thr-Asp',
    'Thr.Phe': 'Thr-Phe',
    'Tyr.Ile': 'Tyr-Ile',
    'Val.Met': 'Val-Met',
    'Val.Pro': 'Val-Pro',
    'b.Ala.Ala': 'b-Ala-Ala',
    'b.Ala.Gly': 'b-Ala-Gly',
    'b.Ala.His': 'b-Ala-His',
    'Met.b.Ala': 'Met-b-Ala',
    'b.Ala.Phe': 'b-Ala-Phe',
    'D.Ala.D.Ala': 'D-Ala-D-Ala',
    'D.Ala.Gly': 'D-Ala-Gly',
    'D.Ala.Leu': 'D-Ala-Leu',
    'D.Leu.D.Leu': 'D-Leu-D-Leu',
    'D.Leu.Gly': 'D-Leu-Gly',
    'D.Leu.Tyr': 'D-Leu-Tyr',
    'g.D.Glu.Gly': 'g-D-Glu-Gly',
    'Gly.D.Ala': 'Gly-D-Ala',
    'Gly.D.Ser': 'Gly-D-Ser',
    'Leu.D.Leu': 'Leu-D-Leu',
    'Phe.b.Ala': 'Phe-b-Ala',
    'Ala.Ala.Ala': 'Ala-Ala-Ala',
    'D.Ala.Gly.Gly': 'D-Ala-Gly-Gly',
    'Gly.Gly.Ala': 'Gly-Gly-Ala',
    'Gly.Gly.D.Leu': 'Gly-Gly-D-Leu',
    'Gly.Gly.Gly': 'Gly-Gly-Gly',
    'Gly.Gly.Ile': 'Gly-Gly-Ile',
    'Gly.Gly.Leu': 'Gly-Gly-Leu',
    'Gly.Gly.Phe': 'Gly-Gly-Phe',
    'Val.Tyr.Val': 'Val-Tyr-Val',
    'Gly.Phe.Phe': 'Gly-Phe-Phe',
    'Leu.Leu.Leu': 'Leu-Leu-Leu',
    'Phe.Gly.Gly': 'Phe-Gly-Gly',
    'Tyr.Gly.Gly': 'Tyr-Gly-Gly'
}

In [14]:
# apply corrections to substrate names: 
col_list = list(biolog_exp.columns)   # remind the order
biolog_exp['correct_name'] = biolog_exp.index
biolog_exp['correct_name'] = [i if i not in correct_dict.keys() else correct_dict[i] for i in biolog_exp['correct_name']]
biolog_exp = biolog_exp[['correct_name'] + col_list] # reorder columns


# this dataset contains duplicated substrates, probably because they are used in different PM plates.
print(len(biolog_exp['correct_name']))
print(len(biolog_exp['correct_name'].unique()))

270
255


In [15]:
# Comparison is focused on carbon sources as in the other datasets.
# Assuming that the first accorrence is from PM1/PM2
to_keep = []
to_remove = []
for index, row in biolog_exp.iterrows(): 
    if row['correct_name'] not in to_keep:
        to_keep.append(row['correct_name'])
    else:
        to_remove.append(index)
        
print(to_remove)
biolog_exp = biolog_exp.drop(index=to_remove)
biolog_exp = biolog_exp[biolog_exp['correct_name'].isin(biolog_mappings.index)]
biolog_exp = biolog_exp.set_index('correct_name', drop=True)
biolog_exp

['L.Histidine.1', 'L.Leucine.1', 'L.Lysine.1', 'D.Aspartic.acid.1', 'L.Homoserine.1', 'N.Acetyl.L.Glutamic.acid.1', 'L.Pyroglutamic.acid.1', 'Tyramine.1', 'D.Glucosamine.1', 'N.Acetyl.D.Glucosamine.1', 'Thymidine.1', 'Uridine.1', 'D.Glucose.6.Phosphate.1', 'Gly.Pro.1', 'Gly.Asp.1']


Strain,30-84,ATCC17415,TAMOak81,ATCC17411,ATCC17809,ChPhzS135,DTR133,SLPH10,ToZa7,ChPhzTR44,...,C50,DSM_6698,ChPhzS23,66,ChPhzS24,ChPhzTR38,ChPhzTR39,ChPhzTR18,PA23,O6
correct_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
L-Arabinose,+,+,-,-,-,-,-,-,-,-,...,+,+,+,+,+,+,+,+,+,+
N-Acetyl-D-Glucosamine,+,+,-,+,+,+,+,+,+,+,...,+,+,+,+,+,+,+,+,+,+
D-Saccharic Acid,+,+,+,+,+,+,+,+,+,+,...,+,+,+,+,+,+,+,+,+,+
Succinic Acid,+,+,+,+,+,+,+,+,+,+,...,+,+,+,+,+,+,+,+,+,+
D-Galactose,+,+,-,-,-,-,-,-,-,-,...,+,+,+,+,+,+,+,+,+,+
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
L-Glutamic Acid,+,+,+,+,+,+,+,+,+,+,...,+,+,+,+,+,+,+,+,+,+
acetamide,-,+,-,-,-,-,-,-,-,-,...,+,+,-,+,-,-,-,-,-,-
Glucuronamide,+,+,+,+,-,+,+,+,-,+,...,+,+,+,+,+,+,+,+,+,+
n-acetyl-d-galactosamine,-,+,+,+,-,+,-,-,+,-,...,+,+,+,+,-,+,-,-,+,-


In [16]:
# convert to long table:

long = []
for i in biolog_exp.index: 
    for j in biolog_exp.columns:
        cell = biolog_exp.loc[i, j]
        if cell=='+': cell=1
        if cell=='-': cell=0
        long.append({'strain': j, 'substrate': i, 'growth_call': cell})
biolog_exp = pnd.DataFrame.from_records(long)

print(biolog_exp.shape)
biolog_exp.to_csv('03_pseudomonas/tables/biolog_exp.csv')
print(len(biolog_exp['substrate'].unique()))
biolog_exp

(3960, 3)
110


Unnamed: 0,strain,substrate,growth_call
0,30-84,L-Arabinose,1
1,ATCC17415,L-Arabinose,1
2,TAMOak81,L-Arabinose,0
3,ATCC17411,L-Arabinose,0
4,ATCC17809,L-Arabinose,0
...,...,...,...
3955,ChPhzTR38,N-Acetyl-D-Mannosamine,1
3956,ChPhzTR39,N-Acetyl-D-Mannosamine,1
3957,ChPhzTR18,N-Acetyl-D-Mannosamine,1
3958,PA23,N-Acetyl-D-Mannosamine,1
