In [None]:
import pandas as pd
import re
import os
import numpy as np

In [None]:
from rtoru import model as rt

exchanges = set(i.id for i in rt.exchanges)

In [None]:
from concerto.helpers.biolog_to_exchange import biolog_map

In [None]:
fname = 'raw_data/Table_2_Multi-Omics Driven Metabolic Network Reconstruction and Analysis of Lignocellulosic Carbon Utilization in Rhodosporidium toruloides.XLSX'

bl_data = pd.read_excel(fname, sheet_name='Biolog')
bl_data['growth'] = bl_data['Average > 0.1 and all replicates greater than negative control']

In [None]:
bl_data.PlateType.unique()

In [None]:
biolog_map

In [None]:
bl_data['plate'] = bl_data.PlateType + '-' + bl_data.Well
bl_data.growth = bl_data.growth.astype(bool)
bl_data.head()

In [None]:
bl_data.PlateType.unique()

In [None]:
bl_data.Experiment.unique()

In [None]:
carbon_plate = bl_data.loc[bl_data.PlateType.isin(['PM1', 'PM2'])].copy()
nitrogren_plate = bl_data.loc[bl_data.PlateType.isin(['PM3B'])].copy()
nitrogren_plate.plate = nitrogren_plate.plate.str.replace('B', '')

phosh_plate =  bl_data.loc[bl_data.Experiment.isin(['Phosphorus'])].copy()
phosh_plate.plate = phosh_sulfer_plate.plate.str.replace('A', '')

sulfer_plate =  bl_data.loc[bl_data.Experiment.isin(['Sulfur'])].copy()
sulfer_plate.plate = phosh_sulfer_plate.plate.str.replace('A', '')

In [65]:
sulfer_plate.head(5)

Unnamed: 0,PlateType,Experiment,Well,Compound,Average ABS (590nm - 750nm),All replicates > negtive control,Average > 0.1,Average > 0.1 and all replicates greater than negative control,pVal (T-test),growth,plate
348,PM4A,Sulfur,F1,Negative Control,0.0788,False,False,False,0.5,False,PM4-F1
349,PM4A,Sulfur,F2,Sulfate,0.2335,True,True,True,0.008887,True,PM4-F2
350,PM4A,Sulfur,F3,Thiosulfate,0.2565,True,True,True,0.000645,True,PM4-F3
351,PM4A,Sulfur,F4,Tetrathionate,0.194367,True,True,True,0.001838,True,PM4-F4
352,PM4A,Sulfur,F5,Thiophosphate,0.216967,True,True,True,1.1e-05,True,PM4-F5


In [None]:
nitrogren_plate[['plate', 'Compound', 'growth']]

In [64]:
biolog_map

Unnamed: 0,plate,ex_rxn,compound_name,exchange
0,PM1-A10,EX_tre(e),D-Trehalose,EX_tre_e
1,PM1-A11,EX_man(e),D-Mannose,EX_man_e
2,PM1-A12,EX_galt(e),Dulcitol,EX_galt_e
3,PM1-A2,EX_arab_L(e),L-Arabinose,EX_arab__L_e
4,PM1-A3,EX_acgam(e),N-Acetyl-DGlucosamine,EX_acgam_e
...,...,...,...,...
153,PM4-D6,EX_pser_L(e),O-Phospho-L-Serine,EX_pser__L_e
154,PM4-D7,EX_thrp(e),O-Phospho-L-Threonine,EX_thrp_e
155,PM4-D9,EX_3ump(e),Uridine-3’-monophosphate,EX_3ump_e
156,PM4-E1,EX_tyrp(e),O-Phospho-D-Tyrosine,EX_tyrp_e


In [None]:
def merge_with_plate(plate):
    merged = pd.concat(
        [plate.set_index('plate'), biolog_map.set_index('plate')],
        axis=1,
        ignore_index=False
    )
    merged.dropna(subset=['growth', 'ex_rxn'], inplace=True)

    merged = merged.reset_index()
    merged#[['exchange', 'growth']]
    merged = subset_to_in_model(merged)[['exchange', 'growth']]
    merged['uptake'] = 10
    return merged

def subset_to_in_model(plate, model_ex=exchanges):
    return plate.loc[plate.exchange.isin(model_ex)]

carbon = merge_with_plate(carbon_plate)
nit = merge_with_plate(nitrogren_plate)
sulf = merge_with_plate(sulfer_plate)
phos = merge_with_plate(phosh_plate)

In [62]:
sulf

Unnamed: 0,exchange,growth,uptake


In [61]:
phos

Unnamed: 0,exchange,growth,uptake
1,EX_gmp_e,True,10
3,EX_g1p_e,True,10
4,EX_g6p_e,True,10
5,EX_gam6p_e,True,10
7,EX_cmp_e,True,10
9,EX_man6p_e,True,10
10,EX_pser__L_e,True,10
13,EX_ump_e,True,10
16,EX_minohp_e,True,10


In [None]:
carbon.to_csv('growth/carbon_biolog.csv', index=False)
nit.to_csv('growth/nit_biolog.csv', index=False)
phos.to_csv('growth/phos_biolog.csv', index=False)
sulf.to_csv('growth/sulf_biolog.csv', index=False)

In [None]:
phos_sul

## Gene ess. parsing

In [None]:
genes = set(i.id for i in rt.genes)

In [None]:
ge_data = pd.read_excel(fname, sheet_name='RB-TDNA Seq')
ge_data.RTO4_ID = ge_data.RTO4_ID.astype(str)
ge_data['genename'] = 'RT_'+ge_data.RTO4_ID.astype(str)
ge_data[['genename', 'M9_Glucose']]

In [None]:
ge_data.columns

In [None]:
genes_in_data = set(ge_data.genename.values)
genes_in_data

In [None]:
print(f'genes in model {len(genes)}')
print(f'genes in data {len(genes_in_data)}')
print(f'model genes not in data {len(genes.difference(genes_in_data))}')
print(f'data genes not in model {len(genes_in_data.difference(genes))}')

In [None]:
ge_in_our_data = ge_data.loc[ge_data.genename.isin(genes)].copy()
ge_in_our_data

In [None]:
ge_in_our_data.dropna(subset=['M9_Glucose'], inplace=True)
ge_in_our_data

In [None]:
ge_in_our_data.loc[ge_in_our_data.M9_Glucose>.5, 'essential'] = True
ge_in_our_data.loc[ge_in_our_data.M9_Glucose<.5, 'essential'] = False
ge_in_our_data['comment'] = ''

In [None]:
ge_for_export = ge_in_our_data[['genename', 'essential','comment']].copy()
ge_for_export

In [None]:
ge_for_export.to_csv('essentiality/ge_m9_glucose.csv', index=False)