In [1]:
import numpy as np
import pandas as pd

### Load data

In [2]:
# schwanhausser
df = pd.read_excel('nature10098-s5.xls')

# recon gene list
recon_genes = sorted(pd.read_table('../recon/genes.tsv')['SYMBOL'].tolist())

### k_sp and k_dp for each gene

In [3]:
# average parameter values for each gene
genes = [str(x) for x in df['Gene Names']]
ksp_average = [float(x) for x in df['translation rate constant (ksp) average [molecules/(mRNA*h)]']]
kdp_average = [np.log(2)/float(x) for x in df['Protein half-life average [h]']]

# get values for Recon genes
ksp_values = []
kdp_values = []
for i in range(len(recon_genes)):
    ksp_values.append([])
    kdp_values.append([])
    for j in range(len(genes)):
        if type(genes[j]) == str:
            
            # if match
            if recon_genes[i] in [x.upper() for x in genes[j].split(';')]:
                
                # if ksp value available
                if not np.isnan(ksp_average[j]):
                    ksp_values[-1].append(ksp_average[j])
                
                # if kdp value available
                if not np.isnan(kdp_average[j]):
                    kdp_values[-1].append(kdp_average[j])

# export values
with open('parameters.csv','w') as f:
    f.write('GENE,KSP [1/hr],KDP [1/hr]\n')
    for i in range(len(recon_genes)):
        if (len(ksp_values[i]) > 0) and (len(kdp_values[i]) > 0):
            
            # SLC29A1 doesn't work well, predictions too low
            if recon_genes[i] != 'SLC29A1':     
                f.write('%s,%f,%f\n' % (recon_genes[i], np.mean(ksp_values[i]), np.mean(kdp_values[i])))

#### Total cellular protein number

In [4]:
# load protein number for each gene
protein_number = df['Protein copy number average [molecules/cell]']

# output total cellular protein number
with open('protein_number.txt','w') as f:
    f.write('%f' % np.sum(protein_number))

#### Total cellular mRNA number

In [5]:
# load protein number for each gene
mrna_number = df['mRNA copy number average [molecules/cell]']

# output total cellular protein number
with open('mrna_number.txt','w') as f:
    f.write('%f' % np.sum(mrna_number))