In [None]:
import pandas as pd
from cobra.io import read_sbml_model,load_yaml_model
from cobra.flux_analysis import double_gene_deletion
from tqdm import tqdm
from multiprocessing import freeze_support

In [None]:
#input files
data_path = "../data/data2016_in_model.xlsx"
generow_path = "../data/genelist_row.csv"
genecol_path = "../data/genelist_col.csv"
yeast_metatwin_gem_path = "../data/Yeast-MetaTwin.yml"
yeast8_gem_path = "../data/yeast-GEM.xml"

#output files
yeast_metatwin_results_path = "../results/science2016_double_gene_deletion_results_yeast_metatwin.csv"
yeast8_results_path = "../results/science2016_double_gene_deletion_results_yeast8.csv"

In [None]:
#Function to compute the number of true positive, true negative, false positive, and false negative based on the simulated and experimental results
def simu_exp(simu, exp, tp, tn, fp, fn):
    # tp
    if simu < 0.000001 and (exp == 'SL' or (-1000 <= exp <= -0.35)):
        tp += 1
    # tn
    elif simu > 0.000001 and (exp == 'SS' or exp > -0.35):
        tn += 1
    # fp
    elif simu < 0.000001 and (exp == 'SS' or exp > -0.35):
        fp += 1
    # fn
    elif simu > 0.000001 and (exp == 'SL' or (-1000 <= exp <= -0.35)):
        fn += 1
    return tp, tn, fp, fn

In [None]:

def yeast_double_gene_knockout(yeast8, generow, genecol, data2016, processes=40):
    tp = fp = fn = tn = 0
    
    # Create a DataFrame to store the results
    results = pd.DataFrame(columns=['Gene1', 'Gene2', 'Predicted Growth', 'genetic interactions scores'])
    deletion_result = double_gene_deletion(yeast8, generow, genecol, processes=processes)
    print('Run double gene deletion...')
    
    for row in tqdm(range(len(generow)), desc='Double gene deletion'):
        for col in range(len(genecol)):
            g1_sys = generow[row]
            g2_sys = genecol[col]
            if g1_sys == g2_sys:
                continue
            if g1_sys and g2_sys:
                growth = deletion_result[deletion_result['ids'] == {g1_sys, g2_sys}]['growth'].iloc[0]

                # compare predicted growth with experimental results
                tp, tn, fp, fn = simu_exp(growth, data2016.loc[g1_sys, g2_sys], tp, tn, fp, fn)

                # Append results to the DataFrame
                results = results.append({
                    'Gene1': g1_sys,
                    'Gene2': g2_sys,
                    'Predicted Growth': growth,
                    'genetic interactions scores': data2016.loc[g1_sys, g2_sys],
                }, ignore_index=True)


    print(f'tp={tp}\nfp={fp}\ntn={tn}\nfn={fn}')
    print('Finished')
    return results



In [None]:
# Load data and model
print('prepare data and model')
data2016 = pd.read_excel(data_path, index_col=0)
generow = pd.read_csv(generow_path, header=None, dtype=str).squeeze("columns").tolist()
genecol = pd.read_csv(genecol_path, header=None, dtype=str).squeeze("columns").tolist()

yeast8 = read_sbml_model(yeast8_gem_path)
yeast_metatwin = load_yaml_model(yeast_metatwin_gem_path)

In [None]:
# perform double gene knockout experiments with yeast8 model
yeast8_results = yeast_double_gene_knockout(yeast8, generow, genecol, data2016,processes=40)
yeast8_results.to_csv(yeast8_results_path, index=False)


In [None]:
#perform double gene knockout with yeast metatwin model
yeast_metatwin_results = yeast_double_gene_knockout(yeast_metatwin, generow, genecol, data2016,processes=40)
yeast_metatwin_results.to_csv(yeast_metatwin_results_path, index=False)
