In [11]:
import pandas as pd
import re
import os
import sys 
import glob
import warnings
import time
warnings.filterwarnings('ignore')
sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), os.pardir, 'src/violin/')))
%load_ext autoreload
%autoreload 2
from in_out import preprocessing_model, preprocessing_reading, output
from scoring import score_reading
from network import node_edge_list
from visualize_violin import visualize 


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [17]:
## Parameters ## 
approach = '1'
kind_dict = {"strong corroboration" : 2, 
                "empty attribute" : 1,
                "indirect interaction" : 3,
                "path corroboration" : 5,
                "specification" : 7,
                "hanging extension" : 40, 
                "full extension" : 39, 
                "internal extension" : 38,  
                "dir contradiction" : 11,
                "sign contradiction" : 10, 
                "att contradiction" : 9,
                "dir mismatch" : 20,
                "path mismatch" : 19,
                "self-regulation" : 18}
match_dict = {"source present" : 1, 
                "target present" : 100, 
                "both present" : 10, 
                "neither present" : 0.1}
evidence_scoring_cols = ["Regulator Name", "Regulator Type", "Regulator Subtype", "Regulator HGNC Symbol", "Regulator Database", "Regulator ID", "Regulator Compartment", "Regulator Compartment ID",
                        "Regulated Name", "Regulated Type", "Regulated Subtype", "Regulated HGNC Symbol", "Regulated Database", "Regulated ID", "Regulated Compartment", "Regulated Compartment ID",
                        "Sign", "Connection Type", "Mechanism", "Site",
                        "Cell Line", "Cell Type", "Tissue Type", "Organism"]
attributes = ['Regulated Compartment ID', 'Regulator Compartment ID', 'Mechanism' , 'Cell Line', 'Cell Type', 'Tissue Type', 'Organism']

In [20]:
reader = 'INDRA'
model_files = ['input/models/SkMel133_biorecipe.xlsx', 'input/models/ModelB_discrete_biorecipe.xlsx']
reading_A_files = glob.glob(f'input/interactions/{reader}/RA*.xlsx')
reading_B_files = glob.glob(f'input/interactions/{reader}/RB*.xlsx')

model_A_df = preprocessing_model(model_files[0])
model_B_df = preprocessing_model(model_files[1])

graph_A = node_edge_list(model_A_df)
graph_B = node_edge_list(model_B_df)

In [21]:
for reading_file in reading_A_files:
    output_file = f'output/{reader}' + '/' + reading_file.split('/')[-1].split('_reading_BioRECIPE')[0]
    print(output_file)
    time1 = time.time()
    reading_df = preprocessing_reading(reading=reading_file, 
                                       evidence_score_cols=evidence_scoring_cols, 
                                       atts=attributes)
    counter_A = {'corroboration': [], 'contradiction': []}
    scored = score_reading(reading_df, 
                       model_A_df, 
                       graph_A, 
                       counter=counter_A,
                       kind_values=kind_dict, 
                       match_values=match_dict, 
                       attributes=attributes, 
                       classify_scheme=approach,
                       )
    output(scored, output_file, kind_values=kind_dict)
    print(time.time() - time1)
    print('corroboration in model: {}'.format(len(set(counter_A['corroboration']))))
    print('contradiction in model: {}'.format(len(set(counter_A['contradiction']))))

for reading_file in reading_B_files:
    output_file = f'output/{reader}' + '/' + reading_file.split('/')[-1].split('_reading_BioRECIPE')[0]
    print(output_file)
    time1 = time.time()
    reading_df = preprocessing_reading(reading=reading_file, 
                                       evidence_score_cols=evidence_scoring_cols, 
                                       atts=attributes)
    counter_B = {'corroboration': [], 'contradiction': []}
    scored = score_reading(reading_df, 
                       model_B_df, 
                       graph_B, 
                       counter=counter_B,
                       kind_values=kind_dict, 
                       match_values=match_dict, 
                       attributes=attributes, 
                       classify_scheme=approach)
    output(scored, output_file, kind_values=kind_dict)
    print(time.time() - time1)
    print('corroboration in model: {}'.format(len(set(counter_B['corroboration']))))
    print('contradiction in model: {}'.format(len(set(counter_B['contradiction']))))


output/INDRA/RA4
30
0.1679670810699463
corroboration in model: 0
contradiction in model: 0
output/INDRA/RA3
559
2.7341859340667725
corroboration in model: 0
contradiction in model: 0
output/INDRA/RA2
1524
8.907699823379517
corroboration in model: 5
contradiction in model: 3
output/INDRA/RA1
51628
165.43350100517273
corroboration in model: 21
contradiction in model: 24
output/INDRA/RB_star_1
787
2.1502530574798584
corroboration in model: 0
contradiction in model: 0
output/INDRA/RB_star_2
608
1.5508217811584473
corroboration in model: 0
contradiction in model: 0
output/INDRA/RB1
648
1.6062729358673096
corroboration in model: 0
contradiction in model: 0
output/INDRA/RB2
389
0.9906690120697021
corroboration in model: 0
contradiction in model: 0
output/INDRA/RB3
26
0.15742111206054688
corroboration in model: 0
contradiction in model: 0


### TESTING task 1 & 2

In [11]:
from translators.within_biorecipe.md_and_int import get_interactions_from_model

In [28]:
# Switch format
interactions_A = os.getcwd() + '/input/interactions/translated_SkeMel133_biorecipe.xlsx'
interactions_B = os.getcwd() + '/input/interactions/translated_ModelB_discrete_biorecipe.xlsx'

get_interactions_from_model(os.getcwd() + '/' + model_files[0], interactions_A)
get_interactions_from_model(os.getcwd() + '/' + model_files[1], interactions_B)

interactions_A_df = pd.read_excel(interactions_A, index_col=None)
interactions_B_df = pd.read_excel(interactions_B, index_col=None)

In [29]:
# Test in VIOLIN
reading_A_df = preprocessing_reading(reading=interactions_A, 
                                     evidence_score_cols=evidence_scoring_cols, 
                                     atts=attributes)
counter_A = {'corroboration': [], 'contradiction': []}
scored = score_reading(reading_A_df, 
                       model_A_df, 
                       graph_A, 
                       counter=counter_A,
                       kind_values=kind_dict, 
                       match_values=match_dict, 
                       attributes=attributes, 
                       classify_scheme=approach,
                       )
output_file = f'test/test_result' + '/' + interactions_A.split('/')[-1].split('_biorecipe')[0]
output(scored, output_file, kind_values=kind_dict)
print('corroboration in model: {}'.format(len(set(counter_A['corroboration']))))
print('contradiction in model: {}'.format(len(set(counter_A['contradiction']))))

reading_B_df = preprocessing_reading(reading=interactions_B,
                                     evidence_score_cols=evidence_scoring_cols,
                                     atts=attributes)
counter_B = {'corroboration': [], 'contradiction': []}
scored = score_reading(reading_B_df, 
                       model_B_df, 
                       graph_B, 
                       counter=counter_B,
                       kind_values=kind_dict, 
                       match_values=match_dict, 
                       attributes=attributes, 
                       classify_scheme=approach)
output_file = f'test/test_result' + '/' + interactions_B.split('/')[-1].split('_biorecipe')[0]
output(scored, output_file, kind_values=kind_dict)
print('corroboration in model: {}'.format(len(set(counter_B['corroboration']))))
print('contradiction in model: {}'.format(len(set(counter_B['contradiction']))))


266
corroboration in model: 264
contradiction in model: 0
72
corroboration in model: 71
contradiction in model: 0


### TESTING Task 3 & 4

In [42]:
# Randomly choose some interactions from modelA
import random 
random.seed(10)

random_A_df = reading_A_df.sample(n=50).reset_index()
random_B_df = reading_B_df.sample(n=25).reset_index()

counter_A = {'corroboration': [], 'contradiction': []}
scored = score_reading(random_A_df, 
                       model_A_df, 
                       graph_A, 
                       counter=counter_A,
                       kind_values=kind_dict, 
                       match_values=match_dict, 
                       attributes=attributes, 
                       classify_scheme=approach,
                       )
output_file = f'test/test_result' + '/' + interactions_A.split('/')[-1].split('_biorecipe')[0]
output(scored, output_file, kind_values=kind_dict)
print('corroboration in model: {}'.format(len(set(counter_A['corroboration']))))
print('contradiction in model: {}'.format(len(set(counter_A['contradiction']))))

counter_B = {'corroboration': [], 'contradiction': []}
scored = score_reading(random_B_df, 
                       model_B_df, 
                       graph_B, 
                       counter=counter_B,
                       kind_values=kind_dict, 
                       match_values=match_dict, 
                       attributes=attributes, 
                       classify_scheme=approach)
output_file = f'test/test_result' + '/' + interactions_B.split('/')[-1].split('_biorecipe')[0]
output(scored, output_file, kind_values=kind_dict)
print('corroboration in model: {}'.format(len(set(counter_B['corroboration']))))
print('contradiction in model: {}'.format(len(set(counter_B['contradiction']))))


50
corroboration in model: 50
contradiction in model: 0
25
corroboration in model: 25
contradiction in model: 0


### TESTING Task 5 & 6 

In [30]:
# import RA2.0.1.1 to verify the extension
extension_reading = os.getcwd() + '/'+ 'input/interactions/example/RA2_0_1_1_reading_BioRECIPE.xlsx'
extension_A_df = preprocessing_reading(reading=extension_reading, 
                                     evidence_score_cols=evidence_scoring_cols, 
                                     atts=attributes)
counter_A = {'corroboration': [], 'contradiction': []}
scored = score_reading(extension_A_df, 
                       model_A_df, 
                       graph_A, 
                       counter=counter_A, 
                       kind_values=kind_dict, 
                       match_values=match_dict, 
                       attributes=attributes, 
                       classify_scheme=approach, 
                       )
output_file = f'test/test_result' + '/' + extension_reading.split('/')[-1].split('_reading_BioRECIPE')[0]
output(scored, output_file, kind_values=kind_dict)
print('corroboration in model: {}'.format(len(set(counter_A['corroboration']))))
print('contradiction in model: {}'.format(len(set(counter_A['contradiction']))))

1006
corroboration in model: 19
contradiction in model: 13
