In [1]:
import pandas as pd
import re
import os
import sys 
import glob
import warnings
import time
warnings.filterwarnings('ignore')
sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), os.pardir, 'src/violin/')))
%load_ext autoreload
%autoreload 2
from in_out import preprocessing_model, preprocessing_reading, output
from scoring import score_reading
from network import node_edge_list
from visualize_violin import visualize 


In [2]:
## Parameters ## 
approach = '1'
kind_dict = {"strong corroboration" : 2, 
                "empty attribute" : 1,
                "indirect interaction" : 3,
                "path corroboration" : 5,
                "specification" : 7,
                "hanging extension" : 40, 
                "full extension" : 39, 
                "internal extension" : 38,  
                "dir contradiction" : 11,
                "sign contradiction" : 10, 
                "att contradiction" : 9,
                "dir mismatch" : 20,
                "path mismatch" : 19,
                "self-regulation" : 18}
match_dict = {"source present" : 1, 
                "target present" : 100, 
                "both present" : 10, 
                "neither present" : 0.1}
evidence_scoring_cols = ["Regulator Name", "Regulator Type", "Regulator Subtype", "Regulator HGNC Symbol", "Regulator Database", "Regulator ID", "Regulator Compartment", "Regulator Compartment ID",
                        "Regulated Name", "Regulated Type", "Regulated Subtype", "Regulated HGNC Symbol", "Regulated Database", "Regulated ID", "Regulated Compartment", "Regulated Compartment ID",
                        "Sign", "Connection Type", "Mechanism", "Site",
                        "Cell Line", "Cell Type", "Tissue Type", "Organism"]
attributes = ['Regulated Compartment ID', 'Regulator Compartment ID']

In [3]:
reader = 'GPT'
model_files = ['input/models/SkMel133_biorecipe.xlsx', 'input/models/ModelB_discrete_biorecipe.xlsx']
reading_A_files = glob.glob(f'input/interactions/{reader}/RA*.xlsx')
reading_B_files = glob.glob(f'input/interactions/{reader}/RB*.xlsx')

model_A_df = preprocessing_model(model_files[0])
model_B_df = preprocessing_model(model_files[1])

graph_A = node_edge_list(model_A_df)
graph_B = node_edge_list(model_B_df)

In [4]:
for reading_file in reading_A_files:
    output_file = f'output/{reader}' + '/' + reading_file.split('/')[-1].split('_reading_BioRECIPE')[0]
    print(output_file)
    time1 = time.time()
    reading_df = preprocessing_reading(reading=reading_file, 
                                       evidence_score_cols=evidence_scoring_cols, 
                                       atts=attributes)
    counter_A = {'corroboration': [], 'contradiction': []}
    scored = score_reading(reading_df, 
                       model_A_df, 
                       graph_A, 
                       counter=counter_A,
                       kind_values=kind_dict, 
                       match_values=match_dict, 
                       attributes=attributes, 
                       classify_scheme=approach,
                       )
    output(scored, output_file, kind_values=kind_dict)
    print(time.time() - time1)
    print('corroboration in model: {}'.format(len(set(counter_A['corroboration']))))
    print('contradiction in model: {}'.format(len(set(counter_A['contradiction']))))

for reading_file in reading_B_files:
    output_file = f'output/{reader}' + '/' + reading_file.split('/')[-1].split('_reading_BioRECIPE')[0]
    print(output_file)
    time1 = time.time()
    reading_df = preprocessing_reading(reading=reading_file, 
                                       evidence_score_cols=evidence_scoring_cols, 
                                       atts=attributes)
    counter_B = {'corroboration': [], 'contradiction': []}
    scored = score_reading(reading_df, 
                       model_B_df, 
                       graph_B, 
                       counter=counter_B,
                       kind_values=kind_dict, 
                       match_values=match_dict, 
                       attributes=attributes, 
                       classify_scheme=approach)
    output(scored, output_file, kind_values=kind_dict)
    print(time.time() - time1)
    print('corroboration in model: {}'.format(len(set(counter_B['corroboration']))))
    print('contradiction in model: {}'.format(len(set(counter_B['contradiction']))))


output/GPT/RA4
58
0.2749059200286865
corroboration in model: 1
contradiction in model: 0
output/GPT/RA3
1364
4.277546167373657
corroboration in model: 5
contradiction in model: 11
output/GPT/RA2
3604
13.514410018920898
corroboration in model: 28
contradiction in model: 40
output/GPT/RB_star_1
964
3.34342098236084
corroboration in model: 0
contradiction in model: 0
output/GPT/RB_star_2
437
1.4069311618804932
corroboration in model: 0
contradiction in model: 0
output/GPT/RB1
858
3.7983109951019287
corroboration in model: 2
contradiction in model: 2
output/GPT/RB2
385
1.3834002017974854
corroboration in model: 2
contradiction in model: 2
output/GPT/RB3
317
1.255737066268921
corroboration in model: 1
contradiction in model: 2
