In [1]:
import numpy as np
sys.path.append('..')
from lib.doctorailib.doctorailib import doctorai
from lib.doctorXAIlib.doctorXAIlib import doctorXAI
import pickle
from lib.embedding_utils import EmbeddingUtils
from lib.utils_similarity import UtilsSimilarity
from lib.utils import Utils
from lib.semantic_enrichment import SemanticEnrichment, WindowStore
from lib.NotesCleaning import NotesCleaning
import gensim
import pandas as pd
import json

# Load all the Doctor XAI and Doctor AI files

In [2]:
model_file = '../doctorXAI/models/trained_doctorAI_output/2020_9_30_MIMIC_III_.44.npz'
dr = doctorai.DoctorAI(modelFile=model_file,
                       ICD9_to_int_dict="../doctorXAI/preprocessing_doctorai/ICD9_to_int_dict",
                       CCS_to_int_dict="../doctorXAI/preprocessing_doctorai/CCS_to_int_dict")

In [3]:
#prendo tutte le sequenze di ICD9 generate nel preprocessing
dataset_sequences = np.load('../doctorXAI/preprocessing_doctorai/mimic_sequences.npy',allow_pickle=True)
#imposto il modello di doctorAI come black-box
black_box_oracle = dr
#seleziono il file dell'ontologia
ontology_path_file = '../lib/doctorXAIlib/ICD9_ontology.csv'
admission_mimic_sequences = np.load('../doctorXAI/preprocessing_doctorai/admission_mimic_sequences.npy',allow_pickle=True)
date_mimic_sequences = np.load('../doctorXAI/preprocessing_doctorai/date_mimic_sequences.npy',allow_pickle=True)
#per avere sottomano il significato dei vari codici
ICD9_description_dict = pickle.load(open('../doctorXAI/ICD9_description_dict.pkl', 'rb'))
CCS_description_dict = pickle.load(open('../doctorXAI/CCS_description_dict.pkl', 'rb'))

### How Doctor XAI works

In [None]:
#seleziono un paziente da spiegare
patient_sequence = dataset_sequences[70]

In [None]:
drXAI = doctorXAI.DoctorXAI(patient_sequence=patient_sequence,
                            dataset_sequences=dataset_sequences,
                            black_box_oracle=black_box_oracle,
                            ontology_path_file=ontology_path_file,
                            syn_neigh_size=500)

In [None]:
decision_rule, \
istance_string, \
list_split_conditions, \
code_names, \
fidelity_synth, \
hit_synth, \
features_names, \
labels_names, \
DT_synth = drXAI.extract_rule(ICD9_description_dict=ICD9_description_dict,
                              CCS_description_dict=CCS_description_dict)

In [None]:
ICD_9 = [condition.split(" <=")[0].split(" >")[0].replace(".", "") for condition in list_split_conditions]

## Load the relations

In [4]:
finding_site_embedding_dict = pickle.load(open("../data/mapping_relations/relation_embeddings/finding_site.pkl",'rb'))
finding_site_dict = pickle.load(open("../data/mapping_relations/finding_site.pkl",'rb'))

In [5]:
due_to_embedding_dict = pickle.load(open("../data/mapping_relations/relation_embeddings/due_to.pkl",'rb'))
due_to_dict = pickle.load(open("../data/mapping_relations/due_to.pkl",'rb'))

In [6]:
associated_morphology_embedding_dict = pickle.load(open("../data/mapping_relations/relation_embeddings/associated_morphology.pkl",'rb'))
associated_morphology_dict = pickle.load(open("../data/mapping_relations/associated_morphology.pkl",'rb'))

In [7]:
description_embedding_dict = pickle.load(open("../data/mapping_relations/relation_embeddings/description.pkl",'rb'))
description_dict = pickle.load(open("../data/mapping_relations/description.pkl",'rb'))

# Load the diabete dataset

In [8]:
df = pd.read_csv("../data/only_diabete.csv")

In [9]:
# load the abbreviations dictionary
abbreviations = pickle.load(open("../data/abbreviations/abbreviations_dict.pkl", "rb"))

# Make a prediction and an explanation with Doctor XAI

In [10]:
diabete_df_dictionary = pickle.load(open('../data/diabete_dictionary_patient_history.pkl', 'rb'))

### Extract the most relevant part of the note

In [10]:
se = SemanticEnrichment(dataset_sequences,
                        black_box_oracle,
                        ontology_path_file,
                        ICD9_description_dict,
                        CCS_description_dict,
                        admission_mimic_sequences)

In [11]:
word2vec_model = gensim.models.KeyedVectors.load_word2vec_format(
    '../data/embeddings/BioWordVec_PubMed_MIMICIII_d200.vec.bin',
    binary=True,
    limit=int(4E7)
)

In [12]:
mapping = pickle.load(open("../data/icd9_mapping/mapping_icd9_description.pkl", "rb"))

In [None]:
# Extract the information and create the dataframe with all the notes that we have to validate

threshold = 95
k = 1
results = []
subject_ids = [249, 1148, 1332, 2018]
notes = []
hadm_ids = []
codes = []
description_codes = []
sub_ids = []

for id in subject_ids:
    # We get the relevant ICD9 codes for the patient and we use these codes and the relations taken from Snomed to 
    # extract the most similar parts of the notes
    relevant_ICD9 = se.explain_and_get_most_relevant_ICD9(diabete_df_dictionary, id)
    relevant_HADM_ID, subject_ids = se.get_relevant_HADM_ID(df, id, relevant_ICD9)
    text, token, ICD_9 = se.get_text_and_tokens(df, relevant_HADM_ID)
    print(relevant_ICD9)
    best_substrings_finding_site = se.extract_most_similar_part(finding_site_dict, text, token, ICD_9, relevant_ICD9, word2vec_model, 7, threshold, k)
    best_substrings_due_to = se.extract_most_similar_part(due_to_dict, text, token, ICD_9, relevant_ICD9, word2vec_model, 9, threshold, k)
    best_substrings_associated_morphology = se.extract_most_similar_part(associated_morphology_dict, text, token, ICD_9, relevant_ICD9, word2vec_model, 7, threshold, k)
    best_substrings_description = se.extract_most_similar_part(description_dict, text, token, ICD_9, relevant_ICD9, word2vec_model, 10, threshold, k)
    
    for hadm_id, sub_id, note, icd9_list in zip(relevant_HADM_ID, subject_ids, text, ICD_9):
        for code in icd9_list:
            if code in relevant_ICD9:
                notes.append(note)
                sub_ids.append(sub_id)
                hadm_ids.append(hadm_id)
                codes.append(code)
                try:
                    description_codes.append(mapping[str(code)])
                except:
                    description_codes.append("")
                    

    for HADM_ID, best_substring in zip(relevant_HADM_ID, best_substrings_finding_site):
        results.append((id, HADM_ID, "Finding_site", best_substring[1]))
    for HADM_ID, best_substring in zip(relevant_HADM_ID, best_substrings_due_to):
        results.append((id, HADM_ID, "Due_to", best_substring[1]))
    for HADM_ID, best_substring in zip(relevant_HADM_ID, best_substrings_associated_morphology):
        results.append((id, HADM_ID, "Associated_morphology", best_substring[1]))
    for HADM_ID, best_substring in zip(relevant_HADM_ID, best_substrings_description):
        results.append((id, HADM_ID, "Description", best_substring[1]))


list_tuples = list(zip(sub_ids, hadm_ids, codes, description_codes, notes))
notes_to_validate = pd.DataFrame(list_tuples, columns=['subject_id', 'hadm_id', 'codes', 'description', 'notes']) 

In [None]:
notes_to_validate.head()

In [None]:
notes_to_validate.shape

In [None]:
notes_to_validate.to_csv("../data/validation/to_validate.csv")

In [None]:
type(results)

In [None]:
# Create the dataframe with all the extracted relations

relations = []
similarity = []
substrings = []
subject_ids = []
hadm_ids = []
relations_type = []
icd9_list = []
for item in results:
    subject_ids.append(item[0])
    hadm_ids.append(item[1])
    relations_type.append(item[2])
    extracted_relations = item[3]
    for extracted_relation in extracted_relations:
        ICD_9 = extracted_relation[0]
        for exr in extracted_relation[1]:
            icd9_list.append(ICD_9)
            relations.append(exr.relation)
            similarity.append(exr.similarity)
            substrings.append(exr.best_substring)

list_tuples = list(zip(subject_ids, hadm_ids, icd9_list, relations_type, relations, similarity, substrings))

results = pd.DataFrame(list_tuples, columns=['subject_ID', 'hadm_id', 'icd_9', 'relation_type', 'relation', 'similarity', 'extracted_substring'])  

In [None]:
results.head()

In [None]:
results.to_csv("../data/validation/result_sentence_extraction.csv", index=False)

# Manual Annotation json to csv

In [None]:
manual_annotations = json.load(open("../data/annotations.json", "rb"))

In [None]:
HADM_ID_list = []
substring = []
relations = []
SUBJECT_ID_list = []
relations_type = []
icd_9_codes = []
notes = []
start_list = []
end_list = []

for annotation in manual_annotations:
    note = annotation['data']['TEXT']
    ICD_9_code = annotation['data']['ICD9_CODE']
    HADM_ID =  annotation['data']['HADM_ID']
    subject_id = annotation['data']['SUBJECT_ID']
    for item in annotation['annotations'][0]['result']:
        start = item['value']['start']
        end = item['value']['end']
        relation = item['value']['labels'][0]
        start = item['value']['start']
        end = item['value']['end']
        relations.append(relation)
        substring.append(note[start:end+1])
        HADM_ID_list.append(HADM_ID)
        icd_9_codes.append(list(eval(ICD_9_code).keys())[0])
        SUBJECT_ID_list.append(subject_id)
        notes.append(note)
        start_list.append(start)
        end_list.append(end)

list_tuples = list(zip(HADM_ID_list, icd_9_codes, substring, relations, start_list, end_list, notes))
dframe = pd.DataFrame(list_tuples, columns=['HADM_ID', 'icd_9_code', 'extracted_string', 'relation', 'start', 'end', 'note'])  

In [None]:
dframe = dframe[dframe.relation != "description"]

In [None]:
dframe.to_csv("../data/validation/manually_annotated_dataset.csv")

In [None]:
manually_annotated_json = dframe.to_json(orient="records")
parsed = json.loads(manually_annotated_json)
json.dump(parsed, open("../data/validation/manually_annotated_dataset.json", "w"))

# Extract ICD9 codes and hadm id 

In [13]:
notes_icd9_hadmid = pd.read_csv("../data/validation/simona_to_validate2.csv")

In [14]:
ICD_9_extracted = notes_icd9_hadmid['codes']
hadm_id_extracted = notes_icd9_hadmid['hadm_id']
notes_extracted = notes_icd9_hadmid['notes']
id_extracted = notes_icd9_hadmid['subject_id']

In [15]:
df.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,SUBJECT_ID,HADM_ID,CATEGORY,INSURANCE,ADMISSION_TYPE,INSURANCE.1,TEXT,Token,ICD9_CODE,DIAGNOSIS
0,293,293,28063,121936.0,Discharge summary,Medicare,EMERGENCY,Medicare,Admission Date: [**2125-2-9**] D...,"['service', 'medicine', 'allergies', 'zocor', ...","['42843', '41071', '5990', '4275', '5849', '50...",CONGESTIVE HEART FAILURE
1,705,705,2414,106238.0,Discharge summary,Private,EMERGENCY,Private,Admission Date: [**2186-6-7**] Discharge ...,"['date', 'birth', 'sex', 'service', 'history',...","['5781', '4280', '78039', '41401', '412', 'V45...",GASTROINTESTINAL BLEED
2,881,881,98046,139402.0,Discharge summary,Medicare,EMERGENCY,Medicare,Admission Date: [**2198-7-17**] ...,"['date', 'birth', 'sex', 'service', 'cardiotho...","['4241', '42843', '2761', '5849', '3342', '414...",CHEST PAIN
3,1088,1088,22180,116189.0,Discharge summary,Medicare,EMERGENCY,Medicare,Admission Date: [**2132-9-4**] D...,"['service', 'med', 'allergies', 'patient', 're...","['53140', '2800', '4280', '53501', '25000', '4...",R/O GASTROINTESTINAL BLEED
4,1093,1093,22180,162436.0,Discharge summary,Medicare,EMERGENCY,Medicare,Admission Date: [**2134-2-16**] ...,"['service', 'medicine', 'allergies', 'patient'...","['80600', '5849', '5990', '42731', '4280', '41...",SYNCOPE;TELEMETRY


In [16]:
notes_extracted = []
for had in hadm_id_extracted:
    notes_extracted.append(df[df['HADM_ID'] == had].TEXT.values[0])

In [17]:
notes_extracted_cleaned = []
original_note_list = []
for note in notes_extracted:
    original_note, cleaned_note, cleaned_note_splitted = NotesCleaning().clean_note_and_remove_abbreviations(note, abbreviations)
    notes_extracted_cleaned.append(cleaned_note_splitted)
    original_note_list.append(original_note)

notes_extracted = notes_extracted_cleaned

In [18]:
def get_k_most_similar_substring(
        embedding_relation,
        token,
        window_size,
        model,
        k,
    ):
        all_similarity_substring = []
        list_windows, embedding_windows = UtilsSimilarity().rolling_window_embedding(
            token, window_size, model
        )
        

        for window, embedding_window in zip(list_windows, embedding_windows):
            similarity = UtilsSimilarity().compute_cosine_similarity(
                embedding_window, embedding_relation
            )
            all_similarity_substring.append((similarity, window))

        all_similarity_substring.sort(key=lambda x: x[0], reverse=True)
        ans = []
        for item in all_similarity_substring[0:k]:
            ans.append(WindowStore(item[0], item[1]))
        return ans



def extract_most_similar_part(
        se,
        relation_dict,
        note,
        tokens,
        ICD_9,
        word2vec_model,
        window_size,
        threshold,
        k,
    ):
        best_similarity = []
        relations = relation_dict.get(str(code), None)
        best_similarity_codes = []
        if relations:
            # We can have multiple relation for each ICD-9 code
            for relation in relations:
                embedding_relation = (
                    EmbeddingUtils().compute_embeddings(
                        word2vec_model, relation
                    )
                )
                best_window_substring = (
                    get_k_most_similar_substring(
                        embedding_relation,
                        token,
                        window_size,
                        word2vec_model,
                        k,
                    )
                )
                for item in best_window_substring:
                    item.add_relation(relation)
                    # Here we store all the relation with the corresponding similarity value and the best substring we extracted
                    best_similarity_codes.append(item)

            # We compute the percentile to remove from the list the strings with a simialarity lower than this value
            similarities = []
            for item in best_similarity_codes:
                similarities.append(item.similarity)
            similarities = sorted(similarities)
            percentile = np.percentile(similarities, threshold)
            best_similarity_codes = [
                item
                for item in best_similarity_codes
                    if item.similarity >= percentile
            ]
        
        return best_similarity_codes

def extract_lower_than_threshold(
        se,
        relation_dict,
        note,
        tokens,
        ICD_9,
        word2vec_model,
        window_size,
        threshold,
        k,
    ):
        best_similarity = []
        relations = relation_dict.get(str(code), None)
        best_similarity_codes = []
        if relations:
            # We can have multiple relation for each ICD-9 code
            for relation in relations:
                embedding_relation = (
                    EmbeddingUtils().compute_embeddings(
                        word2vec_model, relation
                    )
                )
                best_window_substring = (
                    get_k_most_similar_substring(
                        embedding_relation,
                        token,
                        window_size,
                        word2vec_model,
                        k,
                    )
                )
                for item in best_window_substring:
                    item.add_relation(relation)
                    # Here we store all the relation with the corresponding similarity value and the best substring we extracted
                    best_similarity_codes.append(item)

            # We compute the percentile to remove from the list the strings with a simialarity lower than this value
            similarities = []
            for item in best_similarity_codes:
                similarities.append(item.similarity)
            similarities = sorted(similarities)
            percentile = np.percentile(similarities, threshold)
            best_similarity_codes = [
                item
                for item in best_similarity_codes
                    if item.similarity < percentile
            ]
        
        return best_similarity_codes


In [19]:
# Lower than threashold

threshold = 95
k = 1
results = []
for code, id, hadm_id, note, token in zip(ICD_9_extracted, id_extracted, hadm_id_extracted, original_note_list, notes_extracted):
    
    best_substrings_finding_site = extract_lower_than_threshold(se, finding_site_dict, note, token, code, word2vec_model, 7, threshold, k)
    best_substrings_due_to = extract_lower_than_threshold(se, due_to_dict, note, token, code, word2vec_model, 9, threshold, k)
    best_substrings_associated_morphology = extract_lower_than_threshold(se, associated_morphology_dict, note, token, code, word2vec_model, 7, threshold, k)
    best_substrings_description = extract_lower_than_threshold(se, description_dict, note, token, code, word2vec_model, 10, threshold, k)

    for best_substring in best_substrings_finding_site:
        converted_string, _, _ = se.convert_to_original_substring(note, best_substring.best_substring)
        results.append((id, hadm_id, code, "Finding_site", best_substring.best_substring, best_substring.similarity, best_substring.relation, converted_string))
    for best_substring in best_substrings_due_to:
        converted_string, _, _= se.convert_to_original_substring(note, best_substring.best_substring)
        results.append((id, hadm_id, code, "Due_to", best_substring.best_substring, best_substring.similarity, best_substring.relation, converted_string))
    for best_substring in best_substrings_associated_morphology:
        converted_string, _, _= se.convert_to_original_substring(note, best_substring.best_substring)
        results.append((id, hadm_id, code, "Associated_morphology", best_substring.best_substring, best_substring.similarity, best_substring.relation, converted_string))
    for best_substring in best_substrings_description:
        converted_string, _, _ = se.convert_to_original_substring(note, best_substring.best_substring)
        results.append((id, hadm_id, code, "Description", best_substring.best_substring, best_substring.similarity, best_substring.relation, converted_string))

In [None]:
threshold = 95
k = 1
results = []
for code, id, hadm_id, note, token in zip(ICD_9_extracted, id_extracted, hadm_id_extracted, original_note_list, notes_extracted):
    
    best_substrings_finding_site = extract_most_similar_part(se, finding_site_dict, note, token, code, word2vec_model, 7, threshold, k)
    best_substrings_due_to = extract_most_similar_part(se, due_to_dict, note, token, code, word2vec_model, 9, threshold, k)
    best_substrings_associated_morphology = extract_most_similar_part(se, associated_morphology_dict, note, token, code, word2vec_model, 7, threshold, k)
    best_substrings_description = extract_most_similar_part(se, description_dict, note, token, code, word2vec_model, 10, threshold, k)

    for best_substring in best_substrings_finding_site:
        converted_string, _, _ = se.convert_to_original_substring(note, best_substring.best_substring)
        results.append((id, hadm_id, code, "Finding_site", best_substring.best_substring, best_substring.similarity, best_substring.relation, converted_string))
    for best_substring in best_substrings_due_to:
        converted_string, _, _= se.convert_to_original_substring(note, best_substring.best_substring)
        results.append((id, hadm_id, code, "Due_to", best_substring.best_substring, best_substring.similarity, best_substring.relation, converted_string))
    for best_substring in best_substrings_associated_morphology:
        converted_string, _, _= se.convert_to_original_substring(note, best_substring.best_substring)
        results.append((id, hadm_id, code, "Associated_morphology", best_substring.best_substring, best_substring.similarity, best_substring.relation, converted_string))
    for best_substring in best_substrings_description:
        converted_string, _, _ = se.convert_to_original_substring(note, best_substring.best_substring)
        results.append((id, hadm_id, code, "Description", best_substring.best_substring, best_substring.similarity, best_substring.relation, converted_string))

In [20]:
back_result = results.copy()

In [21]:
results = back_result.copy()

In [22]:
# Create the dataframe with all the extracted relations

relations = []
similarity = []
substrings = []
subject_ids = []
hadm_ids = []
relations_type = []
icd9_list = []
converted_strings = []
for item in results:
    subject_ids.append(item[0])
    hadm_ids.append(item[1])
    relations_type.append(item[3])
    ICD_9 = item[2]
    icd9_list.append(ICD_9)
    relations.append(item[6])
    similarity.append(item[5])
    substrings.append(item[4])
    converted_strings.append(item[7])

list_tuples = list(zip(subject_ids, hadm_ids, icd9_list, relations_type, relations, similarity, substrings, converted_strings))

results = pd.DataFrame(list_tuples, columns=['subject_ID', 'hadm_id', 'icd_9', 'relation_type', 'relation', 'similarity', 'extracted_substring', 'converted_string'])  

In [23]:
results.head(11)

Unnamed: 0,subject_ID,hadm_id,icd_9,relation_type,relation,similarity,extracted_substring,converted_string
0,249,116935,49322,Finding_site,airway structure,0.688396,"[failure, post, upper, respiratory, infection,...",
1,249,116935,49322,Associated_morphology,chronic inflammatory morphology,0.788005,"[infection, reactive, airways, disease, chroni...",
2,249,116935,42731,Finding_site,cardiac conducting system structure,0.787525,"[for, cardiac, catherterization, and, found, h...",for cardiac\ncatherterization and found to hav...
3,249,116935,41401,Associated_morphology,atherosclerosis,0.681517,"[diagnosis, native, three, vessel, coronary, a...",
4,249,116935,41401,Associated_morphology,arteriolosclerosis,0.652033,"[renal, failure, hypercoagulability, neck, hem...",renal failure\nhypercoagulability\nneck hemato...
5,249,116935,2449,Finding_site,endocrine gonad,0.634076,"[hypothyroidism, levoxyl, continued, thyroid, ...",
6,249,149546,56985,Finding_site,abdominal vascular structure,0.788083,"[vessels, are, unremarkable, the, vertebral, a...",vessels are unremarkable.\nThe vertebral\nand ...
7,249,149546,56985,Finding_site,intestinal structure,0.6808,"[the, junction, the, and, segments, this, repr...",the\njunction of the M1 and M2 segments. This ...
8,249,149546,56985,Finding_site,vascular structure,0.713587,"[the, and, segments, this, represents, thrombu...",the M1 and M2 segments. This represents a thro...
9,249,149546,56985,Finding_site,gastrointestinal tract structure,0.744394,"[the, setting, acute, gastrointestinal, bleedi...",


In [24]:
results.to_csv("../data/validation/result_sentence_extraction_lowe_than_threshold.csv", index=False)

# Results

In [3]:
df = pd.read_csv("../data/validation/conteggio.csv", sep=";")

In [4]:
df.head()

Unnamed: 0,subject_ID,hadm_id,icd_9,relation_type,relation,similarity,extracted_substring,converted_string,Validazione
0,249,116935,49322,Finding_site,airway structure,0.688396,['failure' 'post' 'upper' 'respiratory' 'infec...,,FN
1,249,116935,49322,Associated_morphology,chronic inflammatory morphology,0.788004,['infection' 'reactive' 'airways' 'disease' 'c...,,TN
2,249,116935,42731,Finding_site,cardiac conducting system structure,0.787525,['for' 'cardiac' 'catherterization' 'and' 'fou...,for cardiac\ncatherterization and found to hav...,TN
3,249,116935,41401,Associated_morphology,atherosclerosis,0.681517,['diagnosis' 'native' 'three' 'vessel' 'corona...,,TN
4,249,116935,41401,Associated_morphology,arteriolosclerosis,0.652033,['renal' 'failure' 'hypercoagulability' 'neck'...,renal failure\nhypercoagulability\nneck hemato...,TN


In [43]:
df[(df.Validazione == "TN") & (df.relation_type == "Finding_site")].Validazione.count()

87

In [44]:
df[(df.Validazione == "TP") & (df.relation_type == "Finding_site")].Validazione.count()

4

In [45]:
df[(df.Validazione == "FP") & (df.relation_type == "Finding_site")].Validazione.count()

38

In [46]:
df[(df.Validazione == "FN") & (df.relation_type == "Finding_site")].Validazione.count()

4

In [47]:
df[(df.Validazione == "TN") & (df.relation_type == "Associated_morphology")].Validazione.count()

16

In [48]:
df[(df.Validazione == "TP") & (df.relation_type == "Associated_morphology")].Validazione.count()

2

In [49]:
df[(df.Validazione == "FP") & (df.relation_type == "Associated_morphology")].Validazione.count()

12

In [50]:
df[(df.Validazione == "FN") & (df.relation_type == "Associated_morphology")].Validazione.count()

5

In [51]:
df[(df.Validazione == "TN") & (df.relation_type == "Description")].Validazione.count()

9

In [52]:
df[(df.Validazione == "TP") & (df.relation_type == "Description")].Validazione.count()

47

In [53]:
df[(df.Validazione == "FN") & (df.relation_type == "Description")].Validazione.count()

14

In [54]:
df[(df.Validazione == "FP") & (df.relation_type == "Description")].Validazione.count()

10

In [55]:
df[(df.Validazione == "TN") & (df.relation_type == "Due_to")].Validazione.count()

9

In [56]:
df[(df.Validazione == "TP") & (df.relation_type == "Due_to")].Validazione.count()

1

In [57]:
df[(df.Validazione == "FN") & (df.relation_type == "Due_to")].Validazione.count()

1

In [58]:
df[(df.Validazione == "FP") & (df.relation_type == "Due_to")].Validazione.count()

6