In [5]:
import pandas as pd
import numpy as np
from collections import OrderedDict

pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', 200)

In [7]:
subtitles = pd.read_csv('../LexSimp_BERT_MLM/output/subtitles/sw_source_subtitles_substitution_clinicalbert_disch.csv', sep=';')
print(len(subtitles))
subtitles.head(100)

6116


Unnamed: 0,origin,sent_id,sentence,source_term,CHV_selection,CHV_substitution,CHV_sim_score,MLM_selection,MLM_substitution,MLM_sim_score
0,House,56,That could manifest with hypo-cardio.,hypo-cardio,[],,,"['infection', 'anxiety', 'age', 'MR', 'stress', 'pneumonia', 'exercise', 'fatigue', 'nausea', 'demand']",infection,12
1,House,180,And we can't go there because of the encephalitis.,encephalitis,"['brain inflammation', 'rasmussen encephalitis']",brain inflammation,11.0,"['infection', 'pneumonia', 'seizure', 'fever', 'bleeding', 'confusion', 'disease', 'bleed', 'stroke', 'liver']",infection,33
2,House,346,We'll skin test for allergies.,allergies,"['allergy', 'hypersensitivity', 'hypersensitivity symptom']",allergy,100.0,"['flu', 'infection', 'fever', 'cancer', 'age', 'pets', 'cough', 'pneumonia', 'pain', 'yeast']",cancer,46
3,House,415,Would've been easier to deal with a tumor.,tumor,['tumor'],tumor,100.0,"['cancer', 'mass', 'pneumonia', 'stroke', 'stomach', 'virus', 'seizure', 'liver', 'kidney', 'disease']",cancer,18
4,House,472,Melancholy without hope.,Melancholy,['sadness'],sadness,15.0,"['Death', 'Life', 'Living', 'Happiness', 'Lives', 'Live', 'Suicide', 'Sad', 'immortality', 'Magic']",Death,40
5,House,503,How do you solve a problem like dermatitis?,dermatitis,['skin inflammation'],skin inflammation,11.0,"['cancer', 'depression', 'skin', 'diabetes', 'pain', 'infection', 'pneumonia', 'anxiety', 'malaria', 'tuberculosis']",depression,46
6,House,520,"Sister, you're having an asthma attack.",asthma,"['asthma', 'bronchitic asthma']",asthma,100.0,"['anxiety', 'heart', 'eye', 'acid', 'insulin', 'arm', 'panic', 'MI', 'stomach', 'infection']",anxiety,31
7,House,542,You diagnosed the patient with allergies and prescribed antihistamine.,allergies,"['allergy', 'hypersensitivity', 'hypersensitivity symptom']",allergy,100.0,"['depression', 'confusion', 'fever', 'pain', 'pneumonia', 'diabetes', 'cough', 'symptoms', 'medications', 'anxiety']",depression,29
8,House,568,That could manifest with tachycardia.,tachycardia,"['rapid heartbeat', 'rapid and irregular heartbeat']",rapid heartbeat,31.0,"['nausea', 'anxiety', 'fever', 'agitation', 'cough', 'fatigue', 'activity', 'sleep', 'rate', 'CP']",anxiety,22
9,House,569,There's no history of fever.,fever,"['fever', 'hyperthermic', 'feverish', 'fever symptoms']",fever,100.0,"['cough', 'seizure', 'trauma', 'infection', 'pneumonia', 'pain', 'falls', 'bleeding', 'travel', 'headache']",infection,36


In [37]:
def insert(source_string, source_term, substitution_term=None):
    start_insert = source_string.index(source_term)
    end_insert = source_string.index(source_term) + len(source_term)
    
    if substitution_term == None:
        output_string = source_string[:start_insert] + "[" + \
                        source_term + "]" + source_string[end_insert:] 
    else:
        output_string = source_string[:start_insert] + "[" + \
                        substitution_term + "]" + source_string[end_insert:] 

    return output_string

def create_evaluation_table(substitution_df, number_of_samples):
    # sentences with single word source term and single word CHV substitution term, with similarity score <85
    sw_source_and_sub = substitution_df.loc[(substitution_df.source_term.apply(lambda x: len(x.split())) == 1) & \
                (substitution_df.CHV_substitution.apply(lambda x: len(str(x).split()) if x==x else -1) == 1)] #x==x means x is not NaN
    sw_source_and_sub = sw_source_and_sub.loc[sw_source_and_sub.CHV_sim_score < 85]
    print("len(sw_source_and_sub): ", len(sw_source_and_sub))
    
    # From the dataframe of substituted sentences, take a random sample of n sentences
    random_subset = sw_source_and_sub.sample(n=number_of_samples)
    random_subset
    print("Unique source terms: ", len(random_subset.source_term.str.lower().value_counts()))
    print(random_subset.source_term.str.lower().value_counts())
    display(random_subset)
    
    CHV_evaluation_table = []
    MLM_evaluation_table = []
    for index, row in random_subset.iterrows():
        if row["source_term"] in row["sentence"]:
            origin = row["origin"]
            sent_id = row["sent_id"]
    
            source_sentence = insert(source_string=row["sentence"], source_term=row["source_term"])
        
            method = "CHV"
            CHV_substituted_sentence = insert(source_string=row["sentence"], source_term=row["source_term"], \
                                              substitution_term=row["CHV_substitution"])
            original_sentence = source_sentence
            simplified_sentence = CHV_substituted_sentence
            
            CHV_evaluation_table.append(OrderedDict({"origin": origin,
                                                     "sent_id": sent_id,
                                                     "method": method,
                                                     "original_sentence": original_sentence,
                                                     "simplified_sentence": simplified_sentence,
                                                     "grammar_original": None,
                                                     "grammar_simplified": None,
                                                     "meaning_preservation": None,
                                                     "simplicity": None
                                                    }))
            
            method = "MLM"
            MLM_substituted_sentence = insert(source_string=row["sentence"], source_term=row["source_term"], \
                                              substitution_term=row["MLM_substitution"])
            original_sentence = source_sentence
            simplified_sentence = MLM_substituted_sentence
            
            MLM_evaluation_table.append(OrderedDict({"origin": origin,
                                                     "sent_id": sent_id,
                                                     "method": method,
                                                     "original_sentence": original_sentence,
                                                     "simplified_sentence": simplified_sentence,
                                                     "grammar_original": None,
                                                     "grammar_simplified": None,
                                                     "meaning_preservation": None,
                                                     "simplicity": None
                                                    }))
            
#             print(original_sentence)
#             print(CHV_substituted_sentence)
#             print(MLM_substituted_sentence)
#             print(" ")
    

    # Create dataframe objects
    CHV_evaluation_table = pd.DataFrame(CHV_evaluation_table)
    MLM_evaluation_table = pd.DataFrame(MLM_evaluation_table)
    
    # Concatenate tables
    evaluation_table = pd.concat([CHV_evaluation_table, MLM_evaluation_table])
    
    # Shuffle dataframe in-place and reset the index
    evaluation_table = evaluation_table.sample(frac=1).reset_index(drop=True)
    
    return evaluation_table


In [211]:
subtitles_evaluation_table = create_evaluation_table(subtitles, 25)
print(len(subtitles_evaluation_table))
subtitles_evaluation_table

len(sw_source_and_sub):  158
Unique source terms:  20
dysosmia         2
autistic         2
ganglioma        2
oncology         2
tendinitis       2
depressed        1
mini-seizures    1
hemorrhage       1
hyperthermia     1
insomnia         1
encephalocele    1
listeria         1
pre-eclampsia    1
shigellosis      1
lacerations      1
comatose         1
opacity          1
haemorrhage      1
psych            1
ap               1
Name: source_term, dtype: int64


Unnamed: 0,origin,sent_id,sentence,source_term,CHV_selection,CHV_substitution,CHV_sim_score,MLM_selection,MLM_substitution,MLM_sim_score
4748,House,153593,"Malaria is the simplest, cheapest method of delivering hyperthermia.",hyperthermia,"['fever', 'hyperthermic', 'feverish']",fever,35.0,"['heat', 'treatment', 'therapy', 'temperature', 'comfort', 'radiation', 'temperatures', 'blood', 'medication', 'infection']",heat,38
3814,House,118395,"You're gonna blow up your career, then six months from now, when you've moved on from Amber to burnt sienna, you're gonna be stuck on a mobile oncology truck in Pewaukee, Wisconsin.",oncology,['tumor'],tumor,17.0,"['cancer', 'tumor', 'medicine', 'therapy', 'toxicity', 'treatment', 'imaging', 'surgery', 'staging', 'diagnosis']",cancer,31
3891,House,120842,Johnny was a non-verbal autistic.,autistic,['autism'],autism,83.0,"['infant', 'man', 'male', 'historian', 'teacher', 'child', 'expressive', 'hearing', 'female', 'interpreter']",male,20
4549,House,145282,The oncology department is subsidized by drug companies.,oncology,['tumor'],tumor,17.0,"['surgery', 'emergency', 'pharmacy', 'insurance', 'transplant', 'diabetes', 'liver', 'ID', 'radiation', 'medicine']",surgery,14
2714,House,78797,To Reduce The Encephalocele Cyst,Encephalocele,"['encephalocele', 'cranioschisis']",cranioschisis,33.0,"['residual', 'large', 'brain', 'pseudo', 'liver', 'frontal', 'retention', 'small', 'cerebral', 'kidney']",brain,12
5395,House,179161,"He had dysosmia for a couple of months, never even came here to get it treated.",dysosmia,"['cacosmia', 'smell disorders', 'anosmia', 'loss of smell']",cacosmia,62.0,"['diabetes', 'nausea', 'headache', 'pain', 'cough', 'weakness', 'anxiety', 'depression', 'CP', 'stopped']",diabetes,43
3223,House,97457,Certainly sounds like a psych case.,psych,"['psychological', 'mental illness', 'mental dysfunction']",psychological,71.0,"['psychiatric', 'stress', 'suicide', 'depression', 'panic', 'medical', 'viral', 'cardiac', 'anxiety', 'prison']",medical,20
3074,House,90735,"Patient's pregnant, could be pre-eclampsia.",pre-eclampsia,"['preeclampsia', 'toxemia', 'eph gestosis']",preeclampsia,72.0,"['normal', 'induced', 'treated', 'chronic', 'reactive', 'resolved', 'controlled', 'bleeding', 'progressive', 'corrected']",normal,21
3826,House,118594,We need a piece of the ganglioma.,ganglioma,"['ganglioneuroma', 'ganglioglioma']",ganglioneuroma,78.0,"['tumor', 'liver', 'brain', 'stomach', 'mass', 'tongue', 'skin', 'bone', 'scalp', 'skull']",tumor,14
5046,House,168230,Opacity in the left temporal lobe.,Opacity,"['opaque', 'opacity', 'increased density', 'corneal opacity', 'corneal clouding']",opaque,67.0,"['Blood', 'Air', 'Mass', 'consolidation', 'Gas', 'mass', 'Focus', 'fluid', 'Small', 'pneumonia']",Blood,22


50


Unnamed: 0,origin,sent_id,method,original_sentence,simplified_sentence,grammar_original,grammar_simplified,meaning_preservation,simplicity
0,House,35422,CHV,"Because she took such an interest, she discovered a tiny nodule... which in reality signified nothing but gave us no choice... but to put a person with [tendinitis] through an expensive and painful test.","Because she took such an interest, she discovered a tiny nodule... which in reality signified nothing but gave us no choice... but to put a person with [tendonitis] through an expensive and painful test.",,,,
1,House,27260,MLM,Maybe it's [shigellosis].,Maybe it's [pneumonia].,,,,
2,House,118395,CHV,"You're gonna blow up your career, then six months from now, when you've moved on from Amber to burnt sienna, you're gonna be stuck on a mobile [oncology] truck in Pewaukee, Wisconsin.","You're gonna blow up your career, then six months from now, when you've moved on from Amber to burnt sienna, you're gonna be stuck on a mobile [tumor] truck in Pewaukee, Wisconsin.",,,,
3,House,43907,CHV,Could be [listeria].,Could be [listeriosis].,,,,
4,House,43907,MLM,Could be [listeria].,Could be [infection].,,,,
5,House,118594,MLM,We need a piece of the [ganglioma].,We need a piece of the [tumor].,,,,
6,House,176664,MLM,How long have you been [depressed]?,How long have you been [confused]?,,,,
7,House,78797,CHV,To Reduce The [Encephalocele] Cyst,To Reduce The [cranioschisis] Cyst,,,,
8,House,118594,CHV,We need a piece of the [ganglioma].,We need a piece of the [ganglioneuroma].,,,,
9,House,78797,MLM,To Reduce The [Encephalocele] Cyst,To Reduce The [brain] Cyst,,,,


In [204]:
subtitles_evaluation_table_V2 = create_evaluation_table(subtitles, 25)
print(len(subtitles_evaluation_table_V2))
subtitles_evaluation_table_V2

len(sw_source_and_sub):  158
Unique source terms:  21
ganglioma                4
tendinitis               2
vertigo                  1
epileptic                1
immuno-compromised       1
wolff-parkinson-white    1
hypogonadism             1
hemorrhage               1
hyperthermia             1
hemorrhages              1
oncology                 1
swollen                  1
rhabdo                   1
autistic                 1
syncope                  1
shigellosis              1
comatose                 1
depigmentation           1
psych                    1
laceration               1
nocardia                 1
Name: source_term, dtype: int64


Unnamed: 0,origin,sent_id,sentence,source_term,CHV_selection,CHV_substitution,CHV_sim_score,MLM_selection,MLM_substitution,MLM_sim_score
5934,House,198373,I haven't read any journal articles proving that veterans have a higher incidence of depigmentation.,depigmentation,['hypopigmentation'],hypopigmentation,70.0,"['depression', 'bleeding', 'confusion', 'trauma', 'infection', 'injury', 'infections', 'pneumonia', 'death', 'rejection']",infection,40
4935,House,161152,Recent developed loud-noise-induced vertigo.,vertigo,"['dizziness', 'positional vertigo', 'postural vertigo', 'paroxysmal vertigo', 'labrynthitis', 'peripheral vertigo']",dizziness,17.0,"['headache', 'confusion', 'weakness', 'nausea', 'seizure', 'cough', 'pneumonia', 'speech', 'stroke', 'CP']",headache,14
3861,House,119339,He had hemorrhages in his fingers.,hemorrhages,['bleeding'],bleeding,14.0,"['blood', 'swelling', 'bleeding', 'pain', 'bruises', 'tingling', 'pulses', 'weakness', 'trauma', 'lesions']",blood,14
3827,House,118595,"Then you're talking about another patient, yours doesn't have a ganglioma.",ganglioma,"['ganglioneuroma', 'ganglioglioma']",ganglioneuroma,78.0,"['tumor', 'mass', 'seizure', 'stroke', 'cancer', 'bleed', 'headache', 'brain', 'nerve', 'kidney']",tumor,14
4748,House,153593,"Malaria is the simplest, cheapest method of delivering hyperthermia.",hyperthermia,"['fever', 'hyperthermic', 'feverish']",fever,35.0,"['heat', 'treatment', 'therapy', 'temperature', 'comfort', 'radiation', 'temperatures', 'blood', 'medication', 'infection']",heat,38
3467,House,105350,"Nocardia fits just as well, but there's no way we can tell, since we can't take her temperature.",Nocardia,['nocardiosis'],nocardiosis,78.0,"['anxiety', 'Rhythm', 'Face', 'sleep', 'Pain', 'Panic', 'Speech', 'Sleep', 'MS', 'Mom']",anxiety,27
5846,House,195050,Made a laceration in the left ventricle about the size of the tip of my index finger.,laceration,['tear'],tear,44.0,"['hole', 'scar', 'bullet', 'fracture', 'wound', 'defect', 'cut', 'depression', 'mass', 'tear']",wound,0
3783,House,117715,"Could be a ganglioma, which is why chaseshouldn't have ignored it.",ganglioma,"['ganglioneuroma', 'ganglioglioma']",ganglioneuroma,78.0,"['tumor', 'mass', 'pneumonia', 'bleed', 'stroke', 'cancer', 'seizure', 'virus', 'tear', 'scar']",pneumonia,33
1282,House,35855,"Because she took such an interest, she discovered a tiny nodule, which in reality signified nothing, but gave us no choice but to put a person with tendinitis through an expensive and painful test.",tendinitis,['tendonitis'],tendonitis,83.0,"['pain', 'tingling', 'discomfort', 'interest', 'anxiety', 'weakness', 'nausea', 'cancer', 'swelling', 'concern']",pain,40
5513,House,183598,What looks like Wolff-Parkinson-White but isn't?,Wolff-Parkinson-White,['wpw'],wpw,17.0,"['white', 'black', 'blood', 'yellow', 'green', 'red', 'urine', 'brown', 'fish', 'food']",white,32


50


Unnamed: 0,origin,sent_id,method,original_sentence,simplified_sentence,grammar_original,grammar_simplified,meaning_preservation,simplicity
0,House,35855,CHV,"Because she took such an interest, she discovered a tiny nodule, which in reality signified nothing, but gave us no choice but to put a person with [tendinitis] through an expensive and painful test.","Because she took such an interest, she discovered a tiny nodule, which in reality signified nothing, but gave us no choice but to put a person with [tendonitis] through an expensive and painful test.",,,,
1,House,41745,MLM,A sex fiend with a [swollen] tongue.,A sex fiend with a [dry] tongue.,,,,
2,House,161152,MLM,Recent developed loud-noise-induced [vertigo].,Recent developed loud-noise-induced [headache].,,,,
3,House,117715,MLM,"Could be a [ganglioma], which is why chaseshouldn't have ignored it.","Could be a [pneumonia], which is why chaseshouldn't have ignored it.",,,,
4,House,153593,CHV,"Malaria is the simplest, cheapest method of delivering [hyperthermia].","Malaria is the simplest, cheapest method of delivering [fever].",,,,
5,House,118574,CHV,"Could be a [ganglioma], which is why Chase shouldn't have ignored it.","Could be a [ganglioneuroma], which is why Chase shouldn't have ignored it.",,,,
6,House,177549,CHV,The damsel in distress is an [epileptic].,The damsel in distress is an [epilepsy].,,,,
7,House,198373,CHV,I haven't read any journal articles proving that veterans have a higher incidence of [depigmentation].,I haven't read any journal articles proving that veterans have a higher incidence of [hypopigmentation].,,,,
8,House,56905,MLM,Severely [autistic].,Severely [depressed].,,,,
9,House,118574,MLM,"Could be a [ganglioma], which is why Chase shouldn't have ignored it.","Could be a [tumor], which is why Chase shouldn't have ignored it.",,,,


In [293]:
subtitles_evaluation_table_V3 = create_evaluation_table(subtitles, 25)
print(len(subtitles_evaluation_table_V3))
subtitles_evaluation_table_V3

len(sw_source_and_sub):  158
Unique source terms:  22
hemorrhage      3
tendinitis      2
vertigo         1
depressive      1
rhabdo          1
hemorrhaging    1
sickness        1
melancholy      1
cancerous       1
ap              1
dysosmia        1
syncope         1
lacerations     1
ganglioma       1
comatose        1
vision          1
mania           1
sciatica        1
psych           1
autistic        1
oncology        1
swollen         1
Name: source_term, dtype: int64


Unnamed: 0,origin,sent_id,sentence,source_term,CHV_selection,CHV_substitution,CHV_sim_score,MLM_selection,MLM_substitution,MLM_sim_score
3112,House,92340,"He couldn't participate in contact sports, because of the risk of hemorrhage.",hemorrhage,['bleeding'],bleeding,14.0,"['bleeding', 'stroke', 'injury', 'infection', 'bleed', 'falls', 'fall', 'falling', 'trauma', 'seizure']",infection,13
4557,House,145658,Foreman knows it's rhabdo.,rhabdo,['rhabdomyolysis'],rhabdomyolysis,63.0,"['blood', 'heart', 'normal', 'motor', 'pain', 'chronic', 'black', 'cold', 'sleep', 'fine']",blood,36
3803,House,118182,Yours doesn't have a ganglioma.,ganglioma,"['ganglioneuroma', 'ganglioglioma']",ganglioneuroma,78.0,"['tumor', 'mass', 'bleed', 'stroke', 'seizure', 'cancer', 'headache', 'murmur', 'kidney', 'fracture']",tumor,14
4175,House,130368,Sometimes this test causes vertigo,vertigo,"['dizziness', 'positional vertigo', 'postural vertigo', 'paroxysmal vertigo', 'labrynthitis', 'peripheral vertigo']",dizziness,17.0,"['nausea', 'confusion', 'headache', 'weakness', 'tingling', 'fatigue', 'anxiety', 'pain', 'falls', 'swelling']",nausea,15
5910,House,197503,"If we do that and he starts hemorrhaging, we won't be able to stop the bleeding.",hemorrhaging,['bleeding'],bleeding,14.0,"['bleeding', 'coughing', 'pumping', 'drinking', 'moving', 'leaking', 'feeding', 'eating', 'recovering', 'spinning']",bleeding,14
3732,House,115811,Is she a psych patient?,psych,"['psychological', 'mental illness', 'mental dysfunction']",psychological,71.0,"['psychiatric', 'suicide', 'depression', 'depressed', 'medical', 'homeless', 'trauma', 'surgical', 'psychiatrist', 'hospital']",psychiatric,71
49,House,1858,We still have the problem of explaining how a white chick from Jersey... who's never traveled south of D.C. has African sleeping sickness.,sickness,"['illness', 'altitude sickness']",illness,29.0,"['disease', 'illness', 'syndrome', 'symptoms', 'spells', 'fever', 'problems', 'disorder', 'discomfort', 'pattern']",disease,20
5680,House,190443,"I got sciatica, like my dad.",sciatica,"['sciatica', 'ischias']",ischias,57.0,"['cold', 'CP', 'diabetes', 'nausea', 'pain', 'depression', 'weakness', 'anxiety', 'headache', 'back']",diabetes,43
1269,House,35412,She doesn't have tendinitis?,tendinitis,['tendonitis'],tendonitis,83.0,"['pain', 'weakness', 'swelling', 'symptoms', 'CP', 'MS', 'surgery', 'fever', 'pneumonia', 'infection']",pain,40
4,House,472,Melancholy without hope.,Melancholy,['sadness'],sadness,15.0,"['Death', 'Life', 'Living', 'Happiness', 'Lives', 'Live', 'Suicide', 'Sad', 'immortality', 'Magic']",Death,40


50


Unnamed: 0,origin,sent_id,method,original_sentence,simplified_sentence,grammar_original,grammar_simplified,meaning_preservation,simplicity
0,House,472,CHV,[Melancholy] without hope.,[sadness] without hope.,,,,
1,House,143353,MLM,"So unless he's dead, [comatose], or insane,","So unless he's dead, [confused], or insane,",,,,
2,House,37007,MLM,"plenty of bipolars are manic in the daytime, [depressive] at night.","plenty of bipolars are manic in the daytime, [negative] at night.",,,,
3,House,101821,CHV,Could a leaky space heater cause [vision] problems,Could a leaky space heater cause [hemeralopia] problems,,,,
4,House,46989,MLM,Half the doctors who specialize in [oncology] turn into burnt out cases.,Half the doctors who specialize in [medicine] turn into burnt out cases.,,,,
5,House,56942,CHV,You know what it's going to be like trying to put an [autistic] kid into a nuclear scanner?,You know what it's going to be like trying to put an [autism] kid into a nuclear scanner?,,,,
6,House,37007,CHV,"plenty of bipolars are manic in the daytime, [depressive] at night.","plenty of bipolars are manic in the daytime, [neurosis] at night.",,,,
7,House,35412,MLM,She doesn't have [tendinitis]?,She doesn't have [pain]?,,,,
8,House,118182,CHV,Yours doesn't have a [ganglioma].,Yours doesn't have a [ganglioneuroma].,,,,
9,House,27880,MLM,"The kid was just taking his [AP] calculus exam, when all of a sudden, he got nauseous and disoriented.","The kid was just taking his [initial] calculus exam, when all of a sudden, he got nauseous and disoriented.",,,,


In [221]:
wikipedia = pd.read_csv('../LexSimp_BERT_MLM/output/wikipedia/sw_source_wikipedia_substitution_clinicalbert_disch.csv', sep=';')
print(len(wikipedia))
wikipedia

960


Unnamed: 0,origin,sent_id,sentence,source_term,CHV_selection,CHV_substitution,CHV_sim_score,MLM_selection,MLM_substitution,MLM_sim_score
0,wiki_fully,6,"In 1920, the Bucks had a daughter, Carol, afflicted with phenylketonuria.",phenylketonuria,"['phenylketonuria (PKU)', 'dhpr deficiency', 'hyperphenylalaninemia']",phenylketonuria (PKU),88.0,"['depression', 'diabetes', 'pneumonia', 'HIV', 'cocaine', 'seizure', 'anxiety', 'cancer', 'alcohol', 'marijuana']",depression,18
1,wiki_fully,11,"Unable to function, the muscles weaken and exhibit atrophy.",atrophy,"['atrophy', 'atrophic']",atrophy,100.0,"['wasting', 'weakness', 'fatigue', 'swelling', 'relaxation', 'fusion', 'age', 'confusion', 'inflammation', 'tingling']",weakness,18
2,wiki_fully,12,"About 75% of people contracting the disease experience ""limb onset"" ALS, i.e., first symptoms in the arms or legs.",ALS,"[""Lou Gehrig's disease (ALS)""]",Lou Gehrig's disease (ALS),17.0,"['symptoms', 'pains', 'syndrome', 'strokes', 'diseases', 'reactions', 'pain', 'disease', 'weakness', 'attacks']",symptoms,0
3,wiki_fully,16,North America and Australia have been free of FMD for many years.,FMD,['fibromuscular dysplasia'],fibromuscular dysplasia,15.0,"['lesions', 'pain', 'disease', 'falls', 'bleeding', 'symptoms', 'infection', 'cancer', 'stool', 'seizure']",disease,22
4,wiki_fully,19,"There is an increased risk of STI transmission if the receiving partner has wounds on his genitals, or if the giving partner has wounds or open sores on or in his or her mouth, or bleeding gums.",STI,['sexually transmitted diseases (STDs)'],sexually transmitted diseases (STDs),21.0,"['sexual', 'HIV', 'direct', 'vertical', 'sexually', 'oral', 'onward', 'horizontal', 'disease', 'virus']",sexual,22
...,...,...,...,...,...,...,...,...,...,...
955,wiki_auto,3645,Women are twice as likely to suffer from specific phobias as men.,phobias,"['phobia', 'claustrophobia', 'school phobia']",phobia,100.0,"['depression', 'symptoms', 'features', 'anxiety', 'stereotypes', 'behaviors', 'fears', 'thoughts', 'reactions', 'infections']",symptoms,31
956,wiki_auto,3752,"When the cancer has extended beyond the uterine tissue, more advanced treatments including combinations of chemotherapy, radiation therapy, or surgery may be required.",cancer,"['cancer', 'primary cancer', 'spindle cell carcinoma', 'carcinoma', 'anaplastic carcinoma', 'carcinomatosis', 'sarcomatoid carcinoma', 'undifferentiated carcinoma']",cancer,100.0,"['tumor', 'disease', 'mass', 'infection', 'bleeding', 'primary', 'inflammation', 'radiation', 'pain', 'invasion']",disease,17
957,wiki_auto,3766,In an open pneumothorax there is a passage from the external environment into the pleural space through the chest wall.,pneumothorax,"['free air in the chest outside the lung', 'catamenial pneumothorax']",free air in the chest outside the lung,21.0,"['abdomen', 'chest', 'stomach', 'lung', 'fracture', 'wound', 'heart', 'view', 'bladder', 'loop']",abdomen,11
958,wiki_auto,3795,Some cancer cells also have abnormal numbers of chromosomes.,cancer,"['cancer', 'primary cancer', 'spindle cell carcinoma', 'carcinoma', 'anaplastic carcinoma', 'carcinomatosis', 'sarcomatoid carcinoma', 'undifferentiated carcinoma']",cancer,100.0,"['normal', 'tumor', 'abnormal', 'inflammatory', 'blood', 'lung', 'plasma', 'stem', 'white', 'brain']",tumor,18


In [222]:
wikipedia_evaluation_table = create_evaluation_table(wikipedia, 25)
print(len(wikipedia_evaluation_table))
wikipedia_evaluation_table

len(sw_source_and_sub):  38
Unique source terms:  17
influenza           7
hypertrophy         2
malignancy          2
epileptics          1
hemorrhage          1
hyperthermia        1
neoplasms           1
hemorrhages         1
pigmentation        1
poliomyelitis       1
vertigo             1
convulsions         1
depressive          1
apoplexy            1
stillborn           1
autistic            1
epileptic-shocks    1
Name: source_term, dtype: int64


Unnamed: 0,origin,sent_id,sentence,source_term,CHV_selection,CHV_substitution,CHV_sim_score,MLM_selection,MLM_substitution,MLM_sim_score
172,wiki_fully,1286,"If the body is unable to maintain a normal temperature and it increases significantly above normal, a condition known as hyperthermia occurs.",hyperthermia,"['fever', 'hyperthermic', 'feverish']",fever,35.0,"['anxiety', 'pneumonia', 'infection', 'seizure', 'starvation', 'nausea', 'depression', 'confusion', 'fatigue', 'diabetes']",pneumonia,48
333,wiki_partially,104,Four years later William died in the 1918 influenza pandemic.,influenza,['flu'],flu,50.0,"['flu', 'virus', 'viral', 'respiratory', 'white', 'lung', 'pulmonary', 'newborn', 'unknown', 'bacterial']",virus,31
671,wiki_partially,3052,"In turn, it is part of the even broader group of diseases affecting the blood, bone marrow, and lymphoid system, which are all known as hematological neoplasms.",neoplasms,['tumor'],tumor,15.0,"['tumors', 'disorders', 'diseases', 'lesions', 'cancer', 'disease', 'tumor', 'conditions', 'entities', 'masses']",tumors,15
44,wiki_fully,299,"Hypertrophy (from Greek ὑπέρ ""excess"" + τροφή ""nourishment"") is the increase in the volume of an organ or tissue due to the enlargement of its component cells.",Hypertrophy,['enlargement'],enlargement,24.0,"['*', 'relaxation', 'inflammation', 'Additionally', 'compensation', 'suppression', 'Background', 'Secondary', 'strengthening', 'Depression']",inflammation,11
423,wiki_partially,860,"In March 1918, some of the first recorded American cases of what came to be the worldwide influenza epidemic, also known as ""Spanish Flu"", were reported at Camp Funston.",influenza,['flu'],flu,50.0,"['flu', 'viral', 'virus', 'malaria', 'HIV', 'respiratory', 'bacterial', 'white', 'systemic', 'human']",virus,31
338,wiki_partially,120,"However, most studies have found no evidence of impairment in autistic individuals' ability to understand other people's basic intentions or goals; instead, data suggests that impairments are found in understanding more complex social emotions or in considering others' viewpoints.",autistic,['autism'],autism,83.0,"['normal', 'understanding', 'affecting', 'social', 'typical', 'individual', 'depressed', 'complex', 'older', 'anxious']",normal,17
242,wiki_fully,1756,He died at the Castle of Moncalieri having suffered an attack of apoplexy.,apoplexy,"['stroke', 'acute stroke']",stroke,29.0,"['depression', 'cancer', 'bleeding', 'stroke', 'suicide', 'pneumonia', 'anxiety', 'MI', 'diabetes', 'death']",depression,27
573,wiki_partially,2156,Blond hair is the result of having little pigmentation in the hair strand.,pigmentation,['hypopigmentation'],hypopigmentation,78.0,"['hair', 'blood', 'color', 'skin', 'drainage', 'bleeding', 'growth', 'breakdown', 'swelling', 'sensation']",skin,36
443,wiki_partially,1098,"Since antiviral drugs are effective in treating influenza if given early (see treatment section, below), it can be important to identify cases early.",influenza,['flu'],flu,50.0,"['infection', 'virus', 'tuberculosis', 'flu', 'infections', 'pneumonia', 'viruses', 'HIV', 'infants', 'malaria']",virus,31
450,wiki_partially,1140,There she began suffering from severe epileptic-shocks.,epileptic-shocks,"['epilepsy', 'physiological shock', 'shock syndrome']",epilepsy,61.0,"['agitation', 'depression', 'anxiety', 'confusion', 'fatigue', 'nausea', 'weakness', 'seizure', 'headache', 'pain']",depression,36


50


Unnamed: 0,origin,sent_id,method,original_sentence,simplified_sentence,grammar_original,grammar_simplified,meaning_preservation,simplicity
0,wiki_partially,2156,CHV,Blond hair is the result of having little [pigmentation] in the hair strand.,Blond hair is the result of having little [hypopigmentation] in the hair strand.,,,,
1,wiki_partially,3052,MLM,"In turn, it is part of the even broader group of diseases affecting the blood, bone marrow, and lymphoid system, which are all known as hematological [neoplasms].","In turn, it is part of the even broader group of diseases affecting the blood, bone marrow, and lymphoid system, which are all known as hematological [tumors].",,,,
2,wiki_partially,1140,CHV,There she began suffering from severe [epileptic-shocks].,There she began suffering from severe [epilepsy].,,,,
3,wiki_partially,1106,MLM,"These tests may be especially useful during the [influenza] season (prevalence = 25%) but in the absence of a local outbreak, or peri-influenza season (prevalence = 10%).","These tests may be especially useful during the [virus] season (prevalence = 25%) but in the absence of a local outbreak, or peri-influenza season (prevalence = 10%).",,,,
4,wiki_partially,1098,MLM,"Since antiviral drugs are effective in treating [influenza] if given early (see treatment section, below), it can be important to identify cases early.","Since antiviral drugs are effective in treating [virus] if given early (see treatment section, below), it can be important to identify cases early.",,,,
5,wiki_fully,1286,CHV,"If the body is unable to maintain a normal temperature and it increases significantly above normal, a condition known as [hyperthermia] occurs.","If the body is unable to maintain a normal temperature and it increases significantly above normal, a condition known as [fever] occurs.",,,,
6,wiki_partially,120,MLM,"However, most studies have found no evidence of impairment in [autistic] individuals' ability to understand other people's basic intentions or goals; instead, data suggests that impairments are found in understanding more complex social emotions or in considering others' viewpoints.","However, most studies have found no evidence of impairment in [normal] individuals' ability to understand other people's basic intentions or goals; instead, data suggests that impairments are found in understanding more complex social emotions or in considering others' viewpoints.",,,,
7,wiki_partially,3052,CHV,"In turn, it is part of the even broader group of diseases affecting the blood, bone marrow, and lymphoid system, which are all known as hematological [neoplasms].","In turn, it is part of the even broader group of diseases affecting the blood, bone marrow, and lymphoid system, which are all known as hematological [tumor].",,,,
8,wiki_partially,877,MLM,There are three types of [vertigo].,There are three types of [depression].,,,,
9,wiki_fully,299,MLM,"[Hypertrophy] (from Greek ὑπέρ ""excess"" + τροφή ""nourishment"") is the increase in the volume of an organ or tissue due to the enlargement of its component cells.","[inflammation] (from Greek ὑπέρ ""excess"" + τροφή ""nourishment"") is the increase in the volume of an organ or tissue due to the enlargement of its component cells.",,,,


In [229]:
wikipedia_evaluation_table_V2 = create_evaluation_table(wikipedia, 25)
print(len(wikipedia_evaluation_table_V2))
wikipedia_evaluation_table_V2

len(sw_source_and_sub):  38
Unique source terms:  18
influenza           7
stillborn           2
vertigo             1
epileptics          1
hemorrhage          1
hyperthermia        1
neoplasms           1
hemorrhages         1
convulsions         1
hypertrophy         1
hemorrhaged         1
epileptic           1
pigmentation        1
malignancy          1
depressive          1
apoplexy            1
lameness            1
epileptic-shocks    1
Name: source_term, dtype: int64


Unnamed: 0,origin,sent_id,sentence,source_term,CHV_selection,CHV_substitution,CHV_sim_score,MLM_selection,MLM_substitution,MLM_sim_score
702,wiki_auto,342,About one-third of unintentional weight loss cases are secondary to malignancy.,malignancy,"['cancer', 'primary cancer']",cancer,33.0,"['cancer', 'pneumonia', 'infection', 'diabetes', 'depression', 'alcohol', 'anxiety', 'radiation', 'surgery', 'trauma']",cancer,33
671,wiki_partially,3052,"In turn, it is part of the even broader group of diseases affecting the blood, bone marrow, and lymphoid system, which are all known as hematological neoplasms.",neoplasms,['tumor'],tumor,15.0,"['tumors', 'disorders', 'diseases', 'lesions', 'cancer', 'disease', 'tumor', 'conditions', 'entities', 'masses']",tumors,15
446,wiki_partially,1107,The type A viruses are the most virulent human pathogens among the three influenza types and cause the most severe disease.,influenza,['flu'],flu,50.0,"['virus', 'viral', 'bacterial', 'organism', 'blood', 'cell', 'vaccine', 'tree', 'lung', 'respiratory']",virus,31
530,wiki_partially,1823,"On 27 December, Ian Curtis suffered his first recognisable epileptic episode.",epileptic,"['epilepsy', 'aura']",epilepsy,80.0,"['seizure', 'cardiac', 'rhythmic', 'motor', 'sensory', 'complex', 'sharp', 'sleep', 'withdrawal', 'periodic']",cardiac,14
615,wiki_partially,2511,"When occurring, auras allow epileptics time to prevent injury to themselves and/or others.",epileptics,"['epilepsy', 'aura']",epilepsy,80.0,"['sleeping', 'seizure', 'recovery', 'people', 'additional', 'wake', 'withdrawal', 'caretaker', 'extinction', 'defenders']",seizure,31
424,wiki_partially,877,There are three types of vertigo.,vertigo,"['dizziness', 'positional vertigo', 'postural vertigo', 'paroxysmal vertigo', 'labrynthitis', 'peripheral vertigo']",dizziness,17.0,"['headache', 'depression', 'confusion', 'nausea', 'anxiety', 'neglect', 'falls', 'seizure', 'weakness', 'pain']",depression,29
423,wiki_partially,860,"In March 1918, some of the first recorded American cases of what came to be the worldwide influenza epidemic, also known as ""Spanish Flu"", were reported at Camp Funston.",influenza,['flu'],flu,50.0,"['flu', 'viral', 'virus', 'malaria', 'HIV', 'respiratory', 'bacterial', 'white', 'systemic', 'human']",virus,31
91,wiki_fully,648,"Anastasia's older sister, Maria, reportedly hemorrhaged in December 1914 during an operation to remove her tonsils, according to her paternal aunt Grand Duchess Olga Alexandrovna of Russia, who was interviewed later in her life.",hemorrhaged,['bleeding'],bleeding,14.0,"['died', 'stabbed', 'fell', 'presented', 'fractured', 'deceased', 'passed', 'injected', 'bleeding', 'cut']",bleeding,14
766,wiki_auto,1136,"Often, individuals have had one or more major depressive episodes.",depressive,['neurosis'],neurosis,43.0,"['anxiety', 'agitation', 'psychiatric', 'stress', 'panic', 'withdrawal', 'sleep', 'anxious', 'sleeping', 'paranoid']",anxiety,14
125,wiki_fully,950,"Further variation exists; thus, specific influenza strain isolates are identified by a standard nomenclature specifying virus type, geographical location where first isolated, sequential number of isolation, year of isolation, and HA and NA subtype.",influenza,['flu'],flu,50.0,"['respiratory', 'viral', 'bacterial', 'virus', 'vaccine', 'flu', 'field', 'blood', 'human', 'lung']",virus,31


50


Unnamed: 0,origin,sent_id,method,original_sentence,simplified_sentence,grammar_original,grammar_simplified,meaning_preservation,simplicity
0,wiki_partially,860,MLM,"In March 1918, some of the first recorded American cases of what came to be the worldwide [influenza] epidemic, also known as ""Spanish Flu"", were reported at Camp Funston.","In March 1918, some of the first recorded American cases of what came to be the worldwide [virus] epidemic, also known as ""Spanish Flu"", were reported at Camp Funston.",,,,
1,wiki_auto,1136,MLM,"Often, individuals have had one or more major [depressive] episodes.","Often, individuals have had one or more major [anxiety] episodes.",,,,
2,wiki_partially,1107,CHV,The type A viruses are the most virulent human pathogens among the three [influenza] types and cause the most severe disease.,The type A viruses are the most virulent human pathogens among the three [flu] types and cause the most severe disease.,,,,
3,wiki_partially,1098,MLM,"Since antiviral drugs are effective in treating [influenza] if given early (see treatment section, below), it can be important to identify cases early.","Since antiviral drugs are effective in treating [virus] if given early (see treatment section, below), it can be important to identify cases early.",,,,
4,wiki_partially,1140,MLM,There she began suffering from severe [epileptic-shocks].,There she began suffering from severe [depression].,,,,
5,wiki_partially,1140,CHV,There she began suffering from severe [epileptic-shocks].,There she began suffering from severe [epilepsy].,,,,
6,wiki_partially,2511,CHV,"When occurring, auras allow [epileptics] time to prevent injury to themselves and/or others.","When occurring, auras allow [epilepsy] time to prevent injury to themselves and/or others.",,,,
7,wiki_auto,342,CHV,About one-third of unintentional weight loss cases are secondary to [malignancy].,About one-third of unintentional weight loss cases are secondary to [cancer].,,,,
8,wiki_partially,1794,CHV,"One of his arms and both feet became enlarged and at some point during his childhood he fell and damaged his hip, resulting in permanent [lameness].","One of his arms and both feet became enlarged and at some point during his childhood he fell and damaged his hip, resulting in permanent [claudication].",,,,
9,wiki_auto,3064,CHV,"Approximately half are [stillborn], and an additional one-third die within 24 hours.","Approximately half are [stillbirth], and an additional one-third die within 24 hours.",,,,


In [342]:
wikipedia_evaluation_table_V3 = create_evaluation_table(wikipedia, 25)
print(len(wikipedia_evaluation_table_V3))
wikipedia_evaluation_table_V3

len(sw_source_and_sub):  38
Unique source terms:  19
influenza           5
poliomyelitis       2
stillborn           2
epileptics          1
hemorrhage          1
hyperthermia        1
hemorrhages         1
neoplasms           1
vertigo             1
pigmentation        1
lameness            1
epileptic           1
convulsions         1
malignancy          1
depressive          1
epileptic-shocks    1
autistic            1
hemorrhaged         1
hypertrophy         1
Name: source_term, dtype: int64


Unnamed: 0,origin,sent_id,sentence,source_term,CHV_selection,CHV_substitution,CHV_sim_score,MLM_selection,MLM_substitution,MLM_sim_score
573,wiki_partially,2156,Blond hair is the result of having little pigmentation in the hair strand.,pigmentation,['hypopigmentation'],hypopigmentation,78.0,"['hair', 'blood', 'color', 'skin', 'drainage', 'bleeding', 'growth', 'breakdown', 'swelling', 'sensation']",skin,36
160,wiki_fully,1194,"Prevention of human-to-human transmission Influenza spreads between humans when infected people cough or sneeze, then other people breathe in the virus or touch something with the virus on it and then touch their own face.",Influenza,['flu'],flu,50.0,"['virus', 'pneumonia', 'infection', 'HIV', 'syndrome', 'viral', 'cough', 'flu', 'lung', 'disease']",virus,31
91,wiki_fully,648,"Anastasia's older sister, Maria, reportedly hemorrhaged in December 1914 during an operation to remove her tonsils, according to her paternal aunt Grand Duchess Olga Alexandrovna of Russia, who was interviewed later in her life.",hemorrhaged,['bleeding'],bleeding,14.0,"['died', 'stabbed', 'fell', 'presented', 'fractured', 'deceased', 'passed', 'injected', 'bleeding', 'cut']",bleeding,14
526,wiki_partially,1794,"One of his arms and both feet became enlarged and at some point during his childhood he fell and damaged his hip, resulting in permanent lameness.",lameness,['claudication'],claudication,36.0,"['weakness', 'fatigue', 'confusion', 'swelling', 'collapse', 'failure', 'bleeding', 'neglect', 'falls', 'damage']",weakness,25
895,wiki_auto,3064,"Approximately half are stillborn, and an additional one-third die within 24 hours.",stillborn,"['stillbirth', 'stillborn']",stillbirth,74.0,"['alive', 'dying', 'dead', 'viable', 'deceased', 'born', 'die', 'unconscious', 'killed', 'hanging']",dead,0
338,wiki_partially,120,"However, most studies have found no evidence of impairment in autistic individuals' ability to understand other people's basic intentions or goals; instead, data suggests that impairments are found in understanding more complex social emotions or in considering others' viewpoints.",autistic,['autism'],autism,83.0,"['normal', 'understanding', 'affecting', 'social', 'typical', 'individual', 'depressed', 'complex', 'older', 'anxious']",normal,17
446,wiki_partially,1107,The type A viruses are the most virulent human pathogens among the three influenza types and cause the most severe disease.,influenza,['flu'],flu,50.0,"['virus', 'viral', 'bacterial', 'organism', 'blood', 'cell', 'vaccine', 'tree', 'lung', 'respiratory']",virus,31
450,wiki_partially,1140,There she began suffering from severe epileptic-shocks.,epileptic-shocks,"['epilepsy', 'physiological shock', 'shock syndrome']",epilepsy,61.0,"['agitation', 'depression', 'anxiety', 'confusion', 'fatigue', 'nausea', 'weakness', 'seizure', 'headache', 'pain']",depression,36
615,wiki_partially,2511,"When occurring, auras allow epileptics time to prevent injury to themselves and/or others.",epileptics,"['epilepsy', 'aura']",epilepsy,80.0,"['sleeping', 'seizure', 'recovery', 'people', 'additional', 'wake', 'withdrawal', 'caretaker', 'extinction', 'defenders']",seizure,31
670,wiki_partially,3048,Poliovirus can affect the spinal cord and cause poliomyelitis.,poliomyelitis,"['polio', 'polioencephalitis']",polio,71.0,"['pneumonia', 'inflammation', 'infection', 'pain', 'fever', 'weakness', 'swelling', 'compression', 'fracture', 'confusion']",pneumonia,33


50


Unnamed: 0,origin,sent_id,method,original_sentence,simplified_sentence,grammar_original,grammar_simplified,meaning_preservation,simplicity
0,wiki_partially,1098,MLM,"Since antiviral drugs are effective in treating [influenza] if given early (see treatment section, below), it can be important to identify cases early.","Since antiviral drugs are effective in treating [virus] if given early (see treatment section, below), it can be important to identify cases early.",,,,
1,wiki_partially,1107,MLM,The type A viruses are the most virulent human pathogens among the three [influenza] types and cause the most severe disease.,The type A viruses are the most virulent human pathogens among the three [virus] types and cause the most severe disease.,,,,
2,wiki_partially,3052,CHV,"In turn, it is part of the even broader group of diseases affecting the blood, bone marrow, and lymphoid system, which are all known as hematological [neoplasms].","In turn, it is part of the even broader group of diseases affecting the blood, bone marrow, and lymphoid system, which are all known as hematological [tumor].",,,,
3,wiki_partially,2156,CHV,Blond hair is the result of having little [pigmentation] in the hair strand.,Blond hair is the result of having little [hypopigmentation] in the hair strand.,,,,
4,wiki_auto,3064,CHV,"Approximately half are [stillborn], and an additional one-third die within 24 hours.","Approximately half are [stillbirth], and an additional one-third die within 24 hours.",,,,
5,wiki_partially,1382,CHV,"Dilation and curettage (D&C), the second most common method of surgical abortion, is a standard gynecological procedure performed for a variety of reasons, including examination of the uterine lining for possible [malignancy], investigation of abnormal bleeding, and abortion.","Dilation and curettage (D&C), the second most common method of surgical abortion, is a standard gynecological procedure performed for a variety of reasons, including examination of the uterine lining for possible [cancer], investigation of abnormal bleeding, and abortion.",,,,
6,wiki_partially,1794,MLM,"One of his arms and both feet became enlarged and at some point during his childhood he fell and damaged his hip, resulting in permanent [lameness].","One of his arms and both feet became enlarged and at some point during his childhood he fell and damaged his hip, resulting in permanent [weakness].",,,,
7,wiki_partially,1140,MLM,There she began suffering from severe [epileptic-shocks].,There she began suffering from severe [depression].,,,,
8,wiki_auto,2470,CHV,"These types of large [hemorrhages] tend to happen more than once, often during sleep.","These types of large [bleeding] tend to happen more than once, often during sleep.",,,,
9,wiki_auto,1756,CHV,"In Australia any [stillborn] weighing more than 400 grams, or more than 20 weeks in gestation, must have its birth registered.","In Australia any [stillbirth] weighing more than 400 grams, or more than 20 weeks in gestation, must have its birth registered.",,,,


In [343]:
# Concatenate tables
final_evaluation_table = pd.concat([wikipedia_evaluation_table_V3, subtitles_evaluation_table_V3])

# Shuffle dataframe in-place and reset the index
final_evaluation_table = final_evaluation_table.sample(frac=1).reset_index(drop=True)

print(len(final_evaluation_table))
final_evaluation_table

100


Unnamed: 0,origin,sent_id,method,original_sentence,simplified_sentence,grammar_original,grammar_simplified,meaning_preservation,simplicity
0,wiki_partially,1107,MLM,The type A viruses are the most virulent human pathogens among the three [influenza] types and cause the most severe disease.,The type A viruses are the most virulent human pathogens among the three [virus] types and cause the most severe disease.,,,,
1,House,190443,MLM,"I got [sciatica], like my dad.","I got [diabetes], like my dad.",,,,
2,House,115811,CHV,Is she a [psych] patient?,Is she a [psychological] patient?,,,,
3,House,68019,MLM,"Human Behavior finals will also get into clinical presentations of [mania], and we'll touch on Stanislas Dehaene's PET scans.","Human Behavior finals will also get into clinical presentations of [depression], and we'll touch on Stanislas Dehaene's PET scans.",,,,
4,wiki_partially,675,MLM,High doses may lead to [convulsions].,High doses may lead to [seizure].,,,,
5,wiki_auto,2470,MLM,"These types of large [hemorrhages] tend to happen more than once, often during sleep.","These types of large [bleeding] tend to happen more than once, often during sleep.",,,,
6,wiki_partially,120,CHV,"However, most studies have found no evidence of impairment in [autistic] individuals' ability to understand other people's basic intentions or goals; instead, data suggests that impairments are found in understanding more complex social emotions or in considering others' viewpoints.","However, most studies have found no evidence of impairment in [autism] individuals' ability to understand other people's basic intentions or goals; instead, data suggests that impairments are found in understanding more complex social emotions or in considering others' viewpoints.",,,,
7,wiki_partially,2511,MLM,"When occurring, auras allow [epileptics] time to prevent injury to themselves and/or others.","When occurring, auras allow [seizure] time to prevent injury to themselves and/or others.",,,,
8,House,472,CHV,[Melancholy] without hope.,[sadness] without hope.,,,,
9,House,56942,CHV,You know what it's going to be like trying to put an [autistic] kid into a nuclear scanner?,You know what it's going to be like trying to put an [autism] kid into a nuclear scanner?,,,,


In [344]:
final_evaluation_table.to_csv('evaluation_V3.csv', sep=',', index=False) #MTurk requires a csv with sep=','