In [1]:
import os
from os.path import dirname, realpath
import sys
import subprocess
import re
import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from nltk.corpus import stopwords
from pycorenlp import StanfordCoreNLP
from  nltk.parse  import CoreNLPParser

sys.path.insert(0, dirname(realpath('')))
from helper_functions import synparse, tregex_tsurgeon

In [2]:
# Replace with standardized filenames when pipeline for previous task is ready
filenames = ['DrugVisData.csv']

data_dir = '../data/'
out_dir = '../data/output/'
# tregex_dir = '../stanford-tregex-2018-02-27/'

# negated term list (use the human annotated version)
neg_list = pd.read_csv(data_dir + 'neg_list_complete.txt', sep='\t', header=0)
neg = neg_list['ITEM'].values
neg_term = [' ' + item + ' ' for item in neg]
neg_term.extend(item + ' ' for item in neg)

stopwords = stopwords.words('english')
RM_POS = ['NN', 'NNS', 'RB', 'NP', 'ADVP', 'IN']
RM_CP = ['however', 'although', 'but']

nlp = StanfordCoreNLP('http://localhost:9001')
parser = CoreNLPParser(url='http://localhost:9001')

In [3]:
# tree rules

trts = {}
# no "jvd|murmurs|deficits" not work, pleural -> vbz?
# trts['NP'] = ('NP=target << DT=neg <<, /no|without/ !> NP >> TOP=t >> S=s', \
#               'excise s target,delete neg')

# if np with top node=S???
# trts['NP'] = ('NP=target << DT=neg <<, /no|without/ !> NP >> TOP=t >> S=s', \
#               'excise s target,delete neg')
trts['NP'] = ('NP=target << DT=neg <<, /no|without/ !> NP >> TOP=t', \
              'delete neg')
# if np with top node=NP
trts['NP-nS'] = ('NP=target <<, /DT|NN|RB/=neg <<, /no|without/ !> NP >> TOP=t', \
              'delete neg')


# denies -> mis pos to nns
trts['NP-denies'] = ('NP=target <<, /denies|deny|denied/=neg >> TOP=t', \
              'delete neg')

# vp only
trts['VP-A'] = ('VP=target << /VBZ|VBD|VB/=neg >> TOP=t', \
              'delete neg')
trts['VP-CC'] = ('VP=target <<, /VBZ|VBD|VB/=neg < CC >> TOP=t', \
              'delete neg')
# vp only, 'resolved', add that neg1 part to prevent jvd -> VP, rashes -> VP error pos tagging
# trts['VP-P'] = ('NP=target <<, DT=neg1 <<, /no|negative|not/ $ VP=neg2 >> TOP=t >> S=s', \
#               'delete neg1')
trts['VP-P'] = ('VP=vp <<- /free|negative|absent|ruled|out|doubtful|unlikely|excluded|resolved|given/=neg $ NP=head >> TOP=t >> S=s', \
              'excise s head')
# this is post, ... is negative
# trts['ADJP-P'] = ('VP=vp < ADJP <<- /negative/=neg $ NP=target >> TOP=t >> S=s', \
#                 'delete vp,excise s target')
trts['ADJP-P'] = ('VP=vp <<- /free|negative|absent|ruled|out|doubtful|unlikely|excluded|resolved|given/=neg $ NP=head >> TOP=t >> S=s', \
                'excise s head')
# this is ant, negative for ...
# trts['ADJP-A'] = ('PP=head $ JJ=neg < NP=target >> TOP=t > ADJP=s', \
#                 'delete neg')
trts['ADJP-A'] = ('PP=head $ /JJ|ADJP|NP/=neg <- NP=target >> TOP=t >> /S|NP|ADJP/=s', \
                'excise s target')
# not
# trts['ADVP-P'] = ('VP=target <<, /VB*|MD/ $ RB=neg >> TOP=t >> S=s', \
#                 'excise s target')
trts['ADVP-P'] = ('VP=head $ RB=neg <<, /VB*|MD/=be >> TOP=t >> S=s', \
                'delete head,delete neg')

# trts['ADVP-A'] = ('VP=target <<, /VB*|MD/ $ RB=neg >> TOP=t >> S=s', \
#                 'excise s target')
trts['ADVP-A'] = ('VP=head $ RB=neg <<, /VB*|MD/ >> TOP=t >> S=s', \
                'excise s head')
trts['ADVP-A2'] = ('VP=head << RB=neg <<, /VB*|MD/ << /ADJP|VP/=target >> TOP=t >> S=s', \
                'excise s target')
# remove sbar
trts['ADVP-sbar'] = ('PP=head <<, /of|without/=neg > NP $ NP < NP=target >> TOP=t >> NP=st << SBAR=sbar', \
                'excise st target,delete sbar')
trts['ADVP-advp'] = ('ADVP=advp', \
                'delete advp')
trts['forced-sbar'] = ('SBAR=sbar', \
                'delete sbar')

# remove RB
trts['ADVP-RB'] = ('TOP=target <<, RB=neg', \
                'delete neg')

# sob become this, so need to be after np and vp
# trts['PP'] = ('PP=head <<, /of|without/=neg > NP $ NP < NP=target >> TOP=t >> NP=s', \
#               'excise s target')
trts['PP'] = ('PP=head <<, IN=neg1 < NP=target >> TOP=t >> /S|NP|ADJP/=s $ /JJ|NP/=neg2', \
              'excise s target')
trts['PP-2'] = ('PP=head << IN=neg <<, /of|without/ >> TOP=t', \
                'delete neg')

trts['NP-CC'] = ('S=s < NP =head<< PP=target << DT=neg <<, /no|without/ < CC=but << but < S=rm < /\.|\,/=punct << SBAR=sbar !> NP > TOP=t', 
                 'delete neg,delete sbar,delete punct,delete but,delete rm')
trts['NP-although'] = ('S=s < NP =head<< PP=target << DT=neg <<, /no|without/ << /although|but/ < /\.|\,/=punct << SBAR=sbar !> NP > TOP=t', 
                       'delete neg,delete sbar,delete punct')

In [19]:
for filename in filenames:
    data = pd.read_csv(data_dir+filename, index_col=False)

    output_data = data

    print(datetime.datetime.now())

    for i in range(0,len(data)):
        if pd.isnull(data.loc[i,'sentence']):
            output_data.loc[i,'Is_Negated'] = 0
        else:
            output = nlp.annotate(data.loc[i,'sentence'], properties={
                                                      'annotators': 'ssplit',
                                                      'outputFormat': 'json',
                                                      'threads': '4',
                                                      'tokenize.options': 'normalizeParentheses=false, normalizeOtherBrackets=false'
                                                      })
            sents = [[str(token['word']) for token in sent['tokens']] for sent in output['sentences']]
            sent_output = [' '.join(sent) for sent in sents if sent != ['.']]

            for item in sent_output:
                # tag negated or affirmed based on string matching --- negation term list
                # add one space to prevent loss of 'no ', 'not ', ... etc.
                for j, substring in enumerate(neg_term):
                    if substring in ' ' + item:
                        output_data.loc[i,'Is_Negated'] = 1
                        output_data.loc[i, 'Negation_Term'] = substring

                        neg_type = neg_list.loc[neg_list.ITEM == substring.strip(),'TYPE'].values[0]
                        output_data.loc[i, 'Negated_Type'] = neg_type
                        
                        # constituency tree parsing
                        sl, tree_list, neg_part = synparse(item, data_dir, neg_list, parser)
                        s = re.sub('\([A-Z]*\$? |\(-[A-Z]+- |\)|\)|\(, |\(. ', '', str(neg_part[0]))
                        
                        if pd.isnull(neg_type)==False:
                            # run tregex/tsurgeon based on the selected neg type
                            ts_out, tree = tregex_tsurgeon(data_dir + 'tmp_neg_tree', neg_type, trts)

                            # deal with corner cases
                            if neg_type == 'NP' and ('that' in ts_out):
                                print('--- NP with that')
                                ts_out, tree = tregex_tsurgeon(data_dir + 'tmp_neg_tree', 'NP-denies', trts)
                            if neg_type == 'NP' and s == ts_out:
                                print('--- NP without S node')
                                ts_out, tree = tregex_tsurgeon(data_dir + 'tmp_neg_tree', 'NP-nS', trts)

                            if neg_type == 'PP' and sum([item in neg_list['ITEM'].tolist() for item in ts_out.split()]) > 0:
                                print('--- NP without S node')
                                ts_out, tree = tregex_tsurgeon(data_dir + 'tmp_neg_tree', 'NP-nS', trts)

                            if neg_type == 'VP-A' and s == ts_out:
                                print('--- VP-A remove denies')
                                ts_out, tree = tregex_tsurgeon(data_dir + 'tmp_neg_tree', 'NP-denies', trts)

                            if neg_type == 'ADVP-A' and s == ts_out:
                                print('--- ADVP-A type 2')
                                ts_out, tree = tregex_tsurgeon(data_dir + 'tmp_neg_tree', 'ADVP-A2', trts)
                            if neg_type == 'ADVP-A' and s == ts_out:
                                print('--- ADVP-A remove SBAR')
                                ts_out, tree = tregex_tsurgeon(data_dir + 'tmp_neg_tree', 'ADVP-sbar', trts)
                            if neg_type == 'ADVP-A' and s == ts_out: # no longer
                                print('--- ADVP-A remove ADVP')
                                ts_out, tree = tregex_tsurgeon(data_dir + 'tmp_neg_tree', 'ADVP-advp', trts)
                            if neg_type == 'ADVP-A' and s == ts_out:
                                print('--- ADVP-A remove RB')
                                ts_out, tree = tregex_tsurgeon(data_dir + 'tmp_neg_tree', 'ADVP-RB', trts)

                            if 'SBAR' in tree:
                                print('--- forced remove SBAR')
                                ts_out, tree = tregex_tsurgeon(data_dir + 'tmp_neg_tree', 'forced-sbar', trts)

                            if sum([item in RM_POS for item in ts_out.split()]) > 0:
                                print('--- remove POS')
                                ts_out = ' '.join(ts_out.split()[1:])

                            if sum([item in RM_CP for item in ts_out.split()]) > 0:
                                print('--- remove CP')
                                for cp in RM_CP:
                                    try:
                                        cp_loc = ts_out.split().index(cp)
                                    except:
                                        continue
                                ts_out = ' '.join(ts_out.split()[:cp_loc])

                            if ts_out.split()[0] in neg_list['ITEM'].tolist() + stopwords:
                                print('--- remove first token f if f in negated list or stopword list')
                                ts_out = ' '.join(ts_out.split()[1:])
                            if neg_type == 'VP-A' and len(ts_out) < 2:
                                print('--- VP-A CC')
                                ts_out, tree = tregex_tsurgeon(data_dir + 'ntree_tmp', 'VP-CC', trts)

                            output_data.loc[i, 'Negated_Part'] = str(ts_out)

                            print('>> ' + ts_out)
                            
                        else:
                            output_data.loc[i, 'Negated_Part'] = ''
                        
                        break
                        
                    else:
                        output_data.loc[i,'Is_Negated'] = 0
                        output_data.loc[i, 'Negation_Term'] = ''
                        output_data.loc[i, 'Negated_Type'] = ''
                        output_data.loc[i, 'Negated_Part'] = ''

    print(datetime.datetime.now())

    # remove file extension from file name and save results in an output file
    output_data.to_csv(out_dir + filename[:-4] + '_Negated_Output_Parsed.csv',index=False)

2020-04-07 15:09:54.776459
>> b'not recommended , because of the wide range of severe side effects associated with their use\r\n'
>> b'not always recorded ; 6/29 dogs ( 21 % ) did not receive regular treatment , 7/29 were treated regularly with isoxazoline systemic products ( 24 % ) , 2/29 with fipronil-based spot-ons ( 7 % ) and 2/29 ( 7 % ) with collars ( one flumethrin-based and one unrecorded brand ) .\r\n'
--- forced remove SBAR
>> b'no evidence supporting its use , which could actually be associated with concerning gastrointestinal dysbiosis\r\n'
>> b'not treated with terbinafine .\r\n'
>> b'Patients were excluded\r\n'
>> b'no licensed vaccines or alternative therapeutic options are available to prevent disease or treat patients\r\n'
>> b'no licensed vaccines or alternative therapeutic options are available to prevent disease or treat patients\r\n'
>> b'no licensed vaccines or alternative therapeutic options are available to prevent disease or treat patients\r\n'
>> b'Total RNA w

>> b'not just the opiate system ( 347 )\r\n'
--- remove CP
>> b'not only for achieving optimal benefi t
--- remove CP
>> b'not only for achieving optimal benefi t
>> b"noff et al ) 914 Cold boxes-making of , 300 Colitis , ulcerative -- cyclosporin in ( Gupta et al ) ( C ) 1277 ; seasonality of ( Myszor and Calam ) ( C ) 522 , ( Don and Goldacre ) ( C ) 1156 ; water-ski spill and ( Bundgaard and Jarnum ) ( C ) 1157 College of Health-alternative medicine guide , 1484 Colon cancer-and breast cancer ( Levine and Witte ) ( C ) 222 , ( Wilhams ) ( C ) ( Frohne and Pfander ) ( Hannmgton-Kiff ) ( R ) 556 Government statement , 1228 ; overheating and ( Stanton ) 1199 , ( Cohen ) ( C ) 1403 ; in SE Scotland ( Bain and Bartholomew ) ( C ) 1402 ; ventilatory dysfunction and ( E ) 558 Cotinine-urine levels in babies , maternal passive smoking and ( Woodward et al ) ( C ) 935 Co-trimoxazole-toxicity of ( Jick et al ) ( C ) 631 , ( Lennon ) ( C ) 1152 , Shann ) ( C ) 1477 Cotsides-use of ( E ) 383 C-

--- forced remove SBAR
>> b'Any calf diagnosed and previously treated with antibiotics or flunixin meglumine for BRD or any other disease was excluded\r\n'
--- forced remove SBAR
>> b'not induce oocysts shedding in cats recently or chronically experimentally infected with T gondii\r\n'
--- forced remove SBAR
>> b'not induce oocysts shedding in cats recently or chronically experimentally infected with T gondii\r\n'
--- remove CP
>> b'not only for the tumors to be visualized by PET ,
--- remove CP
>> b'not only for the tumors to be visualized by PET ,
>> b'not be used to treat CMV infections\r\n'
>> b'not meet the criteria for immunoprophylaxis\r\n'
--- forced remove SBAR
--- remove CP
>> b'Hepatitis has been reported from use of unprocessed refrigerated and frozen bone allografts ,
>> b'no source was identified ?\r\n'
>> b'not rec ~ mrnended .\r\n'
--- forced remove SBAR
>> b'~ ~ Children who are found to be infected with Mycobacterium tuberculosis should be excluded\r\n'
>> b'no source

--- forced remove SBAR
>> b'The long term estrogen deprived human breast cancer cell line MCF-7:5 C (NP-TMP -LSB- 9 , 12 -RSB- was cloned from parental MCF-7 cells following long term ( > (NP-TMP 12 months ) culture in estrogen-free\r\n'
>> b'not use antiviral drugs to treat flu patients prior to the 2009/A/H1N1 pandemic and were generally unfamiliar with these drugs ( Tamiflu and Relenza )\r\n'
>> b'not been shown to decrease the mortality rate in Turkey\r\n'
>> b'not treated with ribavirin died ( 58.3 % ) ; this difference was statistically significant\r\n'
>> b'not been done\r\n'
>> b'not been done\r\n'
>> b'not been done\r\n'
>> b'absence of the anti-hCD26 antibody\r\n'
>> b'not the equivalent S region of BatCoV ( C ) An antibody blocking assay using HKU4 pseudoviruses treated with 10 mg/ml trypsin and an anti-hCD26 antibody\r\n'
>> b'without a cytotoxic alkylating agent such as chlorambucil or cyclophosphamide ( Table 10 -3 ) .\r\n'
>> b'without a cytotoxic alkylating agent such a

>> b'without EDTA and were resuspended to final concentration of 1 \xc3\x97 10 5 cells/ml\r\n'
>> b'not recovered the righting reflex within 60 minutes after surgery , or became apnoeic postoperatively , were administered naloxone or atipamezole , depending on the group assignment\r\n'
>> b'not recovered the righting reflex within 60 minutes after surgery , or became apnoeic postoperatively , were administered naloxone or atipamezole , depending on the group assignment\r\n'
>> b'not have an impact on pain scoring (NP-TMP 3 hours after surgery .\r\n'
>> b'not have an impact on pain scoring (NP-TMP 3 hours after surgery .\r\n'
>> b'with no apparent effect on fecal viral shedding\r\n'
--- NP with that
--- forced remove SBAR
>> b'no FDA-licensed vaccines against arenaviruses and the only available antiviral therapy is limited to the use of ribavirin that is partially effective\r\n'
>> b'not been used since July 2005 to minimize drug toxicity during HDCT .\r\n'
>> b'Infants with inherited m

>> b'no proven effective therapy for unclassified interstitial pneumonia with fibrosis , recommended treatments include supportive care and immunosuppressive therapies such as corticosteroids , cyclophosphamide , and vincristine\r\n'
>> b'no proven effective therapy for unclassified interstitial pneumonia with fibrosis , recommended treatments include supportive care and immunosuppressive therapies such as corticosteroids , cyclophosphamide , and vincristine\r\n'
>> b'no proven effective therapy for unclassified interstitial pneumonia with fibrosis , recommended treatments include supportive care and immunosuppressive therapies such as corticosteroids , cyclophosphamide , and vincristine\r\n'
>> b'no proven effective therapy for unclassified interstitial pneumonia with fibrosis , recommended treatments include supportive care and immunosuppressive therapies such as corticosteroids , cyclophosphamide , and vincristine\r\n'
--- forced remove SBAR
>> b'not significantly reduce the inciden

>> b'neither sensitive for BRAF V600 mutations nor specific for BRAF V600E mutations , confounding accurate outcome evaluations and preventing its usefulness in selecting patient for Tafinlar therapy\r\n'
>> b'not been extensively evaluated\r\n'
>> b'no therapy\r\n'
>> b'no evidence of heart failure\r\n'
--- forced remove SBAR
>> b"not confer the (`` `` cardioprotection ('' '' of betablockers should the arrhythmia represent early DCM\r\n"
--- forced remove SBAR
>> b'neither atenolol nor diltiazem should be administered to cats with overt CHF , as such therapy has been shown to be either detrimental or not beneficial to short-term outcome .\r\n'
--- forced remove SBAR
>> b'neither atenolol nor diltiazem should be administered to cats with overt CHF , as such therapy has been shown to be either detrimental or not beneficial to short-term outcome .\r\n'
--- forced remove SBAR
>> b'neither atenolol nor diltiazem should be administered to cats with overt CHF , as such therapy has been shown

>> b'without ribavirin ( 25 \xce\xbcg/mL ) and OMT ( 50 \xce\xbcg/mL ) , and simultaneously treated with or without TLR3 activator ( polyI : C , 100 ng/mL ) , TLR4 activator LPS-B5 ( 1 \xce\xbcg/mL ) , TLR7/8 activator R-848 ( 10 \xce\xbcg/mL ) , PI-3K / Akt activator ( IGF-1 , 100 ng/mL ) , ERK activator ( EGF , 100 ng/mL ) , p38/JNK activator ( Anisomycin , 10 \xce\xbcM ) , and NF-\xce\xbaB activator ( PMA , 1 \xce\xbcg/mL )\r\n'
>> b'without ribavirin ( 25 \xce\xbcg/mL ) and OMT ( 50 \xce\xbcg/mL ) , and simultaneously treated with or without TLR3 activator ( polyI : C , 100 ng/mL ) , TLR4 activator LPS-B5 ( 1 \xce\xbcg/mL ) , TLR7/8 activator R-848 ( 10 \xce\xbcg/mL ) , PI-3K / Akt activator ( IGF-1 , 100 ng/mL ) , ERK activator ( EGF , 100 ng/mL ) , p38/JNK activator ( Anisomycin , 10 \xce\xbcM ) , and NF-\xce\xbaB activator ( PMA , 1 \xce\xbcg/mL )\r\n'
--- forced remove SBAR
>> b'not treated with any drugs ; in the positive control ( PC ) and OMT-treated groups , MDCK cells were

>> b'without the use of the active technologies or enhancers described in the following paragraphs\r\n'
>> b'without the use of the active technologies or enhancers described in the following paragraphs\r\n'
>> b'without the use of the active technologies or enhancers described in the following paragraphs\r\n'
>> b'without the use of the active technologies or enhancers described in the following paragraphs\r\n'
>> b'without the use of the active technologies or enhancers described in the following paragraphs\r\n'
>> b'without the use of the active technologies or enhancers described in the following paragraphs\r\n'
>> b'not treated with trypsin ( Fig\r\n'
--- forced remove SBAR
>> b'not responding to oral or inhaled therapy or when drug delivery by these routes is not feasible\r\n'
--- NP with that
--- forced remove SBAR
>> b'without obvious effects on IgG levels ) suggests that antibody-independent functions of B cells likely contribute to the observed therapeutic effect .\r\n'
--- f

>> b'no therapy with isavuconazole or voriconazole is the preferred firstline treatment and therapeutic drug monitoring should be utilized to ensure adequacy of dosing -LSB- 55 -RSB-\r\n'
>> b'no therapy with isavuconazole or voriconazole is the preferred firstline treatment and therapeutic drug monitoring should be utilized to ensure adequacy of dosing -LSB- 55 -RSB-\r\n'
>> b'no therapy with isavuconazole or voriconazole is the preferred firstline treatment and therapeutic drug monitoring should be utilized to ensure adequacy of dosing -LSB- 55 -RSB-\r\n'
--- forced remove SBAR
>> b'not replicated when a randomized controlled trial design as applied -LSB- 82 -RSB-\r\n'
--- forced remove SBAR
>> b'without taking bacterial culture results into account\r\n'
--- remove CP
>> b'Multiagent chemotherapy ( prednisone , vincristine , cyclophosphamide ) has been used to treat colonic lymphoma ,
--- remove CP
>> b'Multiagent chemotherapy ( prednisone , vincristine , cyclophosphamide ) has been 

--- NP with that
>> b"without differential cell count , influenza antigen or culture , diagnostic tests for respiratory organisms other than influenza ( respiratory syncytial virus , adenovirus , legionella ) , streptococcal throat screen , sputum culture or Gram stain , Gram stain for other respiratory specimens , blood cultures ) ; d ) requests for diagnostic imaging ( chest X-ray , chest computerized tomography , any respiratory sinus imaging ) ; and e ) new prescriptions ( none similar in the last 90 days ) , selected from the VA national formulary and grouped into the following parameters by expert consensus : cough remedies ( from VA national formulary ( VANF ) drug classes RE-200 , -301 , -302 , -502 , -503 , -507 , -508 , -513 , -516 , or codeine ) , (`` `` other cold remedies ('' '' ( from VANF CN-900 , MS-102 , NT-100 , -200 , -400 , -900 , RE-99 , -501 ) ; antiemetics ( from VANF GA-700 ) , antidiarrheals ( from VANF GA-400 ) , influenza-targeting antivirals ( neuraminidase 

--- NP with that
>> b"without differential cell count , influenza antigen or culture , diagnostic tests for respiratory organisms other than influenza ( respiratory syncytial virus , adenovirus , legionella ) , streptococcal throat screen , sputum culture or Gram stain , Gram stain for other respiratory specimens , blood cultures ) ; d ) requests for diagnostic imaging ( chest X-ray , chest computerized tomography , any respiratory sinus imaging ) ; and e ) new prescriptions ( none similar in the last 90 days ) , selected from the VA national formulary and grouped into the following parameters by expert consensus : cough remedies ( from VA national formulary ( VANF ) drug classes RE-200 , -301 , -302 , -502 , -503 , -507 , -508 , -513 , -516 , or codeine ) , (`` `` other cold remedies ('' '' ( from VANF CN-900 , MS-102 , NT-100 , -200 , -400 , -900 , RE-99 , -501 ) ; antiemetics ( from VANF GA-700 ) , antidiarrheals ( from VANF GA-400 ) , influenza-targeting antivirals ( neuraminidase 

>> b'not well tested .\r\n'
>> b'not well tested .\r\n'
>> b'not well tested .\r\n'
>> b'not responsive to metronidazole , prednisolone and dietary therapy , cytotoxic drugs may be considered\r\n'
>> b'not responsive to metronidazole , prednisolone and dietary therapy , cytotoxic drugs may be considered\r\n'
--- forced remove SBAR
>> b'note that the first-generation imidizoquinoline imiquimod is licensed as an antiviral as well as an anticancer topical therapy ( Aldara \xc2\xae ) and not a vaccine adjuvant\r\n'
--- forced remove SBAR
>> b'not the prolyl oligopeptidase ( POP ) inhibitor z-prolyl prolinal abolished the circulating levels of the peptide ( 27 )\r\n'
>> b'In ribosome profiling , often referred to as RiboSeq , cells are lysed under conditions optimised to minimise further ribosome movement ( addition of translation inhibitors , rapid free\r\n'
>> b'not changed by quercitrin , isoquercitrin and gallic acid ( Fig\r\n'
--- forced remove SBAR
>> b"without tears ('' '' arrived wh

>> b'without focal segmental/lobar infiltrates\r\n'
--- forced remove SBAR
>> b'no trehalose was treated with 2,3,4,6-tetra-O-acetyl-a-D-mannopyranosyl isothiocyanate , octavalent thiourea-bridged mannose cluster 84 was obtained after deprotection ( Figure 38 )\r\n'
>> b'without the indicated stimulants\r\n'
>> b'Briefly , MDCK cell monolayer cultured in 96-well plate was treated with ten-fold serially diluted BAL fluid ( 100 \xce\xbcl ) diluted in serum free\r\n'
>> b'Briefly , MDCK cell monolayer cultured in 96-well plate was treated with ten-fold serially diluted BAL fluid ( 100 \xce\xbcl ) diluted in serum free\r\n'
>> b'not received antibiotics in the previous 4 to 6 weeks include the following choices ( Table 4 ) : amoxicillin/clavulanate , amoxicillin ( 1.5 to 3.5 g/day ) , cefpo - doxime proxetil , or cefuroxime axetil\r\n'
>> b'not received antibiotics in the previous 4 to 6 weeks include the following choices : amoxicillin/clavulanate , amoxicillin ( 3 to 3.5 g/day ) , cefpod

>> b'rather than cefuroxime 19 in patients at risk of DRSP\r\n'
>> b'rather than cefuroxime 19 in patients at risk of DRSP\r\n'
>> b'rather than cefuroxime 19 in patients at risk of DRSP\r\n'
>> b'rather than cefuroxime 19 in patients at risk of DRSP\r\n'
>> b'rather than cefuroxime 19 in patients at risk of DRSP\r\n'
--- forced remove SBAR
>> b'notherapy , and combination therapy should be with cefotaxime or ceftriaxone plus a macrolide ( azithromycin or erythromycin ) if pseudomonal risks are not present\r\n'
--- forced remove SBAR
>> b'notherapy , and combination therapy should be with cefotaxime or ceftriaxone plus a macrolide ( azithromycin or erythromycin ) if pseudomonal risks are not present\r\n'
--- forced remove SBAR
>> b'notherapy , and combination therapy should be with cefotaxime or ceftriaxone plus a macrolide ( azithromycin or erythromycin ) if pseudomonal risks are not present\r\n'
--- forced remove SBAR
>> b'no risks of DRSP , therapy should be with an oral macrolide s

ReadTimeout: HTTPConnectionPool(host='localhost', port=9001): Read timed out. (read timeout=60)

In [20]:
output_data.to_csv(out_dir + filename[:-4] + '_Negated_Output_Parsed.csv',index=False)

In [None]:
negated_drug_mentions = output_data.loc[output_data.Is_Negated==1,'drug']\
                                    .groupby(output_data['drug'])\
                                    .value_counts()\
                                    .droplevel(level=0)
print('Top 20 most negated drugs:\n')
print(negated_drug_mentions.nlargest(20))

In [None]:
asserted_drug_mentions = output_data.loc[output_data.Is_Negated==0,'drug']\
                                    .groupby(output_data['drug'])\
                                    .value_counts()\
                                    .droplevel(level=0)
print('Top 20 most asserted drugs:\n')
print(asserted_drug_mentions.nlargest(20))

#### Pros:
- quick and easy
- curated list of trigger terms for the medical context which includes words like "ineffective", that are not recognized 
  as negation by standard dependancy parsing libraries such as scispacy
- could potentially capture cases such as
    (a) sentences with buts
    (b) comparison of drugs
    (c) when there is a negation word which is unrelated to the drug Eg. "The drug is recommended in cases where the patient is not suffering from a prior chronical disease"

#### Cons:
- does not capture cases such as 
    (d) when negation is understood only with prior medical knowledge
    (e) double negation Eg. "not ineffective" or "not useless"
- false negatives when the negated part extracted does not contain the drug name