In [None]:
# import json file argument data set

import json
import pandas as pd
 
label2id = {
    'NONE': 0,
    'EVIDENCE': 1,
    'CLAIM': 2}
 
def load_corpus(path, label_mapping=None):
    with open(path) as fp:
        corpus = json.load(fp)
 
    documents, texts, labels = [], [], []
    for abstract in corpus:
        documents.append(abstract)
        texts.append(corpus[abstract]['sentences'])
        if isinstance(label_mapping, dict):
            labels.append(
                [label_mapping[str(l).upper()]
                    for l in corpus[abstract]['labels']])
        else:
            labels.append([str(l).upper() for l in corpus[abstract]['labels']])
 
    assert len(texts) == len(labels)
    data = pd.DataFrame(
        zip(documents, texts, labels),
        columns=['document', 'sentences', 'labels'])
 
    return data
 
data = load_corpus('dataset_aueb_argument_v1.json') #, label_mapping=label2id)
print(f'Dataset length: {len(data)} abstracts')
data.sample(20)


Dataset length: 916 abstracts


Unnamed: 0,document,sentences,labels
812,doi: 10.3389/fphys.2018.01306,[Modulation of Cardiac Alternans by Altered Sa...,"[NEITHER, NEITHER, NEITHER, NEITHER, NEITHER, ..."
568,doi: 10.1098/rstb.2016.0138,[Evolution of phenotypic plasticity in extreme...,"[NEITHER, NEITHER, NEITHER, NEITHER, NEITHER, ..."
276,doi: 10.1021/acs.jctc.6b00979,"[Rapid, Accurate, Precise, and Reliable Relati...","[NEITHER, NEITHER, NEITHER, NEITHER, NEITHER, ..."
28,doi: 10.1002/anie.201705721,[Human versus Robots in the Discovery and Crys...,"[NEITHER, NEITHER, NEITHER, NEITHER, NEITHER, ..."
835,doi: 10.3390/jcm9030814,[Assessing the Implementation of Pharmacogenom...,"[NEITHER, EVIDENCE, CLAIM, NEITHER, NEITHER, N..."
363,doi: 10.1038/nchem.2577,[Total synthesis and structure–activity relati...,"[NEITHER, NEITHER, NEITHER, NEITHER, NEITHER, ..."
348,doi: 10.1029/2019ms001791,"[Evaluation of CNRM Earth System Model, CNRM‐E...","[NEITHER, NEITHER, NEITHER, NEITHER, NEITHER, ..."
617,doi: 10.1126/science.aav2211,[Organic synthesis in a modular robotic system...,"[NEITHER, NEITHER, NEITHER, NEITHER, NEITHER, ..."
853,doi: 10.3791/56889,[Application of RNAi and Heat-shock-induced Tr...,"[NEITHER, NEITHER, NEITHER, NEITHER, NEITHER, ..."
758,doi: 10.1371/journal.pone.0226697,[Neurofilaments in blood is a new promising pr...,"[NEITHER, NEITHER, NEITHER, NEITHER, NEITHER, ..."


In [None]:
# assign sentence column in an object

sentencesargument = data['sentences'].explode().reset_index().rename(
    columns={'index': 'doc_id', 'sentences': 'sentence'})
sentencesargument

Unnamed: 0,doc_id,sentence
0,0,Concordance Between Different Amyloid Immunoas...
1,0,Importance Visual assessment of amyloid positr...
2,0,Several immunoassays have been developed to me...
3,0,The agreement between CSF Aβ42 measures from d...
4,0,Objective To determine the concordance between...
...,...,...
9380,915,"Instead, SBPs sample a range of conformations ..."
9381,915,Certain non-transported ligands leave the stru...
9382,915,"Intriguingly, in some cases, similar SBP confo..."
9383,915,"In this case, the inability for transport aris..."


In [None]:
# assign label column in an object

labelsargument = data['labels'].explode().reset_index().rename(
    columns={'index': 'doc_id', 'lables': 'label'})
labelsargument

Unnamed: 0,doc_id,labels
0,0,NEITHER
1,0,NEITHER
2,0,NEITHER
3,0,NEITHER
4,0,NEITHER
...,...,...
9380,915,NEITHER
9381,915,NEITHER
9382,915,NEITHER
9383,915,NEITHER


In [None]:
# reverse column order in sentenceargument object
s=sentencesargument['sentence']
s1=sentencesargument['doc_id']
sentencesargnew=pd.concat([s,s1],axis=1)
sentencesargnew

Unnamed: 0,sentence,doc_id
0,Concordance Between Different Amyloid Immunoas...,0
1,Importance Visual assessment of amyloid positr...,0
2,Several immunoassays have been developed to me...,0
3,The agreement between CSF Aβ42 measures from d...,0
4,Objective To determine the concordance between...,0
...,...,...
9380,"Instead, SBPs sample a range of conformations ...",915
9381,Certain non-transported ligands leave the stru...,915
9382,"Intriguingly, in some cases, similar SBP confo...",915
9383,"In this case, the inability for transport aris...",915


In [None]:
# merge sentence argument and label argument in the same df named dataargument
dataargument = pd.merge(sentencesargument, 
               labelsargument, 
               left_index=True, right_index=True)
dataargument
print(dataargument.shape)
dataargument.head(5)

(9385, 4)


Unnamed: 0,doc_id_x,sentence,doc_id_y,labels
0,0,Concordance Between Different Amyloid Immunoas...,0,NEITHER
1,0,Importance Visual assessment of amyloid positr...,0,NEITHER
2,0,Several immunoassays have been developed to me...,0,NEITHER
3,0,The agreement between CSF Aβ42 measures from d...,0,NEITHER
4,0,Objective To determine the concordance between...,0,NEITHER


In [None]:
# group by argument label
dataargument.groupby(by=["labels"], dropna=False).count()

Unnamed: 0_level_0,doc_id_x,sentence,doc_id_y
labels,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
CLAIM,954,954,954
EVIDENCE,1578,1578,1578
NEITHER,6853,6853,6853


In [None]:
# from dataargument dataframe keep the rows that habel argument label "evidence" and name the df evidence
evidence = dataargument.loc[(dataargument['labels'] == 'EVIDENCE')]
evidence.head(4)
evidence.shape
evidence

Unnamed: 0,doc_id_x,sentence,doc_id_y,labels
8,0,Main Outcomes and Measures The concordance of ...,0,EVIDENCE
9,0,"Results Of 262 participants (mean [SD] age, 70...",0,EVIDENCE
10,0,The mass spectrometry–derived Aβ42 values show...,0,EVIDENCE
11,0,The signal in the classic Aβ42-INNOTEST assay ...,0,EVIDENCE
12,0,"However, the classic Aβ42-INNOTEST assay showe...",0,EVIDENCE
...,...,...,...,...
9302,907,Our results demonstrate that the generalized P...,907,EVIDENCE
9304,907,"Spatially, our estimations suggest a higher pr...",907,EVIDENCE
9340,910,These show that higher values of P* generally ...,910,EVIDENCE
9364,913,Our study generalizes a previous one by Fukush...,913,EVIDENCE


In [None]:
# assign column sentence from evidence and convert it to dataframe named "e"
evidence_s=evidence['sentence']
e=pd.DataFrame(data=evidence_s)
e

Unnamed: 0,sentence
8,Main Outcomes and Measures The concordance of ...
9,"Results Of 262 participants (mean [SD] age, 70..."
10,The mass spectrometry–derived Aβ42 values show...
11,The signal in the classic Aβ42-INNOTEST assay ...
12,"However, the classic Aβ42-INNOTEST assay showe..."
...,...
9302,Our results demonstrate that the generalized P...
9304,"Spatially, our estimations suggest a higher pr..."
9340,These show that higher values of P* generally ...
9364,Our study generalizes a previous one by Fukush...


In [None]:
# split dataframe "e" 
e["sentence"]= e["sentence"].str.split(" ")
e

Unnamed: 0,sentence
8,"[Main, Outcomes, and, Measures, The, concordan..."
9,"[Results, Of, 262, participants, (mean, [SD], ..."
10,"[The, mass, spectrometry–derived, Aβ42, values..."
11,"[The, signal, in, the, classic, Aβ42-INNOTEST,..."
12,"[However,, the, classic, Aβ42-INNOTEST, assay,..."
...,...
9302,"[Our, results, demonstrate, that, the, general..."
9304,"[Spatially,, our, estimations, suggest, a, hig..."
9340,"[These, show, that, higher, values, of, P*, ge..."
9364,"[Our, study, generalizes, a, previous, one, by..."


In [None]:
# from dataframe "e" create index and put each word in a different raw with explode, assign the result in dataframe evidence words
# in order to detect keywords that reveal evidence label
evidence_words = e['sentence'].explode().reset_index().rename(
    columns={'index': 'doc_id', 'sentence': 'evidence_words'})
evidence_words

Unnamed: 0,doc_id,evidence_words
0,8,Main
1,8,Outcomes
2,8,and
3,8,Measures
4,8,The
...,...,...
39270,9373,crucial
39271,9373,GLP-1/Notch
39272,9373,target
39273,9373,facilitating


In [None]:
#Use lower case letters for all words
evidence_words['evidence_words']=evidence_words['evidence_words'].str.lower()
evidence_words

Unnamed: 0,doc_id,evidence_words,count
0,8,main,2
1,8,outcomes,2
2,8,and,1442
3,8,measures,2
4,8,the,242
...,...,...,...
39270,9373,crucial,5
39271,9373,glp-1/notch,2
39272,9373,target,6
39273,9373,facilitating,2


In [None]:
#Use observed symbols
b = "!,().[]:"
for char in b:
    evidence_words['evidence_words']=evidence_words['evidence_words'].str.replace(char, "")

In [None]:
# count how many times each word appears in data frame evidence_words
evidence_words['count'] = evidence_words.groupby('evidence_words')['evidence_words'].transform('count')
evidence_words

Unnamed: 0,doc_id,evidence_words,count
0,8,main,7
1,8,outcomes,3
2,8,and,1445
3,8,measures,9
4,8,the,2052
...,...,...,...
39270,9373,crucial,5
39271,9373,glp-1/notch,2
39272,9373,target,8
39273,9373,facilitating,2


In [None]:
# keep unique values and name the new data frame evunique
evunique=evidence_words.drop_duplicates(subset='evidence_words')
evunique

Unnamed: 0,doc_id,evidence_words,count
0,8,main,7
1,8,outcomes,3
2,8,and,1445
3,8,measures,9
4,8,the,2052
...,...,...,...
39253,9373,silenced,1
39256,9373,polycomb,1
39260,9373,prc2,1
39266,9373,demethylase,1


In [None]:
#sort evunique in descending order to the column count and name the new data frame ev1
ev1=evunique.sort_values(by='count', ascending=False, na_position='first')
ev1.head(50)

Unnamed: 0,doc_id,evidence_words,count
4,8,the,2052
2,8,and,1445
6,8,of,1422
69,11,in,1041
123,13,to,704
140,14,a,578
15,8,with,529
306,36,that,366
614,63,for,358
254,28,is,286


In [None]:
#for the evidence word, chek if there are similar expressions
import re
evid = ev1[ev1['evidence_words'].str.contains(r'^eviden', flags=re.IGNORECASE)]
evid

Unnamed: 0,doc_id,evidence_words,count
1337,271,evidence,29
350,37,evidenced,2
36850,8164,evident,1


In [None]:
##one hundrent most commonly used words
listofevidence=ev1['evidence_words'][0:100].tolist()
listofevidence

['the',
 'and',
 'of',
 'in',
 'to',
 'a',
 'with',
 'that',
 'for',
 'is',
 'we',
 'by',
 'was',
 'were',
 'as',
 'from',
 'at',
 'on',
 'are',
 'results',
 'an',
 'between',
 'this',
 'not',
 'than',
 'or',
 'higher',
 'patients',
 'be',
 'increased',
 'more',
 'associated',
 'these',
 'model',
 'but',
 'also',
 'show',
 'compared',
 'which',
 'p',
 'over',
 'both',
 'when',
 'can',
 'during',
 'using',
 'showed',
 'levels',
 'our',
 'all',
 'analysis',
 '=',
 'high',
 'climate',
 'different',
 'found',
 '95%',
 'significantly',
 'ci',
 'observed',
 'significant',
 'models',
 'mean',
 'lower',
 'reduced',
 'changes',
 '1',
 'decreased',
 'after',
 'had',
 'csf',
 'have',
 'other',
 'only',
 'while',
 'under',
 'most',
 'increase',
 'baseline',
 'data',
 'within',
 'effect',
 'zikv',
 'temperature',
 'time',
 'their',
 'global',
 'disease',
 'change',
 'well',
 'respectively',
 '2',
 'expression',
 'cells',
 'controls',
 'find',
 '<',
 'such',
 'emissions',
 'study']

In [None]:
#create our stop word list
cleanevidence=['the',
 'and',
 'of',
 'in',
 'to',
 'a',
 'with',
 'that',
 'for',
 'is',
 'we',
 'by',
 'was',
 'were',
 'as',
 'from',
 'at',
 'on',
 'are',
 'an',
 'between',
 'this',
 'not',
 'than',
 'or',
 'be',
 'more',
 'these',
 'but',
 'also',
 'which',
 'over',
 'both',
 'when',
 'can',
 'our',
 'all',
 'after',
 'had',
 'csf',
 'have',
 'other',
 'only',
 'while',
 'under',
 'most',
 'their']

In [None]:
#keep words appeared more than once
evtry=ev1[ev1['count']>2]
evtry

Unnamed: 0,doc_id,evidence_words,count
4,8,the,2052
2,8,and,1445
6,8,of,1422
69,11,in,1041
123,13,to,704
...,...,...,...
422,45,forecasts,3
7980,1838,prevalent,3
30223,6418,lps,3
421,45,weather,3


In [None]:
#clean the words list
evtry = evtry[~evtry['evidence_words'].isin(cleanevidence)]
evtry

Unnamed: 0,doc_id,evidence_words,count
20,9,results,156
45,10,higher,91
175,26,patients,90
216,28,increased,88
492,48,associated,83
...,...,...,...
422,45,forecasts,3
7980,1838,prevalent,3
30223,6418,lps,3
421,45,weather,3


In [None]:
#keep words with count more than 30
evtest1=evtry[evtry['count']>30]
evtest1

Unnamed: 0,doc_id,evidence_words,count
20,9,results,156
45,10,higher,91
175,26,patients,90
216,28,increased,88
492,48,associated,83
...,...,...,...
904,105,sensitivity,33
165,25,plasma,32
1065,162,energy,31
1587,418,vs,31


In [None]:
ev=evtest1['evidence_words'].tolist()
ev

['results',
 'higher',
 'patients',
 'increased',
 'associated',
 'model',
 'show',
 'compared',
 'p',
 'during',
 'using',
 'showed',
 'levels',
 'analysis',
 '=',
 'high',
 'climate',
 'different',
 'found',
 '95%',
 'significantly',
 'ci',
 'observed',
 'significant',
 'models',
 'mean',
 'lower',
 'reduced',
 'changes',
 '1',
 'decreased',
 'increase',
 'baseline',
 'data',
 'within',
 'effect',
 'zikv',
 'temperature',
 'time',
 'global',
 'disease',
 'change',
 'well',
 'respectively',
 '2',
 'expression',
 'cells',
 'controls',
 'find',
 '<',
 'such',
 'emissions',
 'study',
 'due',
 'differences',
 'it',
 'two',
 'identified',
 'total',
 'years',
 'however',
 'precipitation',
 'infection',
 'effects',
 'low',
 'sensitivity',
 'plasma',
 'energy',
 'vs',
 'no']

In [None]:
#select the words for our list
ev=['results',
 'higher',
 'patients',
 'increased',
 'associated',
 'model',
 'show',
 'compared',
 'p',
 'during',
 'using',
 'showed',
 'levels',
 'analysis',
 '=',
 'high',
 'climate',
 'different',
 'found',
 '95%',
 'significantly',
 'ci',
 'observed',
 'significant',
 'models',
 'mean',
 'lower',
 'reduced',
 'changes',
 '1',
 'decreased',
 'increase',
 'baseline',
 'data',
 'within',
 'effect',
 'zikv',
 'temperature',
 'time',
 'global',
 'disease',
 'change',
 'well',
 'respectively',
 '2',
 'expression',
 'cells',
 'controls',
 'find',
 '<',
 'such',
 'emissions',
 'study',
 'due',
 'differences',
 'it',
 'two',
 'identified',
 'total',
 'years',
 'however',
 'precipitation',
 'infection',
 'effects',
 'low',
 'sensitivity',
 'plasma',
 'energy',
 'vs',
 'no']

In [None]:
# we assign column sentence in a new object called test1
test1 = dataargument['sentence']

In [None]:
#we transfrom test1 in df with library pandas and assigned in a new variable named t and after in new variable named test2. 
t=pd.DataFrame(test1)
test2=t
# we create a new column in test2 named label and write the value none 
test2['LABEL']='NONE'
test2

Unnamed: 0,sentence,LABEL
0,Concordance Between Different Amyloid Immunoas...,NONE
1,Importance Visual assessment of amyloid positr...,NONE
2,Several immunoassays have been developed to me...,NONE
3,The agreement between CSF Aβ42 measures from d...,NONE
4,Objective To determine the concordance between...,NONE
...,...,...
9380,"Instead, SBPs sample a range of conformations ...",NONE
9381,Certain non-transported ligands leave the stru...,NONE
9382,"Intriguingly, in some cases, similar SBP confo...",NONE
9383,"In this case, the inability for transport aris...",NONE


In [None]:
wordsevidence_df=test2
wordsevidence_df

Unnamed: 0,sentence,LABEL
0,Concordance Between Different Amyloid Immunoas...,NONE
1,Importance Visual assessment of amyloid positr...,NONE
2,Several immunoassays have been developed to me...,NONE
3,The agreement between CSF Aβ42 measures from d...,NONE
4,Objective To determine the concordance between...,NONE
...,...,...
9380,"Instead, SBPs sample a range of conformations ...",NONE
9381,Certain non-transported ligands leave the stru...,NONE
9382,"Intriguingly, in some cases, similar SBP confo...",NONE
9383,"In this case, the inability for transport aris...",NONE


In [None]:
for ind in wordsevidence_df.index:
  splits = str.split(test1[ind])
  for split in splits:
      if split in ev:    
        wordsevidence_df['LABEL'][ind] = 'evidence'

In [None]:
wordsevidence_df.head(40)

Unnamed: 0,sentence,LABEL
0,Concordance Between Different Amyloid Immunoas...,NONE
1,Importance Visual assessment of amyloid positr...,NONE
2,Several immunoassays have been developed to me...,NONE
3,The agreement between CSF Aβ42 measures from d...,evidence
4,Objective To determine the concordance between...,evidence
5,"Design, Setting, and Participants The study in...",evidence
6,Levels of CSF Aβ42 were analyzed using the cla...,evidence
7,Concentrations of CSF Aβ were assessed using a...,evidence
8,Main Outcomes and Measures The concordance of ...,evidence
9,"Results Of 262 participants (mean [SD] age, 70...",NONE


In [None]:
wordsevidence_df['LABEL'].value_counts()

evidence    5521
NONE        3864
Name: LABEL, dtype: int64

In [None]:
cross2['LABEL'].value_counts()

evidence    1191
NONE         387
Name: LABEL, dtype: int64

In [None]:
cross2 = pd.merge(test2, 
               evidence, 
               left_index=True, right_index=True)


print(cross2.shape)
cross2.head(50)

(1578, 6)


Unnamed: 0,sentence_x,LABEL,doc_id_x,sentence_y,doc_id_y,labels
8,Main Outcomes and Measures The concordance of ...,evidence,0,Main Outcomes and Measures The concordance of ...,0,EVIDENCE
9,"Results Of 262 participants (mean [SD] age, 70...",NONE,0,"Results Of 262 participants (mean [SD] age, 70...",0,EVIDENCE
10,The mass spectrometry–derived Aβ42 values show...,evidence,0,The mass spectrometry–derived Aβ42 values show...,0,EVIDENCE
11,The signal in the classic Aβ42-INNOTEST assay ...,NONE,0,The signal in the classic Aβ42-INNOTEST assay ...,0,EVIDENCE
12,"However, the classic Aβ42-INNOTEST assay showe...",evidence,0,"However, the classic Aβ42-INNOTEST assay showe...",0,EVIDENCE
13,The accuracies of the newer assays improved si...,evidence,0,The accuracies of the newer assays improved si...,0,EVIDENCE
14,A combination of the Aβ42:Aβ40 ratio and T-tau...,evidence,0,A combination of the Aβ42:Aβ40 ratio and T-tau...,0,EVIDENCE
25,Main Outcomes and Measures Plasma neurofilamen...,NONE,1,Main Outcomes and Measures Plasma neurofilamen...,1,EVIDENCE
26,Results A total of 30 patients were enrolled (...,evidence,1,Results A total of 30 patients were enrolled (...,1,EVIDENCE
27,"The mean (SD) age was 69.1 (7.0) years, and 18...",evidence,1,"The mean (SD) age was 69.1 (7.0) years, and 18...",1,EVIDENCE


In [None]:
##SYMBOLS

In [None]:
sym = ["%","*","<",">","+","β","=","±","‰","Ξ²","Ξ","²"]

In [None]:
# preparation  symbols dataframe
test2['LABEL']='NONE'
symbols_df=test2
symbols_df

Unnamed: 0,sentence,LABEL
0,Concordance Between Different Amyloid Immunoas...,NONE
1,Importance Visual assessment of amyloid positr...,NONE
2,Several immunoassays have been developed to me...,NONE
3,The agreement between CSF Aβ42 measures from d...,NONE
4,Objective To determine the concordance between...,NONE
...,...,...
9380,"Instead, SBPs sample a range of conformations ...",NONE
9381,Certain non-transported ligands leave the stru...,NONE
9382,"Intriguingly, in some cases, similar SBP confo...",NONE
9383,"In this case, the inability for transport aris...",NONE


In [None]:
# for loop that checks the symbol through text line. Assigns evidence label in the dataframe symbols_df when it finds symbols contained in the list sym.
for ind in symbols_df.index:
  #splits = str.split(test1[ind])
  for split in symbols_df['sentence'][ind]:
      if split in sym:    
        symbols_df['LABEL'][ind] = 'evidence'

In [None]:
symbols_df

Unnamed: 0,sentence,LABEL
0,Concordance Between Different Amyloid Immunoas...,NONE
1,Importance Visual assessment of amyloid positr...,NONE
2,Several immunoassays have been developed to me...,evidence
3,The agreement between CSF Aβ42 measures from d...,evidence
4,Objective To determine the concordance between...,evidence
...,...,...
9380,"Instead, SBPs sample a range of conformations ...",NONE
9381,Certain non-transported ligands leave the stru...,NONE
9382,"Intriguingly, in some cases, similar SBP confo...",NONE
9383,"In this case, the inability for transport aris...",NONE


In [None]:
# count how many evidence label we found with symbol for loop
symbols_df['LABEL'].value_counts()


NONE        8464
evidence     921
Name: LABEL, dtype: int64

In [None]:
cross2 = pd.merge(symbols_df, 
               evidence, 
               left_index=True, right_index=True)


print(cross2.shape)
cross2.head(50)

(1578, 6)


Unnamed: 0,sentence_x,LABEL,doc_id_x,sentence_y,doc_id_y,labels
8,Main Outcomes and Measures The concordance of ...,evidence,0,Main Outcomes and Measures The concordance of ...,0,EVIDENCE
9,"Results Of 262 participants (mean [SD] age, 70...",evidence,0,"Results Of 262 participants (mean [SD] age, 70...",0,EVIDENCE
10,The mass spectrometry–derived Aβ42 values show...,evidence,0,The mass spectrometry–derived Aβ42 values show...,0,EVIDENCE
11,The signal in the classic Aβ42-INNOTEST assay ...,evidence,0,The signal in the classic Aβ42-INNOTEST assay ...,0,EVIDENCE
12,"However, the classic Aβ42-INNOTEST assay showe...",evidence,0,"However, the classic Aβ42-INNOTEST assay showe...",0,EVIDENCE
13,The accuracies of the newer assays improved si...,evidence,0,The accuracies of the newer assays improved si...,0,EVIDENCE
14,A combination of the Aβ42:Aβ40 ratio and T-tau...,evidence,0,A combination of the Aβ42:Aβ40 ratio and T-tau...,0,EVIDENCE
25,Main Outcomes and Measures Plasma neurofilamen...,NONE,1,Main Outcomes and Measures Plasma neurofilamen...,1,EVIDENCE
26,Results A total of 30 patients were enrolled (...,NONE,1,Results A total of 30 patients were enrolled (...,1,EVIDENCE
27,"The mean (SD) age was 69.1 (7.0) years, and 18...",evidence,1,"The mean (SD) age was 69.1 (7.0) years, and 18...",1,EVIDENCE


In [None]:
cross2=pd.DataFrame(cross2)
cross2

Unnamed: 0,sentence_x,LABEL,doc_id_x,sentence_y,doc_id_y,labels
8,Main Outcomes and Measures The concordance of ...,evidence,0,Main Outcomes and Measures The concordance of ...,0,EVIDENCE
9,"Results Of 262 participants (mean [SD] age, 70...",evidence,0,"Results Of 262 participants (mean [SD] age, 70...",0,EVIDENCE
10,The mass spectrometry–derived Aβ42 values show...,evidence,0,The mass spectrometry–derived Aβ42 values show...,0,EVIDENCE
11,The signal in the classic Aβ42-INNOTEST assay ...,evidence,0,The signal in the classic Aβ42-INNOTEST assay ...,0,EVIDENCE
12,"However, the classic Aβ42-INNOTEST assay showe...",evidence,0,"However, the classic Aβ42-INNOTEST assay showe...",0,EVIDENCE
...,...,...,...,...,...,...
9302,Our results demonstrate that the generalized P...,NONE,907,Our results demonstrate that the generalized P...,907,EVIDENCE
9304,"Spatially, our estimations suggest a higher pr...",NONE,907,"Spatially, our estimations suggest a higher pr...",907,EVIDENCE
9340,These show that higher values of P* generally ...,evidence,910,These show that higher values of P* generally ...,910,EVIDENCE
9364,Our study generalizes a previous one by Fukush...,NONE,913,Our study generalizes a previous one by Fukush...,913,EVIDENCE


In [None]:
# how many evidence label were assigned 
#with our for loop using symbols list.
cross2['LABEL'].value_counts()

NONE        1173
evidence     405
Name: LABEL, dtype: int64