In [None]:
import json
import pandas as pd
 
label2id = {
    'NONE': 0,
    'EVIDENCE': 1,
    'CLAIM': 2}
 
def load_corpus(path, label_mapping=None):
    with open(path) as fp:
        corpus = json.load(fp)
 
    documents, texts, labels = [], [], []
    for abstract in corpus:
        documents.append(abstract)
        texts.append(corpus[abstract]['sentences'])
        if isinstance(label_mapping, dict):
            labels.append(
                [label_mapping[str(l).upper()]
                    for l in corpus[abstract]['labels']])
        else:
            labels.append([str(l).upper() for l in corpus[abstract]['labels']])
 
    assert len(texts) == len(labels)
    data = pd.DataFrame(
        zip(documents, texts, labels),
        columns=['document', 'sentences', 'labels'])
 
    return data
 
data = load_corpus('dataset_aueb_argument_v1.json') #, label_mapping=label2id)
print(f'Dataset length: {len(data)} abstracts')
data.sample(20)

Dataset length: 916 abstracts


Unnamed: 0,document,sentences,labels
121,doi: 10.1007/s13361-019-02261-z,[Mapping Unsaturation in Human Plasma Lipids b...,"[NEITHER, NEITHER, NEITHER, NEITHER, NEITHER, ..."
19,doi: 10.1002/ange.201700730,"[Synthesis of Dibenzo[hi,st\n]ovalene and Its ...","[NEITHER, NEITHER, NEITHER, NEITHER, NEITHER, ..."
719,doi: 10.1186/s41747-019-0100-y,[Quantification of liver fibrosis: extracellul...,"[NEITHER, NEITHER, NEITHER, NEITHER, NEITHER, ..."
237,doi: 10.1016/j.redox.2019.101123,[Impact of inhibition of the autophagy-lysosom...,"[NEITHER, NEITHER, NEITHER, NEITHER, NEITHER, ..."
294,doi: 10.1021/acscentsci.8b00176,[Designing Algorithms To Aid Discovery by Chem...,"[NEITHER, NEITHER, NEITHER, NEITHER, NEITHER, ..."
589,doi: 10.1111/bcpt.12938,[Effect of Polymorphisms on the Pharmacokineti...,"[NEITHER, NEITHER, NEITHER, NEITHER, NEITHER, ..."
197,doi: 10.1016/j.ijplas.2017.07.007,[Material length scale of strain gradient plas...,"[NEITHER, NEITHER, NEITHER, NEITHER, NEITHER, ..."
158,doi: 10.1016/j.coph.2016.07.003,[Integrating structural and mutagenesis data t...,"[NEITHER, NEITHER, NEITHER, EVIDENCE, EVIDENCE..."
700,doi: 10.1186/s13073-017-0502-5,[Genetic variation in human drug-related genes...,"[NEITHER, NEITHER, NEITHER, NEITHER, NEITHER, ..."
447,doi: 10.1038/s41598-019-55454-7,[Using mechanistic models for the clinical int...,"[NEITHER, NEITHER, NEITHER, NEITHER, NEITHER, ..."


In [None]:
# assign sentence column in an object

sentencesargument = data['sentences'].explode().reset_index().rename(
    columns={'index': 'doc_id', 'sentences': 'sentence'})
sentencesargument

Unnamed: 0,doc_id,sentence
0,0,Concordance Between Different Amyloid Immunoas...
1,0,Importance Visual assessment of amyloid positr...
2,0,Several immunoassays have been developed to me...
3,0,The agreement between CSF Aβ42 measures from d...
4,0,Objective To determine the concordance between...
...,...,...
9380,915,"Instead, SBPs sample a range of conformations ..."
9381,915,Certain non-transported ligands leave the stru...
9382,915,"Intriguingly, in some cases, similar SBP confo..."
9383,915,"In this case, the inability for transport aris..."


In [None]:
# assign label column in an object

labelsargument = data['labels'].explode().reset_index().rename(
    columns={'index': 'doc_id', 'labels': 'label'})
labelsargument

Unnamed: 0,doc_id,label
0,0,NEITHER
1,0,NEITHER
2,0,NEITHER
3,0,NEITHER
4,0,NEITHER
...,...,...
9380,915,NEITHER
9381,915,NEITHER
9382,915,NEITHER
9383,915,NEITHER


In [None]:
# reverse column order in sentenceargument object
s=sentencesargument['sentence']
s1=sentencesargument['doc_id']
sentencesargnew=pd.concat([s,s1],axis=1)
sentencesargnew

Unnamed: 0,sentence,doc_id
0,Concordance Between Different Amyloid Immunoas...,0
1,Importance Visual assessment of amyloid positr...,0
2,Several immunoassays have been developed to me...,0
3,The agreement between CSF Aβ42 measures from d...,0
4,Objective To determine the concordance between...,0
...,...,...
9380,"Instead, SBPs sample a range of conformations ...",915
9381,Certain non-transported ligands leave the stru...,915
9382,"Intriguingly, in some cases, similar SBP confo...",915
9383,"In this case, the inability for transport aris...",915


In [None]:
# merge sentence argument and label argument in the same df named dataargument
dataargument = pd.merge(sentencesargument, 
               labelsargument, 
               left_index=True, right_index=True)
dataargument
print(dataargument.shape)
dataargument.head(5)

(9385, 4)


Unnamed: 0,doc_id_x,sentence,doc_id_y,label
0,0,Concordance Between Different Amyloid Immunoas...,0,NEITHER
1,0,Importance Visual assessment of amyloid positr...,0,NEITHER
2,0,Several immunoassays have been developed to me...,0,NEITHER
3,0,The agreement between CSF Aβ42 measures from d...,0,NEITHER
4,0,Objective To determine the concordance between...,0,NEITHER


In [None]:
# group by argument label
dataargument.groupby(by=["label"], dropna=False).count()

Unnamed: 0_level_0,doc_id_x,sentence,doc_id_y
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
CLAIM,954,954,954
EVIDENCE,1578,1578,1578
NEITHER,6853,6853,6853


In [None]:
# from dataargument dataframe keep the rows that habel argument label "claim" and name the df claim
claim = dataargument.loc[(dataargument['label'] == 'CLAIM')]

claim

Unnamed: 0,doc_id_x,sentence,doc_id_y,label
15,0,Conclusions and Relevance Concentrations of CS...,0,CLAIM
16,0,These findings suggest the benefit of implemen...,0,CLAIM
30,1,The increases in both neurofilament light and ...,1,CLAIM
31,1,Further investigations will be required to stu...,1,CLAIM
32,1,These preliminary findings demand that we ques...,1,CLAIM
...,...,...,...,...
9306,907,Drought hazard probability maps can contribute...,907,CLAIM
9341,910,The methodology proposed in this analysis prov...,910,CLAIM
9366,913,Our preliminary results on coarse lattices sho...,913,CLAIM
9374,914,"These findings have wide implications, ranging...",914,CLAIM


In [None]:
# assign column sentence from claim and convert it to dataframe named "c"

claim_s=claim['sentence']
c=pd.DataFrame(data=claim_s)
c


Unnamed: 0,sentence
15,Conclusions and Relevance Concentrations of CS...
16,These findings suggest the benefit of implemen...
30,The increases in both neurofilament light and ...
31,Further investigations will be required to stu...
32,These preliminary findings demand that we ques...
...,...
9306,Drought hazard probability maps can contribute...
9341,The methodology proposed in this analysis prov...
9366,Our preliminary results on coarse lattices sho...
9374,"These findings have wide implications, ranging..."


In [None]:
# split dataframe "c" 
c["sentence"]= c["sentence"].str.split(" ")
c

Unnamed: 0,sentence
15,"[Conclusions, and, Relevance, Concentrations, ..."
16,"[These, findings, suggest, the, benefit, of, i..."
30,"[The, increases, in, both, neurofilament, ligh..."
31,"[Further, investigations, will, be, required, ..."
32,"[These, preliminary, findings, demand, that, w..."
...,...
9306,"[Drought, hazard, probability, maps, can, cont..."
9341,"[The, methodology, proposed, in, this, analysi..."
9366,"[Our, preliminary, results, on, coarse, lattic..."
9374,"[These, findings, have, wide, implications,, r..."


In [None]:
# from dataframe "c" create index and put each word in a different raw with explode, assign the result in dataframe claim_words
# in order to detect keywords that reveal claim label
claim_words = c['sentence'].explode().reset_index().rename(
    columns={'index': 'doc_id', 'sentence': 'claim_words'})
claim_words


Unnamed: 0,doc_id,claim_words
0,15,Conclusions
1,15,and
2,15,Relevance
3,15,Concentrations
4,15,of
...,...,...
24639,9384,conformational
24640,9384,dynamics
24641,9384,and
24642,9384,substrate


In [None]:
claim_words['claim_words']=claim_words['claim_words'].str.lower()
claim_words

Unnamed: 0,doc_id,claim_words
0,15,conclusions
1,15,and
2,15,relevance
3,15,concentrations
4,15,of
...,...,...
24639,9384,conformational
24640,9384,dynamics
24641,9384,and
24642,9384,substrate


In [None]:
b = "!,().-[]:"
for char in b:
    claim_words['claim_words'] = claim_words['claim_words'].str.replace(char, "")

In [None]:
# count how many times each word appears in data frame claim_words
claim_words['count'] = claim_words.groupby('claim_words')['claim_words'].transform('count')
claim_words

Unnamed: 0,doc_id,claim_words,count
0,15,conclusions,54
1,15,and,822
2,15,relevance,9
3,15,concentrations,9
4,15,of,1037
...,...,...,...
24639,9384,conformational,2
24640,9384,dynamics,14
24641,9384,and,822
24642,9384,substrate,4


In [None]:
# keep unique values and name the new data frame argunique

argunique=claim_words.drop_duplicates(subset='claim_words')
argunique

Unnamed: 0,doc_id,claim_words,count
0,15,conclusions,54
1,15,and,822
2,15,relevance,9
3,15,concentrations,9
4,15,of,1037
...,...,...,...
24613,9366,thermalization,1
24619,9374,ranging,1
24627,9374,notch,1
24636,9384,ligandsbp,1


In [None]:
#sort argunique in descending order to the column count and name the new data frame arg1

arg1=argunique.sort_values(by='count', ascending=False, na_position='first')
arg1

Unnamed: 0,doc_id,claim_words,count
9,15,the,1319
4,15,of,1037
1,15,and,822
65,16,in,683
104,31,to,548
...,...,...,...
2439,960,340,1
10936,3788,ph,1
10940,3788,saturation,1
10943,3788,biogenic,1


In [None]:
#keep only words appeared more than once
argtry=arg1[arg1['count']>2]
argtry

Unnamed: 0,doc_id,claim_words,count
9,15,the,1319
4,15,of,1037
1,15,and,822
65,16,in,683
104,31,to,548
...,...,...,...
2616,1023,advanced,3
2677,1034,perturbations,3
14973,5217,adaptive,3
1319,480,epidemiological,3


In [None]:
import re
conclu = argtry[argtry['claim_words'].str.contains(r'^conclu', flags=re.IGNORECASE)]
conclu

Unnamed: 0,doc_id,claim_words,count
0,15,conclusions,54
261,93,conclusion,49
2964,1125,conclude,16
1649,682,conclusions/interpretation,3


In [None]:
resul = argtry[argtry['claim_words'].str.contains(r'^resul', flags=re.IGNORECASE)]
resul

Unnamed: 0,doc_id,claim_words,count
516,173,results,93
1545,640,result,12
4151,1559,resulting,9


In [None]:
argtry.head(50)

Unnamed: 0,doc_id,claim_words,count
9,15,the,1319
4,15,of,1037
1,15,and,822
65,16,in,683
104,31,to,548
60,16,a,413
84,30,that,323
236,68,for,308
21,15,with,259
268,93,is,238


In [None]:
listofwords=argtry['claim_words'][0:100].tolist()
listofwords

['the',
 'of',
 'and',
 'in',
 'to',
 'a',
 'that',
 'for',
 'with',
 'is',
 'be',
 'this',
 'we',
 'by',
 'as',
 'are',
 'our',
 'on',
 'these',
 'an',
 'can',
 'from',
 'results',
 'may',
 'at',
 'which',
 'or',
 'not',
 'data',
 'climate',
 'between',
 'study',
 'model',
 'conclusions',
 'changes',
 'conclusion',
 'have',
 'associated',
 'clinical',
 'could',
 'models',
 'show',
 'used',
 'it',
 'but',
 'future',
 'new',
 'more',
 'analysis',
 'potential',
 'studies',
 'suggest',
 'findings',
 'risk',
 'such',
 'both',
 'using',
 'high',
 'global',
 'patients',
 'different',
 'will',
 'change',
 'into',
 'understanding',
 'disease',
 'other',
 'zikv',
 'increase',
 'human',
 'approach',
 'role',
 'further',
 'important',
 'variability',
 'provide',
 'their',
 'higher',
 'were',
 'genetic',
 'when',
 'has',
 'also',
 'should',
 'however',
 'csf',
 'during',
 'treatment',
 'than',
 'over',
 'increased',
 'its',
 'response',
 'current',
 'how',
 'well',
 'demonstrate',
 'use',
 'possib

In [None]:
cleanlist= ['the','of','and','in','to','a','that','for','with','is','be','this','we','by','as','are','our','on','these','an','can','from',
            'may','at','which','or','not', 'have','it','but','will','other','their','were','than','over','its','was','such','both','when',
            'has','also','how','well','while']

In [None]:
argtry = argtry[~argtry['claim_words'].isin(cleanlist)]
argtry

Unnamed: 0,doc_id,claim_words,count
516,173,results,93
1265,463,data,61
195,56,climate,59
173,49,between,59
105,31,study,55
...,...,...,...
2616,1023,advanced,3
2677,1034,perturbations,3
14973,5217,adaptive,3
1319,480,epidemiological,3


In [None]:
test1=argtry[argtry['count']>30]
test1


Unnamed: 0,doc_id,claim_words,count
516,173,results,93
1265,463,data,61
195,56,climate,59
173,49,between,59
105,31,study,55
259,80,model,54
0,15,conclusions,54
131,32,changes,50
261,93,conclusion,49
91,30,associated,48


In [None]:
arg=test1['claim_words'].tolist()
arg

['results',
 'data',
 'climate',
 'between',
 'study',
 'model',
 'conclusions',
 'changes',
 'conclusion',
 'associated',
 'clinical',
 'could',
 'models',
 'show',
 'used',
 'future',
 'new',
 'more',
 'analysis',
 'potential',
 'studies',
 'suggest',
 'findings',
 'risk',
 'using',
 'high',
 'global',
 'patients',
 'different',
 'change',
 'into',
 'understanding',
 'disease',
 'zikv',
 'increase',
 'human',
 'approach',
 'role',
 'further']

In [None]:
arg=['results','data','climate','study',
 'model',
 'conclusions',
 'changes',
 'conclusion',
 'associated',
 'clinical',
 'models',
 'show',
 'future',
 'analysis',
 'potential',
 'studies',
 'suggest',
 'findings',
 'risk',
 'using',
 'high',
 'global',
 'patients',
 'different',
 'change',
 'into',
 'understanding',
 'disease','results','result','resulting','conclusions','conclusion','conclude','conclusions/interpretation']

In [None]:
# we assign column sentence in a new object called test1
test1 = dataargument['sentence']

In [None]:
#we transfrom test1 in df with library pandas and assigned in a new variable named t and after in new variable named test2. 
t=pd.DataFrame(test1)
test2=t
# we create a new column in test2 named label and write the value none 
test2['LABEL']='NONE'
test2

Unnamed: 0,sentence,LABEL
0,Concordance Between Different Amyloid Immunoas...,NONE
1,Importance Visual assessment of amyloid positr...,NONE
2,Several immunoassays have been developed to me...,NONE
3,The agreement between CSF Aβ42 measures from d...,NONE
4,Objective To determine the concordance between...,NONE
...,...,...
9380,"Instead, SBPs sample a range of conformations ...",NONE
9381,Certain non-transported ligands leave the stru...,NONE
9382,"Intriguingly, in some cases, similar SBP confo...",NONE
9383,"In this case, the inability for transport aris...",NONE


In [None]:
#preparation  claim dataframe
claim_df=test2
claim_df

Unnamed: 0,sentence,LABEL
0,Concordance Between Different Amyloid Immunoas...,NONE
1,Importance Visual assessment of amyloid positr...,NONE
2,Several immunoassays have been developed to me...,NONE
3,The agreement between CSF Aβ42 measures from d...,NONE
4,Objective To determine the concordance between...,NONE
...,...,...
9380,"Instead, SBPs sample a range of conformations ...",NONE
9381,Certain non-transported ligands leave the stru...,NONE
9382,"Intriguingly, in some cases, similar SBP confo...",NONE
9383,"In this case, the inability for transport aris...",NONE


In [None]:
for ind in claim_df.index:
  splits = str.split(test1[ind])
  for split in splits:
      if split in arg:    
        claim_df['LABEL'][ind] = 'claim'

In [None]:
# we count how many claim label our for loop found
claim_df['LABEL'].value_counts()

NONE     5388
claim    3997
Name: LABEL, dtype: int64

In [None]:
# Keep rows with claim label and assing it to a new variable named CLAIM
CLAIM = dataargument.loc[(dataargument['label'] == 'CLAIM')]
CLAIM

Unnamed: 0,doc_id_x,sentence,doc_id_y,label
15,0,Conclusions and Relevance Concentrations of CS...,0,CLAIM
16,0,These findings suggest the benefit of implemen...,0,CLAIM
30,1,The increases in both neurofilament light and ...,1,CLAIM
31,1,Further investigations will be required to stu...,1,CLAIM
32,1,These preliminary findings demand that we ques...,1,CLAIM
...,...,...,...,...
9306,907,Drought hazard probability maps can contribute...,907,CLAIM
9341,910,The methodology proposed in this analysis prov...,910,CLAIM
9366,913,Our preliminary results on coarse lattices sho...,913,CLAIM
9374,914,"These findings have wide implications, ranging...",914,CLAIM


In [None]:
# MERGE test2 and CLAIM in order to compare and create the ground rule 
cross = pd.merge(test2, 
                CLAIM, 
                left_index=True, right_index=True)


print(cross.shape)
cross.head(50)

(954, 6)


Unnamed: 0,sentence_x,LABEL,doc_id_x,sentence_y,doc_id_y,label
15,Conclusions and Relevance Concentrations of CS...,claim,0,Conclusions and Relevance Concentrations of CS...,0,CLAIM
16,These findings suggest the benefit of implemen...,claim,0,These findings suggest the benefit of implemen...,0,CLAIM
30,The increases in both neurofilament light and ...,claim,1,The increases in both neurofilament light and ...,1,CLAIM
31,Further investigations will be required to stu...,claim,1,Further investigations will be required to stu...,1,CLAIM
32,These preliminary findings demand that we ques...,claim,1,These preliminary findings demand that we ques...,1,CLAIM
38,This work suggests that faults must be critica...,NONE,2,This work suggests that faults must be critica...,2,CLAIM
49,The partitioning of τtot between the two schem...,NONE,3,The partitioning of τtot between the two schem...,3,CLAIM
56,This suggests that typical circulation biases ...,claim,4,This suggests that typical circulation biases ...,4,CLAIM
68,The observed reduction in Vp during the earthq...,NONE,5,The observed reduction in Vp during the earthq...,5,CLAIM
80,Observations of air mass transformations inclu...,claim,6,Observations of air mass transformations inclu...,6,CLAIM


In [None]:
cross.head(30)
cross.shape

(954, 6)

In [None]:
cross=pd.DataFrame(cross)
cross

Unnamed: 0,sentence_x,LABEL,doc_id_x,sentence_y,doc_id_y,label
15,Conclusions and Relevance Concentrations of CS...,claim,0,Conclusions and Relevance Concentrations of CS...,0,CLAIM
16,These findings suggest the benefit of implemen...,claim,0,These findings suggest the benefit of implemen...,0,CLAIM
30,The increases in both neurofilament light and ...,claim,1,The increases in both neurofilament light and ...,1,CLAIM
31,Further investigations will be required to stu...,claim,1,Further investigations will be required to stu...,1,CLAIM
32,These preliminary findings demand that we ques...,claim,1,These preliminary findings demand that we ques...,1,CLAIM
...,...,...,...,...,...,...
9306,Drought hazard probability maps can contribute...,claim,907,Drought hazard probability maps can contribute...,907,CLAIM
9341,The methodology proposed in this analysis prov...,claim,910,The methodology proposed in this analysis prov...,910,CLAIM
9366,Our preliminary results on coarse lattices sho...,claim,913,Our preliminary results on coarse lattices sho...,913,CLAIM
9374,"These findings have wide implications, ranging...",claim,914,"These findings have wide implications, ranging...",914,CLAIM


In [None]:
# what we found with for loop
cross['LABEL'].value_counts()

claim    595
NONE     359
Name: LABEL, dtype: int64

In [None]:
# what professor found
cross['label'].value_counts()

CLAIM    954
Name: label, dtype: int64

In [None]:
# another method to detect claim label

## LAST SENTENCES CLAIM
datatest=dataargument
datatest=datatest.drop_duplicates(subset='doc_id_y',keep='last')
datatest

Unnamed: 0,doc_id_x,sentence,doc_id_y,label
16,0,These findings suggest the benefit of implemen...,0,CLAIM
32,1,These preliminary findings demand that we ques...,1,CLAIM
38,2,This work suggests that faults must be critica...,2,CLAIM
49,3,The partitioning of τtot between the two schem...,3,CLAIM
56,4,This suggests that typical circulation biases ...,4,CLAIM
...,...,...,...,...
9351,911,"For this time period and this scenario, only 2...",911,NEITHER
9360,912,The mean timescales and length scales for rean...,912,NEITHER
9366,913,Our preliminary results on coarse lattices sho...,913,CLAIM
9374,914,"These findings have wide implications, ranging...",914,CLAIM


In [None]:
datatest.groupby(by=["label"], dropna=False).count()
##when trying to run, see how well it performs

Unnamed: 0_level_0,doc_id_x,sentence,doc_id_y
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
CLAIM,473,473,473
EVIDENCE,50,50,50
NEITHER,393,393,393


In [None]:
crossb = pd.merge(datatest, 
                CLAIM, 
                left_index=True, right_index=True)


print(crossb.shape)
crossb.head(50)

(473, 8)


Unnamed: 0,doc_id_x_x,sentence_x,doc_id_y_x,label_x,doc_id_x_y,sentence_y,doc_id_y_y,label_y
16,0,These findings suggest the benefit of implemen...,0,CLAIM,0,These findings suggest the benefit of implemen...,0,CLAIM
32,1,These preliminary findings demand that we ques...,1,CLAIM,1,These preliminary findings demand that we ques...,1,CLAIM
38,2,This work suggests that faults must be critica...,2,CLAIM,2,This work suggests that faults must be critica...,2,CLAIM
49,3,The partitioning of τtot between the two schem...,3,CLAIM,3,The partitioning of τtot between the two schem...,3,CLAIM
56,4,This suggests that typical circulation biases ...,4,CLAIM,4,This suggests that typical circulation biases ...,4,CLAIM
68,5,The observed reduction in Vp during the earthq...,5,CLAIM,5,The observed reduction in Vp during the earthq...,5,CLAIM
94,7,This implies that efforts to further improve t...,7,CLAIM,7,This implies that efforts to further improve t...,7,CLAIM
110,11,"High fatigue resistance, bistability, and dras...",11,CLAIM,11,"High fatigue resistance, bistability, and dras...",11,CLAIM
163,18,"Finally, the polyoxometalate material from the...",18,CLAIM,18,"Finally, the polyoxometalate material from the...",18,CLAIM
188,22,"Furthermore, the transformation of template cl...",22,CLAIM,22,"Furthermore, the transformation of template cl...",22,CLAIM


In [None]:
crossb=pd.DataFrame(crossb)
crossb

Unnamed: 0,doc_id_x_x,sentence_x,doc_id_y_x,label_x,doc_id_x_y,sentence_y,doc_id_y_y,label_y
16,0,These findings suggest the benefit of implemen...,0,CLAIM,0,These findings suggest the benefit of implemen...,0,CLAIM
32,1,These preliminary findings demand that we ques...,1,CLAIM,1,These preliminary findings demand that we ques...,1,CLAIM
38,2,This work suggests that faults must be critica...,2,CLAIM,2,This work suggests that faults must be critica...,2,CLAIM
49,3,The partitioning of τtot between the two schem...,3,CLAIM,3,The partitioning of τtot between the two schem...,3,CLAIM
56,4,This suggests that typical circulation biases ...,4,CLAIM,4,This suggests that typical circulation biases ...,4,CLAIM
...,...,...,...,...,...,...,...,...
9306,907,Drought hazard probability maps can contribute...,907,CLAIM,907,Drought hazard probability maps can contribute...,907,CLAIM
9341,910,The methodology proposed in this analysis prov...,910,CLAIM,910,The methodology proposed in this analysis prov...,910,CLAIM
9366,913,Our preliminary results on coarse lattices sho...,913,CLAIM,913,Our preliminary results on coarse lattices sho...,913,CLAIM
9374,914,"These findings have wide implications, ranging...",914,CLAIM,914,"These findings have wide implications, ranging...",914,CLAIM


In [None]:
# another method--count claim labels
crossb['label_x'].value_counts()

CLAIM    473
Name: label_x, dtype: int64

In [None]:
datatest['label'].value_counts()

CLAIM       473
NEITHER     393
EVIDENCE     50
Name: label, dtype: int64

In [None]:
# how many claim labels professor found
CLAIM

Unnamed: 0,doc_id_x,sentence,doc_id_y,label
15,0,Conclusions and Relevance Concentrations of CS...,0,CLAIM
16,0,These findings suggest the benefit of implemen...,0,CLAIM
30,1,The increases in both neurofilament light and ...,1,CLAIM
31,1,Further investigations will be required to stu...,1,CLAIM
32,1,These preliminary findings demand that we ques...,1,CLAIM
...,...,...,...,...
9306,907,Drought hazard probability maps can contribute...,907,CLAIM
9341,910,The methodology proposed in this analysis prov...,910,CLAIM
9366,913,Our preliminary results on coarse lattices sho...,913,CLAIM
9374,914,"These findings have wide implications, ranging...",914,CLAIM
