In [1]:
import sys
sys.path.append('../..')

import os
import filecmp
from term_lists import SubstringTagger
from converters.label_studio import export_texts

### I. Test basic functionality

In [2]:
rules = {'WHO': {'label': 'ORG'}, 'Switzerland': {'label': 'LOC'}, 'Ghebreyesus': {'label': 'PER'}}
text = 'The headquarters of WHO is located in Switzerland and Ghebreyesus is the head of it.'
tagger = SubstringTagger(rules)
layer = tagger(text)
export_texts('test-1.json', text, layer, 'label')
assert filecmp.cmp('test-1.json', 'targets/test-1.json'), "Basic functionality fails"
os.remove('test-1.json')

In [3]:
rules = {'WHO': {'label': 'ORG'}, 'Switzerland': {'label': 'LOC'}, 'Ghebreyesus': {'label': 'PER'}}
text = 'The headquarters of WHO is located in Switzerland and Ghebreyesus is the head of it.'
tagger = SubstringTagger(rules)
layer = tagger(text)
export_texts('test-1.json', [text], [layer], 'label')
assert filecmp.cmp('test-1.json', 'targets/test-1.json'), "Basic functionality fails"
os.remove('test-1.json')

In [4]:
rules = {'WHO': {'ner': 'ORG'}, 'Switzerland': {'ner': 'LOC'}, 'Ghebreyesus': {'ner': 'PER'}}
text = 'The headquarters of WHO is located in Switzerland and Ghebreyesus is the head of it.'
tagger = SubstringTagger(rules)
layer = tagger(text)
export_texts('test-2.json', [text], [layer], 'ner')
assert filecmp.cmp('test-2.json', 'targets/test-2.json'), "Basic functionality fails"
os.remove('test-2.json')

In [5]:
rules = {'WHO': {'ner': 'ORG'}, 'Switzerland': {'ner': 'LOC'}, 'Ghebreyesus': {'ner': 'PER'}}
text = 'The headquarters of WHO is located in Switzerland and Ghebreyesus is the head of it.'
tagger = SubstringTagger(rules)
layer = tagger(text)
export_texts('test-3.json', [text, text], [layer, layer], 'ner')
assert filecmp.cmp('test-3.json', 'targets/test-3.json'), "Basic functionality fails"
os.remove('test-3.json')

### II. Test import of additional fields 

In [6]:
rules = {
    'WHO': {'ner': 'ORG', 'score': 0.8}, 
    'Switzerland': {'ner': 'LOC', 'score': 0.9}, 
    'Ghebreyesus': {'ner': 'PER', 'score': 0.4}}
text = 'The headquarters of WHO is located in Switzerland and Ghebreyesus is the head of it.'
tagger = SubstringTagger(rules)
layer = tagger(text)
export_texts('test-4.json', [text], [layer], 'ner', other_attributes=['score'])
assert filecmp.cmp('test-4.json', 'targets/test-4.json'), "Score as additional attribute does not work works"
os.remove('test-4.json')

In [7]:
rules = {
    'WHO': {'ner': 'ORG', 'score': 0.8, 'subtype': 'PER/ORG'}, 
    'Switzerland': {'ner': 'LOC', 'score': 0.9}, 
    'Ghebreyesus': {'ner': 'PER', 'score': 0.4}}
text = 'The headquarters of WHO is located in Switzerland and Ghebreyesus is the head of it.'
tagger = SubstringTagger(rules)
layer = tagger(text)
export_texts('test-5.json', [text], [layer], 'ner', other_attributes=['score', 'subtype'])
assert filecmp.cmp('test-5.json', 'targets/test-5.json'), "Optional additional attribute does not works"
os.remove('test-5.json')

### III. Test Label Studio attributes

In [8]:
rules = {'WHO': {'ner': 'ORG'}, 'Switzerland': {'ner': 'LOC'}, 'Ghebreyesus': {'ner': 'PER'}}
text = 'The headquarters of WHO is located in Switzerland and Ghebreyesus is the head of it.'
tagger = SubstringTagger(rules)
layer = tagger(text)
export_texts('test-6.json', [text], [layer], 'ner', text_name = 'event', labelset_name = 'class')
assert filecmp.cmp('test-6.json', 'targets/test-6.json'), "Renaming Label Studio attribtes fails"
os.remove('test-6.json')