In [67]:
import json
import requests
import time
from xml.dom import minidom
import xml.etree.ElementTree as ET
import pandas as pd

clarinpl_url = "http://ws.clarin-pl.eu/nlprest2/base"
user_mail = "demo2019@nlpday.pl"

In [24]:
def morphoDita_tagger(text, display=False):
    url = clarinpl_url + "/process"
    lpmn = "morphoDita"
    
    payload = {'text': text, 'lpmn': lpmn, 'user': user_mail}
    headers = {'content-type': 'application/json'}
    
    r = requests.post(url, data=json.dumps(payload), headers=headers)
    ccl = r.content.decode('utf-8')
    
    
    if display:
        print(ccl)
        
    return ccl
        
        
def wcrft2_tagger(text, display=False):
    url = clarinpl_url + "/process"
    lpmn = "wcrft2"
    
    payload = {'text': text, 'lpmn': lpmn, 'user': user_mail}
    headers = {'content-type': 'application/json'}
    
    r = requests.post(url, data=json.dumps(payload), headers=headers)
    ccl = r.content.decode('utf-8')
    
    if display:
        print(ccl)
    
    return ccl


def krnnt_tagger(text, display=False):
    url = "http://127.0.0.1:9003/"

    r = requests.post(url, data=text.encode('utf-8'))
    ccl = r.content.decode('utf-8')
    
    if display:
        print(ccl)
        
    return ccl

In [63]:
def ccl_to_bases_tags(ccl):
    tree = ET.fromstring(ccl)
    return {tok.find('./lex/base').text: tok.find('./lex/ctag').text for tok in tree.iter('tok')}


def ccl_to_bases_tags_krnnt(ccl):
    bases_tags = dict()
    
    for line in ccl.splitlines():
        if line.endswith('disamb'):
            line = list(line.split('\t'))
            bases_tags[line[1]] = line[2]
            
    return bases_tags

In [71]:
display = False

text = "Przykładowe zdanie to jest."
#text = "A mogę, bo moim zdaniem jest do niczego. I to delikatnie mówiąc... Rzecz gustu :slight_smile: Ja sobie z przyjemnością obejrzałam wczoraj \"Live and Let Die\". Nareszcie bez Connery'ego!!! Pasuje. Najbardziej ze wszystkich. Może trochę za ładny, ale grą pasuje. IMHO. A Connery to nie święta krowa :wink:"

print('========== morphoDita ==========')
start_timer = time.time()

ccl = morphoDita_tagger(text, display=display)

bases_tags = ccl_to_bases_tags(ccl)
morphoDita_df = pd.DataFrame(bases_tags.items(), columns=['base', 'tag'])

print(bases_tags)

exec_time = time.time() - start_timer
print('morphoDita exec time: %.4fs'%(exec_time))


print('============ wcrft2 ============')
start_timer = time.time()

ccl = wcrft2_tagger(text, display=display)

bases_tags = ccl_to_bases_tags(ccl)
wcrft2_df = pd.DataFrame(bases_tags.items(), columns=['base', 'tag'])

print(bases_tags)

exec_time = time.time() - start_timer
print('wcrft2 exec time: %.4fs'%(exec_time))


print('============ krnnt =============')
start_timer = time.time()

ccl = krnnt_tagger(text, display=display)

bases_tags = ccl_to_bases_tags_krnnt(ccl)
krnnt_df = pd.DataFrame(bases_tags.items(), columns=['base', 'tag'])

print(bases_tags)

exec_time = time.time() - start_timer
print('krnnt exec time: %.4fs'%(exec_time))

{'przykładowy': 'adj:sg:nom:n:pos', 'zdanie': 'subst:sg:nom:n', 'to': 'pred', 'być': 'fin:sg:ter:imperf', '.': 'interp'}
morphoDita exec time: 1.4980s
{'przykładowy': 'adj:sg:nom:n:pos', 'zdanie': 'subst:sg:nom:n', 'to': 'conj', 'być': 'fin:sg:ter:imperf', '.': 'interp'}
wcrft2 exec time: 1.4893s
{'przykładowy': 'adj:sg:nom:n:pos', 'zdanie': 'subst:sg:nom:n', 'to': 'pred', 'być': 'fin:sg:ter:imperf', '.': 'interp'}
krnnt exec time: 0.0168s


In [72]:
morphoDita_df

Unnamed: 0,base,tag
0,przykładowy,adj:sg:nom:n:pos
1,zdanie,subst:sg:nom:n
2,to,pred
3,być,fin:sg:ter:imperf
4,.,interp


In [73]:
wcrft2_df

Unnamed: 0,base,tag
0,przykładowy,adj:sg:nom:n:pos
1,zdanie,subst:sg:nom:n
2,to,conj
3,być,fin:sg:ter:imperf
4,.,interp


In [74]:
krnnt_df

Unnamed: 0,base,tag
0,przykładowy,adj:sg:nom:n:pos
1,zdanie,subst:sg:nom:n
2,to,pred
3,być,fin:sg:ter:imperf
4,.,interp
