In [81]:
import requests
import pyconll
from sklearn import metrics
from tqdm import tqdm

In [27]:
def askLamma(prompt):
    api_url = "http://127.0.0.1:8042/llama/"
    todo = {"prompts":[prompt], "max_gen_len": 500}
    response = requests.post(api_url, json=todo)
    resp = response.json()
    predicted_text = resp['responses'][0]['generation']
    endOfAnswer = predicted_text.find('\n')
    return predicted_text[:endOfAnswer]

In [20]:
# reading first 1020 dataset sentences
frases = pyconll.load_from_file('data/porttinari-base/Porttinari-base_test.conllu')
reviews = []
for sent in frases[:1020]:
    lista_ud=[]
    for token in sent:
        lista_ud.append([token.form, token.xpos, token.upos, token.deprel])
    reviews.append(lista_ud)

In [24]:
promptStart = "Atuando como linguista, faça a análise morfossintática das frases seguindo a anotação UD (Universal Dependencies) como nos exemplos abaixo.\n\n"
for review in reviews[:10]:
    entrada = "Entrada: "
    saida  = "Saida: "
    for token in review:
        if not token[2]:
            token[2] = 'None'
        entrada += token[0] + ' '
        saida += token[0] + '/' + token[2] + ' '
    entrada = entrada.strip()
    saida = saida.strip()
    promptStart += entrada + "\n" + saida + "\n\n"
#print(promptStart)

In [25]:
len(promptStart)

3034

In [73]:
gold = []
pred = []
for review in reviews[20:24]:
    prompt = promptStart
    entrada = "Entrada: "
    saida  = "Saida: "
    goldSent = []
    for token in review:
        if not token[2]:
            token[2] = 'None'
        gold.append(token[2])
        goldSent.append(token[2])
        entrada += token[0] + ' '
        saida += token[0] + '/' + token[2] + ' '
    entrada = entrada.strip()
    saida = saida.strip()
    prompt += entrada + "\nSaída: "
    tries = 0
    while True:
        answer = askLamma(prompt)
        tries += 1
        #print(answer)
        try:
            tagsSent = [token.split('/')[1] for token in answer.split(' ')]
        except:
            #print('retry except', tries)
            continue
        if len(tagsSent) == len(goldSent):
            #print('retry', tries)
            break
        if tries > 15:
            print('error')
            tagsSent = ['None'] * len(goldSent)
            break
    pred += tagsSent
    #print('=======')


Você/PRON ,/PUNCT por/ADP exemplo/NOUN ,/PUNCT você/PRON brilha/VERB um/NUM pouco/ADJ ./PUNCT
A/DET gente/PRON vai/VERB comprar/VERB pão/NOUN e/CCONJ fica/VERB ouvindo/VERB muita/ADJ coisinha/NOUN ./PUNCT
Nos/DET Em/ADP os/DET anos/NOUN 1990/NUM ,/PUNCT tínhamos/VERB uma/DET perspectiva/NOUN de/ADP um/DET mundo/NOUN sem/ADV fronteiras/ADJ ,/PUNCT algo/ADJ que/PRON nos/PRON em/ADP os/DET anos/NOUN 1980/NUM ,/PUNCT era/AUX inimaginável/ADJ ./PUNCT
Nos/PRON Em/ADP os/DET anos/NOUN 1990/NUM ,/PUNCT tínhamos/VERB uma/DET perspectiva/NOUN de/ADP um/DET mundo/NOUN sem/ADJ fronteiras/NOUN ,/PUNCT algo/ADJ que/SCONJ nos/PRON em/ADP os/DET anos/NOUN 1980/NUM ,/PUNCT era/AUX inimaginável/ADJ ./PUNCT
Nos/DET Em/ADP os/DET anos/NUM 1990/NUM ,/PUNCT tínhamos/VERB uma/DET perspectiva/NOUN de/ADP um/DET mundo/NOUN sem/ADV fronteiras/ADJ ,/PUNCT algo/ADJ que/PRON nos/PRON em/ADP os/DET anos/NUM 1980/NUM ,/PUNCT era/AUX inimaginável/ADJ ./PUNCT
Nos/PRON Em/ADP os/DET anos/NOUN 1990/NUM ,/PUNCT tínhamos

In [79]:
print(len(gold),len(pred))
print(metrics.classification_report(gold, pred))

57 57
              precision    recall  f1-score   support

         ADJ       0.60      0.75      0.67         4
         ADP       0.71      0.83      0.77         6
         ADV       0.00      0.00      0.00         1
         AUX       0.67      0.67      0.67         3
       CCONJ       1.00      1.00      1.00         1
         DET       0.80      0.57      0.67         7
        NOUN       0.89      0.73      0.80        11
         NUM       0.33      0.50      0.40         2
        None       1.00      0.33      0.50         3
        PRON       0.50      0.80      0.62         5
       PUNCT       1.00      1.00      1.00         8
        VERB       0.86      1.00      0.92         6

    accuracy                           0.75        57
   macro avg       0.70      0.68      0.67        57
weighted avg       0.78      0.75      0.75        57



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [83]:
gold = []
pred = []
totalTries = []
errors = 0
for review in tqdm(reviews[20:24]):
    prompt = promptStart
    entrada = "Entrada: "
    saida  = "Saida: "
    goldSent = []
    for token in review:
        if not token[2]:
            token[2] = 'None'
        gold.append(token[2])
        goldSent.append(token[2])
        entrada += token[0] + ' '
        saida += token[0] + '/' + token[2] + ' '
    entrada = entrada.strip()
    saida = saida.strip()
    prompt += entrada + "\nSaída: "
    tries = 0
    while True:
        answer = askLamma(prompt)
        tries += 1
        #print(answer)
        try:
            tagsSent = [token.split('/')[1] for token in answer.split(' ')]
        except:
            #print('retry except', tries)
            continue
        if len(tagsSent) == len(goldSent):
            #print('retry', tries)
            totalTries.append(tries)
            break
        if tries > 15:
            errors += 1
            print('error')
            tagsSent = ['None'] * len(goldSent)
            totalTries.append(tries)
            break
    pred += tagsSent
    #print('=======')
print('gold',len(gold),'pred',len(pred),'errors',errors,totalTries)
print(metrics.classification_report(gold, pred))

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [01:14<00:00, 18.73s/it]

gold 57 pred 57 errors 0 [4, 1, 1, 1]
              precision    recall  f1-score   support

         ADJ       1.00      0.75      0.86         4
         ADP       0.67      0.67      0.67         6
         ADV       0.50      1.00      0.67         1
         AUX       1.00      0.67      0.80         3
       CCONJ       1.00      1.00      1.00         1
         DET       0.83      0.71      0.77         7
        NOUN       0.73      0.73      0.73        11
         NUM       0.33      0.50      0.40         2
        None       1.00      0.33      0.50         3
        PRON       0.57      0.80      0.67         5
       PUNCT       1.00      1.00      1.00         8
        VERB       0.86      1.00      0.92         6

    accuracy                           0.77        57
   macro avg       0.79      0.76      0.75        57
weighted avg       0.81      0.77      0.77        57






In [78]:
print()

57


              precision    recall  f1-score   support

         ADJ       0.60      0.75      0.67         4
         ADP       0.71      0.83      0.77         6
         ADV       0.00      0.00      0.00         1
         AUX       0.67      0.67      0.67         3
       CCONJ       1.00      1.00      1.00         1
         DET       0.80      0.57      0.67         7
        NOUN       0.89      0.73      0.80        11
         NUM       0.33      0.50      0.40         2
        None       1.00      0.33      0.50         3
        PRON       0.50      0.80      0.62         5
       PUNCT       1.00      1.00      1.00         8
        VERB       0.86      1.00      0.92         6

    accuracy                           0.75        57
   macro avg       0.70      0.68      0.67        57
weighted avg       0.78      0.75      0.75        57



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [72]:
['None'] * 3

['None', 'None', 'None']

In [41]:
tagsSent = [token.split('/')[1] for token in answer.split(' ')]

In [42]:
tagsSent

['DET',
 'PRON',
 'AUX',
 'VERB',
 'NOUN',
 'CCONJ',
 'VERB',
 'VERB',
 'ADV',
 'NOUN',
 'PUNCT']

In [3]:
prompt = """Input: Ms. Haag plays Elianti .
Output: Ms./NNP Haag/NNP plays/VBZ Elianti/NNP ./.

Input: Mr. Bean loves giant bees.
Output: """
print(askLamma(prompt))

Mr. Bean/VBD loves/VBD giant bees/NNP ./.


In [4]:
prompt = """Input: Ms. Haag plays Elianti .
Output: Ms./NNP Haag/NNP plays/VBZ Elianti/NNP ./.

Input: Mr. Bean adora abelhas gigantes.
Output: """
print(askLamma(prompt))




In [5]:
prompt = """Input: Ms. Haag plays Elianti .
Output: Ms./NNP Haag/NNP plays/VBZ Elianti/NNP ./.

Input: Uma bela casa amarela.
Output: """
print(askLamma(prompt))

Uma/NNP bela/NNP casa/NNP amarela/NNP ./.
