In [2]:
%load_ext autoreload``
%autoreload 2

In [3]:
import os
os.chdir("../")

In [None]:
!pwd

In [4]:
import time
import random
from typing import List
import spacy
import openai
import numpy as np
import wandb
from datasets import load_dataset
from mega.data.load_datasets import load_xnli_dataset
from mega.data.data_utils import choose_few_shot_examples
from mega.prompting.instructions import INSTRUCTIONS
from mega.prompting.prompting_utils import load_prompt_template
from mega.utils.env_utils import load_openai_env_variables
# from mega.models.completion_models import get_model_pred, gpt3x_completion
from mega.models.tag_models import gpt3x_tagger
from mega.models.completion_models import gpt3x_completion
from mega.prompting.prompting_utils import construct_prompt, construct_qa_prompt, construct_tagging_prompt
from mega.data.load_datasets import load_tagging_dataset
from seqeval.metrics import f1_score
from tqdm.notebook import tqdm
from evaluate import load

# Set seed
random.seed(42)
np.random.seed(42)

None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.


In [5]:
# Make sure that {env_name}.env file is present in the envs/ directory
from dotenv import load_dotenv
env_name = "melange"
# load_env(env_name=env_name)
load_dotenv("envs/melange.env")

True

In [None]:
openai.api_version = "2023-03-15-preview"
openai.api_version

In [None]:
openai.api_base

In [8]:
model = "gpt-35-turbo"
pivot_lang = "fr"
tgt_lang = "fr"
prompt_name = "structure_prompting_chat"
few_shot_k = 8
dataset = "udpos"
# short_contexts = False
max_tokens = 100

In [9]:
config = {
    "model" : model,
    "pivot_lang": pivot_lang,
    "tgt_lang": tgt_lang,
    "prompt_name": prompt_name,
    "few_shot_k": few_shot_k,
    "dataset": dataset,
    "max_tokens": max_tokens
}

# wandb.init(project="GPT-4-eval", entity="scai-msri", config=config)

In [6]:
class SpacySentenceTokenizer:
    
    def __init__(self):
        self.nlp = spacy.load('xx_ent_wiki_sm')
        self.nlp.add_pipe("sentencizer")
        
    def __call__(self, text: str) -> List[str]:
        return list(map(lambda span: span.text, self.nlp(text).sents))


In [None]:
# def load_tagging_dataset(
#     dataset: str,
#     lang: str,
#     split: str,
#     dataset_frac: float = 1.0,
#     xtreme_dir: str = "xtreme/download",
#     delimiter: str = "_",
# ):

#     split = "dev" if split == "validation" else split

#     filename = f"{xtreme_dir}/{dataset}/{split}-{lang}.tsv"
#     inputs, labels = read_conll_data(filename)

#     dataset = Dataset.from_dict({"tokens": inputs, "tags": labels})
#     dataset = dataset.map(
#         lambda example: {
#             "tagged_tokens": [f"{token}{delimiter}{tag}"
#             for token, tag in zip(example["tokens"], example["tags"])]
#         }
#     )

#     N = len(dataset)
#     selector = np.arange(int(N * dataset_frac))
#     return dataset.select(selector)


In [14]:
train_dataset = load_tagging_dataset(dataset,
                                lang = pivot_lang,
                                split="dev")
# test_dataset = load_tagging_dataset(dataset,
#                                 lang = tgt_lang,
#                                 split="test")

Map:  46%|████▌     | 1825/3973 [00:00<00:00, 18149.41 examples/s]

Map: 100%|██████████| 3973/3973 [00:00<00:00, 16121.98 examples/s]


In [13]:
test_dataset[0]

{'tokens': ['From', 'the', 'AP', 'comes', 'this', 'story', ':'],
 'tags': ['ADP', 'DET', 'PROPN', 'VERB', 'DET', 'NOUN', 'PUNCT'],
 'tagged_tokens': ['From_ADP',
  'the_DET',
  'AP_PROPN',
  'comes_VERB',
  'this_DET',
  'story_NOUN',
  ':_PUNCT']}

In [15]:
train_examples = choose_few_shot_examples(
        train_dataset, few_shot_k, selection_criteria="random")

In [16]:
PROMPTS_DICT = {
    "structure_prompting": """C: {context}\nT: {tagged}""",
    "structure_prompting_chat": """Tag the following sentence: "{context}"\n{tagged}"""
}

In [17]:
prompt_template = PROMPTS_DICT[prompt_name]

In [18]:
# Loading instruction for the task
instruction = INSTRUCTIONS[dataset]
print(instruction)

You are an NLP assistant whose purpose is to perform Part of Speech (PoS) Tagging. PoS tagging is the process of marking up a word in a text (corpus) as corresponding to a particular part of speech, based on both its definition and its context. You will need to use the tags defined below:
    1. ADJ: adjective
    2. ADP: adposition
    3. ADV: adverb
    4. AUX: auxiliary
    5. CCONJ: coordinating-conjunction
    6. DET: determiner
    7. INTJ: interjection
    8. NOUN: noun
    9. NUM: numeral
    10. PART: particle
    11. PRON: pronoun
    12. PROPN: proper-noun
    13. PUNCT: punctuation
    14. SCONJ: subordinating-conjunction
    15. SYM: symbol
    16. VERB: verb
    17. X: other
    Do not try to answer the question! Just tag each token in the sentence.


In [19]:
train_examples

[{'tokens': ['In',
   '1599',
   ',',
   'a',
   'partnership',
   'of',
   'company',
   'members',
   'built',
   'their',
   'own',
   'theatre',
   'on',
   'the',
   'south',
   'bank',
   'of',
   'the',
   'River',
   'Thames',
   ',',
   'which',
   'they',
   'called',
   'the',
   'Globe',
   '.'],
  'tags': ['ADP',
   'NUM',
   'PUNCT',
   'DET',
   'NOUN',
   'ADP',
   'NOUN',
   'NOUN',
   'VERB',
   'DET',
   'ADJ',
   'NOUN',
   'ADP',
   'DET',
   'ADJ',
   'NOUN',
   'ADP',
   'DET',
   'PROPN',
   'PROPN',
   'PUNCT',
   'PRON',
   'PRON',
   'VERB',
   'DET',
   'NOUN',
   'PUNCT'],
  'tagged_tokens': ['In_ADP',
   '1599_NUM',
   ',_PUNCT',
   'a_DET',
   'partnership_NOUN',
   'of_ADP',
   'company_NOUN',
   'members_NOUN',
   'built_VERB',
   'their_DET',
   'own_ADJ',
   'theatre_NOUN',
   'on_ADP',
   'the_DET',
   'south_ADJ',
   'bank_NOUN',
   'of_ADP',
   'the_DET',
   'River_PROPN',
   'Thames_PROPN',
   ',_PUNCT',
   'which_PRON',
   'they_PRON',
   'called

In [20]:
valid_labels = set()
for example in train_examples:
    valid_labels.update(example["tags"])
valid_labels = list(valid_labels)
valid_labels

['AUX',
 'PROPN',
 'DET',
 'VERB',
 'ADV',
 'ADP',
 'PUNCT',
 'X',
 'ADJ',
 'PRON',
 'SCONJ',
 'NUM',
 'CCONJ',
 'NOUN',
 'PART']

In [21]:
test_example = test_dataset[186]

prompt, label = construct_tagging_prompt(
    train_examples,
    test_example,
    prompt_template=prompt_template,
    chat_prompt=True,
    instruction="Do not try to answer the question. Just tag each token in the sentence."
)
prompt

[{'role': 'system',
  'content': 'Do not try to answer the question. Just tag each token in the sentence.'},
 {'role': 'user',
  'content': 'Tag the following sentence: "In 1599 , a partnership of company members built their own theatre on the south bank of the River Thames , which they called the Globe ."'},
 {'role': 'assistant',
  'content': 'In_ADP 1599_NUM ,_PUNCT a_DET partnership_NOUN of_ADP company_NOUN members_NOUN built_VERB their_DET own_ADJ theatre_NOUN on_ADP the_DET south_ADJ bank_NOUN of_ADP the_DET River_PROPN Thames_PROPN ,_PUNCT which_PRON they_PRON called_VERB the_DET Globe_NOUN ._PUNCT'},
 {'role': 'user',
  'content': 'Tag the following sentence: "There he is , built like King Kong , as ambitious as the Empire State Building , as wide-eyed as Fay Wray , and as much a dream , an invention , as the movies and America itself ."'},
 {'role': 'assistant',
  'content': 'There_ADV he_PRON is_VERB ,_PUNCT built_VERB like_ADP King_PROPN Kong_PROPN ,_PUNCT as_ADV ambitious_A

In [None]:
test_example["tokens"]

In [None]:
preds = gpt3x_completion(
    prompt,
    model,
    test_example["tokens"],
    temperature=0,
    max_tokens=100
)

In [None]:
preds

In [None]:
" ".join(test_example["tagged_tokens"])

In [None]:
test_example = test_dataset[0]
prompt, label = construct_tagging_prompt(
    train_examples,
    test_example,
    prompt_template=prompt_template,
    chat_prompt=False,
    instruction=instruction
)
print(prompt)

In [None]:
preds = gpt3x_tagger(
    prompt,
    model,
    test_example["tokens"],
    one_shot_tag=False,
    temperature=0,
    max_tokens=5
)

In [None]:
preds

In [None]:
test_example["tags"]

In [None]:
get_model_pred(
    train_examples,
    test_example,
    prompt_template,
    verbalizer={},
    model=model,
    chat_prompt=True,
    instruction=instruction,
    one_shot_tag=True,
    max_tokens=max_tokens
    
)

In [None]:
preds = [pred if pred != "" else np.random.choice(valid_labels) for pred in preds]

In [None]:
print(preds)

In [None]:
print(f"Prediction: {preds}")
print(f"Label: {label}")

f1_score([preds], [label])

# prediction = {"prediction_text": pred, "id": test_example["id"]}
# reference = {}
# reference["answers"] = test_example["answers"]
# reference["id"] = test_example["id"]
# results = squad_metric.compute(
#             predictions=[prediction],
#             references=[reference]
#         )

In [None]:
f1_sum = 0
em_sum = 0
avg_em = 0
avg_f1 = 0

run_details = {"num_calls": 0}

pbar = tqdm(enumerate(test_dataset.select(range(1000))))

for i, test_example in pbar:    
    prompt, label = construct_tagging_prompt(
        train_examples,
        test_example,
        prompt_template=prompt_template,
        chat_prompt=True,
        instruction=instruction
    )
    preds = gpt3x_tagger(
        prompt,
        model,
        test_example["tokens"],
        one_shot_tag=True,
        temperature=0,
        max_tokens=100
    )
    preds = [pred if pred != "" else np.random.choice(valid_labels) for pred in preds]
    f1_sum += f1_score([preds], [label])
        
    avg_f1 = f1_sum / (i+1)
    
#     wandb.log({"f1": avg_f1})
#     wandb.log(run_details)
    pbar.set_description(f"f1: {avg_f1}")
#     time.sleep(1/2)