In [None]:
%cd /content/drive/My Drive/Colab Notebooks/comet_atomic2020_bart/

[Errno 2] No such file or directory: '/content/drive/My Drive/Colab Notebooks/comet_atomic2020_bart/'
/content


In [None]:
!pwd

/content/drive/My Drive/Colab Notebooks/comet_atomic2020_bart


In [None]:
!pip install -r requirements.txt

In [None]:
import json
import torch
import argparse
from tqdm import tqdm
from pathlib import Path
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from utils import calculate_rouge, use_task_specific_params, calculate_bleu_score, trim_batch

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [49]:
def chunks(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i : i + n]


class Comet:
    def __init__(self, model_path):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.model = AutoModelForSeq2SeqLM.from_pretrained(model_path).to(self.device)
        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
        task = "summarization"
        use_task_specific_params(self.model, task)
        self.batch_size = 1
        self.decoder_start_token_id = None

    def generate(
            self, 
            queries,
            decode_method="beam", 
            num_generate=5, 
            ):

        with torch.no_grad():
            examples = queries

            decs = []
            for batch in list(chunks(examples, self.batch_size)):

                batch = self.tokenizer(batch, return_tensors="pt", truncation=True, padding="max_length").to(self.device)
                input_ids, attention_mask = trim_batch(**batch, pad_token_id=self.tokenizer.pad_token_id)

                summaries = self.model.generate(
                    input_ids=input_ids,
                    attention_mask=attention_mask,
                    decoder_start_token_id=self.decoder_start_token_id,
                    num_beams=num_generate,
                    num_return_sequences=num_generate,
                    )

                dec = self.tokenizer.batch_decode(summaries, skip_special_tokens=True, clean_up_tokenization_spaces=False)
                decs.append(dec)

            return decs

In [None]:
all_relations = [
    "AtLocation",
    "CapableOf",
    "Causes",
    "CausesDesire",
    "CreatedBy",
    "DefinedAs",
    "DesireOf",
    "Desires",
    "HasA",
    "HasFirstSubevent",
    "HasLastSubevent",
    "HasPainCharacter",
    "HasPainIntensity",
    "HasPrerequisite",
    "HasProperty",
    "HasSubEvent",
    "HasSubevent",
    "HinderedBy",
    "InheritsFrom",
    "InstanceOf",
    "IsA",
    "LocatedNear",
    "LocationOfAction",
    "MadeOf",
    "MadeUpOf",
    "MotivatedByGoal",
    "NotCapableOf",
    "NotDesires",
    "NotHasA",
    "NotHasProperty",
    "NotIsA",
    "NotMadeOf",
    "ObjectUse",
    "PartOf",
    "ReceivesAction",
    "RelatedTo",
    "SymbolOf",
    "UsedFor",
    "isAfter",
    "isBefore",
    "isFilledBy",
    "oEffect",
    "oReact",
    "oWant",
    "xAttr",
    "xEffect",
    "xIntent",
    "xNeed",
    "xReact",
    "xReason",
    "xWant",
    ]

In [None]:
print("model loading ...")
comet = Comet("./comet-atomic_2020_BART")
comet.model.zero_grad()
print("model loaded")

model loading ...
model loaded


In [None]:
queries = []
head = "PersonX eats an apple"
rel = "xNeed"
query = "{} {} [GEN]".format(head, rel)
queries.append(query)
print(queries)
results = comet.generate(queries, decode_method="beam", num_generate=5)
print(results)

['PersonX eats an apple xNeed [GEN]']
[[' to buy an apple', ' to get an apple', ' to pick one up', ' to pick one', ' none']]


In [51]:
queries_trial = []  ## Just trying random stuff.
head = "Char1 wants to be rich" 
rel1 = "xWant" ## Not sure is this is a thing. Will have to check docs.
rel2 = "xNeed"
query1 = "{} {} [GEN]".format(head, rel1)
query2 = "{} {} [GEN]".format(head, rel2)
query3 = "PersonX wants to be rich [GEN]" ## Not sure what [GEN] does
queries_trial.append(head)
queries_trial.append(query1)
queries_trial.append(query2)
queries_trial.append(query3)

In [52]:
results_trial = comet.generate(queries_trial, decode_method="beam", num_generate=5)

In [53]:
for i, query in enumerate(queries_trial):
  print(query)
  print(results_trial[i])
  print()

Char1 wants to be rich
[' Char1 wants to buy a house', ' Char1 wants to be rich.', ' Char1 has no money.', ' Char1 wants to buy a car', ' Char1 wants to be rich']

Char1 wants to be rich xWant [GEN]
[' to buy a house', ' to buy a car', ' to get a job', ' to work hard', ' to buy a mansion']

Char1 wants to be rich xNeed [GEN]
[' to work hard', ' none', ' to get a job', ' to have a job', ' to have money']

PersonX wants to be rich [GEN]
[' PersonX buys a new car', ' PersonX wants to be rich', ' PersonX buys a new house', ' PersonX works hard', ' PersonX spends money']



In [None]:
rel3 = "oWant" ##Was in today's lecture (Tuesday March 15th)
query4 = "{} {} [GEN]".format(head, rel3)
result_q4 = comet.generate([query4], decode_method="beam", num_generate=5)

In [None]:
print(query4)
print(result_q4)

PersonX wants to be rich oWant [GEN]
[[' none', ' to help PersonX', ' to be rich too', ' to help him', ' to help them']]


In [50]:
need_want_dict = {'PersonX': []}

In [None]:
head = "Char1 eats an apple"
rel = "xNeed"
query = "{} {} [GEN]".format(head, rel)
# queries.append(query)
print(queries)
results = comet.generate(queries, decode_method="beam", num_generate=5)
print(results)