In [1]:
!pip install huggingface_hub
!huggingface-cli login
!pip install wandb -qU
import wandb
wandb.login()



In [2]:
!nvidia-smi | grep MiB

| N/A   33C    P0    48W / 400W |      0MiB / 40960MiB |      0%      Default |


In [3]:
!pip install --quiet accelerate==0.23.0
!pip install --quiet peft==0.5.0
!pip install --quiet transformers
!pip install --quiet trl==0.4.7
!pip install --quiet wandb
!pip install --quiet evaluate
!pip install --quiet xformers
!pip install --quiet datasets
!pip install --quiet bitsandbytes
!pip install --quiet evaluate[rouge_score]
!pip install --quiet rouge_score

[0m

In [4]:
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline
)

In [5]:
model_name = "mistralai/Mistral-7B-Instruct-v0.1"
new_model = "foobar8675/Mistral-7B-Instruct-v0.1-annotated3"
device_map = {"": 0}
use_4bit = True
bnb_4bit_compute_dtype = "float16"
bnb_4bit_quant_type = "nf4"
use_nested_quant = False

In [6]:
# https://www.reddit.com/r/LocalLLaMA/comments/15uu6lk/how_do_you_really_really_prompt_llama_2/
def format_dolly(sample):
    instruction = f"""### You are solving the NLP problem. Annotate each word in the context as one of the
      following types: [<<PER>>, <<ORG>>, <<LOC>>, <<MISC>>].
      The entity type definitions are:
      PER is a person,
      ORG is an organization,
      LOC is a location, and
      MISC is an event, nationality, or product.
      The answer should only contain the annotated Context.
      """
    context = f"### Context\n{sample['context']}" if len(sample["context"]) > 0 else None
    response = f"{sample['response']}"
    prompt = "\n\n".join([i for i in ["[INST]", instruction, context, "### Answer\n", "[/INST]", response] if i is not None])
    return prompt

def format_template(sample):
    sample['text'] = f"{format_dolly(sample)}"
    return sample

def format_prediction(sample):
    answer = sample['response']
    sample['response'] = ''
    prompt = f"{format_template(sample)['text']}"
    return prompt

tag_to_ent_dic = {
    0: '0',
    1: 'PER',
    2: 'PER',
    3: 'ORG',
    4: 'ORG',
    5: 'LOC',
    6: 'LOC',
    7: 'MISC',
    8: 'MISC',
}

def ent_tag_to_response(ent_tag, token):
    if ent_tag != '0':
        return f"<<{ent_tag}>>"
    else:
        return token

def tags_to_response(sample):
    sample["ent_tags"] = [tag_to_ent_dic[ent_tag] for ent_tag in sample['ner_tags']]
    sample["context"] = " ".join(sample['tokens'])
    sample["response"] = [ent_tag_to_response(ent_tag, token) for ent_tag, token in zip(sample['ent_tags'], sample['tokens'])]
    sample["response"] = " ".join(sample["response"])
    return sample

In [7]:
from datasets import load_dataset

dataset = load_dataset("conll2003", split="train")
dataset = dataset.shuffle(seed=42)
dataset = dataset.map(tags_to_response)
dataset = dataset.map(format_template)
dataset = dataset.train_test_split(test_size=0.05, shuffle=False)

print(dataset['test'])
print(dataset['test'][0])

train_dataset = dataset["train"]
eval_dataset = dataset["test"].select(range(100))

Dataset({
    features: ['id', 'tokens', 'pos_tags', 'chunk_tags', 'ner_tags', 'ent_tags', 'context', 'response', 'text'],
    num_rows: 703
})
{'id': '6139', 'tokens': ['BREGANCON', ',', 'France', '1996-08-25'], 'pos_tags': [22, 6, 22, 11], 'chunk_tags': [11, 0, 11, 12], 'ner_tags': [5, 0, 5, 0], 'ent_tags': ['LOC', '0', 'LOC', '0'], 'context': 'BREGANCON , France 1996-08-25', 'response': '<<LOC>> , <<LOC>> 1996-08-25', 'text': '[INST]\n\n### You are solving the NLP problem. Annotate each word in the context as one of the\n      following types: [<<PER>>, <<ORG>>, <<LOC>>, <<MISC>>].\n      The entity type definitions are:\n      PER is a person,\n      ORG is an organization,\n      LOC is a location, and\n      MISC is an event, nationality, or product.\n      The answer should only contain the annotated Context.\n      \n\n### Context\nBREGANCON , France 1996-08-25\n\n### Answer\n\n\n[/INST]\n\n<<LOC>> , <<LOC>> 1996-08-25'}


In [8]:
eval_dataset

Dataset({
    features: ['id', 'tokens', 'pos_tags', 'chunk_tags', 'ner_tags', 'ent_tags', 'context', 'response', 'text'],
    num_rows: 100
})

In [9]:
tokenizer = AutoTokenizer.from_pretrained(
    new_model,
    model_max_length=2048)

tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right" # Fix weird overflow issue with fp16 training

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [10]:
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        bf16 = True
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

model = AutoModelForCausalLM.from_pretrained(new_model, quantization_config=bnb_config)

Your GPU supports bfloat16: accelerate training with bf16=True


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [11]:
!nvidia-smi | grep MiB

| N/A   33C    P0    55W / 400W |   5439MiB / 40960MiB |     25%      Default |


In [12]:
%%time
index = 1
sample = eval_dataset[index]
prompt = format_prediction(sample)
print("--- prompt ---")
print(prompt)
print("--- ref ---")
ref = eval_dataset[1]['response']
print(ref)
toks = tokenizer(prompt, return_tensors="pt")
res = model.generate(
    **toks.to("cuda"),
    max_new_tokens=1000,
    do_sample=True,
    top_k=0,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    temperature=0.1,
    ).to('cuda')

result = tokenizer.batch_decode(res)[0]
result = result.split("[/INST]\n\n")[1]
result = result.replace("</s>", "")
print("--- prediction ---")
print(result)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- prompt ---
[INST]

### You are solving the NLP problem. Annotate each word in the context as one of the
      following types: [<<PER>>, <<ORG>>, <<LOC>>, <<MISC>>].
      The entity type definitions are:
      PER is a person,
      ORG is an organization,
      LOC is a location, and
      MISC is an event, nationality, or product.
      The answer should only contain the annotated Context.
      

### Context
SOCCER - SMICER 'S LAST-GASP GOAL KEEPS LENS IN THE LEAD .

### Answer


[/INST]


--- ref ---
SOCCER - <<PER>> 'S LAST-GASP GOAL KEEPS <<ORG>> IN THE LEAD .
--- prediction ---
SOCCER - <<PER>> 'S LAST-GASP GOAL KEEPS <<ORG>> IN THE LEAD .
CPU times: user 2.02 s, sys: 20.6 ms, total: 2.04 s
Wall time: 2.05 s


In [13]:
%%time

import evaluate

predictions = []
references = []
rouge = evaluate.load('rouge')
for sample in eval_dataset:
    ref = sample['response']
    prompt = format_prediction(sample)

    response_toks = tokenizer(sample['context'], return_tensors="pt")
    response_toks_len = len(response_toks["input_ids"][0])

    toks = tokenizer(prompt, return_tensors="pt")
    res = model.generate(
        **toks.to("cuda"),
        max_new_tokens=min(response_toks_len*2, 1024),
        do_sample=True,
        top_k=0,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
        temperature=0.1,
        ).to('cuda')

    result = tokenizer.batch_decode(res)[0]
    result = result.split("[/INST]\n\n")[1]
    result = result.replace("</s>", "")
    result = result.replace("\n\n", "")
    if "### Answer" in result:
        result = result.split("### Answer")[0]
    print(ref)
    print("----")
    print(result)
    predictions.append(result)
    references.append(ref)

results = rouge.compute(predictions=predictions,
                         references=references)
print(results)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<LOC>> , <<LOC>> 1996-08-25
----
<<LOC>> , <<LOC>> 1996-08-25


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


SOCCER - <<PER>> 'S LAST-GASP GOAL KEEPS <<ORG>> IN THE LEAD .
----
SOCCER - <<PER>> 'S LAST-GASP GOAL KEEPS <<ORG>> IN THE LEAD .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


43. <<LOC>> 7.75
----
43. <<LOC>> 7.75


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Net Income Per
----
Net Income Per


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<MISC>> and <<MISC>> league soccer
----
<<MISC>> and <<MISC>> league


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


But the <<ORG>> unit is far more profitable , he said .
----
But the <<ORG>> unit is far more profitable , he said .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


BASEBALL - <<ORG>> RALLY TO BEAT <<ORG>> .
----
BASEBALL - <<ORG>> RALLY TO BEAT <<ORG>> .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


A passenger train collided with a locomotive at a main railway station in <<LOC>> on Wednesday and police said around 10 people were injured .
----
A passenger train collided with a locomotive at a main railway station in <<LOC>> on Wednesday and police said around 10 people were injured .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<ORG>> 4 1 3 0 9 3 6
----
<<ORG>> 4 1 3 0 9 3 6


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


6-1 7-5
----
6-1 7-5


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<ORG>> 62 62 .500 4 1/2
----
<<ORG>> 62 62 .500 4 1/2


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


matches in the <<MISC>> soccer championship .
----
matches in the <<MISC>> soccer championship .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<LOC>> failed to progress beyond the opening group phase in <<MISC>> <<MISC>> .
----
<<LOC>> failed to progress beyond the opening group phase in <<MISC>> <<MISC>> .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<ORG>> 65 65 .500 10
----
<<ORG>> 65 65 .500 10


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R
----
R###


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<PER>> has invited political leaders to a meeting on Thursday to discuss the final form of the pact which both <<MISC>> and <<MISC>> nationalists oppose for different reasons .
----
<<PER>> has invited political leaders to a meeting on Thursday to discuss the final form of the pact which both <<MISC>> and <<MISC>> nationalists oppose for different reasons .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<LOC>> 1996-08-22
----
<<LOC>> 1996-08-22


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<ORG>>
----
<<ORG>>


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


No arrests had been made , a police spokesman said .
----
No arrests had been made , a police spokesman said .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<PER>> <<PER>> , chief executive of the <<ORG>> <<ORG>> <<ORG>> , said : " I do n't think many firms involved in tendering for sensitive projects realise the impact environmental activity has on the cost of running a project .
----
<<PER>> <<PER>> , chief executive of the <<ORG>> <<ORG>> <<ORG>> , said : " I do n't think many firms involved in tendering for sensitive projects realise the impact environmental activity has on the cost of running a project .<<PER>> <<PER>> , chief executive of the <<ORG>> <<ORG>> <<ORG>> , said : " I do n't


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


To bat - <<PER>> <<PER>> , <<PER>> <<PER>> , <<PER>> <<PER>> , <<PER>> <<PER>> , <<PER>> <<PER>>
----
To bat - <<PER>> <<PER>> , <<PER>> <<PER>> , <<PER>> <<PER>> , <<PER>> <<PER>> , <<PER>> <<PER>>### Context
The 22-year-old former world champion , who has been playing for the last two


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Of the 750 mid-to-large size corporations surveyed , 81 percent had undergone a major restructuring in the last three years , and more than two-thirds reported that productivity and profits were up as result .
----
Of the 750 mid-to-large size corporations surveyed , 81 percent had undergone a major restructuring in the last three years , and more than two-thirds reported that productivity and profits were up as result .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<PER>> said he did not expect <<LOC>> 's veto of the <<MISC>> <<MISC>> <<MISC>> <<MISC>> ( <<MISC>> ) to damage bilateral ties with other nations .
----
<<PER>> said he did not expect <<LOC>> 's veto of the <<MISC>> <<MISC>> <<MISC>> <<MISC>> ( <<MISC>> ) to damage bilateral ties with other nations .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<PER>> has complained of lower back pain since her trip to <<LOC>> in May , when the pain forced her to go to <<LOC>> <<LOC>> <<LOC>> for an examination .
----
<<PER>> has complained of lower back pain since her trip to <<LOC>> in May , when the pain forced her to go to <<LOC>> <<LOC>> <<LOC>> for an examination .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<LOC>> 1996-08-28
----
<<LOC>> 1996-08-28


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Aggregate 2-2 .
----
Aggregate 2-2 .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


" The defence ministers will begin negotiations for this at the beginning of September at a <<ORG>> meeting , " he told the newspaper in an interview , excerpts of which were released ahead of publication on Sunday .
----
" The defence ministers will begin negotiations for this at the beginning of September at a <<ORG>> meeting , " he told the newspaper in an interview , excerpts of which were released ahead of publication on Sunday .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


7,000 .
----
7,000 .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


The <<LOC>> <<LOC>> rugby selectors recalled fly-half <<PER>> <<PER>> on Wednesday when they announced their team for the third and final test in <<LOC>> on Saturday .
----
The <<LOC>> <<LOC>> rugby selectors recalled fly-half <<PER>> <<PER>> on Wednesday when they announced their team for the third and final test in <<LOC>> on Saturday .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<ORG>> 3 <<ORG>> <<ORG>> 1
----
<<ORG>> 3 <<ORG>> <<ORG>> 1


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


" Bankruptcy is always possible , particularly when you lose what we are going to lose in the first half of this year , " <<PER>> said . "
----
" Bankruptcy is always possible , particularly when you lose what we are going to lose in the first half of this year , " <<PER>> said . "


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<PER>> <<PER>> ( 5-2 ) won his second straight start , allowing two runs and six hits with two walks and three strikeouts over eight innings .
----
<<PER>> <<PER>> ( 5-2 ) won his second straight start , allowing two runs and six hits with two walks and three strikeouts over eight innings .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<MISC>> said its shipment of ammunition from <<LOC>> was evidence of <<MISC>> military support for <<PER>> 's government .
----
<<ORG>> said its shipment of ammunition from <<LOC>> was evidence of <<MISC>> military support for <<PER>> 's government .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


The rest of the side is made up mainly of <<MISC>> <<MISC>> .
----
The rest of the side is made up mainly of <<MISC>> <<MISC>> .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


soccer matches on Wednesday :
----
soccer matches on Wednesday :<<LOC>> <<


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Results from the <<MISC>> <<MISC>>
----
Results from the <<MISC>> <<MISC


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Expected Sale Date : 08/27/96
----
Expected Sale Date : 08/27/96


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


However , <<MISC>> stocks extended opening losses to more than one percent in morning trading , falling through the 2000 point resistance level on the main <<MISC>> index .
----
However , <<MISC>> stocks extended opening losses to more than one percent in morning trading , falling through the 2000 point resistance level on the main <<MISC>> index .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


It said <<MISC>> Sister <<PER>> <<PER>> , 52 , and <<MISC>> Brother <<PER>> <<PER>> , 28 , were held inside the compound .
----
It said <<MISC>> Sister <<PER>> <<PER>> , 52 , and <<MISC>> Brother <<PER>> <<PER>> , 28 , were held inside the compound .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Leading results and overall standings after the 161 kilometre first stage of the <<MISC>> <<MISC>> <<MISC>> <<MISC>> between <<LOC>> and <<LOC>> on Tuesday .
----
Leading results and overall standings after the 161 kilometre first stage of the <<MISC>> <<MISC>> <<MISC>> between <<LOC>> and <<LOC>> on Tuesday .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<LOC>> , <<LOC>> 1996-08-30
----
<<LOC>> , <<LOC>> 1996-08-30


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<PER>> <<PER>> 1 2 1 11 6 11.00
----
<<PER>> <<PER>> 1 2 1 11 6 11.00


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


After <<LOC>> , he was due to go to <<LOC>> <<LOC>> .
----
After <<LOC>> , he was due to go to <<LOC>> <<LOC>> .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


4. <<PER>> <<PER>> <<PER>> ( <<LOC>> ) <<ORG>> all same time
----
4. <<PER>> <<PER>> <<PER>> ( <<LOC>> ) <<ORG>> all same time


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


BASEBALL - <<ORG>> EDGE <<ORG>> 1-0 .
----
BASEBALL - <<ORG>> EDGE <<ORG>> 1-0 .### Context
BASEBALL - GIANTS EDGE PHILLIES 1-0


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Spin-bowling all-rounders <<PER>> <<PER>> and <<PER>> <<PER>> replace <<PER>> and <<PER>> , while <<PER>> comes in for his <<ORG>> <<ORG>> colleague <<PER>> .
----
Spin-bowling all-rounders <<PER>> <<PER>> and <<PER>> <<PER>> replace <<PER>> and <<PER>> , while <<PER>> comes in for his <<ORG>> <<ORG>> colleague <<PER>> .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


The race continues on Friday with the 178 kilometre fifth-stage from <<LOC>> to <<LOC>> .
----
The race continues on Friday with the 178 kilometre fifth-stage from <<LOC>> to <<LOC>> .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


At least one of the unidentified hijackers was apparently armed with grenades and TNT and threatened to blow the plane up in <<LOC>> unless it was refuelled so they could fly to <<LOC>> to claim political asylum .
----
At least one of the unidentified hijackers was apparently armed with grenades and TNT and threatened to blow the plane up in <<LOC>> unless it was refuelled so they could fly to <<LOC>> to claim political asylum .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Centre <<PER>> <<PER>> has been ruled out of <<LOC>> <<LOC>> 's team for the second test against <<LOC>> <<LOC>> in <<LOC>> on Saturday .
----
Centre <<PER>> <<PER>> has been ruled out of <<LOC>> <<LOC>> 's team for the second test against <<LOC>> <<LOC>> in <<LOC>> on Saturday .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<ORG>> <<ORG>> 3 ( <<PER>> 8th minute , <<PER>> 29th ,
----
<<ORG>> <<ORG>> 3 ( <<PER>> 8th minute , <<PER>> 29th ,### Context
Borussia Dortmund 3 ( <<PER>> 


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<PER>> came out of Wednesday 's game against <<ORG>> <<ORG>> in the ninth inning after suffering a mild hamstring strain .
----
Bonds came out of Wednesday 's game against <<ORG>> <<ORG>> in the ninth inning after suffering a mild hamstring strain .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<LOC>> 1996-08-28
----
<<LOC>> 1996-08-28


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<LOC>> , <<LOC>> 1996-08-25
----
<<LOC>> , <<LOC>> 1996-08-25


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


-- <<LOC>> newsroom ( 36 1)266 2410
----
-- <<LOC>> newsroom ( 36 1)266 2410


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In <<LOC>> : <<ORG>> <<ORG>> ( <<LOC>> <<LOC>> ) 8 <<ORG>>
----
In <<LOC>> : <<ORG>> <<ORG>> ( <<LOC>> <<LOC>> ) 8 <<ORG>>


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Qualifier vs. <<PER>> <<PER>> ( <<LOC>> )
----
Qualifier vs. <<PER>> <<PER>> ( <<LOC>> )


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<PER>> <<PER>> not out 0
----
<<PER>> <<PER>> not out 0


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In <<LOC>> : <<ORG>> <<ORG>> ( <<LOC>> ) 0 <<ORG>> <<ORG>>
----
In <<LOC>> : <<ORG>> <<ORG>> ( <<LOC>> ) 0 <<ORG>> <<ORG>>


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<ORG>> , quoting <<PER>> press secretary <<PER>> <<PER>> , said <<PER>> and <<PER>> had discussed bilateral relations and international issues on the telephone .
----
<<ORG>> , quoting <<PER>> press secretary <<PER>> <<PER>> , said <<PER>> and <<PER>> had discussed bilateral relations and international issues on the telephone .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


this is an important issue in our relationship " , said <<PER>> , who is due to leave for <<LOC>> on Friday .
----
this is an important issue in our relationship " , said <<PER>> , who is due to leave for <<LOC>> on Friday .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


( <<LOC>> ) 71 73 72
----
( <<LOC>> ) 71 73 72


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<LOC>> 1996-08-25
----
<<LOC>> 1996-08-25


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<ORG>> 44 6 53 .456 15 1/2
----
<<ORG>> 44 6 53 .456 15 1/2


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In <<LOC>> where the plant is based , 9,000 to 10,000 people gathered in the central square to express their bitterness at what <<PER>> called government indifference .
----
In <<LOC>> where the plant is based , 9,000 to 10,000 people gathered in the central square to express their bitterness at what <<PER>> called government indifference .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


APRIL 1996 <<ORG>> <<ORG>> <<ORG>> <<ORG>> <<ORG>> <<ORG>> <<ORG>>
----
APRIL 1996 <<ORG>> <<ORG>> <<ORG>> <<ORG>> <<ORG>> <<ORG>> <<ORG>> <<ORG>> <<OR


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


With one out in the fifth <<PER>> <<PER>> <<PER>> and <<PER>> <<PER>> stroked back-to-back singles off <<ORG>> starter <<PER>> <<PER>> ( 7-5 ) and <<PER>> <<PER>> walked .
----
With one out in the fifth <<PER>> <<PER>> <<PER>> and <<PER>> <<PER>> stroked back-to-back singles off <<ORG>> starter <<PER>> <<PER>> ( 7-5 ) and <<PER>> <<PER>> walked .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<ORG>> <<ORG>> 785.00 785.00 6000 4710.000
----
<<ORG>> <<ORG>> 785.00 785.00 6000 4710.000


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Scorers :
----
Scorers :


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<ORG>> <<ORG>> <<ORG>> 1915.00 1915.00 2000 3830.000
----
<<ORG>> <<ORG>> <<ORG>> 1915.00 1915.00 2000 3830.000


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<ORG>> 3 3 0 0 4 1 9
----
<<ORG>> 3 3 0 0 4 1 9


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<PER>> help us if there is a breakdown , " he said of the machinery , loaned to the farm after the <<MISC>> government bought it earlier this year .
----
<<PER>> help us if there is a breakdown , " he said of the machinery , loaned to the farm after the <<MISC>> government bought it earlier this year .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<LOC>> 1996-08-25
----
<<LOC>> 1996-08-25


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Leading results at an
----
Leading results at an <<MISC>> <<M


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<ORG>> <<ORG>> 21 5 1 15 304 586 11
----
<<ORG>> <<ORG>> 21 5 1 15 304 586 11


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


" I have told <<LOC>> leaders that <<MISC>> goverment has attached serious importance to the resolution of the tragic death of <<PER>> <<PER>> , " Under-Secretary of State for Foreign and <<ORG>> Affairs <<PER>> <<PER>> <<PER>> , told reporters .
----
" I have told <<LOC>> leaders that <<MISC>> goverment has attached serious importance to the resolution of the tragic death of Siraj Mia , " Under-Secretary of State for Foreign and Commonwealth Affairs <<PER>> <<PER>> , told reporters .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


But <<ORG>> shrugged off its ex-div tag to soar a further 4.10 guilders to 214.40 continuing its explosive rally sparked by the 51 percent jump in first half net profits last week , which set the market alight on Friday , sending the shares up 18.40 at 210.00 by the close .
----
But <<ORG>> shrugged off its ex-div tag to soar a further 4.10 guilders to 214.40 continuing its explosive rally sparked by the 51 percent jump in first half net profits last week , which set the market alight on Friday , sending the shares up 18.40 at 210.00 by the close .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<ORG>> singer heads for <<LOC>> after illness .
----
<<ORG>> singer heads for <<LOC>> after illness .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


+1 <<PER>> <<PER>> ( <<LOC>> <<LOC>> ) through 8
----
+1 <<PER>> <<PER>> ( <<LOC>> <<LOC>> ) through 8


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


" It 's a possibility , not a probability , he may take a hostage , but we have measures in place if that is the case . "
----
" It 's a possibility , not a probability , he may take a hostage , but we have measures in place if that is the case . "


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<LOC>> , <<LOC>> 1996-08-30
----
<<LOC>> , <<LOC>> 1996-08-30


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


The students staged a violent nine-day demonstration at the university to demand unification with <<LOC>> <<LOC>> .
----
The students staged a violent nine-day demonstration at the university to demand unification with <<LOC>> <<LOC>> .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<ORG>> <<ORG>> <<ORG>> <<ORG>> 3 2 0 1 4 3 6
----
<<ORG>> ( <<ORG>> ) 3 2 0 1 4 3 6


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Eldred ( 6-5 ) walked one and struck out three .
----
<<PER>> ( 6-5 ) walked one and struck out three .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<ORG>> 4 1 1 2 3 6 4
----
<<ORG>> 4 1 1 2 3 6 4


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<LOC>> 1996-08-27
----
<<LOC>> 1996-08-27


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


" They asked me what would be the first thing I would do if I were president , and I said the first thing I would do would be to resign straight away , " Archbishop <<PER>> <<PER>> said at a sermon attended by several cabinet ministers .
----
" They asked me what would be the first thing I would do if I were president , and I said the first thing I would do would be to resign straight away , " Archbishop <<PER>> <<PER>> said at a sermon attended by several cabinet ministers .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<LOC>> 1996-08-23
----
<<LOC>> 1996-08-23


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<PER>> <<PER>> b <<PER>> <<PER>> 65
----
<<PER>> <<PER>> b <<PER>> <<PER>> 65


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


A message from the West this week from <<LOC>> President <<PER>> <<PER>> congratulated <<LOC>> on the anniversary , promising to support market reforms and praising <<LOC>> as a " stabilising factor " in a united <<LOC>> .
----
A message from the <<LOC>> this week from <<LOC>> President <<PER>> <<PER>> congratulated <<LOC>> on the anniversary , promising to support market reforms and praising <<LOC>> as a " stabilising factor " in a united <<LOC>> .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<PER>> <<PER>> , who said local and state governments cannot fight illegal drugs alone .
----
<<PER>> <<PER>> , who said local and state governments cannot fight illegal drugs alone .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


7-6 ( 7-4 ) 6-1
----
7-6 ( 7-4 ) 6-1


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


1. <<PER>> <<PER>> ( <<LOC>> ) 6.72 metres
----
1. <<PER>> <<PER>> ( <<LOC>> ) 6.72 metres


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<ORG>> <<ORG>> , <<ORG>> v <<ORG>> , <<ORG>> v <<ORG>> <<ORG>> ,
----
<<ORG>> <<ORG>> , <<ORG>> v <<ORG>> , <<ORG>> v <<ORG>> <<ORG>> ,


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<PER>> <<PER>>
----
<<PER>> <<PER>>


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


The government-owned <<ORG>> <<ORG>> said the smugglers were caught in <<LOC>> in the eastern state of <<LOC>> , on the border with <<LOC>> , and had confessed they were on their way to " the so-called alliance forces which have been undertaking subversive operations on the eastern border " .
----
The government-owned <<ORG>> <<ORG>> <<ORG>> said the smugglers were caught in <<LOC>> in the eastern state of <<LOC>> , on the border with <<LOC>> , and had confessed they were on their way to " the so-called alliance forces which have been undertaking subversive operations on the eastern border " .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


1. <<PER>> <<PER>> ( <<LOC>> ) 3:54.57
----
1. <<PER>> <<PER>> ( <<LOC>> ) 3:54.57


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<ORG>> <<ORG>> coach <<PER>> <<PER>> said Prime Minister <<PER>> <<PER>> rang him today to offer his congratulations .
----
<<ORG>> <<ORG>> coach <<PER>> <<PER>> said Prime Minister <<PER>> <<PER>> rang him today to offer his congratulations .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<PER>> says <<LOC>> declares war on <<MISC>> .
----
<<PER>> says <<LOC>> declares war on <<MISC>> .


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<<MISC>> <<MISC>> ( <<ORG>> ) No 1663/96 of 20 August 1996 establishing the standard import values for determining the entry price of certain fruit and vegetables END OF DOCUMENT .
----
<<MISC>> <<MISC>> ( <<ORG>> ) <<MISC>> <<MISC>> of 20 August 1996 establishing the standard import values for determining the entry price of certain fruit and vegetables END OF DOCUMENT .
Cellars were flooded , trees uprooted and roofs damaged , but there were no reports of any injuries , an interior affairs ministry spokesman said .
----
Cellars were flooded , trees uprooted and roofs damaged , but there were no reports of any injuries , an interior affairs ministry spokesman said .
{'rouge1': 0.9776798264520048, 'rouge2': 0.9447418506724329, 'rougeL': 0.9778364470728279, 'rougeLsum': 0.9778432083917024}
CPU times: user 2min 59s, sys: 349 ms, total: 3min
Wall time: 3min


In [None]:
from google.colab import runtime
runtime.unassign()