In [1]:
!pip install huggingface_hub
!huggingface-cli login
!pip install wandb -qU
import wandb
wandb.login()



In [2]:
!nvidia-smi | grep MiB

| N/A   31C    P0    43W / 400W |      0MiB / 40960MiB |      0%      Default |


In [3]:
!pip install --quiet accelerate==0.23.0
!pip install --quiet peft==0.5.0
!pip install --quiet transformers
!pip install --quiet trl==0.4.7
!pip install --quiet wandb
!pip install --quiet evaluate
!pip install --quiet xformers
!pip install --quiet datasets
!pip install --quiet bitsandbytes
!pip install --quiet evaluate[rouge_score]
!pip install --quiet rouge_score

[0m

In [4]:
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline
)

In [5]:
new_model = "foobar8675/llama-2-7b-sentiment-annotated3"
device_map = {"": 0}
use_4bit = True
bnb_4bit_compute_dtype = "float16"
bnb_4bit_quant_type = "nf4"
use_nested_quant = False

In [6]:
# https://www.reddit.com/r/LocalLLaMA/comments/15uu6lk/how_do_you_really_really_prompt_llama_2/
def format_dolly(sample):
    instruction = f"""### You are solving the NLP problem. Annotate each word in the context as one of the
      following types: [<<PER>>, <<ORG>>, <<LOC>>, <<MISC>>].
      The entity type definitions are:
      PER is a person,
      ORG is an organization,
      LOC is a location, and
      MISC is an event, nationality, or product.
      The answer should only contain the annotated Context.
      """
    context = f"### Context\n{sample['context']}" if len(sample["context"]) > 0 else None
    response = f"{sample['response']}"
    prompt = "\n\n".join([i for i in ["[INST]", instruction, context, "### Answer\n", "[/INST]", response] if i is not None])
    return prompt

def format_template(sample):
    sample['text'] = f"{format_dolly(sample)}"
    return sample

def format_prediction(sample):
    answer = sample['response']
    sample['response'] = ''
    prompt = f"{format_template(sample)['text']}"
    return prompt

tag_to_ent_dic = {
    0: '0',
    1: 'PER',
    2: 'PER',
    3: 'ORG',
    4: 'ORG',
    5: 'LOC',
    6: 'LOC',
    7: 'MISC',
    8: 'MISC',
}

def ent_tag_to_response(ent_tag, token):
    if ent_tag != '0':
        return f"<<{ent_tag}>>"
    else:
        return token

def tags_to_response(sample):
    sample["ent_tags"] = [tag_to_ent_dic[ent_tag] for ent_tag in sample['ner_tags']]
    sample["context"] = " ".join(sample['tokens'])
    sample["response"] = [ent_tag_to_response(ent_tag, token) for ent_tag, token in zip(sample['ent_tags'], sample['tokens'])]
    sample["response"] = " ".join(sample["response"])
    return sample

In [7]:
from datasets import load_dataset

dataset = load_dataset("conll2003", split="train")
dataset = dataset.shuffle(seed=42)
dataset = dataset.map(tags_to_response)
dataset = dataset.map(format_template)
dataset = dataset.train_test_split(test_size=0.05, shuffle=False)

print(dataset['test'])
print(dataset['test'][0])

train_dataset = dataset["train"]
eval_dataset = dataset["test"].select(range(100))

Dataset({
    features: ['id', 'tokens', 'pos_tags', 'chunk_tags', 'ner_tags', 'ent_tags', 'context', 'response', 'text'],
    num_rows: 703
})
{'id': '6139', 'tokens': ['BREGANCON', ',', 'France', '1996-08-25'], 'pos_tags': [22, 6, 22, 11], 'chunk_tags': [11, 0, 11, 12], 'ner_tags': [5, 0, 5, 0], 'ent_tags': ['LOC', '0', 'LOC', '0'], 'context': 'BREGANCON , France 1996-08-25', 'response': '<<LOC>> , <<LOC>> 1996-08-25', 'text': '[INST]\n\n### You are solving the NLP problem. Annotate each word in the context as one of the\n      following types: [<<PER>>, <<ORG>>, <<LOC>>, <<MISC>>].\n      The entity type definitions are:\n      PER is a person,\n      ORG is an organization,\n      LOC is a location, and\n      MISC is an event, nationality, or product.\n      The answer should only contain the annotated Context.\n      \n\n### Context\nBREGANCON , France 1996-08-25\n\n### Answer\n\n\n[/INST]\n\n<<LOC>> , <<LOC>> 1996-08-25'}


In [8]:
tokenizer = AutoTokenizer.from_pretrained(
    new_model,
    model_max_length=2048)

tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right" # Fix weird overflow issue with fp16 training

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [9]:
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        bf16 = True
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

model = AutoModelForCausalLM.from_pretrained(new_model, quantization_config=bnb_config)

Your GPU supports bfloat16: accelerate training with bf16=True


Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)l-00001-of-00002.bin:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

Downloading (…)l-00002-of-00002.bin:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [10]:
!nvidia-smi | grep MiB

| N/A   31C    P0    50W / 400W |   4655MiB / 40960MiB |      0%      Default |


In [11]:
%%time
index = 1
sample = eval_dataset[index]
prompt = format_prediction(sample)
print("--- prompt ---")
print(prompt)
print("--- ref ---")
ref = eval_dataset[1]['response']
print(ref)
toks = tokenizer(prompt, return_tensors="pt")
res = model.generate(
    **toks.to("cuda"),
    max_new_tokens=1000,
    do_sample=True,
    top_k=0,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    temperature=0.1,
    ).to('cuda')

result = tokenizer.batch_decode(res)[0]
result = result.split("[/INST]\n\n")[1]
result = result.replace("</s>", "")
print("--- prediction ---")
print(result)

--- prompt ---
[INST]

### You are solving the NLP problem. Annotate each word in the context as one of the
      following types: [<<PER>>, <<ORG>>, <<LOC>>, <<MISC>>].
      The entity type definitions are:
      PER is a person,
      ORG is an organization,
      LOC is a location, and
      MISC is an event, nationality, or product.
      The answer should only contain the annotated Context.
      

### Context
SOCCER - SMICER 'S LAST-GASP GOAL KEEPS LENS IN THE LEAD .

### Answer


[/INST]


--- ref ---
SOCCER - <<PER>> 'S LAST-GASP GOAL KEEPS <<ORG>> IN THE LEAD .
--- prediction ---
SOCCER - <<PER>> 'S LAST-GASP GOAL KEEPS <<ORG>> IN THE LEAD .

### Answer



CPU times: user 57.4 s, sys: 108 ms, total: 57.5 s
Wall time: 58.4 s


In [12]:
%%time

import evaluate

predictions = []
references = []
rouge = evaluate.load('rouge')
for sample in eval_dataset:
    ref = sample['response']
    prompt = format_prediction(sample)

    response_toks = tokenizer(sample['context'], return_tensors="pt")
    response_toks_len = len(response_toks["input_ids"][0])

    toks = tokenizer(prompt, return_tensors="pt")
    res = model.generate(
        **toks.to("cuda"),
        max_new_tokens=min(response_toks_len*2, 1024),
        do_sample=True,
        top_k=0,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
        temperature=0.1,
        ).to('cuda')

    result = tokenizer.batch_decode(res)[0]
    result = result.split("[/INST]\n\n")[1]
    result = result.replace("</s>", "")
    result = result.replace("\n\n", "")
    if "### Answer" in result:
        result = result.split("### Answer")[0]
    print(ref)
    print("----")
    print(result)
    predictions.append(result)
    references.append(ref)

results = rouge.compute(predictions=predictions,
                         references=references)
print(results)

Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

<<LOC>> , <<LOC>> 1996-08-25
----
<<LOC>> , <<LOC>> 1996-08-25
SOCCER - <<PER>> 'S LAST-GASP GOAL KEEPS <<ORG>> IN THE LEAD .
----
SOCCER - <<PER>> 'S LAST-GASP GOAL KEEPS <<ORG>> IN THE LEAD .
43. <<LOC>> 7.75
----
43. <<LOC>> 7.75
Net Income Per
----
Net Income Per
<<MISC>> and <<MISC>> league soccer
----
<<MISC>> and <<MISC>> <<MIS
But the <<ORG>> unit is far more profitable , he said .
----
But the <<ORG>> unit is far more profitable , he said .
BASEBALL - <<ORG>> RALLY TO BEAT <<ORG>> .
----
BASEBALL - <<ORG>> RALLY TO BEAT <<ORG>> .
A passenger train collided with a locomotive at a main railway station in <<LOC>> on Wednesday and police said around 10 people were injured .
----
A passenger train collided with a locomotive at a main railway station in <<LOC>> on Wednesday and police said around 10 people were injured .
<<ORG>> 4 1 3 0 9 3 6
----
<<ORG>> 4 1 3 0 9 3 6
6-1 7-5
----
6-1 7-5
<<ORG>> 62 62 .500 4 1/2
----
<<ORG>> 62 62 .500 4 1/2
matches in the <<MISC>> soccer champion

In [13]:
from google.colab import runtime
runtime.unassign()