<a href="https://colab.research.google.com/github/ohmreborn/question-generation-AIB2023/blob/main/evaluate%20/generate_from_llama.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
package = """
appdirs
loralib
bitsandbytes
black
black[jupyter]
datasets
fire
peft==0.3.0
transformers==4.28.1
sentencepiece
gradio
gdown
"""
with open('requirements.txt','w') as f:
    f.write(package)
  
!pip install -r requirements.txt
!pip install accelerate==0.18.0

In [None]:
!wget https://raw.githubusercontent.com/tloen/alpaca-lora/main/templates/alpaca.json

In [None]:
"""
A dedicated helper to manage templates and prompt building.
"""

import json
import os.path as osp
from typing import Union


class Prompter(object):

    def __init__(self):
        
        with open('alpaca.json') as fp:
            self.template = json.load(fp)

    def generate_prompt(
        self,
        instruction: str,
        input: Union[None, str] = None,
        label: Union[None, str] = None,
    ) -> str:
        # returns the full prompt from instruction and optional input
        # if a label (=response, =output) is provided, it's also appended.
        if input:
            res = self.template["prompt_input"].format(
                instruction=instruction, input=input
            )
        else:
            res = self.template["prompt_no_input"].format(
                instruction=instruction
            )
        if label:
            res = f"{res}{label}"
        return res

    def get_response(self, output: str) -> str:
        return output.split(self.template["response_split"])[1].strip()

In [None]:
import gdown
url = "https://drive.google.com/uc?export=download&id=1BsT2l8e00ZZM-Q1RcVUxWimZ3_rw7dXp"

# https://drive.google.com/uc?export=download&id=14DeJ5Gyl02CcUFE2VSIhHYkTMneBXKQv
output = 'adapter_config.json'
gdown.download(url, output, quiet=False)
url = 'https://drive.google.com/uc?export=download&id=1ErWZE4R_0zZjydVsnAQ7apPUhM31GuO6'
# url = 'https://drive.google.com/uc?export=download&id=104yNcI4vE4SjGkvo2VnmnxDpZKTz0P44'
output = 'adapter_model.bin'
gdown.download(url, output, quiet=False)

In [None]:
!mkdir checkpoint
import shutil

path = 'adapter_model.bin'
destination = 'checkpoint/adapter_model.bin'
dest = shutil.move(path, destination)

path = 'adapter_config.json'
destination = 'checkpoint/adapter_config.json'
dest = shutil.move(path, destination)

In [None]:
import os
import sys

import torch
import transformers
from peft import PeftModel
from transformers import GenerationConfig, LlamaForCausalLM, LlamaTokenizer




def main(
    load_8bit: bool = True,
    base_model: str = "decapoda-research/llama-7b-hf",
    lora_weights: str = "/content/checkpoint",
):
    base_model = base_model or os.environ.get("BASE_MODEL", "")
    
    assert (
        base_model
    ), "Please specify a --base_model, e.g. --base_model='huggyllama/llama-7b'"

    tokenizer = LlamaTokenizer.from_pretrained(base_model)
    
    # max_memory = {i:f"{int(mem/1024**3)}GB"for i,mem in enumerate(torch.cuda.mem_get_info())}
    model = LlamaForCausalLM.from_pretrained(
        base_model,
        load_in_8bit=load_8bit,
        torch_dtype=torch.float16,
        device_map="auto",
        # max_memory=max_memory
    )
    model = PeftModel.from_pretrained(
        model,
        lora_weights,
        torch_dtype=torch.float16,
    )

    

    # unwind broken decapoda-research config
    model.config.pad_token_id = tokenizer.pad_token_id = 0  # unk
    model.config.bos_token_id = 1
    model.config.eos_token_id = 2

    if not load_8bit:
        model.half()  # seems to fix bugs for some users.

    model.eval()
    if torch.__version__ >= "2" and sys.platform != "win32":
        model = torch.compile(model)
    return model,tokenizer
# "tloen/alpaca-lora-7b"
model,tokenizer = main(base_model='decapoda-research/llama-7b-hf')
print(model)

In [None]:
device = torch.device('cuda')
device

In [None]:
for p in model.parameters():
  print(p)
  break

In [None]:
# evaluate(
#     instruction,
#     input=None,
#     temperature=0.5, # ทำให้ model มั่นใจมากขึ้นใน softmax function https://stackoverflow.com/questions/58764619/why-should-we-use-temperature-in-softmax/63471046#63471046
#     top_p=0.90, # จะ เอา ค่าความน่าจะเป็นของ top ความน่าจะเป็นที่มากที่สุดมารวมกันจนมากกว่า 0.95 แล้วค่อยให้ model สุ่ม ออกมาhttps://www.linkedin.com/pulse/text-generation-temperature-top-p-sampling-gpt-models-selvakumar
#     top_k=10, # เอา 50 แรก แต่ถ้า ใส่ค่า top p ไปด้วย จะทำให้ คิดของ top p ก่อน เช่น ถ้า 50 ตัวแรกมีความน่าจะเป็นรวมกัน = 0.90 ซึ่งไม่ถึงค่าที่ตั้งไว้ก็เอามาไว้ใช้สำหรับการทำนายครั้งถัดไป https://docs.cohere.com/docs/controlling-generation-with-top-k-top-p#2-pick-from-amongst-the-top-tokens-top-k
#     repetition_penalty=2.0, # https://arxiv.org/pdf/1909.05858.pdf หน้าที่ 5
#     max_new_tokens=1024,
#     model=None,
#     tokenizer=None,
# ):

In [None]:
def evaluate(
    instruction,
    input=None,
    temperature=0.75, # ทำให้ model มั่นใจมากขึ้นใน softmax function https://stackoverflow.com/questions/58764619/why-should-we-use-temperature-in-softmax/63471046#63471046
    top_p=0.95, # จะ เอา ค่าความน่าจะเป็นของ top ความน่าจะเป็นที่มากที่สุดมารวมกันจนมากกว่า 0.95 แล้วค่อยให้ model สุ่ม ออกมาhttps://www.linkedin.com/pulse/text-generation-temperature-top-p-sampling-gpt-models-selvakumar
    top_k=50, # เอา 50 แรก แต่ถ้า ใส่ค่า top p ไปด้วย จะทำให้ คิดของ top p ก่อน เช่น ถ้า 50 ตัวแรกมีความน่าจะเป็นรวมกัน = 0.90 ซึ่งไม่ถึงค่าที่ตั้งไว้ก็เอามาไว้ใช้สำหรับการทำนายครั้งถัดไป https://docs.cohere.com/docs/controlling-generation-with-top-k-top-p#2-pick-from-amongst-the-top-tokens-top-k
    repetition_penalty=1.2, # https://arxiv.org/pdf/1909.05858.pdf หน้าที่ 5
    max_new_tokens=1024,
    model=None,
    tokenizer=None,
):
    prompter = Prompter()
    prompt = prompter.generate_prompt(instruction, input,)
    inputs = tokenizer(prompt, return_tensors="pt")
    input_ids = inputs["input_ids"].to(device)
    generation_config = GenerationConfig(
        temperature=temperature,
        top_p=top_p,
        top_k=top_k,
        repetition_penalty=repetition_penalty
#         **kwargs,
    )


    with torch.no_grad():
        generation_output = model.generate(
            input_ids=input_ids,
            generation_config=generation_config,
            return_dict_in_generate=True,
            output_scores=True,
            max_new_tokens=max_new_tokens,
        )
    s = generation_output.sequences[0]
    output = tokenizer.decode(s)
    return prompter.get_response(output)



In [None]:
instruction = "Please create an inference question in the style of TOEFL reading comprehension section. Also provide an answer in the format"

In [None]:
input = """
The Zaporizhzhia Nuclear Power Plant (NPP) has been reconnected to the Ukrainian power grid, national energy company Ukrenergo said in a statement Monday.

Earlier Monday, Ukrenergo said the plant – currently occupied by Russian forces – had been cut off from the grid after the high-voltage line that supplies it was one of those damaged by a Russian attack on the Dnipro region, to the north of Zaporizhzhia, in the early hours of the morning. 

“As a result of damage to the high-voltage line, the Zaporizhzhia nuclear power plant lost power from the power system and operated from diesel generators,” it said on Telegram. 

“This is the seventh time since the temporary occupation of the Zaporizhzhia NPP that the Russians have created a nuclear and radiation hazard in the NPP area. Ukrenergo made maximum efforts and restored power to the nuclear plant from the Ukrainian power system,” the statement said.
Military and infrastructure facilities in Dnipro were attacked by Russian missiles and drones early on Monday, according to a Telegram post by the Ukrainian air force.
"""
res = evaluate(instruction=instruction,input=input,model=model,tokenizer=tokenizer)
print(res)

In [None]:
import pandas as pd
df = pd.read_csv('https://raw.githubusercontent.com/ohmreborn/question-generation-AIB2023/main/evaluate/eval.csv')
df.head()

In [None]:
for context in df.iloc[:,0]:
  res = evaluate(instruction=instruction,input=context,model=model,tokenizer=tokenizer)
  print(res)
  print('-----------------------------------')

In [None]:
input = df.iloc[0,0]
res = evaluate(instruction=instruction,input=input,model=model,tokenizer=tokenizer)
print(res)