# Env

In [None]:
import os
import argparse
import collections
from datetime import datetime
import re
import json
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.nn.functional as F

from tqdm.auto import tqdm
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    pipeline,
    TrainingArguments,
)
from datasets import Dataset
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

In [None]:
# Gradient False
torch.set_grad_enabled(True)
# work dir
work_dir = '/home/ubuntu/nlp-practice'

In [None]:
# https://huggingface.co/google/gemma-1.1-2b-it
MODEL_ID = 'google/gemma-1.1-2b-it'
NSMC_MODEL_ID = 'gemma-2b-it-nsmc'

In [None]:
%cd {work_dir}
!pwd

In [None]:
from huggingface_hub import notebook_login, interpreter_login
# notebook_login()
# interpreter_login()

# 5.8. LLM Text Classification

In [None]:
%cd {work_dir}/src/llm-tc
!pwd

## Gemma Tutorial

In [None]:
# https://huggingface.co/google/gemma-1.1-2b-it
MODEL_ID = 'google/gemma-1.1-2b-it'

In [None]:
# declare 4 bits quantize
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)
# load 4 bits model
model = AutoModelForCausalLM.from_pretrained(MODEL_ID,
                                             device_map='auto',
                                             quantization_config=quantization_config)
# load tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, add_special_tokens=True)

In [None]:
# make pipeline
pipe = pipeline("text-generation",
                model=model,
                tokenizer=tokenizer,
                max_new_tokens=512)
pipe

In [None]:
# input message
doc = """엄청나게 즐거운 시간이었습니다. 강추!!!"""

In [None]:
# make prompt for nsmc
messages = [
    {
        "role": "user",
        "content": "다음 문장은 영화리뷰입니다. 긍정 또는 부정으로 분류해주세요:\n\n{}".format(doc)
    }
]
prompt = pipe.tokenizer.apply_chat_template(messages,
                                            tokenize=False,
                                            add_generation_prompt=True)

In [None]:
print(prompt)

In [None]:
outputs = pipe(
    prompt,
    do_sample=True,
    temperature=0.2,
    top_k=50,
    top_p=0.95,
    add_special_tokens=True
)
outputs

In [None]:
print(outputs[0]["generated_text"])

In [None]:
print(outputs[0]["generated_text"][len(prompt):])

In [None]:
def gen_prompt(doc):
    messages = [
        {
            "role": "user",
            "content": "다음 문장은 영화리뷰입니다. 긍정 또는 부정으로 분류해주세요:\n\n{}".format(doc)
        }
    ]
    prompt = pipe.tokenizer.apply_chat_template(messages,
                                                tokenize=False,
                                                add_generation_prompt=True)
    return prompt

In [None]:
def gen_response(pipe, doc):
    prompt = gen_prompt(doc)

    outputs = pipe(
        prompt,
        do_sample=True,
        temperature=0.2,
        top_k=50,
        top_p=0.95,
        add_special_tokens=True
    )
    return outputs[0]["generated_text"][len(prompt):]

In [None]:
while True:
    doc = input('문장 > ')
    doc = doc.strip()
    if len(doc) == 0:
        break
    result = gen_response(pipe, doc)
    print(f'감정 > {result}\n\n')

## Gemma finetuning (NSMC)

In [None]:
!sh train.sh "cchyun-llm-tc"

## Gemma Classify

In [None]:
!sh classify.sh "../../checkpoints/cchyun-llm-tc-20240417-014816/checkpoint-final"

## Gemma Infer

In [None]:
from data import gen_test_prompt

In [None]:
MODEL_ID = "google/gemma-1.1-2b-it"
model_fn = "../../checkpoints/cchyun-llm-tc-20240417-014816/checkpoint-final"

In [None]:
# bits and bytes config (4bit)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

# model
model = AutoModelForCausalLM.from_pretrained(model_fn,
                                             device_map="auto",
                                             quantization_config=bnb_config)

# tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID,
                                          add_special_tokens=True)
tokenizer.padding_side = 'right'

In [None]:
# pipeline
pipe = pipeline("text-generation",
                model=model,
                tokenizer=tokenizer,
                max_new_tokens=5)

In [None]:
def gen_response(pipe, doc):
    prompt = gen_test_prompt(doc)[0]

    outputs = pipe(
        prompt,
        do_sample=True,
        temperature=0.2,
        top_k=50,
        top_p=0.95,
        add_special_tokens=True
    )
    return outputs[0]["generated_text"][len(prompt):]

In [None]:
import time
while True:
    doc = input('질문 > ')
    doc = doc.strip()
    if len(doc) == 0:
        break
    start = time.time()
    result = gen_response(pipe, {'document': [doc]})
    end = time.time()
    print(end - start)
    print(f'답변 > {result}\n\n')