In [1]:
%pip install -q transformers datasets loralib sentencepiece bitsandbytes accelerate langchain torch torchvision torchaudio peft numpy protobuf jupyter ipykernel

Note: you may need to restart the kernel to use updated packages.


In [1]:
from transformers import AutoModelForCausalLM, BitsAndBytesConfig, pipeline
from langchain.llms import HuggingFacePipeline
from langchain import PromptTemplate, LLMChain
from langchain.chains import ConversationChain
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
import torch

base = 'beomi/KoAlpaca-KoRWKV-1.5B'
model = AutoModelForCausalLM.from_pretrained(
    base,
    quantization_config=BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
    ),
    device_map="auto"
)

local_llm = HuggingFacePipeline(pipeline=pipeline(
    "text-generation",
    
    model=model,
    
    tokenizer=base,
    
    max_length=512,
    do_sample=True,
    temperature=0.7,
    top_p=0.95,
    repetition_penalty=1.2,
    return_full_text=False,
    # eos_token_id=2,
))

llm_chain = LLMChain(
    llm=local_llm,
    prompt=PromptTemplate(input_variables=["instruction"], template="### 질문:\n{intruction}\n\n답변:")
)

conversation = ConversationChain(
    llm=local_llm,
    verbose=True,
    memory=ConversationBufferWindowMemory(k=4)
)

conversation.prompt.template = '''대화기록:
{history}
질문: {input}
답변:'''

def ask(question):
    print(llm_chain.invoke(question))

def chat(input):
    print(conversation.predict(input=input))
    


In [2]:
ask('What is the capital of England?')
ask('내 이름이 뭘까?')

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


{'intruction': 'What is the capital of England?', 'text': ' 1. 주사위를 던져서 2. 1번과 3번으로 나누어질 수 있는 2,3번과 4,5,6,7,8,9,10,11,12,13, 14,15,16,...4,5,6,7,8,9,10,1,2,3,4,5,6,7,8,9,10,11,12,15,16,17,18,19,20,...하는 중,...'}
{'intruction': '내 이름이 뭘까?', 'text': ' 1,2,3,4,5,6,7,8,9,10,11,12, 13,14,15,16,18,...<|url_start해피여사친한친구와<|url_end|>'}


In [3]:
chat('나의 이름은 전재형이야')

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.




[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3m대화기록:

질문: 나의 이름은 전재형이야
답변:[0m

[1m> Finished chain.[0m
 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20, 21,22, 23, 24, 25, 26, 27, 28, 30,...<|url_start수분하차한 뒤에도, 내기니는 계속해...<|url_start|>http://ww.<|url_end지저분하게, 그리고,...<|url_start...<|url_start...<|url_end....이인위적, 수분하차도,...<|url_start|>http://ww.<|url_end....도면과, 그리고, 그 한조각의,...<|url_start꾸준히...<|url_end...가끔씩은,...도면과,...그거에,...부나,...그리고,...그,...그,...로,..만드는,...고,...아,...나,...가,...와,...나,...가,...가,...나,...가,...만,...나,...나,...나,...나,...가,...나,...나,...가,...나,...나,...나,...나,...나,...나,...나,...나,...나,...나,...나,...나,...나,...나,...나,...나,...나,...나,...나,...나,...나,...가,...나,...나,...나,...나,...나,...나,...나,...나,...나,...나,...나,...나,...나,...나,...나,...나,...나,...가,...나,...가,...나,...나,...나,...가,...나,...나,...나,...나,...가,...가,...가,...나,...나,...나,...가,...나,...가,...가,...나,...나,...나,...나,...나,...가,...가,...나,...가,...나,...나,...가,...나,...나,...가,...나,...가,...가,...

In [None]:
chat('니 이름은 뭐니?')
chat('내 이름은 뭘까??')