- https://huggingface.co/docs/transformers/model_doc/gpt_neox_japanese
- https://tech.nri-net.com/entry/tried_langchain_to_extend_chatgpt
- https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads
- https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard
- https://zenn.dev/umi_mori/books/prompt-engineer/viewer/chatgpt_webapp_langchain_streamlit
- https://zenn.dev/umi_mori/books/prompt-engineer/viewer/langchain_memory
- https://ainow.ai/2022/08/30/267101/

In [None]:
!pip install transformers==4.30.2 sentencepiece==0.1.99 accelerate==0.20.3 langchain==0.0.226 openai==0.27.8 python-dotenv==1.0.0

In [None]:
!nvidia-smi

In [None]:
from dotenv import load_dotenv
import torch

from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory, ConversationBufferWindowMemory
from langchain import PromptTemplate, LLMChain, HuggingFacePipeline
from langchain.schema import HumanMessage
from langchain.schema import AIMessage


chatgpt_id = "gpt-3.5-turbo"
en_list = [
        "bigscience/bloom-560m",
        "bigscience/bloom-1b7",
        "bigscience/bloomz-560m",
        "bigscience/bloomz-1b7",
        # "gpt2",
        "gpt2-medium",
        "gpt2-large",
        "gpt2-xl",
        # "facebook/opt-125m",
        "facebook/opt-350m",
        "facebook/opt-1.3b",
        # "cerebras/Cerebras-GPT-111M",
        "cerebras/Cerebras-GPT-256M",
        "cerebras/Cerebras-GPT-590M",
        "cerebras/Cerebras-GPT-1.3B",
        "vicgalle/gpt2-alpaca",
]
ja_list = [
        # "cyberagent/open-calm-small",
        "cyberagent/open-calm-medium",
        "cyberagent/open-calm-large",
        "cyberagent/open-calm-1b",
        # "rinna/japanese-gpt2-xsmall",
        # "rinna/japanese-gpt2-small",
        "rinna/japanese-gpt2-medium",
        # "rinna/japanese-gpt-1b",
        "rinna/japanese-gpt-neox-small",
        "abeja/gpt2-large-japanese",
        "abeja/gpt-neox-japanese-2.7b",
]


def get_llm(model_id, model_kwargs, pipeline_kwargs):
    if torch.cuda.is_available():
        device = 0
    else:
        device = -1
    if model_id == chatgpt_id:
        load_dotenv()
        llm = ChatOpenAI(model_name=chatgpt_id)
    else:
        llm = HuggingFacePipeline.from_model_id(
            model_id, task="text-generation",
            model_kwargs=model_kwargs,
            pipeline_kwargs=pipeline_kwargs,
            device=device,
            verbose=True
        )

    return llm

def get_prompt(template):
    prompt = PromptTemplate.from_template(template)
    return prompt

In [None]:
torch.cuda.is_available()

In [None]:
model_kwargs = {
            "min_length": 20,
            "max_length": 100,
            "repetition_penalty": 1.01,
            "do_sample": True,
            "top_p": 0.95,
            "top_k": 50,
            "temperature": 0.1,
        }
pipeline_kwargs = {
            "min_new_tokens": 5,
            "max_new_tokens": 50,
}

model_id = "bigscience/bloom-1b7"

template = '''{chat_history}
Human: {input}
AI: Let's think step by step. '''

llm = get_llm(model_id, model_kwargs, pipeline_kwargs)
prompt = get_prompt(template)

In [None]:
memory = ConversationBufferWindowMemory(#return_messages=True,
                                        memory_key="chat_history",
                                        k=5
                                       )

chain = LLMChain(
    llm=llm,
    verbose=True,
    prompt=prompt,
    memory = memory
)


with torch.no_grad():
  chain("What is AI?")
  chain("Tell me some more details.")

print(memory.load_memory_variables({})["chat_history"])
print(memory.buffer)

In [None]:
while True:
    in_text = input()
    if in_text == "exit":
        break
    with torch.no_grad():
      output = chain.predict(input=in_text)
    print(output)