<a href="https://colab.research.google.com/github/softmurata/colab_notebooks/blob/main/llm/langchain_usage.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Installation

In [None]:
!pip install -qq -U langchain tiktoken pypdf chromadb faiss-gpu
!pip install -qq -U transformers InstructorEmbedding sentence_transformers
!pip install -qq -U accelerate bitsandbytes xformers einops

Import Libraries

In [None]:
import warnings
warnings.filterwarnings("ignore")

import os
import textwrap

import langchain
from langchain.llms import HuggingFacePipeline

import torch
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import LlamaTokenizer, LlamaForCausalLM, pipeline

print(langchain.__version__)

### Multi-document retriever
from langchain.vectorstores import Chroma, FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter

from langchain.chains import RetrievalQA, VectorDBQA
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import DirectoryLoader


from InstructorEmbedding import INSTRUCTOR
from langchain.embeddings import HuggingFaceInstructEmbeddings

Load model and create pipeline

In [3]:
# google colab freeではllamaしか動かないと思います！
class CFG:
    model_name = 'llama' # wizardlm, llama, bloom, falcon

def get_model(model = CFG.model_name):

    print('\nDownloading model: ', model, '\n\n')

    if CFG.model_name == 'wizardlm':
        tokenizer = AutoTokenizer.from_pretrained('TheBloke/wizardLM-7B-HF')

        model = AutoModelForCausalLM.from_pretrained('TheBloke/wizardLM-7B-HF',
                                                     load_in_8bit=True,
                                                     device_map='auto',
                                                     torch_dtype=torch.float16,
                                                     low_cpu_mem_usage=True
                                                    )
        max_len = 1024
        task = "text-generation"
        T = 0

    elif CFG.model_name == 'llama':
        # tokenizer = AutoTokenizer.from_pretrained("aleksickx/llama-7b-hf")
        tokenizer = AutoTokenizer.from_pretrained("aleksickx/llama-7b-hf", unk_token="<unk>",
                                                    bos_token="<s>",
                                                    eos_token="</s>")

        model = AutoModelForCausalLM.from_pretrained("aleksickx/llama-7b-hf",
                                                     load_in_8bit=True,
                                                     device_map='auto',
                                                     torch_dtype=torch.float16,
                                                     low_cpu_mem_usage=True,
                                                    )
        max_len = 1024
        task = "text-generation"
        T = 0.1

    elif CFG.model_name == 'bloom':
        tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom-7b1")

        model = AutoModelForCausalLM.from_pretrained("bigscience/bloom-7b1",
                                                     load_in_8bit=True,
                                                     device_map='auto',
                                                     torch_dtype=torch.float16,
                                                     low_cpu_mem_usage=True,
                                                    )
        max_len = 1024
        task = "text-generation"
        T = 0

    elif CFG.model_name == 'falcon':
        tokenizer = AutoTokenizer.from_pretrained("h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v2")

        model = AutoModelForCausalLM.from_pretrained("h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v2",
                                                     load_in_8bit=True,
                                                     device_map='auto',
                                                     torch_dtype=torch.float16,
                                                     low_cpu_mem_usage=True,
                                                     trust_remote_code=True
                                                    )
        max_len = 1024
        task = "text-generation"
        T = 0

    else:
        print("Not implemented model (tokenizer and backbone)")

    return tokenizer, model, max_len, task, T

In [None]:
tokenizer, model, max_len, task, T = get_model(CFG.model_name)

In [5]:
pipe = pipeline(
    task=task,
    model=model,
    tokenizer=tokenizer,
    max_length=max_len,
    temperature=T,
    top_p=0.95,
    repetition_penalty=1.15
)

llm = HuggingFacePipeline(pipeline=pipe)

Load documents and create db

In [None]:
# docsというフォルダを作成して適当な論文をそのフォルダ内に追加してください
loader = DirectoryLoader('/content/docs',
                         glob="./*.pdf",
                         loader_cls=PyPDFLoader,
                         show_progress=True,
                         use_multithreading=True)

documents = loader.load()
# clean document
for i in range(len(documents)):
    documents[i].page_content = documents[i].page_content.replace('\t', ' ')\
                                                         .replace('\n', ' ')\
                                                         .replace('       ', ' ')\
                                                         .replace('      ', ' ')\
                                                         .replace('     ', ' ')\
                                                         .replace('    ', ' ')\
                                                         .replace('   ', ' ')\
                                                         .replace('  ', ' ')

In [None]:
documents[1].page_content

Splitter

In [9]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
len(texts)

103

Create vector database

In [None]:
persist_directory = 'arxiv-vectordb-chroma'

### download embeddings model
instruct_ml_name = "sentence-transformers/all-MiniLM-L6-v2"  # hkunlp/instructor-xl, cuda
instructor_embeddings = HuggingFaceInstructEmbeddings(model_name=instruct_ml_name,
                                                      model_kwargs={"device": "cpu"})

### create embeddings and DB
vectordb = Chroma.from_documents(documents=texts,
                                 embedding=instructor_embeddings,
                                 persist_directory=persist_directory,
                                 collection_name='hp_books')



### persist Chroma database
vectordb.persist()

QA Retriever

In [12]:
retriever = vectordb.as_retriever(search_kwargs={"k": 3, "search_type" : "similarity"})

qa_chain = RetrievalQA.from_chain_type(llm=llm,
                                       chain_type="stuff",
                                       retriever=retriever,
                                       return_source_documents=True,
                                       verbose=False)

Post process output

In [13]:
def wrap_text_preserve_newlines(text, width=110):
    # Split the input text into lines based on newline characters
    lines = text.split('\n')

    # Wrap each line individually
    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]

    # Join the wrapped lines back together using newline characters
    wrapped_text = '\n'.join(wrapped_lines)

    return wrapped_text

def process_llm_response(llm_response):
    print(wrap_text_preserve_newlines(llm_response['result']))
    print('\n\nSources:')
    for source in llm_response["source_documents"]:
        print(source.metadata['source'])
def llm_ans(query):
    llm_response = qa_chain(query)
    ans = process_llm_response(llm_response)
    return ans

QA RUN

In [None]:
query = "Why ShapeNet is better than other AI models?"
llm_ans(query)