In [None]:
!pip install -Uqqq pip --progress-bar off
!pip install -qqq torch==2.0.1 --progress-bar off
!pip install -qqq transformers==4.33.2 --progress-bar off
!pip install -qqq langchain==0.0.299 --progress-bar off
!pip install -qqq chromadb==0.4.10 --progress-bar off
!pip install -qqq xformers==0.0.21 --progress-bar off
!pip install -qqq sentence_transformers==2.2.2 --progress-bar off
!pip install -qqq tokenizers==0.14.0 --progress-bar off
!pip install -qqq optimum==1.13.1 --progress-bar off
!pip install -qqq auto-gptq==0.4.2 --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/ --progress-bar off
!pip install -qqq unstructured==0.10.16 --progress-bar off

In [None]:
!pip install --upgrade torch transformers accelerate bitsandbytes

In [None]:
pip install --upgrade sentence-transformers huggingface-hub transformers langchain

In [None]:
from langchain import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, pipeline
from transformers import BitsAndBytesConfig
from langchain.document_loaders import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma
from langchain import PromptTemplate
import torch


In [None]:

MODEL_NAME = "unsloth/Llama-3.2-3B-Instruct-bnb-4bit"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

generation_config = GenerationConfig.from_pretrained(MODEL_NAME)
generation_config.max_new_tokens = 1024
generation_config.temperature = 0.0001
generation_config.top_p = 0.95
generation_config.do_sample = True
generation_config.repetition_penalty = 1.15

text_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    generation_config=generation_config,
)

llm = HuggingFacePipeline(pipeline=text_pipeline, model_kwargs={"temperature": 0})


In [6]:
loader = CSVLoader("train.csv")
docs = loader.load()

len(docs)

19579

In [7]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
texts = text_splitter.split_documents(docs)
len(texts)

19611

In [None]:

embeddings = HuggingFaceEmbeddings(
    model_name="thenlper/gte-large",
    model_kwargs={"device": "cuda"},
    encode_kwargs={"normalize_embeddings": True},
)

query_result = embeddings.embed_query(texts[0].page_content)
print(len(query_result))

In [10]:
db = Chroma.from_documents(texts, embeddings, persist_directory="db")
# results = db.similarity_search("proof-of-work majority decision making", k=2)
# print(results[0].page_content)

In [11]:
template = """
Use the context below to answer the question. Be extremely precise and don't add any unrelated information.
Context: {context}
Question: {question}
Answer:
"""

In [12]:
prompt = PromptTemplate(template=template, input_variables=["context", "question"])


qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=db.as_retriever(search_kwargs={"k": 2}),
    return_source_documents=True,
    chain_type_kwargs={"prompt": prompt},
)

result = qa_chain(
    "According to the given data, tell me who said this 'It never once occurred to me that the fumbling might be a mere mistake.'."
)
print(result["result"].strip())

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


The author of the text is H.P. Lovecraft (HPL).


In [13]:
result = qa_chain(
    "According to the given data, tell me who said this 'The rigging was found to be ill fitted, and greatly strained; and on the third day of the blow, about five in the afternoon, our mizzen mast, in a heavy lurch to windward, went by the board.'"
)
print(result["result"].strip())

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


EAP (Edward Andrews Peake) is the author of the text.


In [14]:
result = qa_chain(
    "According to the given context, tell me who said this 'One fine February day, when the sun had reassumed some of its genial power, I walked in the forest with my family.'"
)
print(result["result"].strip())

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


The author of the text is MWS (Mary Shelley).
