## Load the packages

In [1]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_chroma import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts.chat import ChatPromptTemplate
from langchain_community.embeddings import HuggingFaceEmbeddings

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from pathlib import Path
from tqdm.auto import tqdm

  from .autonotebook import tqdm as notebook_tqdm


## Define the global constants

Note that this runs completely offline.
1. We use the `Alibaba-NLP/gte-large-en-v1.5` model as the embedding model from HuggingFace.
2. We use the `premai-io/prem-1B-chat` for the chat model instead of OpenAI (GPT 3.5 or GPT 4).

In [9]:
DEVICE = 'cuda'
EMBED_MODEL_NAME = "Alibaba-NLP/gte-large-en-v1.5"
CHAT_MODEL_NAME = 'premai-io/prem-1B-chat'

CHUNK_SIZE = 400
CHUNK_OVERLAP = 40

RAG_SYSTEM_PROMPT = """You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the users's question. If you can't find the answer in the context, just say that you don't know. Use three sentences maximum and keep the answer concise.

Context: ```
{context}
```"""

## Embed the docs

#### Load the docs

In [3]:
docs = []

for f in tqdm(list(Path('docs').iterdir())):
    if f.suffix == '.pdf':
        loader = PyPDFLoader(f)
        docs.extend(loader.load())

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:37<00:00,  1.89s/it]


#### Split the docs

In [4]:
tokenizer = AutoTokenizer.from_pretrained(CHAT_MODEL_NAME)
text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer(tokenizer, chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
splits = text_splitter.split_documents(docs)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


#### Create the vectorstore

In [5]:
model_kwargs = {'device': DEVICE, 'trust_remote_code': True}
encode_kwargs = {'normalize_embeddings': False}
embedding_gen = HuggingFaceEmbeddings(
    model_name=EMBED_MODEL_NAME,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_gen)



In [10]:
def format_docs(docs):
    return "\n\n".join([d.page_content for d in docs])


def generate_result(model, tokenizer, messages, terminators):
    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

    # Generate
    inputs = tokenizer(prompt, return_attention_mask=False, return_tensors="pt", add_special_tokens=False)
    input_ids = inputs['input_ids']
    input_ids = input_ids.to(model.device)
    res = model.generate(input_ids=input_ids, max_new_tokens=400, pad_token_id=tokenizer.pad_token_id, eos_token_id=terminators)
    generated_text = tokenizer.decode(res[0][input_ids.shape[1]:], skip_special_tokens=True).strip()
    return generated_text


def ask_question(ques, vectorstore, model, tokenizer, terminators):
    docs = vectorstore.similarity_search(ques, k=7)
    relevant_context = format_docs(docs)

    prompt = ChatPromptTemplate.from_messages(
        [("system", RAG_SYSTEM_PROMPT), ("user", "{input}")]
    )
    messages = prompt.invoke({'context': relevant_context, 'input': 'hi'}).messages
    messages = [
        {'role': 'system', 'content': messages[0].content},
        {'role': 'user', 'content': messages[1].content},
    ]
    res = generate_result(model, tokenizer, messages, terminators)
    return {'response': res, 'relevant_docs': [d.page_content for d in docs]}

## Chat

#### Load the `premai-io/prem-1B-chat` model from Huggingface.

In [7]:
# Load the model and tokenizer
model = AutoModelForCausalLM.from_pretrained('premai-io/prem-1B-chat', torch_dtype=torch.bfloat16)
model = model.to(DEVICE)

# Setup terminators
terminators = [tokenizer.eos_token_id, tokenizer.encode('<|eot_id|>', add_special_tokens=False)[0]]

#### Ask the questions

In [13]:
res = ask_question('What is the head of union?', vectorstore, model, tokenizer, terminators)
res['response']

'The main establishment of a data intermediation services provider in the Union should be the place of its central administration in the Union.'

In [None]:
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

In [None]:
https://github.com/premAI-io/

In [None]:
git clone https://rohitgr7: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@github.com/premAI-io/genai-zurich-workshop.git