In [None]:
!pip install python-dotenv
!pip install -U torch transformers

Deepseek Model

In [None]:
from huggingface_hub import InferenceClient
from dotenv import load_dotenv
import os

load_dotenv()
HF_API_TOKEN = os.getenv("HF_API_TOKEN")

client = InferenceClient(model="deepseek-ai/DeepSeek-R1", token=HF_API_TOKEN)

response = client.text_generation("Explain what a transformer is in AI.", max_new_tokens=100)
print(response)


In [None]:
def inference(question: str, context: str):

    if context == None or context == "":
        prompt = f"""Give a detailed answer to the following question. Question: {question}"""
    else:
        prompt = f"""Using the information contained in the context, give a detailed answer to the question.
            Context: {context}.
            Question: {question}"""
    chat = [
        {"role": "user", "content": prompt},
        # { "role": "model", "content": "Recurrent Attention (RAG)** is a novel neural network architecture specifically designed" }
    ]
    formatted_prompt = tokenizer.apply_chat_template(
        chat,
        tokenize=False,
        add_generation_prompt=True,
    )
    inputs = tokenizer.encode(
        formatted_prompt, add_special_tokens=False, return_tensors="pt"
    ).to(device)
    with torch.no_grad():
        outputs = model.generate(
            input_ids=inputs,
            max_new_tokens=250,
            do_sample=False,
        )
    response = tokenizer.decode(outputs[0], skip_special_tokens=False)
    response = response[len(formatted_prompt) :]  # remove input prompt from reponse
    response = response.replace("<eos>", "")  # remove eos token
    return response


question = "What is a transformer?"
print(inference(question=question, context=""))

Document loading and splitting

In [None]:
!pip install pypdf
!pip install tiktoken

In [None]:

from langchain.document_loaders import PyPDFLoader

loaders = [
    PyPDFLoader("/home/eversberg/Downloads/1706.03762.pdf"),
    PyPDFLoader("/home/eversberg/Downloads/2005.11401.pdf"),
]
pages = []
for loader in loaders:
    pages.extend(loader.load())

In [None]:
from langchain.text_splitter import TokenTextSplitter

text_splitter = TokenTextSplitter(chunk_size=128, chunk_overlap=12)
docs = text_splitter.split_documents(pages)

In [None]:
print(docs[0].page_content)

Embeddings and vector store


In [None]:
!pip install -U sentence-transformers

In [None]:
import numpy as np
from langchain_community.embeddings import (
    HuggingFaceEmbeddings
)
encoder = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L12-v2', model_kwargs = {'device': "cpu"})


In [None]:
embeddings1 = encoder.embed_query("RAG")
embeddings2 = encoder.embed_query(docs[0].page_content)
print(np.dot(embeddings1, embeddings2))

In [None]:
!pip install faiss-cpu

In [None]:
from langchain.vectorstores import FAISS
from langchain_community.vectorstores.utils import DistanceStrategy
faiss_db = FAISS.from_documents(docs, encoder, distance_strategy=DistanceStrategy.DOT_PRODUCT)


In [None]:
question = "What is a transformer?"
retrieved_docs = faiss_db.similarity_search(question, k=5)
context = "".join(doc.page_content + "\n" for doc in retrieved_docs)
print(context)

In [None]:
torch.cuda.empty_cache()
print(inference(question=question, context=context))

In [None]:
print("For this answer I used the following documents:")
for doc in retrieved_docs:
    print(doc.metadata)