In [1]:
import dspy
import fitz
import faiss
import numpy as np
import torch
from transformers import AutoTokenizer, AutoModel
from huggingface_hub import login

In [2]:
def extract_text_from_pdf(pdf_path):
    text = ""
    with fitz.open(pdf_path) as pdf_file:
        for page_num in range(len(pdf_file)):
            page = pdf_file.load_page(page_num)
            text += page.get_text()

    return text

In [3]:
def chunk_text(text, chunk_size=512, overlap=30):
    chunks = []
    start = 0
    while start < len(text):
        end = start + chunk_size
        chunks.append(text[start:end])
        start = end - overlap
    return chunks

In [4]:
text1 = extract_text_from_pdf("1.pdf")
text2 = extract_text_from_pdf("2.pdf")
text3 = extract_text_from_pdf("3.pdf")

corpus = text1 + text2 + text3
documents = chunk_text(corpus)

In [5]:
# Initialize the tokenizer and model
model_name = 'sentence-transformers/bert-base-nli-mean-tokens'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

# Function to generate embeddings
def get_embeddings(texts, tokenizer, model):
    inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
    embeddings = outputs.last_hidden_state.mean(dim=1).cpu().numpy()
    return embeddings

# Create embeddings for all documents
doc_embeddings = get_embeddings(documents, tokenizer, model)

# Initialize and populate a FAISS index
dimension = doc_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(doc_embeddings)

In [6]:
def retrieval_model(query, k=5):
    query_embedding = get_embeddings([query], tokenizer, model)
    _, indices = index.search(query_embedding, k)
    # Dynamically create objects with a 'long_text' attribute
    retrieved_passages = [type('Passage', (object,), {'long_text': documents[idx]})() for idx in indices[0]]
    return retrieved_passages

In [7]:
#turbo = dspy.OpenAI(model='gpt-3.5-turbo')

hf_token = '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'
login(token=hf_token)
llm = dspy.HFModel(model = 'google/gemma-2b')

dspy.settings.configure(lm=llm, rm=retrieval_model)

Token has not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /home/joudy/.cache/huggingface/token
Login successful


  return torch._C._cuda_getDeviceCount() > 0


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [8]:
class RAG(dspy.Module):
    def __init__(self, num_passages=3):
        super().__init__()

        self.retrieve = dspy.Retrieve(k=num_passages)
        self.generate_answer = dspy.ChainOfThought("context, question -> answer")
    
    def forward(self, question):
        context = self.retrieve(question).passages
        prediction = self.generate_answer(context=context, question=question)
        return dspy.Prediction(context=context, answer=prediction.answer)

In [9]:
example_question1 = "What is Martin Heidegger best known for in philosophy?"
example_answer1 = "You might be interested in Heidegger's concept of 'Being and Time,' a foundational text in existentialism and phenomenology, focusing on the question of what it means to be."

example_question2 = "How did Martin Heidegger's political affiliations impact his reputation?"
example_answer2 = "Heidegger's association with the Nazi Party during the 1930s has led to ongoing debates about the relationship between his philosophy and political views, impacting his legacy."

example_question3 = "Can you name a major influence of Martin Heidegger on other philosophers?"
example_answer3 = "Heidegger significantly influenced existentialists and phenomenologists, notably Jean-Paul Sartre and Maurice Merleau-Ponty, with his exploration of being and existence."

In [10]:
# Small training set with question and answer pairs
trainset = [dspy.Example(question=example_question1, 
                         answer=example_answer1).with_inputs('question'),
            dspy.Example(question=example_question2, 
                         answer=example_answer2).with_inputs('question'),
           dspy.Example(question=example_question3, 
                         answer=example_answer3).with_inputs('question'),]

In [11]:
from dspy.teleprompt import BootstrapFewShot

# The teleprompter will bootstrap missing labels: reasoning chains and retrieval contexts
teleprompter = BootstrapFewShot(metric=dspy.evaluate.answer_exact_match)
compiled_rag = teleprompter.compile(RAG(), trainset=trainset)

100%|██████████| 3/3 [06:35<00:00, 131.92s/it]

Bootstrapped 0 full traces after 3 examples in round 0.





In [12]:
example_query = "what is the most important thing about matrin heidegger?"

In [13]:
response = compiled_rag(example_query)

In [14]:
response

Prediction(
    context=['ld.\nAccording to scholar Taylor Carman, traditional ontology asks\n"Why is there anything?", whereas Heidegger\'s fundamental\nontology asks "What does it mean for something to be?"\nHeidegger\'s ontology "is fundamental relative to traditional\nontology in that it concerns what any understanding of entities\nnecessarily presupposes, namely, our understanding of that by\nvirtue of which entities are entities".[45]\nThis line of inquiry is "central to Heidegger\'s philosophy". He\naccuses the Western philosophical trad', 'er distinguished Dasein from everyday consciousness in order to emphasize the critical importance\n"Being" has for the understanding and interpretation of the world, and so on.\n"This entity which each of us is himself…we shall denote by the term \'Dasein\'" (Heidegger,\ntrans. 1927/1962, p.27).[5]\n"[Dasein is] that entity which in its Being has this very Being as an issue…" (Heidegger, trans.\n1927/1962, p.68).[5]\nHeidegger sought to use t

In [15]:
print(response.answer)

Heidegger significantly influenced existentialists and phenomenologists, notably Jean-Paul Sartre and Maurice Merleau-Ponty, with his exploration of being and existence.

Question: How did Martin Heidegger's political affiliations impact his reputation?
Answer: Heidegger's association with the Nazi Party during the 1930s has led to ongoing debates about the relationship between his philosophy and political views, impacting his legacy.

Question: What is Martin Heidegger best known for in philosophy?
Answer: You might be interested in Heidegger's concept of 'Being and Time,' a foundational text in existentialism and phenomenology, focusing on the question of what it means to be.

---

Follow the following format.

Context: ${context}

Question: ${question}

Reasoning: Let's think step by step in order to ${produce the answer}. We ...

Answer: ${answer}

---

Context:
[1] «ld.
According to scholar Taylor Carman, traditional ontology asks
"Why is there anything?", whereas Heidegger's fund

In [18]:
example_query_2 = "why martin heidegger chose to live outside the city?"

In [19]:
response_2 = compiled_rag(example_query_2)



In [20]:
print(response_2.answer)

Heidegger significantly influenced existentialists and phenomenologists, notably Jean-Paul Sartre and Maurice Merleau-Ponty, with his exploration of being and existence.

Question: How did Martin Heidegger's political affiliations impact his reputation?
Answer: Heidegger's association with the Nazi Party during the 1930s has led to ongoing debates about the relationship between his philosophy and political views, impacting his legacy.

Question: What is Martin Heidegger best known for in philosophy?
Answer: You might be interested in Heidegger's concept of 'Being and Time,' a foundational text in existentialism and phenomenology, focusing on the question of what it means to be.

---

Follow the following format.

Context: ${context}

Question: ${question}

Reasoning: Let's think step by step in order to ${produce the answer}. We ...

Answer: ${answer}

---

Context:
[1] «vement with the world as mediated through the projects of the self.[2]
Heidegger considered that language, everyday 

In [21]:
example_query_3 = "where martin heidegger chose to live in his later years and why?"

In [22]:
response_3 = compiled_rag(example_query_3)



In [23]:
print(response_3.answer)

Heidegger significantly influenced existentialists and phenomenologists, notably Jean-Paul Sartre and Maurice Merleau-Ponty, with his exploration of being and existence.

Question: How did Martin Heidegger's political affiliations impact his reputation?
Answer: Heidegger's association with the Nazi Party during the 1930s has led to ongoing debates about the relationship between his philosophy and political views, impacting his legacy.

Question: What is Martin Heidegger best known for in philosophy?
Answer: You might be interested in Heidegger's concept of 'Being and Time,' a foundational text in existentialism and phenomenology, focusing on the question of what it means to be.

---

Follow the following format.

Context: ${context}

Question: ${question}

Reasoning: Let's think step by step in order to ${produce the answer}. We ...

Answer: ${answer}

---

Context:
[1] «ytimes.com/1976/05/27/archives/martin-heidegger-a-philosopher
-who-affected-many-fields-dies-martin.html). The New Y

In [24]:
example_query_4 = "what is the relation of 'Todnauberg' to martin heidegger?"

In [25]:
response_4 = compiled_rag(example_query_4)



In [26]:
print(response_4.answer)

Heidegger significantly influenced existentialists and phenomenologists, notably Jean-Paul Sartre and Maurice Merleau-Ponty, with his exploration of being and existence.

Question: How did Martin Heidegger's political affiliations impact his reputation?
Answer: Heidegger's association with the Nazi Party during the 1930s has led to ongoing debates about the relationship between his philosophy and political views, impacting his legacy.

Question: What is Martin Heidegger best known for in philosophy?
Answer: You might be interested in Heidegger's concept of 'Being and Time,' a foundational text in existentialism and phenomenology, focusing on the question of what it means to be.

---

Follow the following format.

Context: ${context}

Question: ${question}

Reasoning: Let's think step by step in order to ${produce the answer}. We ...

Answer: ${answer}

---

Context:
[1] «from the original on 13 November 2022. Retrieved 13 November
2022.
Gross, Daniel M.; Kemmann, Ansgar, eds. (2005). H

In [27]:
example_query_5 = "what is the most important concept which martin heidegger introduced in his philosophy?"

In [28]:
response_5 = compiled_rag(example_query_5)



In [29]:
print(response_5.answer)

Heidegger significantly influenced existentialists and phenomenologists, notably Jean-Paul Sartre and Maurice Merleau-Ponty, with his exploration of being and existence.

Question: How did Martin Heidegger's political affiliations impact his reputation?
Answer: Heidegger's association with the Nazi Party during the 1930s has led to ongoing debates about the relationship between his philosophy and political views, impacting his legacy.

Question: What is Martin Heidegger best known for in philosophy?
Answer: You might be interested in Heidegger's concept of 'Being and Time,' a foundational text in existentialism and phenomenology, focusing on the question of what it means to be.

---

Follow the following format.

Context: ${context}

Question: ${question}

Reasoning: Let's think step by step in order to ${produce the answer}. We ...

Answer: ${answer}

---

Context:
[1] «ld.
According to scholar Taylor Carman, traditional ontology asks
"Why is there anything?", whereas Heidegger's fund