In [11]:
import os
from dotenv import load_dotenv
os.environ['ASTRA_DB_API_ENDPOINT'] =  os.getenv('ASTRA_DB_API_ENDPOINT')
os.environ['ASTRA_DB_APPLICATION_TOKEN'] = os.getenv('ASTRA_DB_APPLICATION_TOKEN')
load_dotenv()

True

# Create RAG Pipeline

### Emedding Model and Vector Store

In [16]:
from langchain_astradb import AstraDBVectorStore
from langchain_community.embeddings import OllamaEmbeddings
from langchain.vectorstores.cassandra import Cassandra

# Configure your embedding model and vectore store
embeddings = OllamaEmbeddings(model='llama3.2:1b')
vstore = AstraDBVectorStore(
    collection_name='rag_test',
    embedding=embeddings,
    token=os.getenv('ASTRA_DB_APPLICATION_TOKEN'),
    api_endpoint=os.getenv('ASTRA_DB_API_ENDPOINT')
)
print('Astra vector store configured')

Astra vector store configured


In [17]:
from datasets import load_dataset

# Load a sample dataset
philo_dataset = load_dataset('datastax/philosopher-quotes')['train']
print('An example entry : ')
philo_dataset[:16]

Downloading readme: 100%|██████████| 574/574 [00:00<?, ?B/s] 
Downloading data: 100%|██████████| 67.6k/67.6k [00:00<00:00, 110kB/s]
Generating train split: 100%|██████████| 450/450 [00:00<00:00, 3883.28 examples/s]


An example entry : 


{'author': ['aristotle',
  'aristotle',
  'aristotle',
  'aristotle',
  'aristotle',
  'aristotle',
  'aristotle',
  'aristotle',
  'aristotle',
  'aristotle',
  'aristotle',
  'aristotle',
  'aristotle',
  'aristotle',
  'aristotle',
  'aristotle'],
 'quote': ["True happiness comes from gaining insight and growing into your best possible self. Otherwise all you're having is immediate gratification pleasure, which is fleeting and doesn't grow you as a person.",
  'The roots of education are bitter, but the fruit is sweet.',
  'Before you heal the body you must first heal the mind',
  'The proof that you know something is that you are able to teach it',
  'Those who are not angry at the things they should be angry at are thought to be fools, and so are those who are not angry in the right way, at the right time, or with the right persons.',
  'Whatever we learn to do, we learn by actually doing it; men come to be builders, for instance, by building, and harp players by playing the harp.

In [18]:
from langchain.schema import Document

# Contructs a set of documents form your data.Documents can be used as inputs to your vectore store/
docs = []
for entry in philo_dataset:
    metadata = {"author" : entry['author']}
    if entry['tags']:
        # Add metadata tags to the metadata dictionary
        for tags in entry['tags'].split(';'):
            metadata[tags] = 'y'
    # Create a Langchain document with the quote and metadata tags
    doc = Document(page_content=entry['quote'],metadata=metadata)
    docs.append(doc)

In [23]:
docs[:10]

[Document(metadata={'author': 'aristotle', 'knowledge': 'y'}, page_content="True happiness comes from gaining insight and growing into your best possible self. Otherwise all you're having is immediate gratification pleasure, which is fleeting and doesn't grow you as a person."),
 Document(metadata={'author': 'aristotle', 'education': 'y', 'knowledge': 'y'}, page_content='The roots of education are bitter, but the fruit is sweet.'),
 Document(metadata={'author': 'aristotle', 'ethics': 'y'}, page_content='Before you heal the body you must first heal the mind'),
 Document(metadata={'author': 'aristotle', 'education': 'y', 'knowledge': 'y'}, page_content='The proof that you know something is that you are able to teach it'),
 Document(metadata={'author': 'aristotle'}, page_content='Those who are not angry at the things they should be angry at are thought to be fools, and so are those who are not angry in the right way, at the right time, or with the right persons.'),
 Document(metadata={'au

In [34]:
# Create embedding by inserting your documents into the vectore store.
inserted_ids = vstore.add_documents(docs)
print(f'\nInserted {len(inserted_ids)} Documents.')


Inserted 450 Documents.


In [36]:
# Check your collection to verify that documents are embedded
print(vstore.astra_db.collection('rag_test').find())

{'data': {'documents': [{'_id': '880c3ab822144892b174d6dadc0a4629', 'content': 'To endure life remains, when all is said, the first duty of all living being Illusion can have no value if it makes this more difficult for us.', 'metadata': {'author': 'freud'}}, {'_id': 'b51d9d13d19a43839589b41fee608970', 'content': 'At bottom God is nothing more than an exalted father.', 'metadata': {'author': 'freud', 'religion': 'y'}}, {'_id': '8e5ac58fd1704ac6b57486ffab982294', 'content': 'All men by nature desire knowledge.', 'metadata': {'author': 'aristotle', 'knowledge': 'y', 'education': 'y'}}, {'_id': '41c20f6819fd44248dd865910e2b709b', 'content': 'Analogies, it is true, decide nothing, but they can make one feel more at home.', 'metadata': {'author': 'freud'}}, {'_id': '4b7916814a6a43cb92a4e04dbacefb94', 'content': 'By a lie, a man... annihilates his dignity as a man.', 'metadata': {'author': 'kant', 'politics': 'y', 'ethics': 'y'}}, {'_id': 'f04cb4297b5f4bc8a8c66636cb877943', 'content': 'Smoot

### Basic Retrival
Retrieve context from your vectore database, and pass it to the model with a prompt

In [48]:
from langchain.prompts import ChatPromptTemplate
from langchain_community.chat_models import ChatOllama
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough

retriever = vstore.as_retriever(search_kwargs={'k':3})

prompt_template = """
Answer the question based only on  the supplied context.
Context : {context}
Question : {question}
Your answer :
"""

prompt = ChatPromptTemplate.from_template(prompt_template)
model = ChatOllama(model="llama3.2:1b")

chain = (
    {'context' : retriever, "question"  : RunnablePassthrough()}
    |prompt
    |model
    |StrOutputParser()
)

# Invoke the chain and inspect the context
question = "In the given context, what is philosophical?"

# Retrieve the context separately for inspection
retrieved_context = retriever.get_relevant_documents(question)

# Print the retrieved context
print("Retrieved Context:")
for doc in retrieved_context:
    print(f"Content: {doc.page_content}")
    print(f"Metadata: {doc.metadata}")

# Pass the question to the chain
result = chain.invoke(question)
print("\nModel's Response:")
print(result)

Retrieved Context:
Content: Philosophy stands in need of a science which shall determine the possibility, principles, and extent of human knowledge  priori.
Metadata: {'author': 'kant', 'knowledge': 'y'}
Content: Philosophy is by its nature something esoteric, neither made for the mob nor capable of being prepared for the mob.
Metadata: {'author': 'hegel'}
Content: A moral system valid for all is basically immoral.
Metadata: {'author': 'nietzsche'}

Model's Response:
In the given context, "philosophy" refers to a broad and abstract field of study that deals with fundamental questions about existence, knowledge, values, and reality. It encompasses various disciplines such as metaphysics, epistemology, ethics, logic, and aesthetics, which explore complex ideas, concepts, and principles.

Based on the provided page content from different philosophers (Kant, Hegel, and Nietzsche), it is clear that philosophy is a subject that examines the nature of reality, knowledge, morality, and human e