# Imports, Set up Asyncio

In [1]:
import nest_asyncio
from dotenv import load_dotenv, find_dotenv
import os

load_dotenv(find_dotenv())

nest_asyncio.apply()

In [2]:
import nest_asyncio
import qdrant_client

from llama_index.core import Settings
from llama_index.core import PromptTemplate
from llama_index.llms.ollama import Ollama
from llama_index.core import StorageContext
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.postprocessor import SentenceTransformerRerank
from llama_index.embeddings.fastembed import FastEmbedEmbedding
from llama_index.vector_stores.qdrant import QdrantVectorStore

from IPython.display import Markdown, display

# Connecting to Qdrant
This section establishes a connection to the Qdrant vector database.

In [3]:
import qdrant_client

collection_name="chat_with_docs_v2"

client = qdrant_client.QdrantClient(
    host="localhost",
    port=6333
)

# Instrumentation Setup
This section sets up instrumentation for tracing and monitoring.

In [4]:
from openinference.instrumentation.llama_index import LlamaIndexInstrumentor
from phoenix.otel import register

tracer_provider = register()
LlamaIndexInstrumentor().instrument(tracer_provider=tracer_provider)

🔭 OpenTelemetry Tracing Details 🔭
|  Phoenix Project: default
|  Span Processor: SimpleSpanProcessor
|  Collector Endpoint: localhost:4317
|  Transport: gRPC
|  Transport Headers: {'user-agent': '****'}
|  
|  Using a default SpanProcessor. `add_span_processor` will overwrite this default.
|  
|  
|  `register` has set this TracerProvider as the global OpenTelemetry default.
|  To disable this behavior, call `register` with `set_global_tracer_provider=False`.



# Define the LLM, the embedding model and re-ranker


In [32]:

# Example for Groq (commented out for now)
# from llama_index.llms.groq import Groq
# llm = Groq(model="gemma2-9b-it", request_timeout=120.0)

import os
import warnings

from google.auth import default
from llama_index.llms.vertex import Vertex
from llama_index.core.settings import Settings
from llama_index.core.base.llms.types import ChatMessage

# Suppress Vertex deprecation warnings (switch to GoogleGenAI soon)
warnings.filterwarnings("ignore", category=DeprecationWarning)

# Google Cloud default credentials
credentials, _ = default()

# Initialize Vertex LLM
llm = Vertex(
    model="gemini-2.0-flash",
    temperature=0.7,
    credentials=credentials,
    project=os.getenv("GOOGLE_CLOUD_PROJECT_ID"),
    location=os.getenv("GOOGLE_CLOUD_PROJECT_REGION"),
)
llm._chat_client._response_validation = False
# Set as default LLM for LlamaIndex
Settings.llm = llm

# Example chat call
response = llm.chat([
    ChatMessage(role="user", content="Hello there Gemini! How are you doing today?")
])
print(response)

embed_model = FastEmbedEmbedding(model_name="BAAI/bge-large-en-v1.5")
Settings.embed_model = embed_model

rerank = SentenceTransformerRerank(model="BAAI/bge-reranker-base", top_n=2)

I0000 00:00:1747497762.391316   19758 chttp2_transport.cc:1201] ipv6:%5B::1%5D:4317: Got goaway [11] err=UNAVAILABLE:GOAWAY received; Error code: 11; Debug Text: ping_timeout {created_time:"2025-05-17T21:32:42.388851021+05:30", http2_error:11, grpc_status:14}


assistant: Hello! I am doing well, thank you for asking. I'm ready and eager to assist you. How can I help you today?



: 

# Read the documents
This section loads documents from the specified directory.

In [6]:
from llama_index.core import SimpleDirectoryReader

input_dir_path = './paul_graham'

loader = SimpleDirectoryReader(
            input_dir = input_dir_path,
            required_exts=[".txt"],
            recursive=True
        )
docs = loader.load_data()

# Checking Loaded Documents
This section checks the type and number of loaded documents.

In [7]:
type(docs), len(docs)

(list, 2)

# Set up the Qdrant vector database
This section defines a function to create a vector store index using Qdrant.

In [8]:
client = qdrant_client.QdrantClient(host="localhost", port=6333)

vector_store = QdrantVectorStore(client=client,
                                 collection_name="document_chat")

storage_context = StorageContext.from_defaults(vector_store=vector_store)

index = VectorStoreIndex.from_documents(docs, storage_context=storage_context)

# Define the query engine and prompt template

In [9]:
query_engine = index.as_query_engine(similarity_top_k=4,
                                     node_postprocessors=[rerank])

template = """Context information is below.
              ---------------------
              {context_str}
              ---------------------
              Given the context information above I want you to think
              step by step to answer the query in a crisp manner. Incase 
              you don't know the answer say 'I don't know!'.
              
              Query: {query_str}
              
              Answer:"""

qa_prompt_tmpl = PromptTemplate(template)

query_engine.update_prompts(
    {"response_synthesizer:text_qa_template": qa_prompt_tmpl}
)

I0000 00:00:1747495727.964148   19758 chttp2_transport.cc:1201] ipv6:%5B::1%5D:4317: Got goaway [11] err=UNAVAILABLE:GOAWAY received; Error code: 11; Debug Text: ping_timeout {created_time:"2025-05-17T20:58:47.961899007+05:30", http2_error:11, grpc_status:14}


# Query the document

In [10]:
response = query_engine.query("""How did the structure of funding startups 
                                 in batches contribute to the success and 
                                 growth of the Y Combinator program and the
                                 startups involved?""")
                                 
display(Markdown(str(response)))

Here's a breakdown of how funding startups in batches contributed to Y Combinator's success and the startups' growth:

1.  **Convenience for YC:** Batch funding allowed YC to efficiently support multiple startups simultaneously. They could implement initiatives and assistance programs for a large group, making their efforts more impactful.

2.  **Reduced Isolation for Startups:** Being part of a batch provided startups with a network of peers facing similar challenges. This fostered collaboration, knowledge sharing, and mutual support.

3.  **Community Building:** As YC grew, the alumni network became a strong community, dedicated to helping current batches and each other. This created a valuable resource for mentorship and guidance.

4.  **Internal Customer Base:** Startups within a batch often became each other's initial customers, creating a mini-economy within YC. This helped startups gain traction and early revenue.


____

# RAGAS

### Load the knowledge base

In [12]:
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

loader = DirectoryLoader("./paul_graham/")

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=20)

documents = loader.load_and_split(text_splitter)

In [14]:
documents[0].to_json()

{'lc': 1,
 'type': 'constructor',
 'id': ['langchain', 'schema', 'document', 'Document'],
 'kwargs': {'metadata': {'source': 'paul_graham/how_to_do_great_things.txt'},
  'page_content': 'How to Do Great Work\n\nJuly 2023\n\nIf you collected lists of techniques for doing great work in a lot of different fields, what would the intersection look like? I decided to find out by making it.\n\nPartly my goal was to create a guide that could be used by someone working in any field. But I was also curious about the shape of the intersection. And one thing this exercise shows is that it does have a definite shape; it\'s not just a point labelled "work hard."\n\nThe following recipe assumes you\'re very ambitious.\n\nThe first step is to decide what to work on. The work you choose needs to have three qualities: it has to be something you have a natural aptitude for, that you have a deep interest in, and that offers scope to do great work.\n\nIn practice you don\'t have to worry much about the thi

In [None]:
from langchain_community.llms import Ollama
from langchain_community.embeddings import OllamaEmbeddings

generator_llm = Vertex(
    model="gemini-2.0-flash",
    temperature=0.7,
    credentials=credentials,
    project=os.getenv("GOOGLE_CLOUD_PROJECT_ID"),
    location=os.getenv("GOOGLE_CLOUD_PROJECT_REGION"),
)

critic_llm =  Vertex(
    model="gemini-2.5-flash-preview-04-17",
    temperature=0.7,
    credentials=credentials,
    project=os.getenv("GOOGLE_CLOUD_PROJECT_ID"),
    location=os.getenv("GOOGLE_CLOUD_PROJECT_REGION"),
)

ollama_emb = OllamaEmbeddings(
    model="nomic-embed-text",
)

Note: You might not be able to run the below code on your local machine. If at least a progress bar appears, you know you have done it right. For the time being, you can utilize this CSV file which I will share below, which has been generated from my run.

In [None]:
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context

generator = TestsetGenerator.from_langchain(
    generator_llm=generator_llm,
    critic_llm=critic_llm,
    embeddings=ollama_emb
)

distribution = {simple: 0.5, reasoning: 0.25, multi_context: 0.25}
testset = generator.generate_with_langchain_docs(documents,
                                                 test_size=10,
                                                 distributions=distribution,
                                                 raise_exceptions=False)

## Evaluate the RAG pipeline

In [27]:
import pandas as pd

test_df = pd.read_csv('eval/test_data_paul_graham.csv').dropna()
test_df

Unnamed: 0.1,Unnamed: 0,question,contexts,ground_truth,evolution_type,metadata,episode_done
0,0,How did the shift to publishing on the web cha...,"[""Wow, I thought, there's an audience. If I wr...",The shift to publishing on the web changed the...,simple,[{'source': 'paul_graham/what_i_worked_on.txt'...,True
1,1,"How does criticizing a project as a ""toy"" rese...","[""[9] You can't usually get paid for doing exa...",Criticizing a project as a 'toy' is similar to...,simple,[{'source': 'paul_graham/how_to_do_great_thing...,True
2,2,How did the structure of funding startups in b...,['The deal for startups was based on a combina...,Funding startups in batches allowed for conven...,simple,[{'source': 'paul_graham/what_i_worked_on.txt'...,True
3,3,How can exploring different topics help in gen...,"[""Talking or writing about the things you're i...",Exploring different topics can help in generat...,simple,[{'source': 'paul_graham/how_to_do_great_thing...,True
4,4,How does focusing consistently on something yo...,"[""The way to beat it is to stop occasionally a...",Great work happens by focusing consistently on...,simple,[{'source': 'paul_graham/how_to_do_great_thing...,True
5,5,What are the benefits of starting with somethi...,['Don\'t try to cram too much new stuff into a...,Starting with something small and evolving it ...,simple,[{'source': 'paul_graham/how_to_do_great_thing...,True
6,6,How does being earnest impact the process of d...,"[""There may be some jobs where it's an advanta...",Being earnest is crucial for the process of di...,simple,[{'source': 'paul_graham/how_to_do_great_thing...,True
7,7,What were the initial perceptions of online es...,"[""Wow, I thought, there's an audience. If I wr...","Socially, during the early days of online cont...",simple,[{'source': 'paul_graham/what_i_worked_on.txt'...,True
8,8,What is the importance of avoiding affectation...,"[""One way to aim high is to try to make someth...",Avoiding affectation and focusing on earnestne...,simple,[{'source': 'paul_graham/how_to_do_great_thing...,True
9,9,How can being promiscuously curious and starti...,"[""The best questions grow in the answering. Yo...",Being promiscuously curious and starting lots ...,simple,[{'source': 'paul_graham/how_to_do_great_thing...,True


In [28]:
def generate_response(query_engine, question):
    response = query_engine.query(question)
    return {
        "answer": response.response,
        "contexts": [c.node.get_content() for c in response.source_nodes],
    }

In [29]:
from datasets import Dataset
from tqdm.auto import tqdm

test_questions = test_df["question"].values

responses = [generate_response(query_engine, q) for q in tqdm(test_questions)]

dataset_dict = {
    "question": test_questions,
    "answer": [response["answer"] for response in responses],
    "contexts": [response["contexts"] for response in responses],
    "ground_truth": test_df["ground_truth"].values.tolist(),
}

ragas_eval_dataset = Dataset.from_dict(dataset_dict)

  0%|          | 0/47 [00:00<?, ?it/s]

I0000 00:00:1747497441.604074   19758 chttp2_transport.cc:1201] ipv6:%5B::1%5D:4317: Got goaway [11] err=UNAVAILABLE:GOAWAY received; Error code: 11; Debug Text: ping_timeout {grpc_status:14, http2_error:11, created_time:"2025-05-17T21:27:21.603485805+05:30"}


ResponseValidationError: The model response did not complete successfully.
Finish reason: 2.
Finish message: .
Safety ratings: [].
To protect the integrity of the chat session, the request and response were not added to chat history.
To skip the response validation, specify `model.start_chat(response_validation=False)`.
Note that letting blocked or otherwise incomplete responses into chat history might lead to future interactions being blocked by the service.

In [30]:
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.llms import Ollama

from ragas import evaluate
from ragas.metrics import (
    faithfulness,
    answer_correctness,
    context_recall,
    context_precision,
)

In [31]:
metrics = [faithfulness, answer_correctness,
           context_recall, context_precision]


evaluation_result = evaluate(
    llm=critic_llm,
    embeddings=ollama_emb,
    dataset=ragas_eval_dataset,
    metrics=metrics
)

NameError: name 'ragas_eval_dataset' is not defined