In [1]:
import qdrant_client

from llama_index.core import Settings
from llama_index.core import PromptTemplate
from llama_index.llms.ollama import Ollama
from llama_index.core import StorageContext
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.postprocessor import SentenceTransformerRerank
from llama_index.embeddings.fastembed import FastEmbedEmbedding
from llama_index.vector_stores.qdrant import QdrantVectorStore

from IPython.display import Markdown, display

1.  Set up Asyncio

In [3]:
import nest_asyncio
nest_asyncio.apply()

2. Define the LLM, the embedding model and re-ranker

In [None]:
llm = Ollama(model="llama3.2:1b", request_timeout=180.0)

embed_model = FastEmbedEmbedding(model_name="BAAI/bge-large-en-v1.5")

rerank = SentenceTransformerRerank(model="BAAI/bge-reranker-base", top_n=2)

In [5]:
Settings.embed_model = embed_model
Settings.llm = llm

 3. Read the documents

In [6]:
input_dir_path = './docs/paul_graham'

loader = SimpleDirectoryReader(
            input_dir = input_dir_path,
            required_exts=[".txt"],
            recursive=True
        )
docs = loader.load_data()

4. Set up the Qdrant vector database

In [35]:
client = qdrant_client.QdrantClient(host="localhost", port=6333, timeout=60)
vector_store = QdrantVectorStore(client=client,
                                 collection_name="document_chat")
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(docs,
                                        storage_context=storage_context)

KeyboardInterrupt: 

5.  Define the query engine and prompt template

In [8]:
query_engine = index.as_query_engine(similarity_top_k=4,
                                     node_postprocessors=[rerank])

template = """Context information is below.
              ---------------------
              {context_str}
              ---------------------
              Given the context information above I want you to think
              step by step to answer the query in a crisp manner. Incase 
              you don't know the answer say 'I don't know!'.
              
              Query: {query_str}
              
              Answer:"""

qa_prompt_tmpl = PromptTemplate(template)

query_engine.update_prompts(
    {"response_synthesizer:text_qa_template": qa_prompt_tmpl}
)

6. Query the document

In [9]:
response = query_engine.query("""How did the structure of funding startups 
                                 in batches contribute to the success and 
                                 growth of the Y Combinator program and the
                                 startups involved?""")
                                 
display(Markdown(str(response)))

The structure of funding startups in batches contributed significantly to the success and growth of the Y Combinator (YC) program. Here's a step-by-step breakdown:

1. **Isolation problem**: Founders often faced isolation, which is a major challenge when trying to build and grow a startup. Funding companies like YC allowed founders to connect with peers who understood their problems and were facing similar challenges.

2. **Convenience for the founders**: By funding startups in batches, YC made it easier for them to work on multiple projects simultaneously. This allowed founders to scale more efficiently, which in turn led to better outcomes.

3. **Experienced investors and experts**: The batch-funded model enabled YC to attract experienced investors and experts who were familiar with the startup space. These mentors helped founders refine their ideas and increase their chances of success.

4. **Tight community and networking opportunities**: The Y Combinator's headquarters in Cambridge provided a unique opportunity for founders to connect with each other, share experiences, and learn from one another. This fostered a tight-knit community that supported each other's growth.

5. **Scaling the fundraising process**: By funding startups in batches, YC simplified the fundraising process, reducing the time it took to find new investors. This allowed the program to scale more efficiently, which contributed to its growth and success.

6. **Improved deal flow**: The batch-funded model enabled YC to identify deals that might not have been viable as individual investments. By focusing on larger, more promising companies, the program could increase its chances of making successful investments.

7. **Data-driven decision-making**: The data generated by the batch-funded model helped YC make informed decisions about which startups to fund and which investors to pursue. This data-driven approach enabled the program to refine its investment strategy over time.

8. **Increased accessibility for underrepresented groups**: By funding companies from diverse backgrounds, YC expanded its reach beyond traditional tech hubs like Silicon Valley. This diversity also led to a more inclusive community of founders, entrepreneurs, and investors.

9. **Better customer relationships**: The batch-funded model enabled YC to establish strong relationships with the startups they funded. These connections helped the program deliver value to the customers, ultimately leading to increased adoption and revenue growth.

10. **Enhanced reputation and credibility**: By demonstrating its ability to successfully fund and grow companies in batches, YC reinforced its reputation as a premier startup accelerator. This credibility helped attract more investors, mentors, and students to the program.

In summary, the structure of funding startups in batches was instrumental in contributing to the success and growth of the Y Combinator program and the startups involved.

In [11]:
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

loader = DirectoryLoader("./docs/paul_graham/")

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=20)

documents = loader.load_and_split(text_splitter)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [12]:
documents[0].to_json()

{'lc': 1,
 'type': 'constructor',
 'id': ['langchain', 'schema', 'document', 'Document'],
 'kwargs': {'page_content': 'What I Worked On\n\nFebruary 2021\n\nBefore college the two main things I worked on, outside of school, were writing and programming. I didn\'t write essays. I wrote what beginning writers were supposed to write then, and probably still are: short stories. My stories were awful. They had hardly any plot, just characters with strong feelings, which I imagined made them deep.\n\nThe first programs I tried writing were on the IBM 1401 that our school district used for what was then called "data processing." This was in 9th grade, so I was 13 or 14. The school district\'s 1401 happened to be in the basement of our junior high school, and my friend Rich Draves and I got permission to use it. It was like a mini Bond villain\'s lair down there, with all these alien-looking machines — CPU, disk drives, printer, card reader — sitting up on a raised floor under bright fluorescen

Create RAGAS to evaluate

In [13]:
from langchain_community.llms import Ollama
from langchain_community.embeddings import OllamaEmbeddings

generator_llm = Ollama(model="phi3:3.8b")
critic_llm = Ollama(model="llama3.2:1b")

ollama_emb = OllamaEmbeddings(
    model="nomic-embed-text",
)

In [14]:
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context

generator = TestsetGenerator.from_langchain(
    generator_llm=generator_llm,
    critic_llm=critic_llm,
    embeddings=ollama_emb
)

distribution = {simple: 0.5, reasoning: 0.25, multi_context: 0.25}
testset = generator.generate_with_langchain_docs(documents,
                                                 test_size=10,
                                                 distributions=distribution,
                                                 raise_exceptions=False)

embedding nodes:   0%|          | 0/360 [00:00<?, ?it/s]

Filename and doc_id are the same for all nodes.


Generating:   0%|          | 0/10 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [16]:
import pandas as pd

In [None]:
test_df = testset.t0_pandas().dropna()
test_df.to_scv('test_data_paul_graham.csv')

In [25]:
test_df = pd.read_csv('test_data_paul_graham.csv', index_col = 0).dropna()


In [26]:
test_df.head()

Unnamed: 0,question,contexts,ground_truth,evolution_type,metadata,episode_done
0,How did the shift to publishing on the web cha...,"[""Wow, I thought, there's an audience. If I wr...",The shift to publishing on the web changed the...,simple,[{'source': 'paul_graham/what_i_worked_on.txt'...,True
1,"How does criticizing a project as a ""toy"" rese...","[""[9] You can't usually get paid for doing exa...",Criticizing a project as a 'toy' is similar to...,simple,[{'source': 'paul_graham/how_to_do_great_thing...,True
2,How did the structure of funding startups in b...,['The deal for startups was based on a combina...,Funding startups in batches allowed for conven...,simple,[{'source': 'paul_graham/what_i_worked_on.txt'...,True
3,How can exploring different topics help in gen...,"[""Talking or writing about the things you're i...",Exploring different topics can help in generat...,simple,[{'source': 'paul_graham/how_to_do_great_thing...,True
4,How does focusing consistently on something yo...,"[""The way to beat it is to stop occasionally a...",Great work happens by focusing consistently on...,simple,[{'source': 'paul_graham/how_to_do_great_thing...,True


In [27]:
test_df.shape

(47, 6)

In [28]:
test_df.isna().sum()

question          0
contexts          0
ground_truth      0
evolution_type    0
metadata          0
episode_done      0
dtype: int64

Evaluate the RAG pipeline

In [29]:
def generate_response(query_engine, question):
    response = query_engine.query(question)
    return {
        "answer": response.response,
        "contexts": [c.node.get_content() for c in response.source_nodes],
    }

In [31]:
test_questions = test_df["question"].values

In [34]:
generate_response(query_engine, test_questions[0]) 

Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/Users/annaryzhokhina/PycharmProjects/RAG/venv/lib/python3.9/site-packages/httpx/_transports/default.py", line 72, in map_httpcore_exceptions
  File "/Users/annaryzhokhina/PycharmProjects/RAG/venv/lib/python3.9/site-packages/httpx/_transports/default.py", line 236, in handle_request
  File "/Users/annaryzhokhina/PycharmProjects/RAG/venv/lib/python3.9/site-packages/httpcore/_sync/connection_pool.py", line 256, in handle_request
    raise exc from None
  File "/Users/annaryzhokhina/PycharmProjects/RAG/venv/lib/python3.9/site-packages/httpcore/_sync/connection_pool.py", line 236, in handle_request
    response = connection.handle_request(
  File "/Users/annaryzhokhina/PycharmProjects/RAG/venv/lib/python3.9/site-packages/httpcore/_sync/connection.py", line 103, in handle_request
    return self._connection.handle_request(request)
  File "/Users/annaryzhokhina/PycharmProjects/RAG/venv/lib/python3.9/site-packages/httpcore/_sync/http11.py", line 136,

In [30]:
from datasets import Dataset
from tqdm.auto import tqdm

test_questions = test_df["question"].values

responses = [generate_response(query_engine, q) for q in tqdm(test_questions)]

dataset_dict = {
    "question": test_questions,
    "answer": [response["answer"] for response in responses],
    "contexts": [response["contexts"] for response in responses],
    "ground_truth": test_df["ground_truth"].values.tolist(),
}

ragas_eval_dataset = Dataset.from_dict(dataset_dict)

  0%|          | 0/47 [00:00<?, ?it/s]

Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/Users/annaryzhokhina/PycharmProjects/RAG/venv/lib/python3.9/site-packages/httpx/_transports/default.py", line 72, in map_httpcore_exceptions
  File "/Users/annaryzhokhina/PycharmProjects/RAG/venv/lib/python3.9/site-packages/httpx/_transports/default.py", line 236, in handle_request
  File "/Users/annaryzhokhina/PycharmProjects/RAG/venv/lib/python3.9/site-packages/httpcore/_sync/connection_pool.py", line 256, in handle_request
    raise exc from None
  File "/Users/annaryzhokhina/PycharmProjects/RAG/venv/lib/python3.9/site-packages/httpcore/_sync/connection_pool.py", line 236, in handle_request
    response = connection.handle_request(
  File "/Users/annaryzhokhina/PycharmProjects/RAG/venv/lib/python3.9/site-packages/httpcore/_sync/connection.py", line 103, in handle_request
    return self._connection.handle_request(request)
  File "/Users/annaryzhokhina/PycharmProjects/RAG/venv/lib/python3.9/site-packages/httpcore/_sync/http11.py", line 136,

In [None]:
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.llms import Ollama

from ragas import evaluate
from ragas.metrics import (
    faithfulness,
    answer_correctness,
    context_recall,
    context_precision,
)

In [None]:
metrics = [faithfulness, answer_correctness,
           context_recall, context_precision]

critic_llm = Ollama(model="llama3.2:1b")

ollama_emb = OllamaEmbeddings(model="nomic-embed-text")

evaluation_result = evaluate(
    llm=critic_llm,
    embeddings=ollama_emb,
    dataset=ragas_eval_dataset,
    metrics=metrics
)

In [None]:
eval_scores_df = pd.DataFrame(evaluation_result.scores)