In [76]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Weaviate
import openai

openai.api_key = "your-key-here"

In [77]:
loader = TextLoader('paul_graham.txt')
documents = loader.load()

# Get your splitter ready
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)

# Split your docs into texts
texts = text_splitter.split_documents(documents)

In [78]:
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

In [79]:
weaviate_url = 'your-weaviate-url'
db = Weaviate.from_documents(texts, embeddings, weaviate_url=weaviate_url, by_text=False)
retriever = db.as_retriever(search_kwargs={"additional": ["certainty"]})

In [85]:

def generate_queries_chatgpt(original_query):
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a helpful assistant that generates multiple search queries based on a single input query."},
            {"role": "user", "content": f"Generate multiple search queries related to: {original_query}"},
            {"role": "user", "content": "OUTPUT (4 queries):"}
        ]
    )

    generated_queries = response.choices[0]["message"]["content"].strip().split("\n")
    return generated_queries

def vector_search(query):
    search_results = {}
    retrieved_docs = retriever.get_relevant_documents(query)
    for i in retrieved_docs:
        search_results[i.page_content] = i.metadata['_additional']['certainty']
    return search_results



def reciprocal_rank_fusion(search_results_dict, k=60):
    fused_scores = {}
        
    for query, doc_scores in search_results_dict.items():
        
        for rank, (doc, score) in enumerate(sorted(doc_scores.items(), key=lambda x: x[1], reverse=True)):
            if doc not in fused_scores:
                fused_scores[doc] = 0
            previous_score = fused_scores[doc]
            fused_scores[doc] += 1 / (rank + k)
            print(f"Updating score for {doc} from {previous_score} to {fused_scores[doc]} based on rank {rank} in query '{query}'")

    reranked_results = {doc: score for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)}
    print("Final reranked results:", reranked_results)
    return reranked_results


def generate_output(original_query, reranked_results):
    reranked_docs = [i for i in reranked_results.keys()]
    context = '\n'.join(reranked_docs)
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a helpful assistant that answers user's questions based on the context provided.\nDo not make up an answer if you do not know it, stay within the bounds of the context provided, if you don't know the answer, say that you don't have enough information on the topic!"},
            {"role": "user", "content": f"CONTEXT: {context}\nQUERY: {original_query}"},
            {"role": "user", "content": "ANSWER:"}
        ]
    )

    response = response.choices[0]["message"]["content"].strip()
    return response

In [88]:
original_query = "How did the author come up with the name Ycombinator?"
generated_queries = generate_queries_chatgpt(original_query)

all_results = {}
for query in generated_queries:
    search_results = vector_search(query)
    all_results[query] = search_results

reranked_result = reciprocal_rank_fusion(all_results)
final_output = generate_output(original_query, reranked_result)

Updating score for [13] Y Combinator was not the original name. At first we were called Cambridge Seed. But we didn't want a regional name, in case someone copied us in Silicon Valley, so we renamed ourselves after one of the coolest tricks in the lambda calculus, the Y combinator.

I picked orange as our color partly because it's the warmest, and partly because no VC used it. In 2005 all the VCs used staid colors like maroon, navy blue, and forest green, because they were trying to appeal to LPs, not founders. The YC logo itself is an inside joke: the Viaweb logo had been a white V on a red circle, so I made the YC logo a white Y on an orange square.

[14] YC did become a fund for a couple years starting in 2009, because it was getting so big I could no longer afford to fund it personally. But after Heroku got bought we had enough money to go back to being self-funded. from 0 to 0.016666666666666666 based on rank 0 in query '1. What is the origin of the name "Y Combinator"?'
Updating 

In [94]:
final_output

'The author came up with the name "Y Combinator" after one of the tricks in the lambda calculus called the Y combinator. They chose this name because they wanted something cool and unrelated to a specific region, in case someone copied their idea in Silicon Valley.'