# Compare embeddings performance

We use different approaches to create embeddings from the same texts and compare their performance.

## Configuration:

Please select the model you want to use for the transformations.

In [None]:
llm_source = "openai" # openai or hf for huggingface
embedding_source = "openai" # openai or hf for huggingface

llm_model = "gpt-3.5-turbo"
llm_model = "gpt-4-1106-preview"
temperature = 0

embeddings_model = "text-embedding-ada-002"

markdown_documents_path = "C:\\Dev\\tt\\tt-readme"

use_cached_documents = True
use_cached_transforms = True
reindex_documents = True

## Test different approaches of indexing

This will
- create a question for each document,
- create an answer for each document and
- summarize each document

## Load and split markdown contents of the TT Readme


In [None]:
if use_cached_documents:
    print("Skipping loading documents from markdown files")
else:

    from langchain.document_loaders import DirectoryLoader, TextLoader
    from langchain.text_splitter import MarkdownHeaderTextSplitter

    readme_documents = DirectoryLoader(
        markdown_documents_path,
        glob="**/*.md",
        loader_cls=TextLoader
        ).load()

    headers_to_split_on = [
        ("#", "Header 1"),
        ("##", "Header 2"),
    ]

    splitter = MarkdownHeaderTextSplitter(headers_to_split_on)

    split_documents = []
    for doc in readme_documents:
        result = splitter.split_text(doc.page_content)

        if isinstance(result, list):
            for res in result:
                res.metadata.update(doc.metadata)
            split_documents.extend(result)
        else:
            result.metadata.update(doc.metadata)
            split_documents.append(result)

    # For brevity, reduce amount of entries to a few only
    # split_documents = split_documents[50:60]

    index  = 1
    for doc in split_documents:
        doc.metadata["index"] = index
        index += 1
        doc.metadata["original_content"] = doc.page_content
        #print(doc.metadata)
        #print("\n")

### Persist the data to files or load cached files

In [None]:
import pickle

if (use_cached_documents):
    print("Loading documents from file")
    with open("cache/split_documents.pickle", "rb") as f:
        split_documents = pickle.load(f)
else:
    print("Writing documents to file")
    with open("cache/split_documents.pickle", "wb") as f:
        pickle.dump(split_documents, f)

## Massage content into new embedding documents

In [None]:
from langchain.chat_models.openai import ChatOpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

llm = ChatOpenAI(model=llm_model, temperature=temperature)

def build_chain(prompt):
    return LLMChain(llm=llm, prompt=PromptTemplate(input_variables=["input"], template=prompt))

question_chain = build_chain("Formuliere drei verschiedene deutsche Fragen, die der folgende Text beantwortet: {input}")
answer_chain = build_chain("Erkläre in zwei bis drei deutschen Sätzen, was der folgende Text beantwortet: {input}")
summarize_chain = build_chain("Erstelle eine kurze deutsche Zusammenfassung des folgenden Textes: {input}")

In [None]:
import copy

def transform_documents(chain, file):
    if use_cached_transforms:
        print(f"Loading cached file {file}")
        with open(f"cache/{llm_model}_{file}_documents.pickle", "rb") as f:
            result = pickle.load(f)
        return result
    else:
        result = copy.deepcopy(split_documents)
        for doc in result:
            print(f"Transforming {file} document {doc.metadata['index']} with model {llm_model}")
            doc.metadata["original_content"] = copy.copy(doc.page_content)
            doc.page_content = chain.run(doc.page_content)
        print(f"Writing {file} documents from model {llm_model} to file")
        with open(f"cache/{llm_model}_{file}_documents.pickle", "wb") as f:
            pickle.dump(result, f)
        return result

question_documents = transform_documents(question_chain, "questions")
answer_documents = transform_documents(answer_chain, "answers")
summary_documents = transform_documents(summarize_chain, "summaries")

## Prepare Embeddings model

In [None]:
from langchain.embeddings.openai import OpenAIEmbeddings

embeddings = None

if embedding_source == "openai":
    embeddings = OpenAIEmbeddings(model=embeddings_model)

## Prepare store

In [None]:
from langchain.vectorstores import Qdrant

def store(documents, collection_name):
    Qdrant.from_documents(
        documents,
        url="http://localhost:6333",
        embedding=embeddings,
        collection_name=collection_name,
        force_recreate=True,
    )

pure_collection = f"{embeddings_model}-{llm_model}-p"
question_collection = f"{embeddings_model}-{llm_model}-q"
answer_collection = f"{embeddings_model}-{llm_model}-a"
summary_collection = f"{embeddings_model}-{llm_model}-s"

collections = [pure_collection, question_collection, answer_collection, summary_collection]

## Create embeddings and store them in different collections

In [None]:
if reindex_documents:
    store(split_documents, pure_collection)
    store(question_documents, question_collection)
    store(answer_documents, answer_collection)
    store(summary_documents, summary_collection)

## Search with a query in the different indexes

In [None]:
queries = [
    "Was mache ich, wenn ich meinen letzten Zug verpasst habe?",
    "Nach wie vielen Jahren kann ich mein Notebook erneuern?",
    "Was ist MITOD?",
]

In [None]:
from qdrant_client import QdrantClient

client = QdrantClient("http://localhost:6333")

def search(collection, query):
    return Qdrant(client, collection, embeddings)._similarity_search_with_relevance_scores(query)

collections = [pure_collection, question_collection, answer_collection, summary_collection]

result_table = []
result_table.append(["Collection"] + queries)

for collection in collections:
    row = []
    for query in queries:
        print(f"Searching {collection} for {query}")
        search_results = search(collection, query)

        row.append("\n".join([f"{document.metadata['index']} - {score}" for document, score in search_results]))

    result_table.append([collection] + row)

In [None]:
from tabulate import tabulate

print(tabulate(result_table, tablefmt="grid", headers="firstrow"))

# To check a result, put the index in the following cell and run it

In [None]:
found_index = 31

# find the document with the metadata index of the found_index variable

found_document = None
for doc in split_documents:
    if doc.metadata["index"] == found_index:
        found_document = doc
        break

print(f'{found_document.page_content}\n\n')
print(f'{found_document.metadata}\n\n')

for doc in question_documents:
    if doc.metadata["index"] == found_index:
        found_document = doc
        break

print(f"Questions: {found_document.page_content}\n\n")

for doc in answer_documents:
    if doc.metadata["index"] == found_index:
        found_document = doc
        break

print(f"Answers: {found_document.page_content}\n\n")

for doc in summary_documents:
    if doc.metadata["index"] == found_index:
        found_document = doc
        break

print(f"Summary: {found_document.page_content}\n\n")


# RAG (Teaser)

In [None]:
from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.schema.output_parser import StrOutputParser

# Document
found_index = 31
found_document = None
for doc in split_documents:
    if doc.metadata["index"] == found_index:
        found_document = doc
        break

query = "Nach wie vielen Jahren kann ich mein Notebook erneuern?"

# Prompt
template = f"""Beantworte die Frage nur aufgrund der folgenenden Informationen:
{found_document.page_content}

Frage: {query}
"""

# RAG chain
chain = (
    ChatPromptTemplate.from_template(template)
    | ChatOpenAI(model_name = 'gpt-4-1106-preview')
    | StrOutputParser()
)

chain.invoke(query)