In [8]:
## Data Ingestion
from langchain_community.document_loaders import TextLoader

loader = TextLoader('test.txt')
text_documents = loader.load()
text_documents

[Document(metadata={'source': 'test.txt'}, page_content='In the ever-evolving field of machine learning, comparing model performance is more than just examining metrics — it’s about confidently determining whether one model truly outperforms another. The paired permutation test emerges as an essential tool in these scenarios, offering a rigorous method to assess whether observed differences in performance are statistically significant. Unlike traditional statistical tests, the paired permutation test is robust to data irregularities and designed to handle related samples, ensuring that your conclusions about model superiority are both reliable and sound.\n\nWhat is a Permutation Test in Machine Learning?\nA permutation test, also known as a randomization test, is a non-parametric statistical significance test. In machine learning, it’s often used to determine whether the difference in model performance (like accuracy, AUC, etc.) between two models or datasets is statistically significa

In [9]:
# Convert the whole doc into chunk
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
document     = text_splitter.split_documents(text_documents)
document

[Document(metadata={'source': 'test.txt'}, page_content='In the ever-evolving field of machine learning, comparing model performance is more than just examining metrics — it’s about confidently determining whether one model truly outperforms another. The paired permutation test emerges as an essential tool in these scenarios, offering a rigorous method to assess whether observed differences in performance are statistically significant. Unlike traditional statistical tests, the paired permutation test is robust to data irregularities and designed to handle related samples, ensuring that your conclusions about model superiority are both reliable and sound.'),
 Document(metadata={'source': 'test.txt'}, page_content='What is a Permutation Test in Machine Learning?\nA permutation test, also known as a randomization test, is a non-parametric statistical significance test. In machine learning, it’s often used to determine whether the difference in model performance (like accuracy, AUC, etc.) 

In [10]:
# Vector Embeddings and Vector store
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import Chroma

In [11]:
db = Chroma.from_documents(document, OllamaEmbeddings())

In [6]:
## Vector database
query = "What is paired permutation"
result = db.similarity_search(query)
result

Number of requested results 4 is greater than number of elements in index 2, updating n_results = 2


[Document(metadata={'source': 'test.txt'}, page_content='In the ever-evolving field of machine learning, comparing model performance is more than just examining metrics — it’s about confidently determining whether one model truly outperforms another. The paired permutation test emerges as an essential tool in these scenarios, offering a rigorous method to assess whether observed differences in performance are statistically significant. Unlike traditional statistical tests, the paired permutation test is robust to data irregularities and designed to handle related samples, ensuring that your conclusions about model superiority are both reliable and sound.'),
 Document(metadata={'source': 'test.txt'}, page_content='What is a Permutation Test in Machine Learning?\nA permutation test, also known as a randomization test, is a non-parametric statistical significance test. In machine learning, it’s often used to determine whether the difference in model performance (like accuracy, AUC, etc.) 

In [7]:
result[0].page_content

'In the ever-evolving field of machine learning, comparing model performance is more than just examining metrics — it’s about confidently determining whether one model truly outperforms another. The paired permutation test emerges as an essential tool in these scenarios, offering a rigorous method to assess whether observed differences in performance are statistically significant. Unlike traditional statistical tests, the paired permutation test is robust to data irregularities and designed to handle related samples, ensuring that your conclusions about model superiority are both reliable and sound.'

In [2]:
from langchain_community.llms import Ollama
llm = Ollama(model='llama2')

In [3]:
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template("""
    Answer the following question based only on the provided context.
    Think step by step before providing a detailed answer.
    I will tip you $1000 if the user finds the answer helpful.
    <context>
    {context}
    </context>
    Question: {input}""")

In [5]:
# Chain Introduction
from langchain.chains.combine_documents import create_stuff_documents_chain

In [6]:
document_chain = create_stuff_documents_chain(llm, prompt)

In [12]:
retriever = db.as_retriever()
retriever

VectorStoreRetriever(tags=['Chroma', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x11ba643a0>)

In [13]:
# Retriver and chain
from langchain.chains import create_retrieval_chain

retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [15]:
response = retrieval_chain.invoke({"input": "what is paired permutation?"})

Number of requested results 4 is greater than number of elements in index 2, updating n_results = 2

KeyboardInterrupt

