In [1]:
import os
from ollama import chat
from ollama import ChatResponse
import json
from dotenv import load_dotenv

MODEL = "llama3.2"

In [2]:
from langchain_ollama import OllamaEmbeddings

embedding = OllamaEmbeddings(model="llama3.2")

In [3]:
from langchain_ollama import OllamaLLM
from langchain_core.output_parsers import StrOutputParser

# Establish model and parser
model = OllamaLLM(model=MODEL)
parser = StrOutputParser()

chain = model | parser
chain.invoke("Tell me a bit about Retrieval Augmented Generation (RAG) in less than 100 words.")

'Retrieval Augmented Generation (RAG) is a deep learning technique that combines the strengths of retrieval-based models and text generation algorithms. RAG works by first retrieving relevant documents from an external knowledge base or database to inform the generation process. The retrieved information is then used as input to a language model, which generates new text based on the retrieved context. This approach has shown promising results in tasks such as question answering, text summarization, and conversational AI, where contextual understanding is crucial. RAG offers improved performance over traditional generation-based approaches.'

#### PDF

In [4]:
from langchain.document_loaders import PyPDFLoader

loader = PyPDFLoader("pdf_lists/model_evaluation_selection_ML.pdf")
pages = loader.load()

In [5]:
len(pages)

49

In [6]:
### Choose a page
page = pages[3]

In [7]:
### View the content of the first chapter
print(page.page_content[0:1000])

1 Introduction: Essential Model Evaluation Terms and Techniques
Machine learning has become a central part of our life – as consumers, customers, and hopefully
as researchers and practitioners. Whether we are applying predictive modeling techniques to our
research or business problems, I believe we have one thing in common: We want to make "good"
predictions. Fitting a model to our training data is one thing, but how do we know that it generalizes
well to unseen data? How do we know that it does not simply memorize the data we fed it and fails to
make good predictions on future samples, samples that it has not seen before? And how do we select
a good model in the ﬁrst place? Maybe a different learning algorithm could be better-suited for the
problem at hand?
Model evaluation is certainly not just the end point of our machine learning pipeline. Before we
handle any data, we want to plan ahead and use techniques that are suited for our purposes. In this
article, we will go over a selecti

In [8]:
page.metadata

{'source': 'pdf_lists/model_evaluation_selection_ML.pdf',
 'page': 3,
 'page_label': '4'}

#### URLs 

In [9]:
from langchain.document_loaders import WebBaseLoader

loader = WebBaseLoader("https://github.com/ayoub-berdeddouch/mlops-journey/blob/main/monitoring-05.md")

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [10]:
docs = loader.load()
print(docs[0].page_content[:1000])














































































mlops-journey/monitoring-05.md at main · ayoub-berdeddouch/mlops-journey · GitHub














































Skip to content













Navigation Menu

Toggle navigation




 













            Sign in
          








        Product
        













GitHub Copilot
        Write better code with AI
      







GitHub Advanced Security
        Find and fix vulnerabilities
      







Actions
        Automate any workflow
      







Codespaces
        Instant dev environments
      







Issues
        Plan and track work
      







Code Review
        Manage code changes
      







Discussions
        Collaborate outside of code
      







Code Search
        Find more, search less
      






Explore



      All features

    



      Documentation

    





      GitHub Skills

    





      Blog

    










        Solutions
        






By c

#### Document Splitting

In [9]:
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter

chunk_size =26
chunk_overlap = 4

In [10]:
r_splitter = RecursiveCharacterTextSplitter(
    chunk_size=chunk_size,
    chunk_overlap=chunk_overlap
)

c_splitter = CharacterTextSplitter(
    chunk_size=chunk_size,
    chunk_overlap=chunk_overlap
)

#### Splitting

In [14]:
some_text = """When writing documents, writers will use document structure to group content. \
        This can convey to the reader, which idea's are related. For example, closely related ideas \
        are in sentances. Similar ideas are in paragraphs. Paragraphs form a document. \n\n  \
        Paragraphs are often delimited with a carriage return or two carriage returns. \
        Carriage returns are the "backslash n" you see embedded in this string. \
        Sentences have a period at the end, but also, have a space.\
        and words are separated by space."""

In [15]:
c_splitter = CharacterTextSplitter(
    chunk_size=450,
    chunk_overlap=0,
    separator = ' '
)
r_splitter = RecursiveCharacterTextSplitter(
    chunk_size=450,
    chunk_overlap=0, 
    separators=["\n\n", "\n", " ", ""]
)

In [16]:
c_splitter.split_text(some_text)

['When writing documents, writers will use document structure to group content. This can convey to the reader, which idea\'s are related. For example, closely related ideas are in sentances. Similar ideas are in paragraphs. Paragraphs form a document. \n\n Paragraphs are often delimited with a carriage return or two carriage returns. Carriage returns are the "backslash n" you see embedded in this string. Sentences have a period at the end, but also,',
 'have a space. and words are separated by space.']

In [17]:
r_splitter.split_text(some_text)

["When writing documents, writers will use document structure to group content.         This can convey to the reader, which idea's are related. For example, closely related ideas         are in sentances. Similar ideas are in paragraphs. Paragraphs form a document.",
 'Paragraphs are often delimited with a carriage return or two carriage returns.         Carriage returns are the "backslash n" you see embedded in this string.         Sentences have a period at the end, but also, have a space.        and words are separated by space.']

Let's reduce the chunk size a bit and add a period to our separators:

In [18]:
r_splitter = RecursiveCharacterTextSplitter(
    chunk_size=150,
    chunk_overlap=0,
    separators=["\n\n", "\n", "\. ", " ", ""]
)
r_splitter.split_text(some_text)

["When writing documents, writers will use document structure to group content.         This can convey to the reader, which idea's are related. For",
 'example, closely related ideas         are in sentances. Similar ideas are in paragraphs. Paragraphs form a document.',
 'Paragraphs are often delimited with a carriage return or two carriage returns.         Carriage returns are the "backslash n" you see',
 'embedded in this string.         Sentences have a period at the end, but also, have a space.        and words are separated by space.']

In [19]:
r_splitter = RecursiveCharacterTextSplitter(
    chunk_size=150,
    chunk_overlap=0,
    separators=["\n\n", "\n", "(?<=\. )", " ", ""]
)
r_splitter.split_text(some_text)

["When writing documents, writers will use document structure to group content.         This can convey to the reader, which idea's are related. For",
 'example, closely related ideas         are in sentances. Similar ideas are in paragraphs. Paragraphs form a document.',
 'Paragraphs are often delimited with a carriage return or two carriage returns.         Carriage returns are the "backslash n" you see',
 'embedded in this string.         Sentences have a period at the end, but also, have a space.        and words are separated by space.']

In [20]:
text_splitter = CharacterTextSplitter(
    separator="\n",
    chunk_size=1000,
    chunk_overlap=150,
    length_function=len
)

In [26]:
docs = text_splitter.split_documents(pages)

In [27]:
docs

[Document(metadata={'source': 'pdf_lists/model_evaluation_selection_ML.pdf', 'page': 0, 'page_label': '1'}, page_content='Model Evaluation, Model Selection, and Algorithm\nSelection in Machine Learning\nSebastian Raschka\nUniversity of Wisconsin–Madison\nDepartment of Statistics\nNovember 2018\nsraschka@wisc.edu\nAbstract\nThe correct use of model evaluation, model selection, and algorithm selection\ntechniques is vital in academic machine learning research as well as in many\nindustrial settings. This article reviews different techniques that can be used for\neach of these three subtasks and discusses the main advantages and disadvantages\nof each technique with references to theoretical and empirical studies. Further,\nrecommendations are given to encourage best yet feasible practices in research and\napplications of machine learning. Common methods such as the holdout method\nfor model evaluation and selection are covered, which are not recommended\nwhen working with small datasets.

In [30]:
from langchain.text_splitter import TokenTextSplitter

text_splitter = TokenTextSplitter(chunk_size=10, chunk_overlap=0)
docs = text_splitter.split_documents(pages)

In [35]:
docs[420]

Document(metadata={'source': 'pdf_lists/model_evaluation_selection_ML.pdf', 'page': 5, 'page_label': '6'}, page_content=' can have different meanings: A hypothesis could be the')

In [37]:
pages[5].metadata

{'source': 'pdf_lists/model_evaluation_selection_ML.pdf',
 'page': 5,
 'page_label': '6'}

In [38]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1500,
    chunk_overlap = 150
)

In [39]:
splits = text_splitter.split_documents(pages)

In [40]:
len(splits)

115

#### Embedding and Vector Store

In [42]:
from langchain.vectorstores import Chroma

persist_directory = 'chroma_db'

In [43]:
vectordb = Chroma.from_documents(
    documents=splits,
    embedding=embedding,
    persist_directory=persist_directory
)

In [44]:
print(vectordb._collection.count())

118


In [47]:
question = "main advantages and disadvantages"
docs = vectordb.similarity_search(question, k=3)

In [48]:
len(docs)

3

In [46]:
docs[10].page_content

' the main advantages and disadvantages\nof each technique with'

In [49]:
vectordb.persist()

  vectordb.persist()


In [50]:
question = "what methods used in estimating the performance of machine learning?"
docs = vectordb.similarity_search(question, k=5)

In [52]:
docs[0]

Document(metadata={'page': 10, 'page_label': '11', 'source': 'pdf_lists/model_evaluation_selection_ML.pdf'}, page_content='labeled data for model evaluation. Using the holdout method, we split our dataset into two parts: A\ntraining and a test set. First, we provide the training data to a supervised learning algorithm. The\nlearning algorithm builds a model from the training set of labeled observations. Then, we evaluate the\npredictive performance of the model on an independent test set that shall represent new, unseen data.\nAlso, we brieﬂy introduced the normal approximation, which requires us to make certain assumptions\nthat allow us to compute conﬁdence intervals for modeling the uncertainty of our performance\nestimate based on a single test set, which we have to take with a grain of salt.\nThis section introduces some of the advanced techniques for model evaluation. We will start by\ndiscussing techniques for estimating the uncertainty of our estimated model performance as well

In [53]:
docs[1]

Document(metadata={'page': 24, 'page_label': '25', 'source': 'pdf_lists/model_evaluation_selection_ML.pdf'}, page_content='validation as a crossing over of training and validation stages in successive rounds. Here, the main\nidea behind cross-validation is that each sample in our dataset has the opportunity of being tested.\nk-fold cross-validation is a special case of cross-validation where we iterate over a dataset setktimes.\nIn each round, we split the dataset into kparts: one part is used for validation, and the remaining\nk−1 parts are merged into a training subset for model evaluation as shown in Figure 13 , which\nillustrates the process of 5-fold cross-validation.\n1st\n2nd\n3rd\n4th\n5th\nK Iterations (K-Folds)\nValidation  \nFold\nTraining  \nFold\nLearning  \nAlgorithm\n Hyperparameter  \nValues\nModel\nTraining Fold Data\nTraining Fold Labels\nPrediction\nPerformance\nModel\nValidation  \nFold Data\nValidation  \nFold Labels\nPerformance\nPerformance\nPerformance\nPerforma

#### Similarity Search

In [12]:
from langchain.vectorstores import Chroma

persist_directory = 'chroma_db'
vectordb = Chroma(
    persist_directory=persist_directory,
    embedding_function=embedding
)

  vectordb = Chroma(


In [13]:
print(vectordb._collection.count())

118


In [14]:
texts = [
    """The Amanita phalloides has a large and imposing epigeous (aboveground) fruiting body (basidiocarp).""",
    """A mushroom with a large fruiting body is the Amanita phalloides. Some varieties are all-white.""",
    """A. phalloides, a.k.a Death Cap, is one of the most poisonous of all known mushrooms.""",
]

In [15]:
smalldb = Chroma.from_texts(texts, embedding=embedding)

In [16]:
question = "Tell me about all-white mushrooms with large fruiting bodies"

smalldb.similarity_search(question, k=2)

[Document(metadata={}, page_content='The Amanita phalloides has a large and imposing epigeous (aboveground) fruiting body (basidiocarp).'),
 Document(metadata={}, page_content='A mushroom with a large fruiting body is the Amanita phalloides. Some varieties are all-white.')]

In [17]:
smalldb.max_marginal_relevance_search(question,k=2, fetch_k=3)

[Document(metadata={}, page_content='The Amanita phalloides has a large and imposing epigeous (aboveground) fruiting body (basidiocarp).'),
 Document(metadata={}, page_content='A mushroom with a large fruiting body is the Amanita phalloides. Some varieties are all-white.')]

#### Addressing Diversity: Maximum marginal relevance

In [18]:
question = "what are reasons that three-way holdout is preferred over k-fold cross validation?"
docs_ss = vectordb.similarity_search(question,k=3)

In [19]:
docs_ss[0].page_content[:100]

'1. We want to estimate the generalization accuracy, the predictive performance of a model on\nfuture '

In [20]:
docs_ss[1].page_content[:100]

'labeled data for model evaluation. Using the holdout method, we split our dataset into two parts: A\n'

In [21]:
### Comparison when MMR is applied
docs_mmr = vectordb.max_marginal_relevance_search(question, k=3)

In [22]:
docs_ss[0].page_content[:100]

'1. We want to estimate the generalization accuracy, the predictive performance of a model on\nfuture '

In [23]:
docs_ss[1].page_content[:100]

'labeled data for model evaluation. Using the holdout method, we split our dataset into two parts: A\n'

#### Addressing Specificity: working with metadata

In [24]:
question = "what is the drawback of decreasing the size of the test set?"

In [25]:
docs = vectordb.similarity_search(
    question,
    k=3,
    filter={'source': 'pdf_lists/model_evaluation_selection_ML.pdf'}
)

In [26]:
for d in docs:
    print(d.metadata)

{'page': 10, 'page_label': '11', 'source': 'pdf_lists/model_evaluation_selection_ML.pdf'}
{'page': 22, 'page_label': '23', 'source': 'pdf_lists/model_evaluation_selection_ML.pdf'}
{'page': 15, 'page_label': '16', 'source': 'pdf_lists/model_evaluation_selection_ML.pdf'}


#### Addressing Specificity: working with metadata using self-query retriever

In [33]:
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo

metadata_field_info = [
    AttributeInfo(
        name="source",
        description="The source of the chunk is from, should be `pdf_lists/model_evaluation_selection_ML.pdf`",
        type="string",
    ),
    AttributeInfo(
        name="page",
        description="The page from the PDF",
        type="integer",
    ),
]

In [37]:
from lark import Lark

import lark
print(lark.__version__)

1.2.2


In [None]:
document_content_description = "Paper PDF"
llm = model
retriever = SelfQueryRetriever.from_llm(
    llm,
    vectordb,
    document_content_description,
    metadata_field_info,
    verbose=True
)

#### Additional Tricks : compression

In [39]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

In [40]:
def pretty_print_docs(docs):
    print(f"\n{'-' * 100}\n".join([f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]))


In [42]:
# Wrap our vectorstore
compressor = LLMChainExtractor.from_llm(llm)

In [43]:
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=vectordb.as_retriever()
)

In [46]:
question = "what is the drawback of decreasing the size of the test set?"
compressed_docs = compression_retriever.get_relevant_documents(question)
pretty_print_docs(compressed_docs)

  compressed_docs = compression_retriever.get_relevant_documents(question)


Document 1:

The extracted relevant part of the context is:

"We will start by discussing techniques for estimating the uncertainty of our estimated model performance as well as the model’s variance and stability. And after getting these basics under our belt, we will look at cross-validation techniques for model selection in the next article in this series."

However, upon closer inspection, this part doesn't seem to directly address the question about decreasing the size of the test set.

A more relevant extracted part is:

"Also, we brieﬂy introduced the normal approximation, which requires us to make certain assumptions that allow us to compute conﬁdence intervals for modeling the uncertainty of our performance estimate based on a single test set, which we have to take with a grain of salt."

This part does mention the use of a single test set, but it doesn't specifically discuss decreasing its size.
----------------------------------------------------------------------------------

#### Other Retrievers

In [47]:
from langchain.retrievers import SVMRetriever
from langchain.retrievers import TFIDFRetriever
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [48]:
# Load PDF
loader = PyPDFLoader("pdf_lists/model_evaluation_selection_ML.pdf")
pages = loader.load()
all_page_text=[p.page_content for p in pages]
joined_page_text=" ".join(all_page_text)

# Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1500,chunk_overlap = 150)
splits = text_splitter.split_text(joined_page_text)

In [50]:
# Retrieve
svm_retriever = SVMRetriever.from_texts(splits,embedding)
tfidf_retriever = TFIDFRetriever.from_texts(splits)

In [54]:
question = "what is the drawback of decreasing the size of the test set?"
docs_svm=svm_retriever.get_relevant_documents(question)
docs_svm[0]

Document(metadata={}, page_content='performances (samples may overlap between these training subsets). Looking at the plot above, we\ncan see two distinct trends. First, the resubstitution accuracy (training set) declines as the number of\ntraining samples grows. Second, we observe an improving generalization accuracy (test set) with\nan increasing training set size. These trends can likely be attributed to a reduction in overﬁtting. If\n4https://github.com/rasbt/model-eval-article-supplementary/blob/master/code/resampling-and-kfold.ipynb\n5http://yann.lecun.com/exdb/mnist\n12 Low Variance\n(Precise)\nHigh Variance\n(Not Precise)\nLow Bias\n(Accurate)\nHigh Bias\n(Not Accurate)\nFigure 3: Illustration of bias and variance.\nFigure 4: Learning curves of softmax classiﬁers ﬁt to MNIST.\nthe training set is small, the algorithm is more likely picking up noise in the training set so that the\nmodel fails to generalize well to data that it has not seen before. This observation also explains

In [52]:
docs_tfidf=tfidf_retriever.get_relevant_documents(question)
docs_tfidf[0]

Document(metadata={}, page_content='ask whether it is a good idea to decrease the size of the test set. Decreasing the size of the test set\nbrings up another problem: It may result in a substantial variance of a model’s performance estimate.\nThe reason is that it depends on which instances end up in training set, and which particular instances\n13 end up in test set. Keeping in mind that each time we resample a dataset, we alter the statistics of the\ndistribution of the sample. Most supervised learning algorithms for classiﬁcation and regression as\nwell as the performance estimates operate under the assumption that a dataset is representative of the\npopulation that this dataset sample has been drawn from. As discussed in Section 1.4, stratiﬁcation\nhelps with keeping the sample proportions intact upon splitting a dataset. However, the change in the\nunderlying sample statistics along the features axes is still a problem that becomes more pronounced\nif we work with small datasets,

#### Question Answering

In [55]:
from langchain.chains import RetrievalQA

In [59]:
for d in docs:
    print(d.metadata)

{'page': 10, 'page_label': '11', 'source': 'pdf_lists/model_evaluation_selection_ML.pdf'}
{'page': 22, 'page_label': '23', 'source': 'pdf_lists/model_evaluation_selection_ML.pdf'}
{'page': 15, 'page_label': '16', 'source': 'pdf_lists/model_evaluation_selection_ML.pdf'}


In [57]:
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever()
)

In [60]:
result = qa_chain({"query": question})

  result = qa_chain({"query": question})


In [61]:
result["result"]

'According to the text, the drawback of decreasing the size of the test set is that it can lead to a higher pessimistic bias and increased variance in the model. The text states: "the smaller the dataset, the higher the pessimistic bias and the variance – the sensitivity of a model towards the data is partitioned."'

#### With Prompting

In [62]:
from langchain.prompts import PromptTemplate

# Build prompt
template = """Use the following pieces of context to answer the question at the end. 
If you don't know the answer, just say that you don't know, don't try to make up an answer. 
Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)


In [63]:
# Run chain
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever(),
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

In [65]:
question = "What are some main highlights of resampling that the author covered?"

result = qa_chain({"query": question})
result["result"]

"I don't know, but it appears that the text discusses different methods for evaluating and comparing machine learning algorithms, including resampling techniques such as the three-way holdout method and McNemar's test. \n\nIt also mentions the importance of considering the sample size and potential biases in model performance. The author seems to be discussing how to estimate generalization accuracy, increase predictive performance, and identify the best-suited algorithm for a problem.\n\nThanks for asking!"

In [67]:
result["source_documents"][2]

Document(metadata={'page': 3, 'page_label': '4', 'source': 'pdf_lists/model_evaluation_selection_ML.pdf'}, page_content='More often than not, we want to compare different algorithms to each other, oftentimes in terms of\npredictive and computational performance. Let us summarize the main points why we evaluate the\npredictive performance of a model:\n1. We want to estimate the generalization performance, the predictive performance of our\nmodel on future (unseen) data.\n2. We want to increase the predictive performance by tweaking the learning algorithm and\nselecting the best performing model from a given hypothesis space.\n3. We want to identify the machine learning algorithm that is best-suited for the problem at\nhand; thus, we want to compare different algorithms, selecting the best-performing one as\nwell as the best performing model from the algorithm’s hypothesis space.\nAlthough these three sub-tasks listed above have all in common that we want to estimate the\nperformance of 

#### Retrieval QA with Map-Reduce

In [68]:
### With mapreduce method
qa_chain_mr = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever(),
    chain_type="map_reduce"
)

In [70]:
result = qa_chain_mr({"query": question})
result["result"]

  from .autonotebook import tqdm as notebook_tqdm
Token indices sequence length is longer than the specified maximum sequence length for this model (1651 > 1024). Running this sequence through the model will result in indexing errors


"Based on the provided content, I don't know any information about resampling or its main highlights from the text."

In [71]:
### With refine method
qa_chain_mr = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever(),
    chain_type="refine"
)

In [72]:
result = qa_chain_mr({"query": question})
result["result"]

"Based on the provided context about machine learning model selection, resampling, and hyperparameter tuning, I can refine the original answer.\n\nThe author of the original response did not provide any relevant information on resampling in the context of machine learning or model selection. However, considering the new context:\n\nResampling techniques are widely used in machine learning to evaluate the performance of models and algorithms. Some common resampling techniques include:\n\n1. **K-Fold Cross-Validation**: This technique involves dividing the available data into k folds, training a model on k-1 folds, and evaluating its performance on the remaining fold. The process is repeated k times, with each fold serving as the test set once.\n2. **Leave-One-Out Cross-Validation (LOOCV)**: Similar to K-Fold CV, but instead of dividing the data into k folds, only one data point is left out at a time for testing, while the remaining points are used for training and validation.\n3. **Stra

#### Chatting

In [74]:
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

qa = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=vectordb.as_retriever(),
    memory=memory
)

In [76]:
question = "How repeated holdout validation provides a better estimate of model's performance compared to a single train/test split method?"
result = qa({"question": question})

In [77]:
result['answer']

"I don't know how repeated holdout validation provides a better estimate of a model's performance compared to a single train/test split method. The text does not provide an explanation or analysis of this comparison, and I couldn't find any information that directly addresses this question in the provided context."

In [78]:
def load_db(file, chain_type, k):
    # load documents
    loader = PyPDFLoader(file)
    documents = loader.load()
    # split documents
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
    docs = text_splitter.split_documents(documents)
    # define embedding
    # create vector database from data
    db = DocArrayInMemorySearch.from_documents(docs, embeddings)
    # define retriever
    retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": k})
    # create a chatbot chain. Memory is managed externally.
    qa = ConversationalRetrievalChain.from_llm(
        llm=model, 
        chain_type=chain_type, 
        retriever=retriever, 
        return_source_documents=True,
        return_generated_question=True,
    )
    return qa 

In [79]:
import panel as pn
import param

class cbfs(param.Parameterized):
    chat_history = param.List([])
    answer = param.String("")
    db_query  = param.String("")
    db_response = param.List([])
    
    def __init__(self,  **params):
        super(cbfs, self).__init__( **params)
        self.panels = []
        self.loaded_file = "pdf_lists/model_evaluation_selection_ML.pdf"
        self.qa = load_db(self.loaded_file,"stuff", 4)
    
    def call_load_db(self, count):
        if count == 0 or file_input.value is None:  # init or no file specified :
            return pn.pane.Markdown(f"Loaded File: {self.loaded_file}")
        else:
            file_input.save("temp.pdf")  # local copy
            self.loaded_file = file_input.filename
            button_load.button_style="outline"
            self.qa = load_db("temp.pdf", "stuff", 4)
            button_load.button_style="solid"
        self.clr_history()
        return pn.pane.Markdown(f"Loaded File: {self.loaded_file}")

    def convchain(self, query):
        if not query:
            return pn.WidgetBox(pn.Row('User:', pn.pane.Markdown("", width=600)), scroll=True)
        result = self.qa({"question": query, "chat_history": self.chat_history})
        self.chat_history.extend([(query, result["answer"])])
        self.db_query = result["generated_question"]
        self.db_response = result["source_documents"]
        self.answer = result['answer'] 
        self.panels.extend([
            pn.Row('User:', pn.pane.Markdown(query, width=600)),
            pn.Row('ChatBot:', pn.pane.Markdown(self.answer, width=600, style={'background-color': '#F6F6F6'}))
        ])
        inp.value = ''  #clears loading indicator when cleared
        return pn.WidgetBox(*self.panels,scroll=True)

    @param.depends('db_query ', )
    def get_lquest(self):
        if not self.db_query :
            return pn.Column(
                pn.Row(pn.pane.Markdown(f"Last question to DB:", styles={'background-color': '#F6F6F6'})),
                pn.Row(pn.pane.Str("no DB accesses so far"))
            )
        return pn.Column(
            pn.Row(pn.pane.Markdown(f"DB query:", styles={'background-color': '#F6F6F6'})),
            pn.pane.Str(self.db_query )
        )

    @param.depends('db_response', )
    def get_sources(self):
        if not self.db_response:
            return 
        rlist=[pn.Row(pn.pane.Markdown(f"Result of DB lookup:", styles={'background-color': '#F6F6F6'}))]
        for doc in self.db_response:
            rlist.append(pn.Row(pn.pane.Str(doc)))
        return pn.WidgetBox(*rlist, width=600, scroll=True)

    @param.depends('convchain', 'clr_history') 
    def get_chats(self):
        if not self.chat_history:
            return pn.WidgetBox(pn.Row(pn.pane.Str("No History Yet")), width=600, scroll=True)
        rlist=[pn.Row(pn.pane.Markdown(f"Current Chat History variable", styles={'background-color': '#F6F6F6'}))]
        for exchange in self.chat_history:
            rlist.append(pn.Row(pn.pane.Str(exchange)))
        return pn.WidgetBox(*rlist, width=600, scroll=True)

    def clr_history(self,count=0):
        self.chat_history = []
        return 


ModuleNotFoundError: No module named 'panel'

In [None]:
cb = cbfs()

file_input = pn.widgets.FileInput(accept='.pdf')
button_load = pn.widgets.Button(name="Load DB", button_type='primary')
button_clearhistory = pn.widgets.Button(name="Clear History", button_type='warning')
button_clearhistory.on_click(cb.clr_history)
inp = pn.widgets.TextInput( placeholder='Enter text here…')

bound_button_load = pn.bind(cb.call_load_db, button_load.param.clicks)
conversation = pn.bind(cb.convchain, inp) 

jpg_pane = pn.pane.Image( './img/convchain.jpg')

tab1 = pn.Column(
    pn.Row(inp),
    pn.layout.Divider(),
    pn.panel(conversation,  loading_indicator=True, height=300),
    pn.layout.Divider(),
)
tab2= pn.Column(
    pn.panel(cb.get_lquest),
    pn.layout.Divider(),
    pn.panel(cb.get_sources ),
)
tab3= pn.Column(
    pn.panel(cb.get_chats),
    pn.layout.Divider(),
)
tab4=pn.Column(
    pn.Row( file_input, button_load, bound_button_load),
    pn.Row( button_clearhistory, pn.pane.Markdown("Clears chat history. Can use to start a new topic" )),
    pn.layout.Divider(),
    pn.Row(jpg_pane.clone(width=400))
)
dashboard = pn.Column(
    pn.Row(pn.pane.Markdown('# ChatWithYourData_Bot')),
    pn.Tabs(('Conversation', tab1), ('Database', tab2), ('Chat History', tab3),('Configure', tab4))
)
dashboard