### 1. Dataset Preparation

In [1]:
import pandas as pd
import json

def convert_squad_to_separate_csv(json_path, context_csv="context_data.csv", qa_csv="qa_data.csv"):
    with open(json_path, "r") as f:
        squad_data = json.load(f)

    context_data = []
    qa_data = []
    context_id_map = {}
    context_id = 0

    for article in squad_data['data']:
        for paragraph in article['paragraphs']:
            context = paragraph['context']
            
            # Check if context already exists, if so, use the existing context_id
            if context not in context_id_map:
                context_id_map[context] = context_id
                context_data.append([context_id, context])
                context_id += 1

            # For each QA pair, create a row linking to the context_id
            for qa in paragraph['qas']:
                question = qa['question']
                answers = [ans['text'] for ans in qa['answers']]
                combined_answer = " ".join(answers)  # Combine all answers into one string
                # Use the context_id that corresponds to the current context
                current_context_id = context_id_map[context]
                for answer in answers:
                    qa_data.append([current_context_id, question, answer])

    # Save context data to CSV
    context_df = pd.DataFrame(context_data, columns=["context_id", "context"])
    context_df.to_csv(context_csv, index=False)
    print(f"Context CSV file saved as {context_csv}")

    # Save QA data to CSV
    qa_df = pd.DataFrame(qa_data, columns=["context_id", "question", "answer"])
    qa_df.to_csv(qa_csv, index=False)
    print(f"QA CSV file saved as {qa_csv}")

# Example usage:
convert_squad_to_separate_csv("train-v1.1.json", "context_data.csv", "qa_data.csv")


Context CSV file saved as context_data.csv
QA CSV file saved as qa_data.csv


Create VectorDB - 10s

In [2]:
import pandas as pd
from langchain_ollama.chat_models import ChatOllama
from langchain_community.vectorstores import Chroma
from langchain_ollama import OllamaEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_ollama.chat_models import ChatOllama
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.schema import Document

# TODO: test multi chunking size, and present how chunking effect the data returned
def create_vector_db_from_contexts(context_csv="context_data.csv") -> Chroma:
    """
    Create a vector database from the context data in the CSV.

    Args:
        context_csv (str): Path to the CSV file containing the context data.

    Returns:
        Chroma: A vector store containing the embedded contexts.
    """
    # Load the context data
    context_df = pd.read_csv(context_csv)
    
    # Extract the contexts
    contexts = context_df["context"].tolist()[:20]
    ids = context_df["context_id"].tolist()[:20]
    
    # Initialize the embedding model
    embeddings = OllamaEmbeddings(model="nomic-embed-text")
    
    # Convert contexts and metadata into LangChain Document objects
    documents = [
        Document(page_content=context, metadata={"context_id": context_id})
        for context, context_id in zip(contexts, ids)
    ]
    
    # Embed the contexts
    vector_db = Chroma.from_documents(
        documents=documents,
        embedding=embeddings,
        collection_name="context_collection",
        persist_directory="./SQuaAD"
    )
    
    print(f"Vector DB created with {len(contexts)} contexts")
    
    return vector_db

# Example usage:
vector_db = create_vector_db_from_contexts("context_data.csv")

Vector DB created with 20 contexts


### 2. LLM Chain creation with invoke

original code that works

In [3]:
from langchain.chains import ConversationalRetrievalChain

# Initialize LLM
llm = ChatOllama(model="llama3.2")
# Query prompt template
QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate 2
    different versions of the given user question to retrieve relevant documents from
    a vector database. By generating multiple perspectives on the user question, your
    goal is to help the user overcome some of the limitations of the distance-based
    similarity search. Provide these alternative questions separated by newlines.
    Original question: {question}""",
)

# Set up retriever
retriever = MultiQueryRetriever.from_llm(
    vector_db.as_retriever(), 
    llm,
    prompt=QUERY_PROMPT
)

In [4]:
def process_questions(questions: list, vector_db: Chroma, selected_model: str) -> str:
    """
    Process a user question using the vector database and selected language model.

    Args:
        question (list): The user's question.
        vector_db (Chroma): The vector database containing document embeddings.
        selected_model (str): The name of the selected language model.

    Returns:
        str: The generated response to the user's question.
    """
    # RAG prompt template
    template = """With very brief and concise answer,Answer the question based ONLY on the following context:
    {context}
    Question: {question}
    """

    prompt = ChatPromptTemplate.from_template(template)

    # Create chain
    chain = (
        {"context": retriever, "question": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
    )

    # More optimized
    # Create the retrieval chain
    # chain = ConversationalRetrievalChain.from_llm(
    #     llm,
    #     retriever,
    #     verbose=True,
    #     return_source_documents=True
    # )
    
    # Lists to store answers and sources
    answers = []
    sources = []
    retrieved_documents = []
    # Loop through each question and generate a response
    for question in questions:
        
        # response = chain({"question": question, "chat_history": []})
        response = chain.invoke(question)
        r= retriever.get_relevant_documents(question)
        # response = chain.invoke({"context": retriever, "question": question})
        
        answers.append(response)
        
        # answers.append(response["response"])
        retrieved_documents.append(r)
        # sources.append(response["source_documents"])
        
        
        
    return answers, retrieved_documents

example 1: Get Retrieved document

In [40]:
# TODO: To be tried later
def retrieve_and_combine_documents(inputs):
    retriever = inputs["context"]
    question = inputs["question"]
    retrieved_docs = retriever.get_relevant_documents(question)
    return {"retrieved_docs": retrieved_docs, "question": question}

# Chain definition
def chain(inputs):
    # Step 1: Retrieve and combine documents
    step1_output = retrieve_and_combine_documents(inputs)

    # Step 2: Format with prompt
    prompt_input = prompt.format_prompt(step1_output)
    
    # Step 3: Pass through the LLM
    llm_response = llm.invoke(prompt_input.to_string())
    
    # Step 4: Structure final output
    return {"response": llm_response, "retrieved_docs": step1_output["retrieved_docs"]}

# Invocation
response = chain({"context": retriever, "question": question})
print("LLM Response:", response["response"])
print("Retrieved Documents:", response["retrieved_docs"])


Example 2: get retrievals and generation

In [50]:
from langchain.schema.runnable import Runnable
from langchain_core.runnables import RunnablePassthrough

class RetrieveAndCombineDocuments(Runnable):
    """Runnable to fetch documents and combine them with inputs."""
    def invoke(self, inputs,run_manager=None):
        retriever = inputs["context"]
        question = inputs["question"]
        retrieved_docs = retriever.get_relevant_documents(question)
        return {"retrieved_docs": retrieved_docs, "question": question}

In [None]:
def retrieve_and_combine_documents(inputs):
    retriever = inputs["context"]
    question = inputs["question"]
    retrieved_docs = retriever.get_relevant_documents(question)
    return {"retrieved_docs": retrieved_docs, "question": question}

# Chain definition
def chain_(inputs):
    # RAG prompt template
    template = """With very brief and concise answer,Answer the question based ONLY on the following context:
    {context}
    Question: {question}
    """

    prompt = ChatPromptTemplate.from_template(template)
    # Step 1: Retrieve and combine documents
    step1_output = retrieve_and_combine_documents(inputs)

    # Step 2: Format with prompt
    prompt_input = prompt.format_prompt(step1_output)
    
    # Step 3: Pass through the LLM
    llm_response = llm.invoke(prompt_input.to_string())
    
    # Step 4: Structure final output
    return {"response": llm_response, "retrieved_docs": step1_output["retrieved_docs"]}


TypeError: BaseChatPromptTemplate.format_prompt() takes 1 positional argument but 2 were given

### Load data for Inference

In [30]:
import pandas as pd

def load_qa_from_csv(csv_file_path: str):
    """
    Loads questions and answers from the provided CSV file.
    
    Args:
        csv_file_path (str): Path to the CSV file containing the questions and answers.
    
    Returns:
        tuple: Two lists, one with the questions and the other with the answers.
    """
    # Load the CSV into a pandas DataFrame
    df = pd.read_csv(csv_file_path)
    
    # Extract questions and answers into lists
    questions = df['question'].tolist()
    answers = df['answer'].tolist()
    gt_context_id = df['context_id'].tolist()
    return questions, answers, gt_context_id


In [31]:
# Load questions and answers from CSV
questions, GT_answers, gt_context_id = load_qa_from_csv('qa_data.csv')
GT_answers = GT_answers[:20]
questions = questions[:20]
gt_context_id = gt_context_id[:20]

#### Infere and Eval Retrievers

In [19]:
def retrieve_with_ids(vector_db, querys, k=5):
    """
    Retrieve documents from the vector database along with their IDs.

    Args:
        vector_db (Chroma): The vector database.
        query (str): The query to search for.
        k (int): Number of top results to retrieve.

    Returns:
        list: List of retrieved document metadata (including IDs).
    """
    retriever = vector_db.as_retriever(search_kwargs={"k": k})
    results = {}
    ids = []
    for query in querys:
        result = retriever.get_relevant_documents(query)
        print(result)
        results[query] = [{"id": doc.metadata["context_id"], "content": doc.page_content} for doc in result]
        ids.append([doc.metadata["context_id"] for doc in result])
    return results, ids


#### Inference

Inference by original code

In [22]:
retrieved_docs,ids = retrieve_with_ids(vector_db, querys=questions)

[Document(metadata={'context_id': 0}, page_content='Architecturally, the school has a Catholic character. Atop the Main Building\'s gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend "Venite Ad Me Omnes". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.'), Document(metadata={'context_id': 2}, page_content='The university is the major seat of the Congregation of Holy Cross (albeit not its official headquarters, which are in Rome). Its main seminary, Moreau Seminary, is located on the campus across St.

In [13]:
answers, sources = process_questions(questions, vector_db, selected_model="llama3.2")

# Now, `model_responses` will hold the generated responses that you can use for evaluation.


Inference by second code

In [86]:
for question in questions:
    # Invocation
    response = chain_({"context": retriever, "question": question})
    print("LLM Response:", response["response"])
    print("Retrieved Documents:", response["retrieved_docs"])


TypeError: BaseChatPromptTemplate.format_prompt() takes 1 positional argument but 2 were given

In [23]:
ids

[[0, 2, 10, 17, 7],
 [7, 17, 0, 4, 15],
 [0, 17, 7, 2, 14],
 [17, 0, 2, 14, 8],
 [7, 0, 17, 2, 9],
 [1, 15, 19, 4, 14],
 [1, 15, 9, 8, 14],
 [1, 15, 4, 8, 9],
 [1, 15, 8, 4, 16],
 [1, 19, 15, 4, 14],
 [2, 17, 6, 0, 7],
 [2, 17, 0, 6, 13],
 [17, 4, 9, 7, 19],
 [16, 17, 15, 2, 6],
 [2, 11, 12, 10, 6],
 [3, 8, 9, 5, 4],
 [3, 19, 4, 8, 14],
 [3, 19, 4, 8, 14],
 [3, 16, 14, 4, 8],
 [3, 19, 4, 8, 5]]

In [14]:
answers

['Saint Bernadette Soubirous.',
 'A copper statue of Christ with arms upraised facing the Main Building, with the legend "Venite Ad Me Omnes" (Come to Me All).',
 'The Basilica of the Sacred Heart is next to the Main Building.',
 'The Grotto at Notre Dame is a Marian place of prayer and reflection that is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858.',
 'A golden statue of the Virgin Mary.',
 'The Scholastic magazine began publishing in September 1876 as a one-page journal.',
 'The Juggler is released twice a year.',
 'The Observer.',
 'Three newspapers: The Observer, Common Sense, and Irish Rover.',
 'In 1987.',
 'The official headquarters of the Congregation of the Holy Cross are in Rome.',
 'Moreau Seminary.',
 "The text does not explicitly state which structure is the oldest. However, it mentions the Basilica of the Sacred Heart, which was built in 1873. This is likely one of the older structures on campus

In [15]:
sources

[[Document(metadata={'context_id': 0}, page_content='Architecturally, the school has a Catholic character. Atop the Main Building\'s gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend "Venite Ad Me Omnes". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.'),
  Document(metadata={'context_id': 14}, page_content='As of 2012[update] research continued in many fields. The university president, John Jenkins, described his hope that Notre Dame would become "one of the pre–eminent research institutions in th

In [18]:
GT_answers

['Saint Bernadette Soubirous',
 'a copper statue of Christ',
 'the Main Building',
 'a Marian place of prayer and reflection',
 'a golden statue of the Virgin Mary',
 'September 1876',
 'twice',
 'The Observer',
 'three',
 '1987',
 'Rome',
 'Moreau Seminary',
 'Old College',
 'Retired priests and brothers',
 'Buechner Prize for Preaching',
 'eight',
 '1920',
 'the College of Science',
 'five',
 'the 1870s']

## Evaluation

In [33]:
def calculate_retrieval_metrics(gt_ids, retrieved_ids_list, k=5):
    """
    Calculate retrieval metrics.

    Args:
        gt_ids (list): List of ground truth document IDs.
        retrieved_ids_list (list): List of lists containing retrieved document IDs for each query.
        k (int): Number of top documents to consider.

    Returns:
        dict: Metrics like Recall@k and Precision@k.
    """
    recall_k = []
    precision_k = []

    for gt, retrieved in zip(gt_ids, retrieved_ids_list):
        # Ensure ground-truth IDs are iterable
        if not isinstance(gt, (list, set)):
            gt = [gt]
        relevant_retrieved = set(gt).intersection(set(retrieved[:k]))
        recall_k.append(len(relevant_retrieved) / len(gt) if gt else 0)
        precision_k.append(len(relevant_retrieved) / k)

    return {
        "Recall@k": sum(recall_k) / len(recall_k),
        "Precision@k": sum(precision_k) / len(precision_k),
    }


In [35]:
print(gt_context_id)
print(ids)
calculate_retrieval_metrics(gt_context_id,ids)

[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3]
[[0, 2, 10, 17, 7], [7, 17, 0, 4, 15], [0, 17, 7, 2, 14], [17, 0, 2, 14, 8], [7, 0, 17, 2, 9], [1, 15, 19, 4, 14], [1, 15, 9, 8, 14], [1, 15, 4, 8, 9], [1, 15, 8, 4, 16], [1, 19, 15, 4, 14], [2, 17, 6, 0, 7], [2, 17, 0, 6, 13], [17, 4, 9, 7, 19], [16, 17, 15, 2, 6], [2, 11, 12, 10, 6], [3, 8, 9, 5, 4], [3, 19, 4, 8, 14], [3, 19, 4, 8, 14], [3, 16, 14, 4, 8], [3, 19, 4, 8, 5]]


{'Recall@k': 0.95, 'Precision@k': 0.19000000000000006}

In [33]:
from langchain.evaluation.qa import QAEvalChain
# from langchain.evaluation.retrieval import RetrievalEvaluator

def evaluate_generated_answers(questions, answers, ground_truths):
    # Create the eval chain
    eval_chain = QAEvalChain.from_llm(llm)
    
    # Prepare the examples list in the expected format
    examples = [
        {"question": q, "answer": gt} for q, gt in zip(questions, ground_truths)
    ]
    
    # Prepare the predictions list in the expected format
    predictions = [
        {"prediction": ans} for ans in answers
    ]
    
    # Perform evaluation with custom keys
    graded_outputs = eval_chain.evaluate(
        examples, 
        predictions, 
        question_key="question", 
        answer_key="answer", 
        prediction_key="prediction"
    )
    return graded_outputs

print(type(GT_answers))
evaluate_generated_answers(questions, answers,GT_answers)

<class 'list'>


[{'results': 'Since both the student answer and true answer match exactly, I would grade it as:\n\nGRADE: CORRECT'},
 {'results': 'GRADE: CORRECT'},
 {'results': 'INCORRECT'},
 {'results': 'GRADE: CORRECT'},
 {'results': 'INCORRECT\n\nThe grading decision is INCORRECT because, although the student\'s answer and true answer contain similar information, there is no difference in the factual accuracy. The true answer also includes "at Notre Dame", which isn\'t present in the student\'s answer.'},
 {'results': 'GRADE: CORRECT'},
 {'results': 'INCORRECT'},
 {'results': 'Based on the student\'s answer, "The Observer", and the true answer, also "The Observer", I would score it as:\n\nGRADE: CORRECT'},
 {'results': 'GRADE: CORRECT'},
 {'results': 'INCORRECT'},
 {'results': 'GRADE: CORRECT'},
 {'results': 'GRADE: CORRECT'},
 {'results': 'INCORRECT'},
 {'results': 'GRADE: INCORRECT'},
 {'results': "Since both the student's answer and the true answer contain identical information, with no conflic

In [None]:
# Evaluate generated answers
answer_metrics = evaluate_generated_answers(questions, answers, GT_answers)
print("Answer Metrics:", answer_metrics)

#### My judge
1. in example (2), 

In [None]:
import numpy as np
from sklearn.metrics import precision_score, recall_score
from collections import defaultdict

def calculate_retrieval_metrics(retrieved_docs: list, relevant_docs: list, k: int):
    """
    Calculate retrieval metrics: Recall@k, Precision@k, MRR, and nDCG.
    
    Args:
        retrieved_docs (list): List of retrieved documents' ids or contents.
        relevant_docs (list): List of relevant documents' ids or contents.
        k (int): The number of top documents to consider for evaluation.
    
    Returns:
        dict: A dictionary with retrieval metrics.
    """
    # Ensure k is within bounds
    k = min(k, len(retrieved_docs))
    
    # Initialize results
    precision_at_k = 0
    recall_at_k = 0
    mrr = 0
    dcg = 0
    idcg = 0
    
    # Precision and Recall at k
    retrieved_set = set(retrieved_docs[:k])
    relevant_set = set(relevant_docs)
    
    precision_at_k = len(retrieved_set & relevant_set) / k
    recall_at_k = len(retrieved_set & relevant_set) / len(relevant_set)
    
    # Mean Reciprocal Rank (MRR)
    for idx, doc in enumerate(retrieved_docs[:k]):
        if doc in relevant_docs:
            mrr = 1 / (idx + 1)  # Rank starts at 1
    
    # nDCG
    for idx, doc in enumerate(retrieved_docs[:k]):
        if doc in relevant_docs:
            dcg += 1 / np.log2(idx + 2)  # Log base 2
    idcg = sum(1 / np.log2(i + 2) for i in range(min(k, len(relevant_docs))))
    nDCG = dcg / idcg if idcg > 0 else 0
    
    return {
        'Precision@k': precision_at_k,
        'Recall@k': recall_at_k,
        'MRR': mrr,
        'nDCG': nDCG
    }

In [None]:
from nltk.translate.bleu_score import sentence_bleu
from rouge_score import rouge_scorer

def calculate_generation_metrics(generated_responses: list, ground_truth: list):
    """
    Calculate BLEU and ROUGE scores for evaluating the generation quality.
    
    Args:
        generated_responses (list): List of model-generated answers.
        ground_truth (list): List of ground truth answers (correct answers).
    
    Returns:
        dict: A dictionary with BLEU and ROUGE metrics.
    """
    # BLEU score calculation (simple case)
    bleu_scores = [sentence_bleu([gt.split()], gen.split()) for gen, gt in zip(generated_responses, ground_truth)]
    
    # ROUGE score calculation (using rouge_score)
    rouge = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    rouge_scores = [rouge.score(gt, gen) for gen, gt in zip(generated_responses, ground_truth)]
    
    # Extract ROUGE-1 and ROUGE-L F1 scores
    rouge1_f1 = np.mean([score['rouge1'].f1 for score in rouge_scores])
    rouge2_f1 = np.mean([score['rouge2'].f1 for score in rouge_scores])
    rougel_f1 = np.mean([score['rougeL'].f1 for score in rouge_scores])
    
    # Calculate BLEU mean
    bleu_mean = np.mean(bleu_scores)
    
    return {
        'BLEU': bleu_mean,
        'ROUGE-1 F1': rouge1_f1,
        'ROUGE-2 F1': rouge2_f1,
        'ROUGE-L F1': rougel_f1
    }


In [None]:
import torch
import math
from transformers import GPT2LMHeadModel, GPT2Tokenizer

def calculate_perplexity(generated_responses: list, model_name='gpt2'):
    """
    Calculate the perplexity of the generated responses using a language model.
    
    Args:
        generated_responses (list): List of model-generated answers.
        model_name (str): The name of the pretrained language model to use (default: GPT-2).
    
    Returns:
        float: The average perplexity of the responses.
    """
    tokenizer = GPT2Tokenizer.from_pretrained(model_name)
    model = GPT2LMHeadModel.from_pretrained(model_name)
    model.eval()
    
    total_perplexity = 0
    for response in generated_responses:
        inputs = tokenizer(response, return_tensors='pt')
        with torch.no_grad():
            outputs = model(**inputs, labels=inputs['input_ids'])
        log_likelihood = outputs.loss.item()
        perplexity = math.exp(log_likelihood)
        total_perplexity += perplexity
    
    return total_perplexity / len(generated_responses)


In [None]:
# Assuming `retrieved_docs`, `relevant_docs`, `generated_responses`, and `ground_truth` are available:

# Retrieval Metrics
retrieval_metrics = calculate_retrieval_metrics(retrieved_docs, relevant_docs, k=5)

# Generation Metrics
generation_metrics = calculate_generation_metrics(generated_responses, ground_truth)

# Perplexity
perplexity = calculate_perplexity(generated_responses)

# Print results
print("Retrieval Metrics:", retrieval_metrics)
print("Generation Metrics:", generation_metrics)
print("Perplexity:", perplexity)


#### Running with history

In [None]:
from langchain.prompts import PromptTemplate
from langchain.chains import ConversationalRetrievalChain
from langchain.chains import LLMChain
from langchain.chat_models import ChatOllama

def process_question_with_history(question: str, vector_db: Chroma, conversation_history: list, selected_model: str) -> str:
    """
    Process a user question considering the conversation history using the vector database and selected language model.

    Args:
        question (str): The user's question.
        vector_db (Chroma): The vector database containing document embeddings.
        conversation_history (list): The conversation history (previous questions and answers).
        selected_model (str): The name of the selected language model.

    Returns:
        str: The generated response to the user's question.
    """
    logger.info(f"Processing question: {question} using model: {selected_model}")
    
    # Initialize LLM
    llm = ChatOllama(model=selected_model)

    # Prepare history context: concatenate previous question-answer pairs
    history_context = "\n".join([f"Q: {entry['question']}\nA: {entry['answer']}" for entry in conversation_history])

    # Create the combined prompt: current question + history context
    conversation_prompt = f"""
    You are an AI assistant that helps with answering questions. Below is the conversation history:
    {history_context}
    Now answer the following question based on the context:
    Q: {question}
    A:
    """

    # Query prompt template for the conversation
    prompt = PromptTemplate(input_variables=["question", "history"], template=conversation_prompt)

    # Set up retriever (with vector database and history context as input)
    retriever = vector_db.as_retriever()

    # Create the chain: combining retrieval and LLM
    chain = ConversationalRetrievalChain.from_llm(
        llm,
        retriever,
        verbose=True,
        return_source_documents=True
    )

    # Get the response using the conversational chain
    response = chain.run(question=question, history=conversation_history)
    
    # Update conversation history with the new Q&A pair
    conversation_history.append({"question": question, "answer": response})

    return response


In [None]:
conversation_history = []

user_question = "What is the capital of France?"
model_answer = process_question_with_history(user_question, vector_db, conversation_history, selected_model="your-model")

# Print or display the answer in your app
print(model_answer)

Challenges:

    Context Length: If the history becomes too long, you might hit the model's context length limit (e.g., token limits). In such cases, you can truncate the history or select the most recent exchanges to keep the context manageable.
    Scaling: For large-scale conversations, you might need more sophisticated memory management (e.g., storing only relevant parts of the history or summarizing older parts of the conversation).