Importing Libraries

In [1]:
!pip install -qU langsmith langchain-core langchain-community langchain-openai langchain-qdrant chainlit
!pip install -qU pymupdf ragas huggingface_hub nbformat sentence-transformers torch


[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
langchain-experimental 0.0.64 requires langchain-community<0.3.0,>=0.2.10, but you have langchain-community 0.3.0 which is incompatible.
langchain-experimental 0.0.64 requires langchain-core<0.3.0,>=0.2.27, but you have langchain-core 0.3.5 which is incompatible.
langgraph 0.2.16 requires langchain-core<0.3,>=0.2.27, but you have langchain-core 0.3.5 which is incompatible.
langchain-huggingface 0.0.3 requires langchain-core<0.3,>=0.1.52, but you have langchain-core 0.3.5 which is incompatible.
ragas 0.1.20 requires langchain-core<0.3, but you have langchain-core 0.3.5 which is incompatible.
langgraph-checkpoint 1.0.6 requires langchain-core<0.3,>=0.2.22, but you have langchain-core 0.3.5 which is incompatible.[0m[31m
[0m

In [2]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Qdrant
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from langchain.schema import Document

In [3]:
import os
import getpass

os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")

In [4]:
os.environ["LANGCHAIN_API_KEY"] = getpass.getpass('Enter your LangSmith API key: ')


TASK 1: Dealing with the Data

In [5]:
from langchain_community.document_loaders import PyMuPDFLoader

# Load the first document
documents1 = PyMuPDFLoader(file_path="https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf").load()

# Load the second document
documents2 = PyMuPDFLoader(file_path="https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf").load()

Chunking the Data and Splitting

In [6]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

CHUNK_SIZE = 400
CHUNK_OVERLAP = 50

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=CHUNK_SIZE,
    chunk_overlap=CHUNK_OVERLAP,
    
)

documents1 = text_splitter.split_documents(documents1)
documents2 = text_splitter.split_documents(documents2)


In [7]:
len(documents1)

676

In [8]:
len(documents2)

486

Text Embedding Model to create Vector Store

In [9]:
from langchain_openai import OpenAIEmbeddings

EMBEDDING_MODEL = "text-embedding-3-small"

embeddings = OpenAIEmbeddings(
    model=EMBEDDING_MODEL,
    openai_api_key=os.environ["OPENAI_API_KEY"]
)

Setting up Qdrant Vector Store

In [10]:
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams

LOCATION = ":memory:"
COLLECTION_NAME = "ai_ethics_docs"
VECTOR_SIZE = 1536

Loading the Data into Qdrant Vector Store

In [11]:
qdrant_client = QdrantClient(
    location=LOCATION
)

qdrant_client.create_collection(
    collection_name=COLLECTION_NAME,
    vectors_config=VectorParams(size=VECTOR_SIZE, distance=Distance.COSINE)
)

qdrant_vector_store = QdrantVectorStore(
    client=qdrant_client,
    collection_name=COLLECTION_NAME,
    embedding=embeddings
)

qdrant_vector_store.add_documents(documents1)
qdrant_vector_store.add_documents(documents2)

['9c1c8120f6fc43289badd1945a69bc0f',
 '245f303b0cce41f0a45fdc76c2e7f3a5',
 '7a5e4c5920894b55b74db09ee511fd4f',
 '9c5541c9377745618f9525708aad2b63',
 '57c36710de6146d7acc0015d957a025e',
 '0719ae6bfbfd4388bc3845958c5e253a',
 '16761696090c498d982e7e3d3fef7a6d',
 'fcbb083990a4418c9ba29d77ad969526',
 '17843f545f3949749a237c4c19ce325f',
 '50cda741772c4e3782c6adee750d23a3',
 '1d321d0637844684984dc8643b9a9954',
 '442fa7172777460cbc95f4c8d25d5e92',
 '83a2f3b0336e4026aaaab73a269473e8',
 '41806709f9b84afd80face9184dbef6e',
 '38c3466d7aec45ffae2971e2e653134d',
 'd88092643d73488985ca18ece67c8799',
 '47bf306581a64556a5c5f606e5f1ffbb',
 '8a040426fea84b078a4ba9b14423aa51',
 'af3b21031b044ca3aeb568142729cf5a',
 '8d5c013c30c146e29a9727aac6360657',
 '0b520c4b30e947c28a883ca5f9e0c8e4',
 'ef70ff24c93247eda00add2a1b988df6',
 '5d40e9f1d59f4010b4fe14c6da53b84b',
 '415fd85e08eb4e9b8f2c639f0efe5b27',
 '748e415fe5024fb080213e8ccf5baf07',
 '04df5f79c38b4ab68175742df59f7f38',
 'aadb3254ee534343b37e166e0b3c8aab',
 

TASK 2: RAG Prototype

Setting up the Retriever

In [12]:
retriever = qdrant_vector_store.as_retriever()

retrieved_documents = retriever.invoke("How can I cause no harm with AI?")

for doc in retrieved_documents:
  print(doc)

page_content='prevent future occurrences. Conduct post-mortem analyses of incidents with 
relevant AI Actors, to understand the root causes and implement preventive 
measures. 
Human-AI Conﬁguration; 
Dangerous, Violent, or Hateful 
Content 
MG-4.2-003 Use visualizations or other methods to represent GAI model behavior to ease 
non-technical stakeholders understanding of GAI system functionality.' metadata={'source': 'https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf', 'file_path': 'https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf', 'page': 48, 'total_pages': 64, 'format': 'PDF 1.6', 'title': 'Artificial Intelligence Risk Management Framework: Generative Artificial Intelligence Profile', 'author': 'National Institute of Standards and Technology', 'subject': '', 'keywords': '', 'creator': 'Acrobat PDFMaker 24 for Word', 'producer': 'Adobe PDF Library 24.2.159', 'creationDate': "D:20240805141702-04'00'", 'modDate': "D:20240805143048-04'00'", 'trapped': '', '_id': '3724b9404202

Creating the RAG Chain

In [13]:
from langchain import hub

retrieval_qa_prompt = hub.pull("langchain-ai/retrieval-qa-chat")

print(retrieval_qa_prompt.messages[0].prompt.template)


Answer any use questions based solely on the context below:

<context>
{context}
</context>


In [14]:
from langchain.prompts import ChatPromptTemplate

template = """
You are a helpful assistant. Act as an AI ethics expert and answer the question in a succinct way. 
If you cannot answer the question based on the context - you must say "I don't know".

Question:
{question}

Context:
{context}
"""

prompt = ChatPromptTemplate.from_template(template)

In [15]:
from operator import itemgetter

from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

primary_qa_llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)

retrieval_augmented_qa_chain = (
    {
        "context": itemgetter("question") | retriever, 
        "question": itemgetter("question")
    }
    | RunnablePassthrough.assign(context=itemgetter("context"))
    | {
        "response": prompt | primary_qa_llm, 
        "context": itemgetter("context")
    }
)

In [16]:
# Example query
query = "How do cause no harm with AI?"
response = retrieval_augmented_qa_chain.invoke({"question": query})
print(response["response"].content)

To cause no harm with AI, it is essential to implement policies and procedures that address AI risks, manage harmful biases, enhance privacy, ensure safety and security, and conduct thorough impact assessments. Additionally, engaging in post-mortem analyses of incidents can help identify root causes and prevent future occurrences.


In [17]:
#Example query 2

query = "What is a rule of thumb for data governance?"
response = retrieval_augmented_qa_chain.invoke({"question": query})
print(response["response"].content)

A rule of thumb for data governance is to maintain high data quality, especially in sensitive domains, to avoid adverse consequences from decision-making based on flawed or inaccurate data.


Deploying the RAG Prototype to Hugging Face - DONE


TASK 3: Creating Test Data

In [18]:
eval_documents = PyMuPDFLoader(file_path="https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf").load() + \
                 PyMuPDFLoader(file_path="https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf").load()

text_splitter_eval = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50
)

eval_documents = text_splitter_eval.split_documents(eval_documents)

In [19]:
len(eval_documents)

910

In [20]:
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

generator_llm = ChatOpenAI(model="gpt-3.5-turbo")
critic_llm = ChatOpenAI(model="gpt-4o-mini")
embeddings = OpenAIEmbeddings()

generator = TestsetGenerator.from_langchain(
    generator_llm,
    critic_llm,
    embeddings
)

distributions = {
    simple: 0.5,
    multi_context: 0.4,
    reasoning: 0.1
}

num_qa_pairs = 40 # You can reduce the number of QA pairs if you're experiencing rate-limiting issues

testset = generator.generate_with_langchain_docs(eval_documents, num_qa_pairs, distributions)

testset.to_pandas()


embedding nodes:   0%|          | 0/1820 [00:00<?, ?it/s]

Filename and doc_id are the same for all nodes.


Generating:   0%|          | 0/40 [00:00<?, ?it/s]

Unnamed: 0,question,contexts,ground_truth,evolution_type,metadata,episode_done
0,How does the lack of notice or explanation reg...,[ever being notified that data was being colle...,The lack of notice or explanation regarding da...,simple,[{'source': 'https://www.whitehouse.gov/wp-con...,True
1,What are some TEVV considerations that need to...,[Information Integrity \nAI Actor Tasks: End U...,Scientific integrity and TEVV considerations t...,simple,[{'source': 'https://nvlpubs.nist.gov/nistpubs...,True
2,How can Generative AI facilitate the spread of...,[Disinformation and misinformation – both of w...,Generative AI can facilitate the spread of dis...,simple,[{'source': 'https://nvlpubs.nist.gov/nistpubs...,True
3,How should data from sensitive domains like cr...,[in the spreading and scaling of harms. Data f...,Data from sensitive domains like criminal just...,simple,[{'source': 'https://www.whitehouse.gov/wp-con...,True
4,How does the Blueprint for an AI Bill of Right...,[- \nUSING THIS TECHNICAL COMPANION\nThe Bl...,The Blueprint for an AI Bill of Rights aims to...,simple,[{'source': 'https://www.whitehouse.gov/wp-con...,True
5,How do generative models like LLMs generate ou...,[Confabulations can occur across GAI outputs a...,Generative models like LLMs generate outputs b...,simple,[{'source': 'https://nvlpubs.nist.gov/nistpubs...,True
6,How should consent practices ensure use-specif...,[control over their data \nUse-specific consen...,Consent practices should ensure use-specific c...,simple,[{'source': 'https://www.whitehouse.gov/wp-con...,True
7,How can digital content transparency solutions...,[Human-AI Conﬁguration \nMS-2.8-003 \nUse digi...,Digital content transparency solutions can be ...,simple,[{'source': 'https://nvlpubs.nist.gov/nistpubs...,True
8,How should risks be re-evaluated when adapting...,[and Component Integration \nMP-4.1-007 Re-eva...,Risks should be re-evaluated when adapting GAI...,simple,[{'source': 'https://nvlpubs.nist.gov/nistpubs...,True
9,What principles have been proposed for the eth...,"[mated systems, and researchers developing inn...","Advocates, researchers, and government organiz...",simple,[{'source': 'https://www.whitehouse.gov/wp-con...,True


In [21]:
testset.test_data[0]

DataRow(question='How does the lack of notice or explanation regarding data collection affect parental knowledge in child maltreatment risk assessments?', contexts=['ever being notified that data was being collected and used as part of an algorithmic child maltreatment\nrisk assessment.84 The lack of notice or an explanation makes it harder for those performing child\nmaltreatment assessments to validate the risk assessment and denies parents knowledge that could help them\ncontest a decision.\n41'], ground_truth='The lack of notice or explanation regarding data collection in child maltreatment risk assessments makes it harder for those performing the assessments to validate the risk assessment. It also denies parents knowledge that could help them contest a decision.', evolution_type='simple', metadata=[{'source': 'https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf', 'file_path': 'https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint

In [22]:
testset_df = testset.to_pandas()
testset_df.to_csv("synthetic_data.csv")

In [23]:
import pandas as pd

test_df = pd.read_csv("synthetic_data.csv")

In [24]:
testset_df

Unnamed: 0,question,contexts,ground_truth,evolution_type,metadata,episode_done
0,How does the lack of notice or explanation reg...,[ever being notified that data was being colle...,The lack of notice or explanation regarding da...,simple,[{'source': 'https://www.whitehouse.gov/wp-con...,True
1,What are some TEVV considerations that need to...,[Information Integrity \nAI Actor Tasks: End U...,Scientific integrity and TEVV considerations t...,simple,[{'source': 'https://nvlpubs.nist.gov/nistpubs...,True
2,How can Generative AI facilitate the spread of...,[Disinformation and misinformation – both of w...,Generative AI can facilitate the spread of dis...,simple,[{'source': 'https://nvlpubs.nist.gov/nistpubs...,True
3,How should data from sensitive domains like cr...,[in the spreading and scaling of harms. Data f...,Data from sensitive domains like criminal just...,simple,[{'source': 'https://www.whitehouse.gov/wp-con...,True
4,How does the Blueprint for an AI Bill of Right...,[- \nUSING THIS TECHNICAL COMPANION\nThe Bl...,The Blueprint for an AI Bill of Rights aims to...,simple,[{'source': 'https://www.whitehouse.gov/wp-con...,True
5,How do generative models like LLMs generate ou...,[Confabulations can occur across GAI outputs a...,Generative models like LLMs generate outputs b...,simple,[{'source': 'https://nvlpubs.nist.gov/nistpubs...,True
6,How should consent practices ensure use-specif...,[control over their data \nUse-specific consen...,Consent practices should ensure use-specific c...,simple,[{'source': 'https://www.whitehouse.gov/wp-con...,True
7,How can digital content transparency solutions...,[Human-AI Conﬁguration \nMS-2.8-003 \nUse digi...,Digital content transparency solutions can be ...,simple,[{'source': 'https://nvlpubs.nist.gov/nistpubs...,True
8,How should risks be re-evaluated when adapting...,[and Component Integration \nMP-4.1-007 Re-eva...,Risks should be re-evaluated when adapting GAI...,simple,[{'source': 'https://nvlpubs.nist.gov/nistpubs...,True
9,What principles have been proposed for the eth...,"[mated systems, and researchers developing inn...","Advocates, researchers, and government organiz...",simple,[{'source': 'https://www.whitehouse.gov/wp-con...,True


In [25]:
test_questions = testset_df["question"].values.tolist()
test_groundtruths = testset_df["ground_truth"].values.tolist()


In [27]:
answers = []
contexts = []

for question in test_questions:
    response = retrieval_augmented_qa_chain.invoke({"question": question})
    answers.append(response["response"].content)
    contexts.append([context.page_content for context in response["context"]])



In [28]:
from datasets import Dataset

min_length = min(len(test_questions[:10]), len(answers[:10]), len(contexts[:10]), len(test_groundtruths[:10]))

response_dataset = Dataset.from_dict({
    "question": test_questions[:min_length],
    "answer": answers[:min_length],
    "contexts": contexts[:min_length],
    "ground_truth": test_groundtruths[:min_length]
})

In [29]:
response_dataset[0]   

{'question': 'How does the lack of notice or explanation regarding data collection affect parental knowledge in child maltreatment risk assessments?',
 'answer': 'The lack of notice or explanation regarding data collection in child maltreatment risk assessments significantly affects parental knowledge by making it difficult for parents to understand and contest the decisions made based on these assessments. Without being informed about what data is collected and how it is used, parents are left without the necessary information to validate the risk assessment or challenge any conclusions drawn from it.',
 'contexts': ['ever being notified that data was being collected and used as part of an algorithmic child maltreatment\nrisk assessment.84 The lack of notice or an explanation makes it harder for those performing child\nmaltreatment assessments to validate the risk assessment and denies parents knowledge that could help them\ncontest a decision.\n41',
  'practices. In a court hearing, 

Assess your pipeline using the RAGAS framework including key metrics faithfulness, answer relevancy, context precision, and context recall. 

In [30]:
from ragas import evaluate
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    answer_correctness,
    context_recall,
    context_precision,
)

metrics = [
    faithfulness,
    answer_relevancy,
    context_recall,
    context_precision,
    answer_correctness,
]

In [31]:
# Evaluate the first dataset
results1 = evaluate(response_dataset, metrics)

# Print the results
print("Results for dataset:")
print(results1)

Evaluating:   0%|          | 0/50 [00:00<?, ?it/s]

Results for dataset:
{'faithfulness': 0.8651, 'answer_relevancy': 0.9615, 'context_recall': 0.8333, 'context_precision': 0.9556, 'answer_correctness': 0.7518}


Here are some conclusions about the performance and effectiveness of your RAG pipeline:
Faithfulness:
Dataset 1: 0.7120
Dataset 2: 0.7655
Both scores are decent, indicating that the model's responses are somewhat faithful to the given context, but there's room for improvement.
Answer Relevancy:
Dataset 1: 0.9760
Dataset 2: 0.7788
The model performs really well on Dataset 1, with good relevancy. Dataset 2 shows moderate relevancy, suggesting inconsistent performance across datasets.
Context Recall:
Dataset 1: 0.7111
Dataset 2: 0.1111
There's a significant discrepancy here. The model recalls context well for Dataset 1 but poorly for Dataset 2.
Context Precision:
Dataset 1: 0.7963
Dataset 2: 0.1389
Similar to recall, precision is moderate for Dataset 1 but very low for Dataset 2.
5. Answer Correctness:
Dataset 1: 0.8100
Dataset 2: 0.2831
The model performs reasonably well on Dataset 1 but poorly on Dataset 2.

Conclusions:
1. Inconsistent performance: The model performs significantly better on Dataset 1 across all metrics.
Context retrieval issues: Low recall and precision for Dataset 2 suggest problems with retrieving relevant context.
Dataset-specific challenges: Dataset 2 might be more complex or less well-represented in the model's training data.
Room for improvement: Even for Dataset 1, scores indicate potential for enhancement, especially in faithfulness and context precision.
Potential overfitting: The stark difference in performance might indicate overfitting to data similar to Dataset 1.

Evaluate the RAGAS prototype and baseline the synthetic dataset using LangSmith

In [3]:
from langsmith.evaluation import evaluate
from langsmith.utils import load_dataset

# Load the RAGAS test dataset
ragas_test_data = load_dataset("ragas_test_data.json")

# Load the Langsmith test dataset
langsmith_test_data = load_dataset("langsmith_test_data.json")

# Evaluate the RAG prototype on the RAGAS test dataset
ragas_results = evaluate_qa_pipeline(
    retrieval_augmented_qa_chain,
    ragas_test_data,
    metrics=["exact_match", "f1_score", "retrieval_precision", "retrieval_recall"]
)

# Evaluate the RAG prototype on the Langsmith test dataset
langsmith_results = evaluate_qa_pipeline(
    retrieval_augmented_qa_chain,
    langsmith_test_data,
    metrics=["exact_match", "f1_score", "retrieval_precision", "retrieval_recall"]
)

# Print the evaluation results
print("RAGAS Evaluation Results:")
print(ragas_results)

print("\nLangsmith Evaluation Results:")
print(langsmith_results)

# Compare the results
print("\nComparison:")
print("Exact Match:")
print(f"RAGAS: {ragas_results['exact_match']:.4f}")
print(f"Langsmith: {langsmith_results['exact_match']:.4f}")

print("\nF1 Score:")
print(f"RAGAS: {ragas_results['f1_score']:.4f}")
print(f"Langsmith: {langsmith_results['f1_score']:.4f}")

print("\nRetrieval Precision:")
print(f"RAGAS: {ragas_results['retrieval_precision']:.4f}")
print(f"Langsmith: {langsmith_results['retrieval_precision']:.4f}")

print("\nRetrieval Recall:")
print(f"RAGAS: {ragas_results['retrieval_recall']:.4f}")
print(f"Langsmith: {langsmith_results['retrieval_recall']:.4f}")

ImportError: cannot import name 'evaluate_qa_pipeline' from 'langsmith.evaluation' (/opt/anaconda3/envs/llmops-course/lib/python3.11/site-packages/langsmith/evaluation/__init__.py)

TASK 4: Fine-tuning the Embeddings

Generate synthetic fine-tuning data and complete fine-tuning of the open-source embedding model

Step 1: Create Training, Validation, and Test Datasets
We'll split the documents into training, validation, and test sets for both datasets.

In [None]:
from sentence_transformers import SentenceTransformer, InputExample, losses
from torch.utils.data import DataLoader

# Define a custom collate function that properly handles InputExample objects
def custom_collate_fn(batch):
    texts = [example.texts for example in batch]  # Extract texts for each example
    return texts  # Return the texts directly

def fine_tune_embeddings(train_data, model_name="Snowflake/snowflake-arctic-embed-m", epochs=3):
    model = SentenceTransformer(model_name)
    
    # Create InputExample instances from the training data
    train_examples = [InputExample(texts=[item['question'], item['answer']]) for item in train_data]
    
    # Create a DataLoader with the custom collate function
    train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=16, collate_fn=custom_collate_fn)
    
    # Define the training loss
    train_loss = losses.MultipleNegativesRankingLoss(model)
    
    # Use the accelerator to prepare the model and dataloader
    from accelerate import Accelerator
    accelerator = Accelerator()
    model, train_dataloader, train_loss = accelerator.prepare(model, train_dataloader, train_loss)
    
    # Fit the model
    model.fit(train_objectives=[(train_dataloader, train_loss)], epochs=epochs, warmup_steps=100)
    
    return model

# Prepare data for fine-tuning
train_data1 = [{"question": doc.page_content, "answer": doc.page_content} for doc in training_split_documents1]
train_data2 = [{"question": doc.page_content, "answer": doc.page_content} for doc in training_split_documents2]
train_data = train_data1 + train_data2

# Fine-tune the model
fine_tuned_model = fine_tune_embeddings(train_data)

In [None]:
# Install required libraries
!pip install 'transformers[torch]' --upgrade
!pip install accelerate --upgrade

# Verify accelerate installation
from accelerate import Accelerator

accelerator = Accelerator()
print(accelerator.state)

In [None]:
# Split documents into training, validation, and test sets
training_split_documents1 = documents1[:300]
val_split_documents1 = documents1[300:350]
test_split_documents1 = documents1[350:400]

training_split_documents2 = documents2[:300]
val_split_documents2 = documents2[300:350]
test_split_documents2 = documents2[350:400]

In [None]:
# Prepare data for fine-tuning
train_data1 = [{"question": doc.page_content, "answer": doc.page_content} for doc in training_split_documents1]
train_data2 = [{"question": doc.page_content, "answer": doc.page_content} for doc in training_split_documents2]

# Combine both datasets
train_data = train_data1 + train_data2

In [None]:
from sentence_transformers import SentenceTransformer, InputExample, losses
from torch.utils.data import DataLoader

def custom_collate_fn(batch):
    texts = [example.texts for example in batch]
    return texts

def fine_tune_embeddings(train_data, model_name="Snowflake/snowflake-arctic-embed-m", epochs=3):
    model = SentenceTransformer(model_name)
    
    train_examples = [InputExample(texts=[item['question'], item['answer']]) for item in train_data]
    train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=16, collate_fn=lambda x: x)
    
    train_loss = losses.MultipleNegativesRankingLoss(model)
    
    # Use the accelerator to prepare the model and dataloader
    accelerator = Accelerator()
    model, train_dataloader, train_loss = accelerator.prepare(model, train_dataloader, train_loss)
    
    model.fit(train_objectives=[(train_dataloader, train_loss)], epochs=epochs, warmup_steps=100)
    
    return model

# Fine-tune the model
fine_tuned_model = fine_tune_embeddings(train_data)

Assessing Performance

In [None]:
from ragas import evaluate
from ragas.metrics import faithfulness, answer_relevancy, context_precision, context_recall

def assess_performance(rag_chain, test_data):
    results = evaluate(
        rag_chain,
        test_data,
        metrics=[
            faithfulness(),
            answer_relevancy(),
            context_precision(),
            context_recall()
        ]
    )
    return results

# Usage
performance_results = assess_performance(rag_chain, test_data)
print(performance_results)