In [None]:
from langsmith import evaluate, Client
from difflib import SequenceMatcher
from langchain_community.vectorstores import FAISS
from langchain_ollama import OllamaEmbeddings
from langchain_ollama import OllamaLLM
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
import os
from dotenv import load_dotenv
load_dotenv()
client = Client()
dataset_name = "Rag data"

# Initialize Ollama embeddings
embedding_model = OllamaEmbeddings(
    model="mxbai-embed-large",
    base_url="http://localhost:11434"
)

# Initialize Ollama LLM
llm = OllamaLLM(
    model="llama3.1",
    base_url="http://localhost:11434"
)

# Path to your vectorstore
vectorstore_path = "C:\\Users\\PC\\Desktop\\rag for interview\\pdf_vectorstore"

def retrieve_documents(question):
    """
    Retrieve relevant documents from the FAISS index based on the question
    
    Args:
        question: The query to search for in the vector store
        
    Returns:
        List of documents relevant to the question
    """
    # Load the existing vector store
    vectorstore = FAISS.load_local(vectorstore_path, embedding_model,allow_dangerous_deserialization=True)
    
    # Retrieve documents from the vector store
    docs = vectorstore.similarity_search(question, k=4)
    
    return docs

def generate_response(question, documents):
    """
    Generate a response to the question using the retrieved documents
    
    Args:
        question: The query to answer
        documents: The retrieved documents to use as context
        
    Returns:
        Generated answer to the question
    """
    # Create a context string from the documents
    context = "\n\n".join([doc.page_content for doc in documents])
    
    # Create a prompt template
    prompt_template = """
    You are a helpful assistant. Use the following context to answer the question.
    If you don't know the answer based on the context, just say you don't know.
    
    Context:
    {context}
    
    Question: {question}
    
    Answer:
    """
    
    prompt = PromptTemplate(
        template=prompt_template,
        input_variables=["context", "question"]
    )
    
    # Create an LLMChain
    chain = LLMChain(llm=llm, prompt=prompt)
    
    # Run the chain
    response = chain.invoke({"context": context, "question": question})
    
    return response["text"]

def langsmith_rag(question):
    """
    RAG pipeline function that retrieves documents and generates a response
    
    Args:
        question: The query to answer
        
    Returns:
        Dictionary with the generated output
    """
    # Retrieve relevant documents
    documents = retrieve_documents(question)
    
    # Generate response
    answer = generate_response(question, documents)
    
    # Return in the format expected by LangSmith
    return {"output": answer}

def similarity_score(reference_outputs: dict, outputs: dict) -> dict:
    reference = reference_outputs["output"]
    prediction = outputs["output"]
    prompt_template = """
    You are an AI judge evaluating two responses to the same question. Your task is to rate how similar the two answers are in meaning, even if the wording differs. The rating should reflect how well the answers convey the same idea or concept.

Consider factors such as:

Whether the core message or intent of both answers is aligned.

How closely the answers express the same information, even if they use different words or phrasing.

Differences in tone, detail, and structure should not heavily influence the meaning similarity.

Please assign a score from 0 to 10, where:

0 means the answers are completely different in meaning.

10 means the answers are identical in meaning.
ONLY RESPOND WITH THE SCORE, DO NOT ADD ANY OTHER TEXT. Even if the 2 answers are exact match, do not add any other text.
for example
reference output: hello this is karim
output: hello this is karim
Score: 10
do not add 'score' only respond with a number 

reference output: {reference}

output : {output}

Score:
    """
    
    prompt = PromptTemplate(
        template=prompt_template,
        input_variables=["reference", "output"]
    )
    
    # Create an LLMChain
    chain = LLMChain(llm=llm, prompt=prompt)
    
    # Run the chain
    response = chain.invoke({"reference": reference, "output": prediction})
    
    return {"key": "similarity", "score": int(response["text"])}

   

def target_function(inputs: dict):
    return langsmith_rag(inputs["input"])

# Only run evaluation if this file is executed directly

evaluate(
        target_function,
        data=dataset_name,
        evaluators=[similarity_score],
        experiment_prefix="rag data triall1"
    )


View the evaluation results for experiment: 'rag data triall1-3eb8b421' at:
https://eu.smith.langchain.com/o/12537708-62c5-486d-9e38-b7853d327780/datasets/89ac1e0b-7261-4a89-936c-023a64cd06bd/compare?selectedSessions=7bbed207-53d3-468f-baaf-4316d941ffa6




0it [00:00, ?it/s]

Unnamed: 0,inputs.input,outputs.output,error,reference.output,feedback.similarity,execution_time,example_id,id
0,Who were the final project presentations showc...,The final week project presentations were show...,,The project was showcased to IBM’s General Man...,9,5.049451,0eff0c50-51c0-4ab4-8ad9-8adcae45cfbd,00c4d827-5a8c-4d5b-b8ff-deeaf2cf32a9
1,How did you solve the problem of intent recogn...,I used a router chain to solve the issue of in...,,Intent recognition was solved by using a route...,8,1.40754,25e70f13-204b-4eb0-b634-e7572acbb388,d35f6e5f-fdbb-400e-9c71-b84c856f7e44
2,What was the main objective of the project?,The primary objectives of the project were:\n\...,,The main objective was to enhance the travel e...,8,2.232957,26f56aa7-49d9-4566-9424-56bb82bbb587,5aa59397-3bbf-48b8-9639-b458f9fcd1cf
3,What challenges did you face with LLMs?,"According to the context, I faced two major ch...",,"One major challenge was LLM hallucination, whe...",8,2.287549,36ef975b-e8f7-49d5-950e-4735cf3449d8,00956ddc-9338-4ba8-9bf4-f150fe8440ae
4,How did your university studies differ from re...,"Based on the context provided, it seems that m...",,University focused on theoretical foundations ...,8,2.908452,4b8eb673-2819-4d7a-9d7f-30c4f2452e0c,f2231ffb-4b5c-48f2-bec6-727c2970e7d8
5,How was user session memory handled?,A buffer memory system was implemented to hand...,,"A buffer memory approach was used, storing onl...",9,1.437968,77766f65-6957-4c64-9152-33eab3b3393b,1e70d633-11b5-444d-8a3f-a3eec940099c
6,What skills did you gain during the project?,I gained extensive hands-on experience and dev...,,"Skills gained included LangChain, LLMs, RAG me...",8,1.612615,92bb5916-0f7a-4b54-b163-5fe943461ef2,a273fc10-5db2-4daa-b43d-a30a0d46ed80
7,What is the trip planning module?,The Trip Planning Module is a feature of the c...,,"The trip planning module suggests itineraries,...",9,1.770625,a98a2d47-9fbb-478b-9447-ac6a6106c5c5,c454b9b6-c80e-43b1-b953-20cf0803b2f2
8,Which technologies did the team use for cloud ...,The team used Google Cloud for training prompt...,,The team utilized Google Cloud for training pr...,8,1.819446,e47aaa14-59ff-4f18-9c73-94ce1d3de881,b02bddfc-2f19-49b3-b22f-e54adfecc056
9,What are the key features of the chatbot?,"Based on the context, the key features of the ...",,"Key features included multilingual support, tr...",6,4.336784,f7cf87d4-ffb3-467d-9e73-bcb88d57d59b,2a5507bf-6bdd-4271-83d5-37aa4281cecd
