### Evaluator

#### Example of evaluating a document based on the chunking techniques (RecursiveCharacterSplit, FormRecognizer, TikToken) and using different chunkSize and Overlap during the process.

In [1]:
import os  
import json  
import openai
from Utilities.envVars import *

# Set OpenAI API key and endpoint
openai.api_type = "azure"
openai.api_version = OpenAiVersion
openai_api_key = OpenAiKey
assert openai_api_key, "ERROR: Azure OpenAI Key is missing"
openai.api_key = openai_api_key
openAiEndPoint = f"https://{OpenAiService}.openai.azure.com"
assert openAiEndPoint, "ERROR: Azure OpenAI Endpoint is missing"
assert "openai.azure.com" in openAiEndPoint.lower(), "ERROR: Azure OpenAI Endpoint should be in the form: \n\n\t<your unique endpoint identifier>.openai.azure.com"
openai.api_base = openAiEndPoint
davincimodel = OpenAiDavinci


In [2]:
# Import required libraries
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.chat_models import AzureChatOpenAI, ChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from Utilities.cogSearchVsRetriever import CognitiveSearchVsRetriever
from langchain.docstore.document import Document
from langchain.llms import Replicate
from langchain.chains import RetrievalQA
from langchain import PromptTemplate, LLMChain
from langchain.callbacks.base import BaseCallbackManager
from langchain.document_loaders import PDFMinerLoader, UnstructuredFileLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from Utilities.evaluator import createEvaluatorDataSearchIndex, indexEvaluatorDataSections, createEvaluatorDocumentSearchIndex, indexDocs
from Utilities.evaluator import createEvaluatorQaSearchIndex, searchEvaluatorQaData, searchEvaluatorDocument, searchEvaluatorDocumentIndexedData
from Utilities.evaluator import searchEvaluatorRunIndex, createEvaluatorRunIndex, getEvaluatorResult
from Utilities.evaluator import createEvaluatorResultIndex, searchEvaluatorRunIdIndex
from IPython.display import display, HTML
import uuid
import random
from langchain.chains import QAGenerationChain
import itertools
import json
from json import JSONDecodeError
import time
from langchain.evaluation.qa import QAEvalChain
import pandas as pd

In [3]:
# Parameters
embeddingModelType = "azureopenai"
temperature = 0
tokenLength = 1000
fileName = "Fabric Get Started.pdf"
regenerateQa = False
reEvaluate = False
topK = 3
totalQuestions = 5
retrieverType = "SimilaritySearch"
promptStyle = "Descriptive"

In [4]:
# Constant Variables
evaluatorDocumentIndex = "evaluatordocument"
evaluatorDataIndexName = "evaluatordata"
evaluatorQaDataIndexName = "evaluatorqadata"
evaluatorRunIndexName = "evaluatorrun"
evaluatorRunResultIndexName = "evaluatorrunresult"

In [5]:
if (embeddingModelType == 'azureopenai'):
        openai.api_type = "azure"
        openai.api_key = OpenAiKey
        openai.api_version = OpenAiVersion
        openai.api_base = f"https://{OpenAiService}.openai.azure.com"

        llm = AzureChatOpenAI(
                openai_api_base=openai.api_base,
                openai_api_version=OpenAiVersion,
                deployment_name=OpenAiChat,
                temperature=temperature,
                openai_api_key=OpenAiKey,
                openai_api_type="azure",
                max_tokens=tokenLength)
        embeddings = OpenAIEmbeddings(deployment=OpenAiEmbedding, chunk_size=1, openai_api_key=OpenAiKey)
        logging.info("LLM Setup done")
elif embeddingModelType == "openai":
        openai.api_type = "open_ai"
        openai.api_base = "https://api.openai.com/v1"
        openai.api_version = '2020-11-07' 
        openai.api_key = OpenAiApiKey
        llm = ChatOpenAI(temperature=temperature,
        openai_api_key=OpenAiApiKey,
        model_name="gpt-3.5-turbo",
        max_tokens=tokenLength)
        embeddings = OpenAIEmbeddings(openai_api_key=OpenAiApiKey)

In [6]:
# Check if we already have document inserted into our index
documentResponse = searchEvaluatorDocument(SearchService, SearchKey, evaluatorDocumentIndex, fileName)
if documentResponse.get_count() > 0:
    for doc in documentResponse:
        documentId = doc["documentId"]
        break
else:
    documentId = str(uuid.uuid4())
    # Create the Evaluator Document Search Index
    createEvaluatorDocumentSearchIndex(SearchService, SearchKey, evaluatorDocumentIndex)
    # Insert the document metadata
    evaluatorDocument = []
    evaluatorDocument.append({
            "id": str(uuid.uuid4()),
            "documentId": documentId,
            "documentName": fileName,
            "sourceFile": fileName,
        })
    indexDocs(SearchService, SearchKey, evaluatorDocumentIndex, evaluatorDocument)

In [7]:
# Process our fileName
# TODO : Add support for other file types

fabricGetStartedPath = "Data/PDF/" + fileName
# Load the PDF with Document Loader available from Langchain
loader = PDFMinerLoader(fabricGetStartedPath)
rawDocs = loader.load()
# Set the source 
for doc in rawDocs:
    doc.metadata['source'] = fabricGetStartedPath

In [8]:
# Process the document and create the chunked Index with different split methods, chunk sizes and overlaps.
# Eventually we will add support for different models
# Add more Split Methods
splitMethods = ["RecursiveCharacterTextSplitter"]
model = "GPT3.5"
chunkSizes = ['500', '1000', '1500', '2000']
overlaps = ['0', '50', '100', '150']

# Create the Evaluator Data Search Index to store our vector Data
createEvaluatorDataSearchIndex(SearchService, SearchKey, evaluatorDataIndexName)
for splitMethod in splitMethods:
    for chunkSize in chunkSizes:
        for overlap in overlaps:
            # Check if we already have data inserted into our index
            dataResponse = searchEvaluatorDocumentIndexedData(SearchService, SearchKey, evaluatorDataIndexName, documentId, 
                                                 splitMethod, chunkSize, overlap)
            if dataResponse.get_count() == 0:
                print("Processing Split Method: " + splitMethod + " Chunk Size: " + chunkSize + " Overlap: " + overlap)
                # Split the document into chunks of 500 characters & 0 overlap
                splitter = RecursiveCharacterTextSplitter(chunk_size=chunkSize, chunk_overlap=overlap)
                docs = splitter.split_documents(rawDocs)
                indexEvaluatorDataSections(OpenAiService, OpenAiKey, OpenAiVersion, OpenAiApiKey, SearchService, 
                            SearchKey, embeddingModelType, OpenAiEmbedding, fileName, evaluatorDataIndexName, docs, 
                            splitMethod, chunkSize, overlap, model, embeddingModelType, documentId)

Search index evaluatordata already exists


In [9]:
def generateEvaluation(data, chunk):
    # Generate random starting index in the doc to draw question from
    noOfChar = len(data)
    startingIndex = random.randint(0, noOfChar-chunk)
    subSequence = data[startingIndex:startingIndex+chunk]
    # Set up QAGenerationChain chain using GPT 3.5 as default
    chain = QAGenerationChain.from_llm(llm)
    evalSet = []
    # Catch any QA generation errors and re-try until QA pair is generated
    awaitingAnswer = True
    while awaitingAnswer:
        try:
            qaPair = chain.run(subSequence)
            evalSet.append(qaPair)
            awaitingAnswer = False
        except JSONDecodeError:
            startingIndex = random.randint(0, noOfChar-chunk)
            subSequence = data[startingIndex:startingIndex+chunk]
    evalPair = list(itertools.chain.from_iterable(evalSet))
    return evalPair

In [10]:
# Now that we have indexed the documents, let's go ahead and create the set of the QA pairs for the document and store that in the index
# We will use the same QA Pair for evaluating all the different chunk sizes and overlap
# Check first if we have already generated the QA pairs for this document
# If we have, then we will just use that
# If not, then we will generate the QA pairs and store them in the index
r = searchEvaluatorQaData(SearchService, SearchKey, evaluatorQaDataIndexName, documentId)
evaluatorQaData = []
if r.get_count() == 0 or regenerateQa:
    generateTotalQuestions = 15
    generatedQAPairs = []
    for i in range(generateTotalQuestions):
        # Generate one question
        evalPair = generateEvaluation(rawDocs[0].page_content, 3000)
        if len(evalPair) == 0:
            # Error in eval generation
            continue
        else:
            # This returns a list, so we unpack to dict
            evalPair = evalPair[0]
            generatedQAPairs.append(evalPair)
    # Create the Evaluator Document Search Index
    createEvaluatorQaSearchIndex(SearchService, SearchKey, evaluatorQaDataIndexName)
    # Insert the document metadata
    if regenerateQa:
        i=0
        for qa in r:
            evaluatorQaData.append({
                "id": qa['id'],
                "documentId": qa['documentId'],
                "questionId": qa['questionId'],
                "question": generatedQAPairs[i]['question'],
                "answer": generatedQAPairs[i]['answer'],
            })
            i+=1
    else:
        for qa in generatedQAPairs:
            evaluatorQaData.append({
                "id": str(uuid.uuid4()),
                "documentId": documentId,
                "questionId": str(uuid.uuid4()),
                "question": qa['question'],
                "answer": qa['answer'],
            })
    indexDocs(SearchService, SearchKey, evaluatorQaDataIndexName, evaluatorQaData)
else:
    for qa in r:
            evaluatorQaData.append({
                "id": qa['id'],
                "documentId": qa['documentId'],
                "questionId": qa['questionId'],
                "question": qa['question'],
                "answer": qa['answer'],
            })

In [11]:
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible.
{context}
Question: {question}
Helpful Answer:"""

QaChainPrompt = PromptTemplate(input_variables=["context", "question"],template=template,)

In [12]:
template = """You are a teacher grading a quiz. 
You are given a question, the student's answer, and the true answer, and are asked to score the student answer as either Correct or Incorrect.

Example Format:
QUESTION: question here
STUDENT ANSWER: student's answer here
TRUE ANSWER: true answer here
GRADE: Correct or Incorrect here

Grade the student answers based ONLY on their factual accuracy. Ignore differences in punctuation and phrasing between the student answer and true answer. It is OK if the student answer contains more information than the true answer, as long as it does not contain any conflicting statements. If the student answers that there is no specific information provided in the context, then the answer is Incorrect. Begin! 

QUESTION: {query}
STUDENT ANSWER: {result}
TRUE ANSWER: {answer}
GRADE:"""

promptStyleFast = PromptTemplate(input_variables=["query", "result", "answer"], template=template)

In [13]:
template = """You are a teacher grading a quiz. 
You are given a question, the student's answer, and the true answer, and are asked to score the student answer as either Correct or Incorrect.
You are also asked to identify potential sources of bias in the question and in the true answer.

Example Format:
QUESTION: question here
STUDENT ANSWER: student's answer here
TRUE ANSWER: true answer here
GRADE: Correct or Incorrect here

Grade the student answers based ONLY on their factual accuracy. Ignore differences in punctuation and phrasing between the student answer and true answer. It is OK if the student answer contains more information than the true answer, as long as it does not contain any conflicting statements. If the student answers that there is no specific information provided in the context, then the answer is Incorrect. Begin! 

QUESTION: {query}
STUDENT ANSWER: {result}
TRUE ANSWER: {answer}
GRADE:

Your response should be as follows:

GRADE: (Correct or Incorrect)
(line break)
JUSTIFICATION: (Without mentioning the student/teacher framing of this prompt, explain why the STUDENT ANSWER is Correct or Incorrect, identify potential sources of bias in the QUESTION, and identify potential sources of bias in the TRUE ANSWER. Use one or two sentences maximum. Keep the answer as concise as possible.)
"""

promptStyleBias = PromptTemplate(input_variables=["query", "result", "answer"], template=template)


In [14]:
template = """You are assessing a submitted student answer to a question relative to the true answer based on the provided criteria: 
    
    ***
    QUESTION: {query}
    ***
    STUDENT ANSWER: {result}
    ***
    TRUE ANSWER: {answer}
    ***
    Criteria: 
      relevance:  Is the submission referring to a real quote from the text?"
      conciseness:  Is the answer concise and to the point?"
      correct: Is the answer correct?"
    ***
    Does the submission meet the criterion? First, write out in a step by step manner your reasoning about the criterion to be sure that your conclusion is correct. Avoid simply stating the correct answers at the outset. Then print "Correct" or "Incorrect" (without quotes or punctuation) on its own line corresponding to the correct answer.
    Reasoning:
"""

promptStyleGrading = PromptTemplate(input_variables=["query", "result", "answer"], template=template)


In [15]:
template = """You are a teacher grading a quiz. 
You are given a question, the student's answer, and the true answer, and are asked to score the student answer as either Correct or Incorrect.

Example Format:
QUESTION: question here
STUDENT ANSWER: student's answer here
TRUE ANSWER: true answer here
GRADE: Correct or Incorrect here

Grade the student answers based ONLY on their factual accuracy. Ignore differences in punctuation and phrasing between the student answer and true answer. It is OK if the student answer contains more information than the true answer, as long as it does not contain any conflicting statements. If the student answers that there is no specific information provided in the context, then the answer is Incorrect. Begin! 

QUESTION: {query}
STUDENT ANSWER: {result}
TRUE ANSWER: {answer}
GRADE:

Your response should be as follows:

GRADE: (Correct or Incorrect)
(line break)
JUSTIFICATION: (Without mentioning the student/teacher framing of this prompt, explain why the STUDENT ANSWER is Correct or Incorrect. Use one or two sentences maximum. Keep the answer as concise as possible.)
"""

promptStyleDefault = PromptTemplate(input_variables=["query", "result", "answer"], template=template)


In [16]:
template = """ 
    Given the question: \n
    {query}
    Here are some documents retrieved in response to the question: \n
    {result}
    And here is the answer to the question: \n 
    {answer}
    Criteria: 
      relevance: Are the retrieved documents relevant to the question and do they support the answer?"
    Do the retrieved documents meet the criterion? Print "Correct" (without quotes or punctuation) if the retrieved context are relevant or "Incorrect" if not (without quotes or punctuation) on its own line. """

gradeDocsPromptFast = PromptTemplate(input_variables=["query", "result", "answer"], template=template)


In [17]:
template = """ 
    Given the question: \n
    {query}
    Here are some documents retrieved in response to the question: \n
    {result}
    And here is the answer to the question: \n 
    {answer}
    Criteria: 
      relevance: Are the retrieved documents relevant to the question and do they support the answer?"

    Your response should be as follows:

    GRADE: (Correct or Incorrect, depending if the retrieved documents meet the criterion)
    (line break)
    JUSTIFICATION: (Write out in a step by step manner your reasoning about the criterion to be sure that your conclusion is correct. Use one or two sentences maximum. Keep the answer as concise as possible.)
    """

gradeDocsPromptDefault = PromptTemplate(input_variables=["query", "result", "answer"], template=template)

In [18]:
def gradeModelAnswer(predictedDataSet, predictions, promptStyle):
    if promptStyle == "Fast":
        prompt = promptStyleFast
    elif promptStyle == "Descriptive w/ bias check":
        prompt = promptStyleBias
    elif promptStyle == "OpenAI grading prompt":
        prompt = promptStyleGrading
    else:
        prompt = promptStyleDefault

    # Note: GPT-4 grader is advised by OAI 
    evalChain = QAEvalChain.from_llm(llm=llm,
                                      prompt=prompt)
    gradedOutputs = evalChain.evaluate(predictedDataSet,
                                         predictions,
                                         question_key="question",
                                         prediction_key="result")
    return gradedOutputs

In [19]:
def gradeModelRetrieval(getDataSet, predictions, gradeDocsPrompt):
    if gradeDocsPrompt == "Fast":
        prompt = gradeDocsPromptFast
    else:
        prompt = gradeDocsPromptDefault

    # Note: GPT-4 grader is advised by OAI
    evalChain = QAEvalChain.from_llm(llm=llm,prompt=prompt)
    gradedOutputs = evalChain.evaluate(getDataSet,
                                         predictions,
                                         question_key="question",
                                         prediction_key="result")
    return gradedOutputs

In [20]:
def runEvaluator(totalQuestions, chain, retriever, promptStyle, testDataSet):
    d = pd.DataFrame(columns=['question', 'answer', 'predictedAnswer', 'answerScore', 'retrievalScore', 'latency'])
    for i in range(totalQuestions):
        predictions = []
        retrievedDocs = []
        gtDataSet = []
        latency = []
        currentDataSet = testDataSet[i]
    
        try:
            startTime = time.time()
            predictions.append(chain({"query": currentDataSet["question"]}, return_only_outputs=True))
            gtDataSet.append(currentDataSet)
            endTime = time.time()
            elapsedTime = endTime - startTime
            latency.append(elapsedTime)
        except:
            predictions.append({'result': 'Error in prediction'})
            print("Error in prediction")

        # Extract text from retrieved docs
        retrievedDocText = ""
        docs = retriever.get_relevant_documents(currentDataSet["question"])
        for i, doc in enumerate(docs):
            retrievedDocText += "Doc %s: " % str(i+1) + \
                doc.page_content + " "

        # Log
        retrieved = {"question": currentDataSet["question"],
                    "answer": currentDataSet["answer"], "result": retrievedDocText}
        retrievedDocs.append(retrieved)

        # Grade
        gradedAnswer = gradeModelAnswer(gtDataSet, predictions, promptStyle)
        gradedRetrieval = gradeModelRetrieval(gtDataSet, retrievedDocs, promptStyle)

        # Assemble output
        # Summary statistics
        dfOutput = {'question': evaluatorQaData[i]['question'], 'answer': evaluatorQaData[i]['answer'],
                    'predictedAnswer': predictions[0]['result'], 'answerScore': [{'score': 1 if "Incorrect" not in text else 0,
                                'justification': text} for text in [g['text'] for g in gradedAnswer]], 
                                'retrievalScore': [{'score': 1 if "Incorrect" not in text else 0,
                                'justification': text} for text in [g['text'] for g in gradedRetrieval]],
                    'latency': latency}

        # Add to dataframe
        d = pd.concat([d, pd.DataFrame(dfOutput)], axis=0)

        # Convert dataframe to dict
    d_dict = d.to_dict('records')
    return d_dict


In [21]:
# Select retriever
#chunkSizes = ['1500']
#overlaps = ['150']
createEvaluatorResultIndex(SearchService, SearchKey, evaluatorRunResultIndexName)
# Check if we already have runId for this document
r = searchEvaluatorRunIdIndex(SearchService, SearchKey, evaluatorRunResultIndexName, documentId)
if r.get_count() == 0:
    runId = str(uuid.uuid4())
else:
    for run in r:
        runId = run['runId']
        break
for splitMethod in splitMethods:
    for chunkSize in chunkSizes:
        for overlap in overlaps:
            # Verify if we have created the Run ID
            r = searchEvaluatorRunIndex(SearchService, SearchKey, evaluatorRunResultIndexName, documentId, retrieverType, 
                                    promptStyle, splitMethod, chunkSize, overlap)
            if r.get_count() == 0 or reEvaluate:
                # Create the Run ID
                print("Processing: ", documentId, retrieverType, promptStyle, splitMethod, chunkSize, overlap)
                runIdData = []
                subRunId = str(uuid.uuid4())
               
                retriever = CognitiveSearchVsRetriever(contentKey="contentVector",
                            serviceName=SearchService,
                            apiKey=SearchKey,
                            indexName=evaluatorDataIndexName,
                            topK=topK,
                            splitMethod = splitMethod,
                            model = model,
                            chunkSize = chunkSize,
                            overlap = overlap,
                            openAiService = OpenAiService,
                            openAiKey = OpenAiKey,
                            openAiVersion = OpenAiVersion,
                            openAiApiKey = OpenAiApiKey,
                            documentId = documentId,
                            openAiEmbedding=OpenAiEmbedding,
                            returnFields=["id", "content", "sourceFile", "splitMethod", "chunkSize", "overlap", "model", "modelType", "documentId"]
                            )
                vectorStoreChain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, 
                                                chain_type_kwargs={"prompt": QaChainPrompt})
                runEvaluations = runEvaluator(totalQuestions, vectorStoreChain, retriever, promptStyle, evaluatorQaData)
                runEvaluationData = []
                for runEvaluation in runEvaluations:
                        runEvaluationData.append({
                            "id": str(uuid.uuid4()),
                            "runId": runId,
                            "subRunId": subRunId,
                            "documentId": documentId,
                            "retrieverType": retrieverType,
                            "promptStyle": promptStyle,
                            "splitMethod": splitMethod,
                            "chunkSize": chunkSize,
                            "overlap": overlap,
                            "question": runEvaluation['question'],
                            "answer": runEvaluation['answer'],
                            "predictedAnswer": runEvaluation['predictedAnswer'],
                            "answerScore": json.dumps(runEvaluation['answerScore']),
                            "retrievalScore": json.dumps(runEvaluation['retrievalScore']),
                            "latency": str(runEvaluation['latency']),
                        })
                indexDocs(SearchService, SearchKey, evaluatorRunResultIndexName, runEvaluationData)

Search index evaluatorrunresult already exists


In [29]:
finalOutput = []
r = getEvaluatorResult(SearchService, SearchKey, evaluatorRunResultIndexName, documentId)
for run in r:
    finalOutput.append({
        "Question": run['question'],
        "Answer": run['answer'],
        "Predicted Answer": run['predictedAnswer'],
        "Retriever Type": run['retrieverType'],
        "Prompt Style": run['promptStyle'],
        "Split Method": run['splitMethod'],
        "Chunk Size": run['chunkSize'],
        "Overlap": run['overlap'],
        "Answer Score": json.loads(run['answerScore']),
        "Retrieval Score": json.loads(run['retrievalScore']),
        "Latency": run['latency'],
    })
df = pd.DataFrame(finalOutput)
df

Unnamed: 0,Question,Answer,Predicted Answer,Retriever Type,Prompt Style,Split Method,Chunk Size,Overlap,Answer Score,Retrieval Score,Latency
0,What is the primary use case for the Pipeline ...,The primary use case for the Pipeline copy act...,The primary use case for the Pipeline copy act...,SimilaritySearch,Descriptive,RecursiveCharacterTextSplitter,1000,0,"{'score': 1, 'justification': 'GRADE: Correct ...","{'score': 1, 'justification': 'GRADE: Correct ...",3.0535309314727783
1,What is the purpose of a workspace in Microsof...,"In workspaces, you create collections of items...",Fabric is a non-Power BI experience that requi...,SimilaritySearch,Descriptive,RecursiveCharacterTextSplitter,1000,0,"{'score': 1, 'justification': 'GRADE: Correct ...","{'score': 1, 'justification': 'GRADE: Correct ...",2.496325969696045
2,What is the primary use case for the Pipeline ...,The primary use case for the Pipeline copy act...,The purpose of a workspace in Microsoft Fabric...,SimilaritySearch,Descriptive,RecursiveCharacterTextSplitter,1500,0,"{'score': 0, 'justification': 'GRADE: Incorrec...","{'score': 0, 'justification': 'GRADE: Incorrec...",3.873573064804077
3,What is the key difference between a per-user ...,The key difference is that a Fabric capacity i...,Fabric is a preview product by Microsoft that ...,SimilaritySearch,Descriptive,RecursiveCharacterTextSplitter,1500,0,"{'score': 0, 'justification': 'GRADE: Incorrec...","{'score': 1, 'justification': 'GRADE: Correct ...",2.108778715133667
4,What is the purpose of a workspace in Microsof...,"In workspaces, you create collections of items...",The 'Create a workspace' pane is used to creat...,SimilaritySearch,Descriptive,RecursiveCharacterTextSplitter,1500,100,"{'score': 1, 'justification': 'GRADE: Correct ...","{'score': 1, 'justification': 'GRADE: Correct ...",1.9552397727966309
...,...,...,...,...,...,...,...,...,...,...,...
75,What is the purpose of a workspace in Microsof...,"In workspaces, you create collections of items...",The key difference is that the Fabric (Preview...,SimilaritySearch,Descriptive,RecursiveCharacterTextSplitter,1000,150,"{'score': 1, 'justification': 'GRADE: Correct ...","{'score': 1, 'justification': 'GRADE: Correct ...",2.2532236576080322
76,What is the purpose of a workspace in Microsof...,"In workspaces, you create collections of items...",A workspace in Microsoft Fabric is a place whe...,SimilaritySearch,Descriptive,RecursiveCharacterTextSplitter,1000,150,"{'score': 1, 'justification': 'GRADE: Correct ...","{'score': 1, 'justification': 'GRADE: Correct ...",2.2411551475524902
77,What is the purpose of a workspace in Microsof...,"In workspaces, you create collections of items...",Fabric is a Microsoft platform for data manage...,SimilaritySearch,Descriptive,RecursiveCharacterTextSplitter,1000,150,"{'score': 1, 'justification': 'GRADE: Correct ...","{'score': 1, 'justification': 'GRADE: Correct ...",2.4773612022399902
78,What is the purpose of a workspace in Microsof...,"In workspaces, you create collections of items...",A workspace in Microsoft Fabric is a place to ...,SimilaritySearch,Descriptive,RecursiveCharacterTextSplitter,1500,100,"{'score': 1, 'justification': 'GRADE: Correct ...","{'score': 1, 'justification': 'GRADE: Correct ...",2.587143659591675


In [24]:
# question = "What is Microsoft Fabric"
# #answer = retriever.get_relevant_documents(question)
# answer = vectorStoreChain({"query": question}, return_only_outputs=True)
# answer