In [1]:
import os

# Load environmental variables:
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
## Create a LLM
from langchain_google_vertexai import ChatVertexAI
llm = ChatVertexAI(model="gemini-pro")

In [17]:
from langchain_community.document_loaders import DirectoryLoader

# Load, chunk and index the contents of the blog.
loader = DirectoryLoader(
    path="../data",
    glob="**/*.txt"
)
docs = loader.load()

In [19]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=GoogleGenerativeAIEmbeddings(model="models/text-embedding-004"))

In [20]:
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

# Retrieve and generate using the relevant snippets of the blog.
retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

result = rag_chain.invoke("What is Colorado's capital?")

In [32]:
# Create question | ground-truth comparison

import pandas as pd

with open("../data/qas.tsv") as f:
    evaluation = [ q.strip().split("\t") for q in f]


In [None]:
experiments = []
for q, a in evaluation:
    result = rag_chain.invoke(q)
    experiments.append( { "q": q, "a": result, "truth": a })

In [37]:
experiments[0:5]

[{'q': 'In what country is Normandy located?',
  'a': '## Normandy is located in **France**. Information extracted from the provided context directly supports this statement.  \n\n',
  'truth': 'France'},
 {'q': 'What branch of theoretical computer science deals with broadly classifying computational problems by difficulty and class of relationship?',
  'a': 'Computational complexity theory is the branch of theoretical computer science that deals with classifying computational problems according to their inherent difficulty and their relationship to each other. It seeks to determine the time and memory resources required to solve different kinds of problems. \n',
  'truth': 'Computational complexity theory'},
 {'q': 'What is Southern California often abbreviated as?',
  'a': 'Southern California is often abbreviated as SoCal.  It encompasses the 10 southernmost counties in the state, including Los Angeles, San Diego, and Orange County.   ',
  'truth': 'SoCal'},
 {'q': 'What company was

In [39]:
from ragas.langchain.evalchain import RagasEvaluatorChain
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    # context_precision,
    context_recall,
)

answer_rel_chain = RagasEvaluatorChain(metric=answer_relevancy)


AttributeError: 'AnswerRelevancy' object has no attribute 'init_model'

I0000 00:00:1720756594.414591   91926 tcp_posix.cc:809] IOMGR endpoint shutdown
I0000 00:00:1720756599.414654   91926 tcp_posix.cc:809] IOMGR endpoint shutdown
