# Import Libraries

In [None]:
import os
import requests
import json
import sys
import pandas as pd
from datetime import datetime
from pathlib import Path
import weaviate
from langchain_community.vectorstores import Weaviate
from langchain.chains import RetrievalQA
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain_openai import ChatOpenAI
from langchain.schema import Document
from langchain_community.embeddings import OpenAIEmbeddings
from langchain.prompts import PromptTemplate
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, Settings, StorageContext
from weaviate.classes.config import Configure, Property, DataType, Tokenization, VectorDistances
import weaviate.classes as wvc
from weaviate.classes.query import MetadataQuery
import weaviate.classes.query as wq
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.output_parsers import StructuredOutputParser
from langchain.output_parsers import ResponseSchema
from weaviate.classes.init import Auth
from llama_index.core.llms import ChatMessage
from llama_index.core.node_parser import LangchainNodeParser
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.openai_like import OpenAILike
from llama_index.core.schema import TextNode
from llama_index.vector_stores.weaviate import WeaviateVectorStore
import numpy as np

In [None]:
client = weaviate.connect_to_weaviate_cloud(
    cluster_url="url",  # Replace with your Weaviate Cloud URL
    auth_credentials=Auth.api_key("key")
)

# Upload Nodes Directly with No Chunking

In [None]:
OPENAI_API_KEY = 'api_key'
#GENERATOR_MODEL_NAME = "Mistral-7B-Instruct-v0.3"
GENERATOR_MODEL_NAME = "Meta-Llama-3.1-8B-Instruct"
GENERATOR_BASE_URL = "https://kscope.vectorinstitute.ai/v1"
EMBEDDING_MODEL_NAME = "BAAI/bge-base-en-v1.5"

llm = OpenAILike(
    model=GENERATOR_MODEL_NAME,
    is_chat_model=True,
    temperature=0,
    max_tokens=None,
    api_base=GENERATOR_BASE_URL,
    api_key=OPENAI_API_KEY
)
message = [
    ChatMessage(
        role="user",
        content="whats up?"
    )
]
try:
    result = llm.chat(message)
    print(f"Result: \n\n{result}")
except Exception as err:
    if "Error code: 503" in err.message:
        print(f"The model {GENERATOR_MODEL_NAME} is not ready yet.")
    else:
        raise

Result: 

assistant: Not much! I'm just a language model, I don't have feelings or emotions like humans do, but I'm here to help you with any questions or topics you'd like to discuss. How about you? How's your day going?


# Creates Nodes

In [None]:
import pandas as pd
from llama_index.core.schema import TextNode

df = pd.read_csv("/projects/RAG2/cibc-2/Bootcamp_Dataset_CIBC2.csv")

# Specify metadata fields to include
metadata_fields = ["datePublished", "rating", "topic", "headline","sentiment"]  # Manually specify which columns to use as metadata

# Ensure NaN values are replaced with None
df = df.where(pd.notna(df), None)

# Convert DataFrame to list of TextNodes
nodes = [
    TextNode(
        text=row["reviewBody"],
        metadata={
            key: value for key, value in row.items() if key in metadata_fields and value is not None and value != ""
        }
    )
    for _, row in df.iterrows()
]

# Output example
for node in nodes:
    print(node)

Node ID: 21bf506b-a789-47a7-b80a-34e3e820b5b3
Text: ... this so called bank is a joke... customer service is
nonexistent  when trying to get answers they simply disappear... just
let me close my account!...thumbs down...
Node ID: d07c1f62-899a-4d99-b498-51461fe51686
Text: is a toy bank.Its not a serious institution in any way. You can
keep there a couple hundred dollars on groceries but thats about it.
For starters, limit 500 per transaction. I went to pay for my
insurance and it just refused to work because the bill was about 1000.
Quite inconvenient.Even worse, there is 200 daily withdrawal limit
with no possib...
Node ID: 3bf123cb-333a-48fb-a6df-be75a8f62034
Text: made a large payment on my  account to   credit card. Over 2000.
The web server was slow and the payment timed out saying incomplete
transaction try again later. I dis so and payment transacted as
normal. But the first payment was also credited resulting in my
account being in the red. Phone  they showed me how to reverse 

In [None]:
print(f"Setting up the embeddings model...")
embeddings = HuggingFaceEmbedding(
    model_name=EMBEDDING_MODEL_NAME,
    device='cuda',
    trust_remote_code=True,
)

Setting up the embeddings model...


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/94.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/777 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

1_Pooling%2Fconfig.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
Settings.llm = llm
Settings.embed_model = embeddings

In [None]:
vector_store = WeaviateVectorStore(
    weaviate_client=client, index_name="Reviews_Llamaindex", text_key="text"
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex(nodes, storage_context=storage_context)

In [None]:
vector_store_query_mode="hybrid"vector_store_query_mode="hybrid",retriever = index.as_retriever(similarity_top_k=6)

# Retrieve the most relevant context from the vector store based on the query
retrieved_docs = retriever.retrieve("Tell me about the worst reviews")
retrieved_docs

[NodeWithScore(node=TextNode(id_='7b0b2a12-6321-47fb-a679-738d74f07e36', embedding=[-0.0004456334572751075, -0.012357603758573532, 0.0186807531863451, -0.011129416525363922, 0.02621369995176792, 0.038296207785606384, -0.010588061064481735, 0.053543154150247574, 0.01688372902572155, 0.0023078236263245344, -0.005728745833039284, 0.031153835356235504, -0.05284224450588226, 0.027112219482660294, 0.0031023197807371616, 0.0266299806535244, 0.03124970942735672, 0.0144833168014884, 0.03676591068506241, -0.006164774764329195, 0.015934402123093605, 0.01159452460706234, 0.013986392877995968, -0.0016500272322446108, 0.06267523020505905, -0.01604161597788334, 0.002054613083600998, 0.020609481260180473, -0.08007912337779999, -0.03550311550498009, 0.057696059346199036, 0.020932376384735107, -0.017023591324687004, 0.04083847999572754, 0.04202541336417198, 0.004663525149226189, -0.03401796147227287, 0.03028033673763275, -0.0013789284275844693, -0.0014255524147301912, -0.038892894983291626, -0.026899768

In [None]:
query_engine = RetrieverQueryEngine(retriever=retriever)
result = query_engine.query("Tell me about the worst reviews")
print(f"Result: \n\n{result}")

Result: 



# Upload Data with Chunking

## Upload Documents

In [None]:
from llama_index.core import Document

df = pd.read_csv("/projects/RAG2/cibc-2/Bootcamp_Dataset_CIBC2.csv")

# Ensure NaN values are replaced with None
df = df.where(pd.notna(df), None)

# Specify metadata fields to include
metadata_fields = ["datePublished", "rating", "topic", "headline","sentiment"]  # Manually specify which columns to use as metadata

# Modify loop to replace None values in metadata with "N/A"
documents = [
    Document(
        text=row['reviewBody'],
        metadata={key: (value if value is not None and value != "" else "N/A") for key, value in row.items() if key in metadata_fields}
    )
    for _, row in df.iterrows()
]

In [None]:
from llama_index.core.node_parser import SentenceSplitter

parser = SentenceSplitter(chunk_size = 256, chunk_overlap = 32)
nodes = parser.get_nodes_from_documents(documents)

print(f"Created {len(nodes)} nodes from {len(documents)} documents")

Created 795 nodes from 678 documents


In [None]:
from collections import defaultdict

# Group nodes by datePublished
date_groups = defaultdict(list)

# Loop through all nodes and group them by datePublished
for node in nodes:
    date_published = node.metadata["headline"]  # Assuming it's always present
    date_groups[date_published].append(node)

# Print chunks grouped by datePublished
for date, group in date_groups.items():
    print(f"Head: {date}\n")
    for i, node in enumerate(group):
        print(f"Chunk {i+1}:")
        print("Text:")
        print(node.text)
        print("------------------")
    print("\n")  # Extra space between groups

Head: simplii the ghost bank

Chunk 1:
Text:
... this so called bank is a joke... customer service is nonexistent  when trying to get answers they simply disappear... just let me close my account!...thumbs down...
------------------


Head:  is a toy bank.

Chunk 1:
Text:
is a toy bank.Its not a serious institution in any way. You can keep there a couple hundred dollars on groceries but thats about it. For starters, limit 500 per transaction. I went to pay for my insurance and it just refused to work because the bill was about 1000. Quite inconvenient.Even worse, there is 200 daily withdrawal limit with no possibilities of increasing it more. Thats insane! So, if you need cash, you cannot retrieve it from there unless you go to the local  for multiple days in a row.That said, it could work okay if maybe you dont earn very much, or are looking for a secondary bank. Some people have accounts in more than two banks, that can be convenient my main one is mostly for savings and expensive pu

In [None]:
chunk_vector_store = WeaviateVectorStore(
    weaviate_client=client, index_name="Reviews_Chunk", text_key="text"
)
chunk_storage_context = StorageContext.from_defaults(vector_store=chunk_vector_store)
chunk_index = VectorStoreIndex(nodes, storage_context=chunk_storage_context)

In [None]:
chunk_retriever = chunk_index.as_retriever(similarity_top_k=6)

chunk_query_engine = RetrieverQueryEngine(retriever=chunk_retriever)
result = chunk_query_engine.query("Tell me about the worst reviews")
print(f"Result: \n\n{result}")

Result: 

It appears that the bank in question has a history of receiving extremely negative reviews from customers. Many reviewers have expressed frustration with the bank's customer service, citing rude and unhelpful staff, long wait times, and a lack of knowledge or competence. Some reviewers have also reported difficulty in accessing their accounts, and have been left feeling deceived or taken advantage of.

One reviewer described the bank's processes as "archaic" and noted that they can only send withdrawal forms by post. Another reviewer reported being put on hold multiple times and being read the same scripted answer by phone staff.

Several reviewers have also expressed concerns about the bank's management, with one reviewer describing a manager as "not helpful" and another reporting that a manager claimed to be the highest authority and refused to transfer the call to a superior.

Overall, the reviews suggest that the bank has a reputation for poor customer service, unhelpful 

# RAGAS Eval for Chunking

In [None]:
from datasets import Dataset

dataset = Dataset.from_pandas(test_set)
answers = np.empty(len(dataset), dtype=object)

for index, row in enumerate(dataset):
    query = row["question"]

    answer = chunk_query_engine.query(query)

    print(f"Result {index}\nQuestion: {query}\nAnswer: {answer}\n")

    # Store the result
    answers[index] = str(answer)

Result 0
Question: How do the reviewer's experiences with Simplii's systems contribute to their negative sentiment? 


This question directly addresses the keyphrase "Unreliable systems" within the context of the review, prompting an answer that focuses on the reviewer's frustrations with the systems' failures. 

Answer: The reviewer's experiences with Simplii's systems contribute to their negative sentiment by revealing a pattern of failures and inefficiencies. They report being unable to access their funds, experiencing holds on transfers, and encountering issues with their Simplii card, including it stopping work without explanation. These incidents suggest a lack of reliability and trustworthiness in Simplii's systems, leading to frustration and a negative perception of the company.

Result 1
Question: How long was the reviewer's money held up by the bank? 



Answer: The reviewer's money was held up for 3 weeks and counting, as mentioned in the review dated July 20, 2021.

Result 

In [None]:
dataset = dataset.add_column("answer", answers)
dataset

Dataset({
    features: ['question', 'contexts', 'ground_truth', 'evolution_type', 'metadata', 'episode_done', 'answer'],
    num_rows: 5
})

In [None]:
dataset = dataset.map(convert_string_to_list)
dataset.features

Map:   0%|          | 0/5 [00:00<?, ? examples/s]

{'question': Value(dtype='string', id=None),
 'contexts': Sequence(feature=Value(dtype='string', id=None), length=-1, id=None),
 'ground_truth': Value(dtype='string', id=None),
 'evolution_type': Value(dtype='string', id=None),
 'metadata': Value(dtype='string', id=None),
 'episode_done': Value(dtype='bool', id=None),
 'answer': Value(dtype='string', id=None)}

In [None]:
from ragas import evaluate
from ragas.metrics import Faithfulness, ContextPrecision, AnswerCorrectness
from datasets import Dataset
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

OPENAI_API_KEY = 'key'
#GENERATOR_MODEL_NAME = "Mistral-7B-Instruct-v0.3"
GENERATOR_MODEL_NAME = "Meta-Llama-3.1-8B-Instruct"
GENERATOR_BASE_URL = "https://kscope.vectorinstitute.ai/v1"
EMBEDDING_MODEL_NAME = "BAAI/bge-base-en-v1.5"

# Define the RAG embeddings model (different than the OpenAI embedding model defined above for test set generation)
model_kwargs = {'device': 'cuda', 'trust_remote_code': True}
encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity

print(f"Setting up the RAG embeddings model...")
embeddings = HuggingFaceEmbeddings(
    model_name="BAAI/bge-base-en-v1.5",
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs,
)

generator_llm = ChatOpenAI(
    #model="Meta-Llama-3.1-8B-Instruct",
    model="gemma-2-9b-it",
    base_url=GENERATOR_BASE_URL,
    api_key=OPENAI_API_KEY,
)

score = evaluate(
    dataset=dataset,
    metrics=[
        Faithfulness(),
        ContextPrecision(),
        AnswerCorrectness(),
    ],
    llm=generator_llm, # Using OpenAI LLM as the evaluator
    embeddings=embeddings,
)

Setting up the RAG embeddings model...


Evaluating:   0%|          | 0/15 [00:00<?, ?it/s]

  return await self._ascore(row, callbacks)
  return await self._ascore(row, callbacks)
  return await self._ascore(row, callbacks)
  return await self._ascore(row, callbacks)
  return await self._ascore(row, callbacks)
  similarity_score = await self.answer_similarity.ascore(
  similarity_score = await self.answer_similarity.ascore(
  similarity_score = await self.answer_similarity.ascore(


In [None]:
score.to_pandas()

Unnamed: 0,user_input,retrieved_contexts,response,reference,faithfulness,context_precision,answer_correctness
0,How do the reviewer's experiences with Simplii...,[datePublished: 3/14/2024\nheadline: Horrible ...,The reviewer's experiences with Simplii's syst...,The reviewer expresses frustration with Simpli...,0.625,1.0,0.974735
1,How long was the reviewer's money held up by t...,[datePublished: 11/1/2024\nheadline: Dont wast...,The reviewer's money was held up for 3 weeks a...,"The reviewer's money was held up for 14 days, ...",0.333333,1.0,0.408285
2,Why couldn't the customer activate their appro...,[datePublished: 2/6/2020\nheadline: Prejudice ...,The customer was unable to activate their appr...,The customer couldn't activate their account b...,0.4,1.0,0.313508
3,What financial service is causing the delay?,[datePublished: 5/11/2021\nheadline: I overpai...,A loan approval.,The context does not provide information about...,0.0,0.0,0.131695
4,What credit product did both reviewers target?,[datePublished: 2/6/2024\nheadline: Experience...,A secured line of credit.,A secured line of credit,1.0,1.0,0.994893


In [None]:
data.to_csv('/path/chunking_scores.csv')

(chunk_size = 256, chunk_overlap = 32)

# RAGAS with ReRanker with same chunk sizes (chunk_size = 256, chunk_overlap = 32)

## Loading Weaviate DB

In [None]:
from llama_index.core.postprocessor import LongContextReorder

chunk_vector_store = WeaviateVectorStore(
    weaviate_client=client, index_name="Reviews_Chunk", text_key="text"
)

chunk_retriever = VectorStoreIndex.from_vector_store(chunk_vector_store).as_retriever(
)

reorder = LongContextReorder()

# assemble query engine
chunkrerank_query_engine = RetrieverQueryEngine(
    retriever=chunk_retriever,
    #response_synthesizer=response_synthesizer,
    node_postprocessors=[reorder],
)

## Evaluation

In [None]:
from datasets import Dataset

dataset = Dataset.from_pandas(test_set)
answers = np.empty(len(dataset), dtype=object)

for index, row in enumerate(dataset):
    query = row["question"]

    answer = chunkrerank_query_engine.query(query)

    print(f"Result {index}\nQuestion: {query}\nAnswer: {answer}\n")

    # Store the result
    answers[index] = str(answer)

Result 0
Question: How do the reviewer's experiences with Simplii's systems contribute to their negative sentiment? 


This question directly addresses the keyphrase "Unreliable systems" within the context of the review, prompting an answer that focuses on the reviewer's frustrations with the systems' failures. 

Answer: The reviewer's experiences with Simplii's systems contribute to their negative sentiment by causing frustration and inconvenience, as evidenced by their inability to access their funds in a timely manner and their repeated failed attempts to resolve the issue through daily calls. This suggests that the systems are unreliable and unable to meet the reviewer's needs, leading to a negative perception of the service.

Result 1
Question: How long was the reviewer's money held up by the bank? 



Answer: The reviewer's money was held up for a couple of days, and then again for an unspecified period of time, but it was eventually corrected within 24 hours.

Result 2
Question:

In [None]:
dataset = dataset.add_column("answer", answers)
dataset

Dataset({
    features: ['question', 'contexts', 'ground_truth', 'evolution_type', 'metadata', 'episode_done', 'answer'],
    num_rows: 5
})

In [None]:
import ast

# Method 1: Using map
def convert_string_to_list(example):
    example['contexts'] = ast.literal_eval(example['contexts'])
    return example

dataset = dataset.map(convert_string_to_list)
dataset.features

Map:   0%|          | 0/5 [00:00<?, ? examples/s]

{'question': Value(dtype='string', id=None),
 'contexts': Sequence(feature=Value(dtype='string', id=None), length=-1, id=None),
 'ground_truth': Value(dtype='string', id=None),
 'evolution_type': Value(dtype='string', id=None),
 'metadata': Value(dtype='string', id=None),
 'episode_done': Value(dtype='bool', id=None),
 'answer': Value(dtype='string', id=None)}

In [None]:
from ragas import evaluate
from ragas.metrics import Faithfulness, ContextPrecision, AnswerCorrectness
from datasets import Dataset
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_huggingface.embeddings import HuggingFaceEmbeddings

OPENAI_API_KEY = 'key'
#GENERATOR_MODEL_NAME = "Mistral-7B-Instruct-v0.3"
GENERATOR_MODEL_NAME = "Meta-Llama-3.1-8B-Instruct"
GENERATOR_BASE_URL = "https://kscope.vectorinstitute.ai/v1"
EMBEDDING_MODEL_NAME = "BAAI/bge-base-en-v1.5"

# Define the RAG embeddings model (different than the OpenAI embedding model defined above for test set generation)
model_kwargs = {'device': 'cuda', 'trust_remote_code': True}
encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity

print(f"Setting up the RAG embeddings model...")
embeddings = HuggingFaceEmbeddings(
    model_name="BAAI/bge-base-en-v1.5",
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs,
)

generator_llm = ChatOpenAI(
    #model="Meta-Llama-3.1-8B-Instruct",
    model="gemma-2-9b-it",
    base_url=GENERATOR_BASE_URL,
    api_key=OPENAI_API_KEY,
)

score = evaluate(
    dataset=dataset,
    metrics=[
        Faithfulness(),
        ContextPrecision(),
        AnswerCorrectness(),
    ],
    llm=generator_llm, # Using OpenAI LLM as the evaluator
    embeddings=embeddings,
)

Setting up the RAG embeddings model...


  user_id = json.load(open(uuid_filepath))["userid"]


Evaluating:   0%|          | 0/15 [00:00<?, ?it/s]

  return await self._ascore(row, callbacks)
  return await self._ascore(row, callbacks)
  return await self._ascore(row, callbacks)
  return await self._ascore(row, callbacks)
  return await self._ascore(row, callbacks)
  similarity_score = await self.answer_similarity.ascore(


In [None]:
score.to_pandas()

Unnamed: 0,user_input,retrieved_contexts,response,reference,faithfulness,context_precision,answer_correctness
0,How do the reviewer's experiences with Simplii...,[datePublished: 3/14/2024\nheadline: Horrible ...,The reviewer's experiences with Simplii's syst...,The reviewer expresses frustration with Simpli...,0.833333,1.0,0.669092
1,How long was the reviewer's money held up by t...,[datePublished: 11/1/2024\nheadline: Dont wast...,The reviewer's money was held up for a couple ...,"The reviewer's money was held up for 14 days, ...",0.666667,1.0,0.194264
2,Why couldn't the customer activate their appro...,[datePublished: 2/6/2020\nheadline: Prejudice ...,The customer was unable to activate their acco...,The customer couldn't activate their account b...,0.25,1.0,0.357388
3,What financial service is causing the delay?,[datePublished: 5/11/2021\nheadline: I overpai...,The delay is caused by a lack of response from...,The context does not provide information about...,0.0,0.0,0.187277
4,What credit product did both reviewers target?,[datePublished: 2/6/2024\nheadline: Experience...,The reviewers targeted a financial product off...,A secured line of credit,0.666667,1.0,0.155886


# RAG with Semantic Chunking

In [None]:
from llama_index.core import Document

df = pd.read_csv("/projects/RAG2/cibc-2/Bootcamp_Dataset_CIBC2.csv")

# Ensure NaN values are replaced with None
df = df.where(pd.notna(df), None)

# Specify metadata fields to include
metadata_fields = ["datePublished", "rating", "topic", "headline","sentiment"]  # Manually specify which columns to use as metadata

# Modify loop to replace None values in metadata with "N/A"
documents = [
    Document(
        text=row['reviewBody'],
        metadata={key: (value if value is not None and value != "" else "N/A") for key, value in row.items() if key in metadata_fields}
    )
    for _, row in df.iterrows()
]

In [None]:
from llama_index.core.node_parser import (
    SentenceSplitter,
    SemanticSplitterNodeParser,
)

from llama_index.core.postprocessor import SimilarityPostprocessor

print(f"Setting up the embeddings model...")
embeddings = HuggingFaceEmbedding(
    model_name=EMBEDDING_MODEL_NAME,
    device='cuda',
    trust_remote_code=True,
)

splitter = SemanticSplitterNodeParser(
    buffer_size=1, breakpoint_percentile_threshold=95, embed_model=embeddings
)

# also baseline splitter
base_splitter = SentenceSplitter(chunk_size=256)

nodes = splitter.get_nodes_from_documents(documents)

Setting up the embeddings model...


In [None]:
from collections import defaultdict

# Group nodes by datePublished
date_groups = defaultdict(list)

# Loop through all nodes and group them by datePublished
for node in nodes:
    date_published = node.metadata["headline"]  # Assuming it's always present
    date_groups[date_published].append(node)

# Print chunks grouped by datePublished
for date, group in date_groups.items():
    print(f"Head: {date}\n")
    for i, node in enumerate(group):
        print(f"Chunk {i+1}:")
        print("Text:")
        print(node.text)
        print("------------------")
    print("\n")  # Extra space between groups

Head: simplii the ghost bank

Chunk 1:
Text:
... this so called bank is a joke... customer service is nonexistent  when trying to get answers they simply disappear... just let me close my account!...thumbs down...
------------------


Head:  is a toy bank.

Chunk 1:
Text:
is a toy bank.Its not a serious institution in any way. You can keep there a couple hundred dollars on groceries but thats about it. For starters, limit 500 per transaction. 
------------------
Chunk 2:
Text:
I went to pay for my insurance and it just refused to work because the bill was about 1000. Quite inconvenient.Even worse, there is 200 daily withdrawal limit with no possibilities of increasing it more. Thats insane! So, if you need cash, you cannot retrieve it from there unless you go to the local  for multiple days in a row.That said, it could work okay if maybe you dont earn very much, or are looking for a secondary bank. Some people have accounts in more than two banks, that can be convenient my main one is 

In [None]:
chunk_vector_store = WeaviateVectorStore(
    weaviate_client=client, index_name="Reviews_SemChunk256", text_key="text"
)

chunk_storage_context = StorageContext.from_defaults(vector_store=chunk_vector_store)
chunk_index = VectorStoreIndex(nodes, storage_context=chunk_storage_context)

chunk_retriever = chunk_index.as_retriever(similarity_top_k=6)

chunk_query_engine = RetrieverQueryEngine(retriever=chunk_retriever)
result = chunk_query_engine.query("Tell me about the worst reviews")
print(f"Result: \n\n{result}")

Result: 

It appears that several individuals have had extremely poor experiences with a particular bank. They have reported being treated rudely, having difficulty accessing their accounts, and being unimpressed with the bank's customer service. Some have even gone so far as to close their accounts and warn others to stay away. The bank's processes and policies have also been criticized as being outdated and inefficient. Overall, it seems that this bank has a long way to go in terms of improving its reputation and customer satisfaction.


In [None]:
from llama_index.core.postprocessor import SimilarityPostprocessor
from llama_index.core.postprocessor import LongContextReorder

reorder = LongContextReorder()

chunk_retriever = chunk_index.as_retriever()

processor = SimilarityPostprocessor(similarity_cutoff=0.75)

chunk_query_engine = RetrieverQueryEngine(retriever=chunk_retriever, node_postprocessors=[processor, reorder],)

## Eval

In [None]:
from datasets import Dataset

dataset = Dataset.from_pandas(test_set)
answers = np.empty(len(dataset), dtype=object)

for index, row in enumerate(dataset):
    query = row["question"]

    answer = chunk_query_engine.query(query)

    print(f"Result {index}\nQuestion: {query}\nAnswer: {answer}\n")

    # Store the result
    answers[index] = str(answer)

Result 0
Question: How do the reviewer's experiences with Simplii's systems contribute to their negative sentiment? 


This question directly addresses the keyphrase "Unreliable systems" within the context of the review, prompting an answer that focuses on the reviewer's frustrations with the systems' failures. 

Answer: The reviewer's experiences with Simplii's systems have been marred by difficulties and failures, leading to a sense of frustration and disappointment. This has resulted in a negative perception of the company's ability to provide reliable and efficient services, ultimately contributing to their overall negative sentiment.

Result 1
Question: How long was the reviewer's money held up by the bank? 



Answer: The reviewer's money was held up for a week, and then an additional holding extension was added, making it even longer.

Result 2
Question: Why couldn't the customer activate their approved acct?
Answer: The customer was unable to activate their approved account bec

In [None]:
dataset = dataset.add_column("answer", answers)
dataset

Dataset({
    features: ['question', 'contexts', 'ground_truth', 'evolution_type', 'metadata', 'episode_done', 'answer'],
    num_rows: 5
})

In [None]:
import ast
from ragas import evaluate
from ragas.metrics import Faithfulness, ContextPrecision, AnswerCorrectness
from datasets import Dataset
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

# Method 1: Using map
def convert_string_to_list(example):
    example['contexts'] = ast.literal_eval(example['contexts'])
    return example

dataset = dataset.map(convert_string_to_list)

Map:   0%|          | 0/5 [00:00<?, ? examples/s]

In [None]:
OPENAI_API_KEY = 'key'
#GENERATOR_MODEL_NAME = "Mistral-7B-Instruct-v0.3"
GENERATOR_MODEL_NAME = "Meta-Llama-3.1-8B-Instruct"
GENERATOR_BASE_URL = "https://kscope.vectorinstitute.ai/v1"
EMBEDDING_MODEL_NAME = "BAAI/bge-base-en-v1.5"

# Define the RAG embeddings model (different than the OpenAI embedding model defined above for test set generation)
model_kwargs = {'device': 'cuda', 'trust_remote_code': True}
encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity

print(f"Setting up the RAG embeddings model...")
embeddings = HuggingFaceEmbeddings(
    model_name="BAAI/bge-base-en-v1.5",
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs,
)

generator_llm = ChatOpenAI(
    model="Meta-Llama-3.1-8B-Instruct",
    #model="gemma-2-9b-it",
    base_url=GENERATOR_BASE_URL,
    api_key=OPENAI_API_KEY,
)

score = evaluate(
    dataset=dataset,
    metrics=[
        Faithfulness(),
        ContextPrecision(),
        AnswerCorrectness(),
    ],
    llm=generator_llm, # Using OpenAI LLM as the evaluator
    embeddings=embeddings,
)

Setting up the RAG embeddings model...


Evaluating:   0%|          | 0/15 [00:00<?, ?it/s]

  return await self._ascore(row, callbacks)
  return await self._ascore(row, callbacks)
  return await self._ascore(row, callbacks)
  return await self._ascore(row, callbacks)
  return await self._ascore(row, callbacks)
  similarity_score = await self.answer_similarity.ascore(
  similarity_score = await self.answer_similarity.ascore(


In [None]:
score.to_pandas()

Unnamed: 0,user_input,retrieved_contexts,response,reference,faithfulness,context_precision,answer_correctness
0,How do the reviewer's experiences with Simplii...,[datePublished: 3/14/2024\nheadline: Horrible ...,The reviewer's experiences with Simplii's syst...,The reviewer expresses frustration with Simpli...,1.0,1.0,0.719939
1,How long was the reviewer's money held up by t...,[datePublished: 11/1/2024\nheadline: Dont wast...,"The reviewer's money was held up for a week, a...","The reviewer's money was held up for 14 days, ...",0.333333,1.0,0.433811
2,Why couldn't the customer activate their appro...,[datePublished: 2/6/2020\nheadline: Prejudice ...,The customer was unable to activate their appr...,The customer couldn't activate their account b...,0.25,1.0,0.336159
3,What financial service is causing the delay?,[datePublished: 5/11/2021\nheadline: I overpai...,Account access.,The context does not provide information about...,0.0,0.0,0.139817
4,What credit product did both reviewers target?,[datePublished: 2/6/2024\nheadline: Experience...,The credit product in question is a loan or cr...,A secured line of credit,0.5,1.0,0.192114


# RAG 256 Chunking with LLM Rank

In [None]:
from llama_index.core.postprocessor import LongContextReorder
from llama_index.core import PromptTemplate
from llama_index.core import get_response_synthesizer

chunk_vector_store = WeaviateVectorStore(
    weaviate_client=client, index_name="Reviews_Chunk", text_key="text"
)

retriever = VectorStoreIndex.from_vector_store(chunk_vector_store).as_retriever(similarity_top_k=5,
)

qa_prompt_tmpl = (
    "Context information is customer review data for the Simplii bank.\n"
    "---------------------\n"
    "{context_str}\n"
    "---------------------\n"
    "Given the context information and not prior knowledge, "
    "answer the query.\n"
    "When answering please reference which specific reviews led you to your answer using their datepublished and headline "
)

qa_prompt = PromptTemplate(qa_prompt_tmpl)
response_synthesizer = get_response_synthesizer(text_qa_template=qa_prompt)

chunk_retriever = VectorStoreIndex.from_vector_store(chunk_vector_store)

chat_engine = chunk_retriever.as_chat_engine(chat_mode="react", llm=llm, response_synthesizer=response_synthesizer, verbose=True)

# assemble query engine
chunkrerank_query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
)

In [None]:
response = chat_engine.chat(
    "Tell me about customers' experience withdrawing money"
)

> Running step 78030cb1-6820-4dc3-bfcd-84896a2cf07c. Step input: Tell me about customers' experience withdrawing money
[1;3;38;5;200mThought: The current language of the user is: English. I need to use a tool to help me answer the question.
Action: query_engine_tool
Action Input: {'input': 'Simplii App customer reviews withdrawing money', 'num_beams': 5}
[0m[1;3;34mObservation: Based on the provided reviews, it appears that Simplii Financial has a history of holding onto customers' money without releasing it, and in some cases, even taking away a portion of it as a penalty for requesting a refund. This suggests that Simplii Financial may have a policy of withholding funds from customers, which can be frustrating and time-consuming to resolve.

Specifically, the review from 3/27/2023, "Beware of Simplii", mentions that the author deposited two checks and had to wait for 3 weeks for them to be released, and even after multiple calls, the money was still not available to withdraw. This

In [None]:
print(chunkrerank_query_engine.query("What are the main areas the clients commend in positive customer reviews?"))

Based on the provided context information, it appears that the Simplii bank has a strong focus on customer service and satisfaction. The reviews suggest that the bank's staff are knowledgeable, professional, and helpful in resolving customer issues and providing information.

One review in particular highlights the bank's commitment to customer satisfaction, stating that the reviewer has been with the bank for 30 years and has been pleased with the service (datePublished: 10/23/2024, headline: was very helpful). This suggests that the bank has a long-term relationship with its customers and is dedicated to providing consistent and high-quality service.

Another review mentions that the bank's staff are willing to go above and beyond to help customers, even in difficult situations (datePublished: 9/26/2024, headline: Best help ever!). This suggests that the bank values its customers and is committed to providing exceptional service.

Overall, based on the reviews, it appears that the Si

In [None]:
from datasets import Dataset
import pandas as pd

test_set = pd.read_csv('/path/new_testset.csv')
test_set = test_set.iloc[: , 1:]

In [None]:
dataset = dataset.add_column("answer", answers)
dataset

Dataset({
    features: ['question', 'contexts', 'ground_truth', 'evolution_type', 'metadata', 'episode_done', 'answer'],
    num_rows: 5
})

In [None]:
import ast
from ragas import evaluate
from ragas.metrics import Faithfulness, ContextPrecision, AnswerCorrectness
from datasets import Dataset

# Method 1: Using map
def convert_string_to_list(example):
    example['contexts'] = ast.literal_eval(example['contexts'])
    return example

dataset = dataset.map(convert_string_to_list)

Map:   0%|          | 0/5 [00:00<?, ? examples/s]

In [None]:
from ragas import evaluate
from ragas.metrics import Faithfulness, ContextPrecision, AnswerCorrectness
from datasets import Dataset
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_huggingface.embeddings import HuggingFaceEmbeddings

OPENAI_API_KEY = 'key'
#GENERATOR_MODEL_NAME = "Mistral-7B-Instruct-v0.3"
GENERATOR_MODEL_NAME = "Meta-Llama-3.1-8B-Instruct"
GENERATOR_BASE_URL = "https://kscope.vectorinstitute.ai/v1"
EMBEDDING_MODEL_NAME = "BAAI/bge-base-en-v1.5"

# Define the RAG embeddings model (different than the OpenAI embedding model defined above for test set generation)
model_kwargs = {'device': 'cuda', 'trust_remote_code': True}
encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity

print(f"Setting up the RAG embeddings model...")
embeddings = HuggingFaceEmbeddings(
    model_name="BAAI/bge-base-en-v1.5",
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs,
)

generator_llm = ChatOpenAI(
    model="Meta-Llama-3.1-8B-Instruct",
    #model="gemma-2-9b-it",
    base_url=GENERATOR_BASE_URL,
    api_key=OPENAI_API_KEY,
)

score = evaluate(
    dataset=dataset,
    metrics=[
        Faithfulness(),
        ContextPrecision(),
        AnswerCorrectness(),
    ],
    llm=generator_llm, # Using OpenAI LLM as the evaluator
    embeddings=embeddings,
)

Setting up the RAG embeddings model...


Evaluating:   0%|          | 0/15 [00:00<?, ?it/s]

  return await self._ascore(row, callbacks)
  return await self._ascore(row, callbacks)
  return await self._ascore(row, callbacks)
  return await self._ascore(row, callbacks)
  return await self._ascore(row, callbacks)
  similarity_score = await self.answer_similarity.ascore(
Exception raised in Job[14]: AttributeError('StringIO' object has no attribute 'sentences')
Exception raised in Job[12]: AttributeError('StringIO' object has no attribute 'sentences')
Exception raised in Job[5]: LLMDidNotFinishException(The LLM generation was not completed. Please increase try increasing the max_tokens and try again.)
Exception raised in Job[2]: BadRequestError(Error code: 400 - {'detail': {'object': 'error', 'message': "This model's maximum context length is 4096 tokens. However, you requested 4385 tokens in the messages, Please reduce the length of the messages.", 'type': 'BadRequestError', 'param': None, 'code': 400}})


In [None]:
score.to_pandas()

Unnamed: 0,user_input,retrieved_contexts,response,reference,faithfulness,context_precision,answer_correctness
0,How do the reviewer's experiences with Simplii...,[datePublished: 3/14/2024\nheadline: Horrible ...,"Based on the provided reviews, the reviewer's ...",The reviewer expresses frustration with Simpli...,0.545455,1.0,
1,How long was the reviewer's money held up by t...,[datePublished: 11/1/2024\nheadline: Dont wast...,"According to the reviews, the reviewer's money...","The reviewer's money was held up for 14 days, ...",0.0,1.0,
2,Why couldn't the customer activate their appro...,[datePublished: 2/6/2020\nheadline: Prejudice ...,"Based on the provided reviews, it appears that...",The customer couldn't activate their account b...,0.181818,1.0,0.339935
3,What financial service is causing the delay?,[datePublished: 5/11/2021\nheadline: I overpai...,"Based on the provided reviews, it appears that...",The context does not provide information about...,0.333333,0.0,0.188307
4,What credit product did both reviewers target?,[datePublished: 2/6/2024\nheadline: Experience...,"Unfortunately, the provided information does n...",A secured line of credit,,1.0,


# RAGAS with Vanilla Decoupled RAG

In [None]:
from langchain_openai import ChatOpenAI

OPENAI_API_KEY = 'key'
GENERATOR_MODEL_NAME = "Meta-Llama-3.1-8B-Instruct"
GENERATOR_BASE_URL = "https://kscope.vectorinstitute.ai/v1"

van_llm = ChatOpenAI(
    model=GENERATOR_MODEL_NAME,
    temperature=0,
    max_tokens=None,
    base_url=GENERATOR_BASE_URL,
    api_key=OPENAI_API_KEY
)
message = [
    ("human", "what's up"),
]
try:
    result = van_llm.invoke(message)
    print(f"Result: \n\n{result.content}")
except Exception as err:
    if "Error code: 503" in err.message:
        print(f"The model {GENERATOR_MODEL_NAME} is not ready yet.")
    else:
        raise

Result: 

Not much! It's nice to chat with you. Is there something on your mind that you'd like to talk about, or are you just looking for some casual conversation?


In [None]:
EMBEDDING_MODEL_NAME = "BAAI/bge-base-en-v1.5"

print(f"Setting up the embeddings model...")
van_embeddings = HuggingFaceEmbedding(
    model_name=EMBEDDING_MODEL_NAME,
    device='cuda',
    trust_remote_code=True,
)

collection = client.collections.get("Reviews")

def get_embeddings(text):
    embed_out = van_embeddings.get_text_embedding(text)
    return embed_out

# Parse the string into a datetime object
def convert_date(date):
    datetime_obj = datetime.fromisoformat(date)

    # Extract the date part
    date_only = datetime_obj.date()
    return date_only

def retrieve_docs(query):
    answer=""
    query_vector = get_embeddings(query)

    # Perform query
    response = collection.query.hybrid(
        query=query,  # For BM25 part of the hybrid search
        vector=query_vector,  # For vector part of the hybrid search
        limit=6,
        return_metadata=wq.MetadataQuery(score=True),
    )

    for i, o in enumerate(response.objects, start=1):
        return "\n".join([f"Title: {o.properties['headline']}, Date: {convert_date(str(o.properties['datePublished']))}\n{o.properties['reviewBody']}\n" for i, o in enumerate(response.objects, start=1)])
        #return "\n".join([f"Title: {o.properties['headline']}, Date: {o.properties['datePublished']}\n{o.properties['reviewBody']}\n" for i, o in enumerate(response.objects, start=1)])

def RAG(query):
    context = retrieve_docs(query)
    formatted_prompt = f"Context:\n{context}\n\nQuery:\n{query}\n\nYou are an assistant supposed to provide insights about customer reviews. Take the query and context and answer the query. If you're referencing specific reviews, make sure you use their title and date. If the review has no headline, account for that too:"

    result = van_llm.invoke(formatted_prompt)
    return result
    print(f"Result: \n\n{result.content}")

Setting up the embeddings model...


In [None]:
RAG("whats the worst review?")



## Evalutation

In [None]:
from datasets import Dataset
import pandas as pd

test_set = pd.read_csv('/path/new_testset.csv')
test_set = test_set.iloc[: , 1:]

dataset = Dataset.from_pandas(test_set)
answers = np.empty(len(dataset), dtype=object)

for index, row in enumerate(dataset):
    query = row["question"]

    answer = RAG(query)

    print(f"Result {index}\nQuestion: {query}\nAnswer: {answer}\n")

    # Store the result
    answers[index] = answer.content

Result 0
Question: How do the reviewer's experiences with Simplii's systems contribute to their negative sentiment? 


This question directly addresses the keyphrase "Unreliable systems" within the context of the review, prompting an answer that focuses on the reviewer's frustrations with the systems' failures. 

Answer: content="The reviewer's experiences with Simplii's systems contribute to their negative sentiment in several ways:\n\n1. **Failed card validation**: The reviewer's Simplii card stopped working after a few days, and they had to call to validate it, which didn't work. This led to a month-long struggle to get a new barcode, which couldn't be sent to them. (Title: Simplii card, Date: 2022-07-23)\n2. **Inability to cancel the card**: The reviewer was unable to cancel the card before validation, which added to their frustration. (Title: Simplii card, Date: 2022-07-23)\n3. **ATM transaction errors**: The reviewer experienced an ATM transaction error, where they were charged t

In [None]:
dataset = dataset.add_column("answer", answers)

In [None]:
import ast

# Method 1: Using map
def convert_string_to_list(example):
    example['contexts'] = ast.literal_eval(example['contexts'])
    return example

dataset = dataset.map(convert_string_to_list)
dataset.features

Map:   0%|          | 0/5 [00:00<?, ? examples/s]

{'question': Value(dtype='string', id=None),
 'contexts': Sequence(feature=Value(dtype='string', id=None), length=-1, id=None),
 'ground_truth': Value(dtype='string', id=None),
 'evolution_type': Value(dtype='string', id=None),
 'metadata': Value(dtype='string', id=None),
 'episode_done': Value(dtype='bool', id=None),
 'answer': Value(dtype='string', id=None)}

In [None]:
from ragas import evaluate
from ragas.metrics import Faithfulness, ContextPrecision, AnswerCorrectness
from datasets import Dataset
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

OPENAI_API_KEY = 'key'
#GENERATOR_MODEL_NAME = "Mistral-7B-Instruct-v0.3"
GENERATOR_MODEL_NAME = "Meta-Llama-3.1-8B-Instruct"
GENERATOR_BASE_URL = "https://kscope.vectorinstitute.ai/v1"
EMBEDDING_MODEL_NAME = "BAAI/bge-base-en-v1.5"

# Define the RAG embeddings model (different than the OpenAI embedding model defined above for test set generation)
model_kwargs = {'device': 'cuda', 'trust_remote_code': True}
encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity

print(f"Setting up the RAG embeddings model...")
embeddings = HuggingFaceEmbeddings(
    model_name="BAAI/bge-base-en-v1.5",
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs,
)

generator_llm = ChatOpenAI(
    #model="Meta-Llama-3.1-8B-Instruct",
    model="gemma-2-9b-it",
    base_url=GENERATOR_BASE_URL,
    api_key=OPENAI_API_KEY,
)

score = evaluate(
    dataset=dataset,
    metrics=[
        Faithfulness(),
        ContextPrecision(),
        AnswerCorrectness(),
    ],
    llm=generator_llm, # Using OpenAI LLM as the evaluator
    embeddings=embeddings,
)

Setting up the RAG embeddings model...


Evaluating:   0%|          | 0/15 [00:00<?, ?it/s]

  return await self._ascore(row, callbacks)
  return await self._ascore(row, callbacks)
  return await self._ascore(row, callbacks)
  return await self._ascore(row, callbacks)
  return await self._ascore(row, callbacks)
  similarity_score = await self.answer_similarity.ascore(
  similarity_score = await self.answer_similarity.ascore(


In [None]:
score.to_pandas()

Unnamed: 0,user_input,retrieved_contexts,response,reference,faithfulness,context_precision,answer_correctness
0,How do the reviewer's experiences with Simplii...,[datePublished: 3/14/2024\nheadline: Horrible ...,The reviewer's experiences with Simplii's syst...,The reviewer expresses frustration with Simpli...,0.217391,1.0,0.341715
1,How long was the reviewer's money held up by t...,[datePublished: 11/1/2024\nheadline: Dont wast...,"According to the reviews, the reviewer's money...","The reviewer's money was held up for 14 days, ...",0.181818,1.0,0.368305
2,Why couldn't the customer activate their appro...,[datePublished: 2/6/2020\nheadline: Prejudice ...,"Based on the provided context, it appears that...",The customer couldn't activate their account b...,0.3,1.0,0.262063
3,What financial service is causing the delay?,[datePublished: 5/11/2021\nheadline: I overpai...,"Based on the context, it appears that the fina...",The context does not provide information about...,0.083333,0.0,0.182126
4,What credit product did both reviewers target?,[datePublished: 2/6/2024\nheadline: Experience...,"Based on the provided context, it appears that...",A secured line of credit,0.666667,1.0,0.485777


# RAGAS Evaluation for No Chunking RAG

Read existing Weaviate DB

In [None]:
no_chunk_vector_store = WeaviateVectorStore(
    weaviate_client=client, index_name="Reviews_Llamaindex", text_key="text"
)

no_chunk_retriever = VectorStoreIndex.from_vector_store(no_chunk_vector_store).as_retriever(
    similarity_top_k=8
)

no_chunk_query_engine = RetrieverQueryEngine(retriever=no_chunk_retriever)


In [None]:
import pandas as pd

test_set = pd.read_csv('/path/new_testset.csv')
test_set = test_set.iloc[: , 1:]
test_set

Unnamed: 0,question,contexts,ground_truth,evolution_type,metadata,episode_done
0,How do the reviewer's experiences with Simplii...,['datePublished: 3/14/2024\nheadline: Horrible...,The reviewer expresses frustration with Simpli...,simple,[{'source': '/projects/RAG2/cibc-2/Bootcamp_Da...,True
1,How long was the reviewer's money held up by t...,['datePublished: 11/1/2024\nheadline: Dont was...,"The reviewer's money was held up for 14 days, ...",simple,[{'source': '/projects/RAG2/cibc-2/Bootcamp_Da...,True
2,Why couldn't the customer activate their appro...,['datePublished: 2/6/2020\nheadline: Prejudice...,The customer couldn't activate their account b...,reasoning,[{'source': '/projects/RAG2/cibc-2/Bootcamp_Da...,True
3,What financial service is causing the delay?,['datePublished: 5/11/2021\nheadline: I overpa...,The context does not provide information about...,reasoning,[{'source': '/projects/RAG2/cibc-2/Bootcamp_Da...,True
4,What credit product did both reviewers target?,['datePublished: 2/6/2024\nheadline: Experienc...,A secured line of credit,multi_context,[{'source': '/projects/RAG2/cibc-2/Bootcamp_Da...,True


In [None]:
from datasets import Dataset

dataset = Dataset.from_pandas(test_set)
answers = np.empty(len(dataset), dtype=object)

for index, row in enumerate(dataset):
    query = row["question"]

    answer = no_chunk_query_engine.query(query)

    print(f"Result {index}\nQuestion: {query}\nAnswer: {answer}\n")

    # Store the result
    answers[index] = str(answer)

Result 0
Question: How do the reviewer's experiences with Simplii's systems contribute to their negative sentiment? 


This question directly addresses the keyphrase "Unreliable systems" within the context of the review, prompting an answer that focuses on the reviewer's frustrations with the systems' failures. 

Answer: The reviewer's experiences with Simplii's systems are marred by frequent failures and inefficiencies. They often find themselves unable to access their funds, with holds being placed on their accounts for extended periods of time. This is evident in the numerous complaints about delayed or stuck transactions, including transfers between Simplii accounts. The reviewer's frustration with these issues is compounded by the lack of effective communication and resolution from Simplii's customer service team. As a result, the reviewer feels that their money is "stuck in limbo" and that they are unable to use it when they need to. This consistent pattern of system failures and

In [None]:
dataset = dataset.add_column("answer", answers)
dataset

Dataset({
    features: ['question', 'contexts', 'ground_truth', 'evolution_type', 'metadata', 'episode_done', 'answer'],
    num_rows: 5
})

In [None]:
dataset.features

{'question': Value(dtype='string', id=None),
 'contexts': Value(dtype='string', id=None),
 'ground_truth': Value(dtype='string', id=None),
 'evolution_type': Value(dtype='string', id=None),
 'metadata': Value(dtype='string', id=None),
 'episode_done': Value(dtype='bool', id=None),
 'answer': Value(dtype='string', id=None)}

In [None]:
import ast

# Method 1: Using map
def convert_string_to_list(example):
    example['contexts'] = ast.literal_eval(example['contexts'])
    return example

dataset = dataset.map(convert_string_to_list)
dataset.features

Map:   0%|          | 0/5 [00:00<?, ? examples/s]

{'question': Value(dtype='string', id=None),
 'contexts': Sequence(feature=Value(dtype='string', id=None), length=-1, id=None),
 'ground_truth': Value(dtype='string', id=None),
 'evolution_type': Value(dtype='string', id=None),
 'metadata': Value(dtype='string', id=None),
 'episode_done': Value(dtype='bool', id=None),
 'answer': Value(dtype='string', id=None)}

In [None]:
from ragas import evaluate
from ragas.metrics import Faithfulness, ContextPrecision, AnswerCorrectness
from datasets import Dataset
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

OPENAI_API_KEY = 'pkey'
#GENERATOR_MODEL_NAME = "Mistral-7B-Instruct-v0.3"
GENERATOR_MODEL_NAME = "Meta-Llama-3.1-8B-Instruct"
GENERATOR_BASE_URL = "https://kscope.vectorinstitute.ai/v1"
EMBEDDING_MODEL_NAME = "BAAI/bge-base-en-v1.5"

# Define the RAG embeddings model (different than the OpenAI embedding model defined above for test set generation)
model_kwargs = {'device': 'cuda', 'trust_remote_code': True}
encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity

print(f"Setting up the RAG embeddings model...")
embeddings = HuggingFaceEmbeddings(
    model_name="BAAI/bge-base-en-v1.5",
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs,
)

generator_llm = ChatOpenAI(
    #model="Meta-Llama-3.1-8B-Instruct",
    model="gemma-2-9b-it",
    base_url=GENERATOR_BASE_URL,
    api_key=OPENAI_API_KEY,
)

score = evaluate(
    dataset=dataset,
    metrics=[
        Faithfulness(),
        ContextPrecision(),
        AnswerCorrectness(),
    ],
    llm=generator_llm, # Using OpenAI LLM as the evaluator
    embeddings=embeddings,
)

Setting up the RAG embeddings model...


Evaluating:   0%|          | 0/15 [00:00<?, ?it/s]

  return await self._ascore(row, callbacks)
  return await self._ascore(row, callbacks)
  return await self._ascore(row, callbacks)
  return await self._ascore(row, callbacks)
  return await self._ascore(row, callbacks)
  similarity_score = await self.answer_similarity.ascore(
  similarity_score = await self.answer_similarity.ascore(
  similarity_score = await self.answer_similarity.ascore(
Exception raised in Job[2]: BadRequestError(Error code: 400 - {'detail': {'object': 'error', 'message': "This model's maximum context length is 4096 tokens. However, you requested 4175 tokens in the messages, Please reduce the length of the messages.", 'type': 'BadRequestError', 'param': None, 'code': 400}})


In [None]:
score.to_pandas()

Unnamed: 0,user_input,retrieved_contexts,response,reference,faithfulness,context_precision,answer_correctness
0,How do the reviewer's experiences with Simplii...,[datePublished: 3/14/2024\nheadline: Horrible ...,The reviewer's experiences with Simplii's syst...,The reviewer expresses frustration with Simpli...,0.5,1.0,
1,How long was the reviewer's money held up by t...,[datePublished: 11/1/2024\nheadline: Dont wast...,The reviewer's money was held up for 12 busine...,"The reviewer's money was held up for 14 days, ...",0.5,1.0,0.628838
2,Why couldn't the customer activate their appro...,[datePublished: 2/6/2020\nheadline: Prejudice ...,It seems that the bank's process for account a...,The customer couldn't activate their account b...,0.111111,1.0,0.174177
3,What financial service is causing the delay?,[datePublished: 5/11/2021\nheadline: I overpai...,A second mortgage secured line of credit.,The context does not provide information about...,0.0,0.0,0.119559
4,What credit product did both reviewers target?,[datePublished: 2/6/2024\nheadline: Experience...,Secured line of credit.,A secured line of credit,1.0,1.0,0.99139
