<a href="https://colab.research.google.com/github/kevalshah90/llms/blob/main/financial_rag_gemini.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import getpass
import os

# Set your Cohere API gkey
os.environ["GOOGLE_API_KEY"] = getpass.getpass()

In [None]:
# For GPT-4 grading
os.environ["OPENAI_API_KEY"] = getpass.getpass()

In [None]:
# For Gemini
os.environ["ANTHROPIC_API_KEY"] = getpass.getpass()

In [None]:
# For Cohere reranker
os.environ["COHERE_API_KEY"] = getpass.getpass()

In [None]:
!pip install -U -q langchain cohere ragas arxiv pymupdf chromadb wandb tiktoken unstructured==0.12.5 datasets langchain_anthropic openai pandas google-generativeai

# Download SEC filing

In [None]:
from langchain_community.document_loaders import UnstructuredURLLoader

url = "https://www.sec.gov/Archives/edgar/data/1559720/000155972024000006/abnb-20231231.htm"
loader = UnstructuredURLLoader(urls=[url], headers={'User-Agent': 'virat virat@virat.com'})
documents = loader.load()

# Chunk and store filing in vector DB

In [None]:
from langchain.vectorstores import Chroma
from langchain.text_splitter import TokenTextSplitter
from langchain.embeddings import OpenAIEmbeddings

# Naively chunk the SEC filing by tokens
token_splitter = TokenTextSplitter(chunk_size=256, chunk_overlap=20)
docs = token_splitter.split_documents(documents)

In [None]:
# Save the chunked docs in vector DB
vectorstore = Chroma.from_documents(docs, OpenAIEmbeddings(model="text-embedding-3-large"))

# Load Q&A Dataset

In [None]:
import requests
import pandas as pd

# URL of the JSON file
url = 'https://raw.githubusercontent.com/virattt/datasets/main/abnb-2023-10k.json'

# Fetch the JSON content from the URL
response = requests.get(url)
data = response.json()

# Convert the JSON content to a pandas DataFrame
df = pd.json_normalize(data)

# Rename 'answer' to 'ground_truth' for eval later one
df.rename(columns={'answer': 'ground_truth'}, inplace=True)

# Display the DataFrame
df.head()

# Generate answers using LLM

In [None]:
prompt = """
You are an expert language model designed to
answer questions about financial documents like
SEC filings.

Given financial documents, your primary role is to extract key information
and providing accurate answers to questions
related to these filings.

In your response, optimize for conciseness, accuracy, and correctness.
"""

In [None]:
import google.generativeai as genai

genai.configure(api_key=os.environ['GOOGLE_API_KEY'])

gemini = genai.GenerativeModel('gemini-1.0-pro')
response = gemini.generate_content('Please summarise this document: ...')

print(response.text)

In [None]:
import google.generativeai as genai
import cohere
from typing import List

# Setup Gemini
genai.configure(api_key=os.environ['GOOGLE_API_KEY'])
gemini = genai.GenerativeModel('gemini-1.0-pro')

# Setup Cohere
co = cohere.Client(os.environ["COHERE_API_KEY"])

def rerank_documents(query: str, documents: list, top_k) -> List[str]:
  response = co.rerank(
      query=query,
      documents=documents,
      top_n=top_k,
      model="rerank-english-v2.0",
      return_documents=True
  )
  results = response.results
  return [{"text": docs.document.text} for docs in results]

from google.generativeai import client

def answer_question(query: str, documents: list, prompt: str) -> str:
    message = f"{prompt}. Please answer the question: ```{query}``` using the following documents: ```{documents}```."
    response = gemini.generate_content(message)

    return response.text

In [None]:
import time

answers = []
k = 3

# Fields for computing inference speed
total_time = 0.0
num_iterations = 0

# Execute RAG pipeline
for index, row in df.iterrows():
  # Get start time
  start_time = time.time()

  # Extract the question
  question = row['question']

  # Print current question
  print(f"Answering question {index + 1}:   {question}")

  # Query vector DB for documents
  top_k_docs = vectorstore.similarity_search(question, k)

  # Extract the text content from documents
  documents = [{"text": doc.page_content} for doc in top_k_docs]

  # Rerank the documents
  documents = rerank_documents(question, documents, k)

  # Ask the LLM
  answer = answer_question(question, documents, prompt)

  # Add generated answer to our list of answers
  answers.append(answer)

  # Get end time
  end_time = time.time()
  # Update total execution time (excluding sleep time)
  total_time += (end_time - start_time)
  num_iterations += 1

  # Sleep for 1 second to avoid overloading the LLM
  time.sleep(1)

# Add the generated answers as a new column in the DataFrame
df['answer'] = answers

In [None]:
# Calculate the average execution time
avg_time = total_time / num_iterations

print(f"Took {avg_time} avg seconds for each RAG call")

# Visually inspect the answers

In [None]:
df.head()

# Evaluate answers using RAGAS

In [None]:
from datasets import Dataset

# Convert the DataFrame into a HuggingFace DataSet for RAGAS evaluation
dataset = Dataset.from_pandas(df)

# Use GPT-4 for Evaluation

In [None]:
from langchain_openai import ChatOpenAI
from ragas import evaluate
from ragas.metrics import faithfulness, answer_correctness

gpt = ChatOpenAI(model_name="gpt-4-0125-preview", temperature=0)

gpt_result = evaluate(
    dataset,
    llm=gpt,
    metrics=[answer_correctness],
)

print(gpt_result)

# Use Claude 3 Opus for Evaluation

In [None]:
from langchain_anthropic import ChatAnthropic
from langchain_core.prompts import ChatPromptTemplate

claude = ChatAnthropic(
    temperature=0,
    model_name="claude-3-opus-20240229",
    anthropic_api_key=os.environ["ANTHROPIC_API_KEY"],
)

claude_result = evaluate(
    dataset,
    llm=claude,
    metrics=[answer_correctness],
)

print(claude_result)

# Get mean of both GPT and Claude scores

In [None]:
print(f"Average score: {(gpt_result['answer_correctness'] + claude_result['answer_correctness']) / 2}")