<a href="https://colab.research.google.com/gist/virattt/bf13f748c6b4763b6c6215c8659c02f6/private_rag-reranking-gpt-colbert-mistral-cohere.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Install dependencies

In [None]:
!pip install openai

In [None]:
!pip install chromadb

In [None]:
!pip install langchain

In [None]:
!pip install tiktoken

In [None]:
!pip install pypdf

In [None]:
import getpass
import os

# Set your OpenAI API key
os.environ["OPENAI_API_KEY"] = getpass.getpass()

# Download and prepare SEC filing

In [None]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Load $ABNB's financial report. This may take 1-2 minutes since the PDF is large
sec_filing_pdf = "https://d18rn0p25nwr6d.cloudfront.net/CIK-0001559720/8a9ebed0-815a-469a-87eb-1767d21d8cec.pdf"

# Create your PDF loader
loader = PyPDFLoader(sec_filing_pdf)

# Load the PDF document
documents = loader.load()

# Chunk the financial report
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

# Load the SEC filing into vector store

In [None]:
from langchain_community.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings

# Load the document into Chroma
embedding_function = OpenAIEmbeddings()
db = Chroma.from_documents(docs, embedding_function)

# Query the vector store

In [None]:
query = "What are the specific factors contributing to Airbnb's increased operational expenses in the last fiscal year?"
docs = db.similarity_search(query)

# Rerank using GPT-4

In [None]:
from openai import OpenAI
import time
import json

start = time.time()
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
response = client.chat.completions.create(
    model='gpt-4-1106-preview',
    response_format={"type": "json_object"},
    temperature=0,
    messages=[
      {"role": "system", "content": "You are an expert relevance ranker. Given a list of documents and a query, your job is to determine how relevant each document is for answering the query. Your output is JSON, which is a list of documents.  Each document has two fields, content and score.  relevance_score is from 0.0 to 100.0. Higher relevance means higher score."},
      {"role": "user", "content": f"Query: {query} Docs: {docs}"}
    ]
  )

print(f"Took {time.time() - start} seconds to re-rank documents with GPT-4.")

In [None]:
# Sort the scores by highest to lowest and print
scores = json.loads(response.choices[0].message.content)["documents"]
sorted_data = sorted(scores, key=lambda x: x['score'], reverse=True)
print(json.dumps(sorted_data, indent=2))

# Rerank using ColBERT

In [None]:
!pip install --quiet transformers torch

In [None]:
from transformers import AutoTokenizer, AutoModel

# Load the tokenizer and the model
tokenizer = AutoTokenizer.from_pretrained("colbert-ir/colbertv2.0")
model = AutoModel.from_pretrained("colbert-ir/colbertv2.0")

In [None]:
import torch

start = time.time()
scores = []

# Function to compute MaxSim
def maxsim(query_embedding, document_embedding):
    # Expand dimensions for broadcasting
    # Query: [batch_size, query_length, embedding_size] -> [batch_size, query_length, 1, embedding_size]
    # Document: [batch_size, doc_length, embedding_size] -> [batch_size, 1, doc_length, embedding_size]
    expanded_query = query_embedding.unsqueeze(2)
    expanded_doc = document_embedding.unsqueeze(1)

    # Compute cosine similarity across the embedding dimension
    sim_matrix = torch.nn.functional.cosine_similarity(expanded_query, expanded_doc, dim=-1)

    # Take the maximum similarity for each query token (across all document tokens)
    # sim_matrix shape: [batch_size, query_length, doc_length]
    max_sim_scores, _ = torch.max(sim_matrix, dim=2)

    # Average these maximum scores across all query tokens
    avg_max_sim = torch.mean(max_sim_scores, dim=1)
    return avg_max_sim

# Encode the query
query_encoding = tokenizer(query, return_tensors='pt')
query_embedding = model(**query_encoding).last_hidden_state.mean(dim=1)

# Get score for each document
for document in docs:
    document_encoding = tokenizer(document.page_content, return_tensors='pt', truncation=True, max_length=512)
    document_embedding = model(**document_encoding).last_hidden_state

    # Calculate MaxSim score
    score = maxsim(query_embedding.unsqueeze(0), document_embedding)
    scores.append({
        "score": score.item(),
        "document": document.page_content,
    })

print(f"Took {time.time() - start} seconds to re-rank documents with ColBERT.")

In [None]:
# Sort the scores by highest to lowest and print
sorted_data = sorted(scores, key=lambda x: x['score'], reverse=True)
print(json.dumps(sorted_data, indent=2))

# Rerank using Mistral

In [None]:
!pip install mistralai

In [None]:
# Set your Mistral API key
os.environ["MISTRAL_API_KEY"] = getpass.getpass()

In [None]:
import json
from mistralai.client import MistralClient
from mistralai.models.chat_completion import ChatMessage

start = time.time()
client = MistralClient(api_key=os.environ["MISTRAL_API_KEY"])
response = client.chat(
    model="mistral-medium",
    messages=[
      ChatMessage(role="system", content="You are an expert relevance ranker. Given a list of documents and a query, your job is to determine how relevant each document is for answering the query. Your output is JSON, which is a list of documents.  Each document has two fields, content and score.  relevance_score is from 0.0 to 100.0. Higher relevance means higher score."),
      ChatMessage(role="user", content=f"Query: {query} Docs: {docs}")
    ]
)

print(f"Took {time.time() - start} seconds to re-rank documents with mistral-medium.")

In [None]:
response.choices[0].message.content

In [None]:
# Sort the scores by highest to lowest and print
scores = json.loads(response.choices[0].message.content)
sorted_data = sorted(scores, key=lambda x: x['score'], reverse=True)
print(json.dumps(sorted_data, indent=2))

# Rerank using Cohere

In [None]:
!pip install cohere

In [None]:
# Set your Cohere API key
os.environ["COHERE_API_KEY"] = getpass.getpass()

In [None]:
import cohere

# Get your cohere API key on: www.cohere.com
co = cohere.Client(os.environ["COHERE_API_KEY"])

documents = [doc.page_content for doc in docs]

# Example query and passages
start = time.time()

results = co.rerank(query=query, documents=documents, top_n=4, model="rerank-english-v2.0")
print(f"Took {time.time() - start} seconds to re-rank documents with Cohere.")