# Answer questions with citations from Reddit SEC filing with Opper and Mistral

This cookbook is an appendix to: https://opper.ai/blog/simple-rag-with-citations

In [None]:
# Install the datasets library from huggingface
!pip install opperai -U
!pip install pydantic


## Imports

In [None]:
import os
from opperai import Opper, fn, Client, AsyncClient
from opperai.types.indexes import RetrievalResponse
from pydantic import BaseModel, Field
from typing import List, Literal

os.environ["OPPER_API_KEY"] = "op-..."

opper = Opper()


## Index PDF

In [None]:

# We get or create our index
index = opper.indexes.get(name="mistral-rag4")
if not index:
    index = opper.indexes.create(name="mistral-rag4")

    # we upload our pdf to the index
    index.upload_file(
        file_path="./reddit-sec.pdf",
        )
print(index)


## Retrieve relevant content for answering question

In [None]:
question = "What are the key financial and growth numbers for Reddit?"
#question = "What is Reddits strategy around AI and future value from AI?"
#question = "Who are the biggest shareholders of Reddit?" 
#question = "Is the company making money and how much?"
#question = "Who are the main competitors of Reddit?"


# We retrieve the content from the index
results = index.query(
    query=question,
    k=3
)

print(results)


In [None]:
# Extract file_name and page number from results
class Source(BaseModel):
    file_name: str
    content: str
    page_number: int

class Sources(BaseModel):
    sources: List[Source]

processed_results = [
    Source(
        content=result.content,
        file_name=result.metadata.get("file_name"),
        page_number=result.metadata.get("page")
    ) for result in results
]

print(processed_results)


## Create response with citations

In [None]:
class Citation(BaseModel):
    file_name: str 
    page_number: int 
    citation: str 

class Response(BaseModel):
    answer: str 
    citations: List[Citation]

@fn(path="test/mistral-rag/citations", model="azure/mistral-large-eu")
def extract_citations(question: str, sources: List[Source]) -> List[Citation]:
    """ Build a list of citations for the question from the sources"""

@fn(path="test/mistral-rag/response", model="azure/mistral-large-eu")
def produce_response(question: str, citations: List[Citation]) -> Response:
    """ Produce an answer to the question using the possible citations. Refer to any statements or facts from citations inline in the answer with [1], [2] etc. """

citations = extract_citations(question, processed_results)

response = produce_response(question, citations)

print(response)


## Print it! 

In [None]:
print(response.answer)
print()
index = 1
for citation in response.citations:
    print(f"[{index}]", f'"{citation.citation}"')
    index += 1
    
