In [1]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.prompts import ChatPromptTemplate

from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain_core.output_parsers import StrOutputParser

from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
from langchain_cohere import CohereRerank


In [None]:
import os
os.environ["OPENAI_API_KEY"] = token

In [3]:
#Load PDF

pdf_path = "DOC-SF238339076816-20230503.pdf"
loader = PyPDFLoader(pdf_path)
pages = loader.load_and_split()

len(pages)

31

In [4]:
#Load OpenAI LLM and Embeddings

embeddings_model= OpenAIEmbeddings(model="text-embedding-3-small")
llm = ChatOpenAI(model="gpt-3.5-turbo", max_tokens = 300)

In [5]:
#Split chunks

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=4000,
    chunk_overlap=20,
    length_function=len,
    add_start_index=True
    
)

chunks = text_splitter.split_documents(pages)

In [6]:
vectorstore = Chroma(embedding_function=embeddings_model, persist_directory="naiveDB")

  vectorstore = Chroma(embedding_function=embeddings_model, persist_directory="naiveDB")


In [7]:
naive_retriever = vectorstore.as_retriever(search_kwargs={"k":10})

In [None]:
os.environ['COHERE_API_KEY'] = token_cohere

In [9]:
rerank = CohereRerank(model= "rerank-v3.5", top_n=3)

compressor_retriever = ContextualCompressionRetriever(
    base_compressor=rerank,
    base_retriever=naive_retriever
)

In [None]:
TEMPLATE = """
You are an expert in legislation and technology. Answer the question below using the provided context.

Query:
{question}

Context:
{context}
"""

rag_prompt = ChatPromptTemplate.from_template(TEMPLATE)


In [11]:
setup_retriever = RunnableParallel({"question": RunnablePassthrough(), "context": compressor_retriever})

In [12]:
output_parser = StrOutputParser()

In [13]:
compressor_retriever_chain = setup_retriever | rag_prompt | llm | output_parser

In [None]:
compressor_retriever_chain.invoke(
    "What are the main risks of the AI legal framework?"
)


'No contexto atual, os principais riscos do marco legal de IA incluem a falta de regulamentações claras para proteger a privacidade dos dados dos usuários, a possibilidade de viéses algorítmicos prejudiciais e a dificuldade em responsabilizar as empresas pelo uso inadequado da inteligência artificial. Além disso, a falta de transparência das decisões tomadas por algoritmos de IA pode levar a desconfiança por parte dos consumidores e da sociedade em geral. É importante que o marco legal de IA aborde essas questões de forma abrangente e equilibrada para garantir o desenvolvimento ético e responsável dessa tecnologia.'