In [1]:
from dotenv import load_dotenv
import os
import pickle

# Laden Sie die Umgebungsvariablen aus der .env-Datei
load_dotenv()
API_KEY = os.environ.get("OPEN_API_KEY")

In [2]:
from langchain.document_loaders import PyPDFLoader  # for loading the pdf
from langchain.embeddings import OpenAIEmbeddings  # for creating embeddings
from langchain.vectorstores import Chroma  # for the vectorization part
from langchain.chains import ChatVectorDBChain  # for chatting with the pdf
from langchain.llms import OpenAI  # the LLM model we'll use (CHatGPT)



In [3]:

pdf_path = "The-AI-Act.pdf"
loader = PyPDFLoader(pdf_path)
pages = loader.load_and_split()


In [4]:
len(pages)

120

In [5]:
# 2. Creating embeddings and Vectorization
embeddings = OpenAIEmbeddings()
vectordb = Chroma.from_documents(pages, embedding=embeddings,
                                 persist_directory=".")
vectordb.persist()



100%|██████████| 1/1 [00:02<00:00,  2.87s/it]


In [13]:
query = "What is the bitcoin?"
vectordb.similarity_search(query, k=2, filter={"page": 1})

[Document(page_content='EN 1  EN EXPLANATORY MEMORANDUM  \n1. CONTEXT  OF THE  PROPOSAL  \n1.1. Reasons for and objectives of the proposal  \nThis explanatory memorandum accompanies the proposal for a Regulation laying down \nharmonised rules on artificial intelligence (Artificial Intelligence Act). Artificial Intelligence \n(AI) is a fast evolving family of technologies that can bring a wide array of economic and \nsocietal benefits across the entire s pectrum of industries and social activities. By improving \nprediction, optimising operations and resource allocation, and personalising service delivery, \nthe use of artificial intelligence can support socially and environmentally beneficial outcomes \nand pro vide key competitive advantages to companies and the European economy. Such \naction is especially needed in high -impact sectors, including climate change, environment and \nhealth, the public sector, finance, mobility, home affairs and agriculture. However, t he same \nelement

In [10]:
vectordb.max_marginal_relevance_search(query,k=2, fetch_k=3)

[Document(page_content="EN 91  EN LEGISLATIVE FINANCIAL STATEMENT  \n1. FRAMEWORK  OF THE  PROPOSAL/INITIATIVE   \n1.1. Title of the proposal/initiative  \nRegulation of the European Parliament and of the Council Laying Down Harmonised \nRules on Artificial Intelligence (Artificial Intelligence Act) and Amending Certain \nUnion Legislative Acts  \n1.2. Policy area(s) concerned  \nCommunications Networks, Content and Technology;  \nInternal Market, Industry, Entrepreneurship and SMEs;  \nThe budgetary impact concerns the new tasks entrusted with the Commission, \nincluding the support to the EU AI Board;  \nActivity: Shaping Europe's digital future.  \n1.3. The proposal/initiative relates to:  \nX  a new action   \n\uf0a8 a new action following a pilot project/preparatory action64  \n\uf0a8 the extension of an existing action   \n\uf0a8 an action redirected towards a new action   \n1.4. Objective(s)  \n1.4.1.  General objective( s)  \nThe general objective of the intervention is to ensu

In [None]:


# 3. Querying
llm = OpenAI(temperature=0.9, model_name="gpt-3.5-turbo")
pdf_qa = ChatVectorDBChain.from_llm(
    llm, vectordb, return_source_documents=True)

query = "What is the bitcoin?"
result = pdf_qa({"question": query, "chat_history": ""})
print("Answer:")
print(result["answer"])

chat_history = [(query, result["answer"])]
query2 = "When it be found?"
result = pdf_qa({"question": query2, "chat_history": chat_history})
print("Answer2:")
print(result["answer"])