In [None]:
# Install packages
!pip install langchain
!pip install torch
!pip install sentence_transformers
!pip install faiss-cpu
!pip install huggingface-hub
!pip install pypdf
!pip -q install accelerate
!pip install llama-cpp-python
!pip -q install git+https://github.com/huggingface/transformers

In [1]:
# Import classes
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import LlamaCpp
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFDirectoryLoader
import warnings
warnings.filterwarnings("ignore")

In [2]:
# Create a PyPDFDirectoryLoader object with the directory path "Data/"
loader = PyPDFDirectoryLoader("Data/")

# Load data using the PyPDFDirectoryLoader object
data = loader.load()

In [3]:
# Create a RecursiveCharacterTextSplitter object with max size of each chunk and overlap between chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=20)

# Split documents into chunks using the RecursiveCharacterTextSplitter object
text_chunks = text_splitter.split_documents(data)

In [4]:
# Get the number of text chunks generated by the splitting process
len(text_chunks)

459

In [5]:
# Access the first text chunk generated by the splitting process
text_chunks[0]

Document(page_content='European Parliament\n2019-2024\nTEXTS ADOPTED\nP9_TA(2024)0138\nArtificial Intelligence Act\nEuropean Parliament legislative resolution of 13 March 2024 on the proposal for a \nregulation of the European Parliament and of the Council on laying down harmonised \nrules on Artificial Intelligence (Artificial Intelligence Act) and amending certain Union \nLegislative Acts (COM(2021)0206 – C9-0146/2021 – 2021/0106(COD))\n(Ordinary legislative procedure: first reading)\nThe European Parliament,\n– having regard to the Commission proposal to Parliament and the Council \n(COM(2021)0206),\n– having regard to Article 294(2) and Articles 16 and 114 of the Treaty on the \nFunctioning of the European Union, pursuant to which the Commission submitted the \nproposal to Parliament (C9-0146/2021),\n– having regard to Article 294(3) of the Treaty on the Functioning of the European Union,\n– having regard to the opinion of the European Central Bank of 29 December 20211,\n– having r

In [6]:
# Initialize HuggingFaceEmbeddings with the specified model_name
# This will load the pre-trained model "all-MiniLM-L6-v2" for generating embeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [7]:
# Create a FAISS vector store from the text chunks using the specified embeddings
# This will generate vectors for each text chunk based on the provided embeddings
vector_store = FAISS.from_documents(text_chunks, embedding=embeddings)

In [8]:
#   Initialize LlamaCpp with specified parameters:
#   - streaming: Enables streaming mode for continuous input processing.
#   - model_path: Path to the LLM model file "mistral-7b-instruct-v0.1.Q4_K_M.gguf".
#   - temperature: Softmax temperature for controlling generation randomness.
#   - top_p: Top-p (nucleus) sampling threshold for controlling generation diversity.
#   - verbose: Enables verbose mode for detailed logging.
#   - n_ctx: Size of the input context window for the model.
llm = LlamaCpp(
    streaming = True,
    model_path="mistral-7b-instruct-v0.1.Q4_K_M.gguf",
    temperature=0.85,
    top_p=1,
    verbose=True,
    n_ctx=4096
)

llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from mistral-7b-instruct-v0.1.Q4_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.1
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:                 llama.atte

In [9]:
# Create a RetrievalQA object using the specified parameters:
#   - llm: LlamaCpp instance used for language model inference.
#   - chain_type: Type of QA chain to create ("stuff" in this case).
#   - retriever: Vector store converted to a retriever with search arguments {"k": 2}.
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vector_store.as_retriever(search_kwargs={"k": 2})
)

In [10]:
# Define a query for the QA system to process
query_01 = "What is the purpose of the AI Act?"

In [11]:
# Execute the QA system on the provided query to retrieve an answer
qa.run(query_01)


llama_print_timings:        load time =   15356.83 ms
llama_print_timings:      sample time =      27.89 ms /    90 runs   (    0.31 ms per token,  3226.62 tokens per second)
llama_print_timings: prompt eval time =  354614.48 ms /   515 tokens (  688.57 ms per token,     1.45 tokens per second)
llama_print_timings:        eval time =   17266.07 ms /    89 runs   (  194.00 ms per token,     5.15 tokens per second)
llama_print_timings:       total time =  372631.41 ms /   604 tokens


' The purpose of the AI Act is to improve the functioning of the internal market and promote the uptake of human-centric and trustworthy artificial intelligence (AI), while ensuring a high level of protection of health, safety, fundamental rights enshrined in the Charter of Fundamental Rights, including democracy, the rule of law and environmental protection, against the harmful effects of AI systems in the Union, and to support innovation.'

In [12]:
# Define a query for the QA system to process
query_02 = "I want to share AI-generated content, does it have to be tagged somehow? Can you give me the original article in the AI Act that you have used as a source?" 

In [13]:
# Execute the QA system on the provided query to retrieve an answer
qa.run(query_02)

Llama.generate: prefix-match hit



llama_print_timings:        load time =   15356.83 ms
llama_print_timings:      sample time =      85.33 ms /   256 runs   (    0.33 ms per token,  3000.08 tokens per second)
llama_print_timings: prompt eval time =  281770.90 ms /   648 tokens (  434.83 ms per token,     2.30 tokens per second)
llama_print_timings:        eval time =   51928.48 ms /   256 runs   (  202.85 ms per token,     4.93 tokens per second)
llama_print_timings:       total time =  335597.25 ms /   904 tokens


" No, it doesn't have to be tagged. The question is asking for a summary of the content used for training a general-purpose AI model and that information does not need to be tagged. Here is the relevant paragraph from Article 105 of the AI Act: (105) General-purpose models, in particular large generative models, capable of generating text, images, and other content, present unique innovation opportunities but also challenges to artists, authors, and other creators and the way their creative content is created, distributed, used and consumed. The development and training of such models require access to vast amounts of text, images, videos, and other data. Text and data mining techniques may be used extensively in this context for the retrieval and analysis of such content, which may be protected by copyright and related rights. Any use of copyright protected content requires the authorisation of the rightsholder concerned unless relevant copyright exceptions and limitations apply. Dire

In [14]:
# Define a query for the QA system to process
query_03 = "Can you give me some examples of unacceptable AI practices according to the AI Act?"

In [15]:
# Execute the QA system on the provided query to retrieve an answer
qa.run(query_03)

Llama.generate: prefix-match hit

llama_print_timings:        load time =   15356.83 ms
llama_print_timings:      sample time =      28.12 ms /    93 runs   (    0.30 ms per token,  3306.78 tokens per second)
llama_print_timings: prompt eval time =  115448.80 ms /   523 tokens (  220.74 ms per token,     4.53 tokens per second)
llama_print_timings:        eval time =   18121.56 ms /    92 runs   (  196.97 ms per token,     5.08 tokens per second)
llama_print_timings:       total time =  134362.13 ms /   615 tokens


" According to the AI Act, one example of an unacceptable AI practice is placing on the market, putting into service or using an AI system that deploys subliminal techniques beyond a person's consciousness or purposefully manipulative or deceptive techniques, with the objective, or the effect of, materially distorting the behavior of a person or a group of persons by appreciably impairing their ability to make an informed decision."