In [1]:
from langchain_ollama import ChatOllama

In [6]:
llm = ChatOllama(
    model='gemma3:1b',
    temperature=0.9
)

In [3]:
from langchain_core.messages import AIMessage

## Invocation

In [9]:
messages = [
    (
        "system",
        "You are a helpful assistant that translates English to German. Translate the user sentence.",
    ),
    ("human", "I like to take photographs."),
]
ai_msg = llm.invoke(messages)
print(ai_msg.content)

I like to take photographs.  **Ich mag es, Fotos zu machen.** 

Or, more casually: **Ich fotografiere gern.** (I like to photograph.) 

Do you want me to translate it into another way?


In [11]:
import pandas as pd

In [12]:
df = pd.read_csv('GDPR_10QA_dataset_filtered.csv')

In [13]:
df.head()

Unnamed: 0,Content,Article Number,Article Name,Chapter Number,Chapter Name,Article Word Count,Question,Answer,Question Word Count,Answer Word Count
0,\nArticle 1\nSubject-matter and objectives\n\n...,1,Subject-matter and objectives,1,General provisions,91,What is the purpose of GDPR?,The purpose of GDPR is to lay down rules relat...,6,34
1,\nArticle 2\nMaterial scope\n\n1. This Regulat...,2,Material scope,1,General provisions,232,What is the scope of GDPR?,GDPR applies to the processing of personal dat...,6,43
2,\nArticle 3\nTerritorial scope\n\n1. This Regu...,3,Territorial scope,1,General provisions,154,What is the territorial scope of GDPR?,GDPR applies to the processing of personal dat...,7,38
3,\nArticle 4\nDefinitions\n\nFor the purposes o...,4,Definitions,1,General provisions,935,What is the definition of personal data accord...,Personal data means any information relating t...,10,69
4,\nArticle 4 (continued)\nDefinitions\n\nFor th...,4,Definitions,1,General provisions,405,What is the definition of 'group of undertakin...,"For the purposes of this Regulation, 'group of...",11,17


In [15]:
print(df['Content'][0])


Article 1
Subject-matter and objectives

1. This Regulation lays down rules relating to the protection of natural persons with regard to the processing of
   personal data and rules relating to the free movement of personal data.
2. This Regulation protects fundamental rights and freedoms of natural persons and in particular their right to the
   protection of personal data.
3. The free movement of personal data within the Union shall be neither restricted nor prohibited for reasons
   connected with the protection of natural persons with regard to the processing of personal data.



In [16]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_ollama import ChatOllama
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

In [18]:
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

  from .autonotebook import tqdm as notebook_tqdm


In [19]:
# Prepare the documents 
documents = []
for _, row in df.iterrows():
    # Combine relevant information
    text = f"""
    Article {row['Article Number']}: {row['Article Name']}
    Chapter {row['Chapter Number']}: {row['Chapter Name']}
    
    Content:
    {row['Content']}
    """
    documents.append(text)

In [20]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
texts = text_splitter.create_documents(documents)

In [22]:
# Create vector store
vectorstore = Chroma.from_documents(
    documents=texts,
    embedding=embeddings
)

In [23]:
# Create a custom detailed prompt template
prompt_template = """
You are a GDPR expert assistant. Use the following pieces of context to answer the question at the end.
If you do not know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}

Question: {question}

Please provide a clear and concise answer based on the GDPR articles provided in the context
"""

In [24]:
PROMPT = PromptTemplate(
    template=prompt_template,
    input_variable=["context", "question"]
)

In [30]:
# Set up a RAG chain
llm = ChatOllama(
    model="gemma3:1b",
    temperature=0.4
)

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(search_kwargs={"k": 3}),
    chain_type_kwargs={"prompt": PROMPT}
)

In [31]:
def ask_gdpr_question(question: str) -> str:
    """
    Ask a question about GDPR and get an answer based on the provided context
    """
    response = qa_chain.invoke(question)
    return response

In [32]:
ask_gdpr_question("What is anonymity?")

{'query': 'What is anonymity?',
 'result': 'According to the provided context, anonymity refers to the processing of personal data in a manner that prevents it from being attributed to a specific data subject without additional information.\n\n'}