In [1]:
import logging
log_level = logging.INFO
# log_level = logging.DEBUG
log_level = 15
logging.basicConfig(level=log_level)
logger = logging.getLogger(__name__)
logger.setLevel(log_level)


In [2]:
import os
import pandas as pd
from openai import OpenAI

from regulations_rag.embeddings import  EmbeddingParameters
from regulations_rag.rerank import RerankAlgos
from regulations_rag.corpus_chat import ChatParameters

import importlib
import cemad_rag.cemad_corpus_index
importlib.reload(cemad_rag.cemad_corpus_index)
from cemad_rag.cemad_corpus_index import CEMADCorpusIndex

import cemad_rag.corpus_chat_cemad
importlib.reload(cemad_rag.corpus_chat_cemad)
from cemad_rag.corpus_chat_cemad import CorpusChatCEMAD


In [3]:
openai_client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"),)
chat_parameters = ChatParameters(chat_model = "gpt-4o", temperature = 0, max_tokens = 500)
#chat_parameters = ChatParameters(chat_model = "gpt-4-turbo", temperature = 0, max_tokens = 500)

embedding_parameters = EmbeddingParameters("text-embedding-3-large", 1024)
key = os.getenv('excon_encryption_key')

corpus_index = CEMADCorpusIndex(key)

rerank_algo  = RerankAlgos.LLM
if rerank_algo == RerankAlgos.LLM:
    rerank_algo.params["openai_client"] = openai_client
    rerank_algo.params["model_to_use"] = chat_parameters.model
    rerank_algo.params["user_type"] = corpus_index.user_type
    rerank_algo.params["corpus_description"] = corpus_index.corpus_description
    rerank_algo.params["final_token_cap"] = 5000 # can go large with the new models


chat = CorpusChatCEMAD(openai_client = openai_client, 
                    embedding_parameters = embedding_parameters, 
                    chat_parameters = chat_parameters, 
                    corpus_index = corpus_index,
                    rerank_algo = RerankAlgos.LLM,   
                    user_name_for_logging = 'test_user')




ANALYSIS:regulations_rag.corpus_chat:test_user: Reset Conversation History


In [4]:

#user_content = "Can I collect data from children?"
user_content = "What is offshoring?"
user_content = "can a foreigner buy property in south africa?"
user_content = "Who can trade gold?"

chat.reset_conversation_history()
chat.user_provides_input(user_content)

ANALYSIS:regulations_rag.corpus_chat:test_user: Reset Conversation History
ANALYSIS:regulations_rag.corpus_chat:test_user question: Who can trade gold?
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
DEV:regulations_rag.corpus_chat:#################   Similarity Search       #################
DEV:regulations_rag.corpus_chat:No relevant workflow found
DEV:regulations_rag.corpus_index:--   No relevant definitions found
DEV:regulations_rag.corpus_index:Selecting the top 15 items based on cosine-similarity score
DEV:regulations_rag.corpus_index:0.3476:                CEMAD:                C.(C):        question: What approvals are required to acquire gold for trade purposes?
DEV:regulations_rag.corpus_index:0.3709:                CEMAD:                C.(B):        question: Who should I contact if I want to export gold?
DEV:regulations_rag.corpus_index:--   Relevant sections found
DEV:regulations_rag.rerank:Re-ranking using LLM
INFO:httpx:HTTP Request:

In [5]:
user_content = "what documentation is required?"

chat.user_provides_input(user_content)

ANALYSIS:regulations_rag.corpus_chat:test_user question: what documentation is required?
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
DEV:regulations_rag.corpus_chat:#################   Similarity Search       #################
INFO:regulations_rag.corpus_chat:Found a potentially relevant workflow: documentation
DEV:regulations_rag.corpus_index:--   No relevant definitions found
DEV:regulations_rag.corpus_index:Selecting the top 15 items based on cosine-similarity score
DEV:regulations_rag.corpus_index:0.3603:                CEMAD:              B.12(A):        question: What documentation is required to conduct a merchanting trade?
DEV:regulations_rag.corpus_index:--   Relevant sections found
DEV:regulations_rag.rerank:Re-ranking using LLM
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
DEV:regulations_rag.rerank:--   results requested by LLM filter
DEV:regulations_rag.rerank:B.12(A)
INFO:cemad_rag.corpu

In [6]:
from IPython.display import Markdown, display

m = chat.messages[-1]["content"].strip()
print(chat.messages[-1]["content"])

references = chat.references[m]
for index, row in references.iterrows():
    display(Markdown(row['text']))    


The documentation required includes:
1. Approval from the South African Diamond and Precious Metals Regulator.
2. A permit from SARS.  
Reference:  
Section C.(C) from Currency and Exchange Control Manual for Authorised Dealers  



C. Gold

&nbsp;&nbsp;&nbsp;&nbsp;(C) Acquisition of gold for trade purposes

&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(i) The acquisition of gold for legitimate trade purposes by e.g. manufacturing jewellers, dentists, is subject to the approval of the South African Diamond and Precious Metals Regulator.

&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(ii) After receiving such approval, a permit must be obtained from SARS which will entitle the permit holder to approach Rand Refinery Limited for an allocation of gold.

&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(iii) The holders of gold, having received the approvals outlined above, are exempt from the provisions of Regulation 5(1).

In [9]:
from IPython.display import Markdown, display

# m = chat.corpus.get_text("Forgotten", "1")
# display(Markdown(m))    

d = chat.corpus.get_text("CEMAD", "A.3(A)(i)")
d
#print(chat.corpus.get_text("CovidLocation", "1"))
#print(chat.corpus.get_text("Consent", "7.1.3").strip())
#print(chat.corpus.get_text("GDPR", "8").strip())

'A.3 Duties and responsibilities of Authorised Dealers\n    (A) Introduction\n        (i) Authorised Dealers should note that when approving requests in terms of the Authorised Dealer Manual, they are in terms of the Regulations, not allowed to grant permission to clients and must refrain from using wording that approval/permission is granted in correspondence with their clients. Instead reference should be made to the specific section of the Authorised Dealer Manual in terms of which the client is permitted to transact.'

In [6]:
wf, dfn, section = chat.similarity_search("Who can trade gold?")

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
DEV:cemad_rag.corpus_chat:#################   Similarity Search       #################
DEV:cemad_rag.corpus_chat:No relevant workflow found
DEV:cemad_rag.cemad_corpus_index:--   No relevant definitions found
DEV:cemad_rag.cemad_corpus_index:Selecting the top 15 items based on cosine-similarity score
DEV:cemad_rag.cemad_corpus_index:0.3476:                CEMAD:                C.(C):        question: What approvals are required to acquire gold for trade purposes?
DEV:cemad_rag.cemad_corpus_index:0.3709:                CEMAD:                C.(B):        question: Who should I contact if I want to export gold?
DEV:cemad_rag.cemad_corpus_index:--   Relevant sections found
DEV:regulations_rag.rerank:Re-ranking using LLM
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
DEV:regulations_rag.rerank:--   results requested by LLM filter
DEV:regulations_rag.rerank:C.(C)


In [5]:
print(chat._create_system_message(3))

You are answering questions about the General Data Protection Regulation (GDPR) for a Controller based only on the reference extracts provided. You have 3 options:
1) Answer the question. Preface an answer with the tag 'ANSWER:'. All referenced extracts must be quoted at the end of the answer, not in the body, by number, in a comma separated list starting after the keyword 'Reference: '. Do not include the word Extract, only provide the number(s).
2) Request additional documentation. If, in the body of the extract(s) provided, there is a reference to another section that is directly relevant and not already provided, respond with the word 'SECTION:' followed by 'Extract extract_number, Reference section_reference' - for example SECTION: Extract 1, Reference Article 98.
3) State 'NONE:' and nothing else in all other cases



In [None]:
# BCR
user_content = "What are binding corporate rules?"
user_content = "How do binding corporate rules become approved?"
user_content = "What requirements must binding corporate rules meet?"
user_content = "What details must be spelled out in binding corporate rules?"
user_content = "What rights do individuals have under binding corporate rules?"
user_content = "Who is liable if binding corporate rules are breached?"
user_content = "How are individuals informed about their rights under binding corporate rules?"
user_content = "What is the role of a data protection officer within a group adopting binding corporate rules?"
user_content = "What mechanisms ensure compliance with binding corporate rules?"
user_content = "How is the effectiveness of binding corporate rules verified?"
user_content = "What happens if local laws conflict with the standards of binding corporate rules?"
user_content = "What kind of training is required for employees with access to personal data under binding corporate rules?"
user_content = "What is the process for reporting changes in binding corporate rules?"

# Decision Making
user_content = "What rights do individuals have regarding automated decision-making and profiling?"
user_content = "What does it mean for a decision to have a legal effect on a person?"
user_content = "Under what conditions can an individual be subject to automated decision-making?"
user_content = "What safeguards must be in place when automated decisions are necessary for contracts?"
user_content = "Can special categories of personal data be used in automated decision-making?"

#DPIA
user_content = "When must you conduct a data protection impact assessment?"
user_content = "Who should you consult when carrying out a data protection impact assessment?"
user_content = " What triggers the requirement for a data protection impact assessment?"
user_content = "How do supervisory authorities contribute to the DPIA process?"
user_content = "How do supervisory authorities contribute to the data protection impact assessment process?"
user_content ="What content is required in a data protection impact assessment?"
user_content ="What content is required in a DPIA?"
user_content = "Should individuals' views be considered in the assessment process?"
user_content = "How often should you review the data protection impact assessment?"

#DPO
user_content = "When must you designate a data protection officer?"
user_content = "Can multiple organisations share a DPO?"
user_content = "What criteria must a data protection officer meet?"
user_content = "Can a data protection officer be a contractor?"
user_content = "How should the contact details of the DPO be shared?"
user_content = "What types of processing activities require you to appoint a data protection officer?"
user_content = "How does the scale of data processing affect the requirement to appoint a data protection officer?"
user_content = "What is large scale?"
user_content = "What is the difference between a data protection officer and a controller?"

# intl_transfer
user_content = "What are the conditions for transferring personal data to another country or international organisation?"
user_content = "When is an individual's consent required for data transfer?"
user_content = "How does a contract affect the transfer of personal data?"
user_content = "What constitutes an important reason of public interest for transferring personal data?"
user_content = "When can data be transferred for the establishment, exercise, or defence of legal claims?"
user_content = "Under what circumstances can personal data be transferred to protect someone's vital interests?"
user_content = "What are the rules for transferring data from public registers?"
user_content = "What are the requirements for a non-repetitive transfer based on compelling legitimate interests?"
user_content = "When are public authorities exempt from certain transfer conditions?"
user_content = "How does the law recognise public interest for data transfers?"
user_content = "What documentation is required for assessing data transfers?"

# lead_sa

# data breach
user_content = "What action should you take if a personal data breach occurs?"
user_content = "How soon must you notify the appropriate authority when a data breach is discovered?"
user_content = "Is there any situation where you might not need to notify a supervisory authority about a data breach?"
user_content = "What information must you include when reporting a data breach?"
user_content = "What should be done if all details about a breach can't be reported immediately?"
user_content = "Why is it important to document all personal data breaches and the response to them?"

user_content = "When should you inform individuals about a personal data breach?"
user_content = "What information should be included when notifying an individual about a personal data breach?"
user_content = "Under what conditions can you avoid informing individuals about a personal data breach?"
user_content = "What happens if you don't voluntarily notify individuals about a personal data breach?"


# portability
user_content = "What is the right to data portability?"
user_content = "What does data portability mean for the transfer of data between controllers?"
user_content = "Is the right to data portability unlimited?"

# transparency
user_content = "What information must be provided to individuals when collecting their personal data?"
user_content = "What details about the data controller need to be disclosed during data collection?"
user_content = "What are the required disclosures about the use and processing of personal data?"
user_content = "When is it necessary to inform individuals about the transfer of their personal data to third countries or international organisations?"
user_content = "What rights must individuals be informed about concerning their personal data?"
user_content = "How does the requirement for providing information change if the purpose of data processing changes?"
user_content = "What information must be provided to individuals when their data is collected indirectly?"
user_content = "Within what timeframe must information be provided to the data subject when their data is collected indirectly?"
# user_content = "What actions must be taken if the purpose of data processing changes after initial collection?"
# user_content = "Are there any circumstances under which the obligation to provide information to data subjects does not apply?"
# user_content = "How are data subjects informed about their rights concerning their personal data?"


# Codes
user_content = "Who is encouraged to create codes of conduct for GDPR compliance?"
user_content = "What purposes do codes of conduct serve?"
user_content = "Can codes of conduct address the protection of children's data?"
user_content = "How can codes of conduct assist with personal data breaches?"
user_content = "Can organisations outside the EU adhere to codes of conduct?"
user_content = "What requirements must a code of conduct meet for approval?"
# user_content = "Who approves codes of conduct for local purposes?"
# user_content = "How are codes of conduct evaluated for processing activities across multiple Member States?"
# user_content = "What role does the Commission play in the validity of codes of conduct?"
# user_content = "How is the public informed about approved codes of conduct?"
# user_content = "Where can one find a register of approved codes of conduct?"
# user_content = "Who can monitor compliance with a code of conduct?"
# user_content = "What qualifications must a body meet to monitor compliance with a code of conduct?"
user_content = "How does a body become accredited to monitor a code of conduct?"
# user_content = "What responsibilities does an accredited body have when a code of conduct is breached?"
# user_content = "What actions can an accredited monitoring body take if a code of conduct is infringed?"
# user_content = "Under what circumstances can an accreditation be revoked?"

#user_content = "Can you give me a worfklow for the approval of a code of conduct?"

# territorial scope
user_content = "When does GDPR apply to non-EU entities?"
user_content = "I am not based in the EU. Does GDPR apply to me?"

# video
user_content = "Is video surveillance regulated under gdpr?"

# Covid Health
user_content = "What specific purposes allow healthcare data to be processed without explicit consent?"
user_content = "What are the permissible legal bases for processing health data for scientific research?"

# covid location
user_content = "What types of data should not be collected by contact tracing applications?"

# Concent
user_content = "Can children consent to data collection?"

# Forgotten
user_content = "What is the right to erasure and when does it apply?"


In [1]:
from gdpr_rag.documents.dpo import DPO
from gdpr_rag.documents.consent import Consent
dpo = DPO()
consent = Consent()

In [23]:
file = "./inputs/index/video.parquet"
df = pd.read_parquet(file)
df

Unnamed: 0,section_reference,text,source,embedding,document
0,2.1,gAAAAABmUH9YDpyn75A6P5UBv2tn3enDzLAPpQYXhXxsuk...,summary,"[-0.05341560021042824, 0.0171151515096426, -0....",Video
1,2.2,gAAAAABmUH9YEm2qVPRmHM-J5GQxVp8K7JCYlxzpbBAt6h...,summary,"[-0.05042532831430435, -0.034690551459789276, ...",Video
2,2.3,gAAAAABmUH9Yt_9fPYROaTi2ZIdE-IGpQpFtdHQ1tI6IpM...,summary,"[-0.05431677773594856, 0.031087687239050865, -...",Video
3,3,gAAAAABmUH9YI-aMX31OE0KoFJtf5__giOrAD1XTEiVt1q...,summary,"[0.011479574255645275, -0.04576420783996582, -...",Video
4,3.1.1,gAAAAABmUH9YMHnezbumcNuKAdDdVAelDbD5cu0VuRpGAz...,summary,"[0.024606412276625633, -0.012803575955331326, ...",Video
...,...,...,...,...,...
123,9.3.2,gAAAAABmUH9YxhYBbjnCjhfyzJSnOqOND6Egd3ErKrbHPX...,question,"[-0.004779711831361055, 0.0069802324287593365,...",Video
124,9.3.2,gAAAAABmUH9YaTg645WLz3LZdFQ_zBII1t0IXvpW96hTbw...,question,"[-0.045810651034116745, 0.048128221184015274, ...",Video
125,9.3.2,gAAAAABmUH9YSxCyP9WC7aFGTLMJSCrcL84pCwxTkSPlhz...,question,"[-0.02516673319041729, -0.024208365008234978, ...",Video
126,10,gAAAAABmUH9YJjSfd9EKsOg7w3xxvlvNGZ5J0T1SWUEaug...,question,"[-0.039777971804142, -0.014415937475860119, -0...",Video
