In [1]:
import os
os.chdir("../")

In [2]:
from dotenv import load_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI

load_dotenv()

llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash", 
    google_api_key=os.getenv("GOOGLE_API_KEY"),
    transport="rest",
    temperature=0.3
)

print("LLM Initialized")

  from .autonotebook import tqdm as notebook_tqdm

All support for the `google.generativeai` package has ended. It will no longer be receiving 
updates or bug fixes. Please switch to the `google.genai` package as soon as possible.
See README for more details:

https://github.com/google-gemini/deprecated-generative-ai-python/blob/main/README.md

  from google.generativeai.caching import CachedContent  # type: ignore[import]


LLM Initialized


In [3]:
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader

def load_pipeda(data_path):
    loader = DirectoryLoader(data_path, glob="*.pdf", loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents

docs = load_pipeda("data/")
print(f"Loaded {len(docs)} pages from PIPEDA Act.")

Loaded 66 pages from PIPEDA Act.


In [8]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

# 800 chars 
text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
text_chunks = text_splitter.split_documents(docs)

print(f" Created {len(text_chunks)} chunks.")
# first chunk
print(f"Sample Content: {text_chunks[0].page_content[:100]}...")

 Created 389 chunks.
Sample Content: Current to December 29, 2025
Last amended on March 4, 2025
À jour au 29 décembre 2025
Dernière modif...


In [5]:
import os
from dotenv import load_dotenv
from langchain_huggingface import HuggingFaceEmbeddings
from pinecone import Pinecone, ServerlessSpec
from langchain_pinecone import PineconeVectorStore

# Load environment variables from .env
load_dotenv()

# Load Local Embeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Setup Pinecone
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
index_name = "pipeda-bot-huggingface"

# Create index 
if index_name not in [idx.name for idx in pc.list_indexes()]:
    pc.create_index(
        name=index_name,
        dimension=384, 
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )

print(f" Pinecone Index '{index_name}' is Ready.")

 Pinecone Index 'pipeda-bot-huggingface' is Ready.


In [6]:
# Create vectorstore and upload
vectorstore = PineconeVectorStore.from_documents(
    documents=text_chunks,
    embedding=embeddings,
    index_name=index_name
)

print(" Successfully indexed PIPEDA into Pinecone.")

 Successfully indexed PIPEDA into Pinecone.


In [7]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

system_prompt = (
    "You are a professional Canadian Legal Assistant specializing in PIPEDA. "
    "Use the following retrieved context from the PIPEDA Act to answer the question. "
    "If you cannot answer based on the context, say you do not know. "
    "Always cite the specific Section or Schedule.\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("human", "{input}")
])

retriever = vectorstore.as_retriever()

rag_chain = (
    {
        "context": retriever,
        "input": RunnablePassthrough()
    }
    | prompt
    | llm
    | StrOutputParser()
)

query = "What are the rules for individual access to personal information?"

response = rag_chain.invoke(query)

print(" AI LEGAL ANALYSIS:\n")
print(response)



 AI LEGAL ANALYSIS:

Under PIPEDA, the rules for individual access to personal information are as follows:

Upon request, an individual must be informed of the existence, use, and disclosure of their personal information and shall be given access to that information. Additionally, an individual has the right to challenge the accuracy and completeness of their information and have it amended as appropriate.

However, there are exceptions to this access requirement. In certain situations, an organization may not be able to provide access to all the personal information it holds about an individual. These exceptions should be limited and specific, and the reasons for denying access must be provided to the individual upon request. One example of an exception mentioned is information that is prohibitively costly to provide.

**Citation:** 4.9 Principle 9 — Individual Access
