In [None]:
# !pip install -qU langchain-google-genai langchain-google-genai langchain langchain-community
# !pip install -qU pypdf faiss-cpu gradio
# !pip install -qU langchain-google-genai dotenv


^C


# **DEPENDENCIES AND HELPER FUNCTION**

In [3]:
# from google.colab import userdata
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain.chains import LLMChain, RetrievalQA
from langchain.chains.question_answering import load_qa_chain
from langchain.document_loaders import PyPDFLoader
from langchain.memory import ConversationBufferMemory, VectorStoreRetrieverMemory
from langchain.text_splitter import RecursiveCharacterTextSplitter
from IPython.display import Markdown, display
from langchain.vectorstores import FAISS
from pprint import pprint
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate

from dotenv import load_dotenv

import os
import faiss
import pandas as pd
import gradio as gr

# Run this if you're running on collab
# GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')

load_dotenv()
GOOGLE_API_KEY  = os.getenv('GOOGLE_API_KEY')

def printmd(string, color=None):
    colorstr = "<span style='color:{}'>{}</span>".format(color, string)

    display(Markdown(colorstr))

  from .autonotebook import tqdm as notebook_tqdm


# **DOCUMENT PROCESSING**

In [12]:
# @title Initiate loader
document_path = "data/PROPOSAL JUNGE LEITER.pdf"
loader = PyPDFLoader(document_path)

In [13]:
# @title Document Splitter

# load document
document = loader.load()

# split instance
doc_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
)

# Split the documnet
docs = doc_splitter.split_documents(document)

# Peak store chunks in dataframe for better visual
chunks = {'Chunk_Number': [], 'Page_Content': []} # Initialize lists for columns
for i, doc in enumerate(docs):
  chunks['Chunk_Number'].append(f'Chunk Number: {i+1}') # Append values to lists
  chunks['Page_Content'].append(doc.page_content)

docs_df = pd.DataFrame(chunks)
docs_df.head(5)

Unnamed: 0,Chunk_Number,Page_Content
0,Chunk Number: 1,PROPOSAL PEMBENTUKAN MENTOR AKADEMIK \n ...
1,Chunk Number: 2,DAFTAR ISI \nBAB I PENDAHULUAN 3 I. Lat...
2,Chunk Number: 3,BAB I PENDAHULUAN I. Latar Belakang Seg...
3,Chunk Number: 4,pembelajaran\n \nyang\n \ndapat\n \ndiikuti\n ...
4,Chunk Number: 5,"dapat\n \nmemahami\n \ndengan\n \nbaik,\n \nte..."


In [14]:
# @title Document Embeddings

# initiate the embedding model
embeddings = GoogleGenerativeAIEmbeddings(
    model="models/text-embedding-004",
    google_api_key=GOOGLE_API_KEY
)

# Store Vector in db
db_vector = FAISS.from_documents(docs, embeddings)

# Apply vectors into dataframe for better visual
docs_df['text_embedding'] = docs_df['Page_Content'].apply(lambda x: embeddings.embed_query(x))
docs_df.head(5)

Unnamed: 0,Chunk_Number,Page_Content,text_embedding
0,Chunk Number: 1,PROPOSAL PEMBENTUKAN MENTOR AKADEMIK \n ...,"[-0.03115140274167061, -0.02489153854548931, -..."
1,Chunk Number: 2,DAFTAR ISI \nBAB I PENDAHULUAN 3 I. Lat...,"[-0.010875840671360493, 0.05974109470844269, -..."
2,Chunk Number: 3,BAB I PENDAHULUAN I. Latar Belakang Seg...,"[0.0026714575942605734, 0.03974543511867523, -..."
3,Chunk Number: 4,pembelajaran\n \nyang\n \ndapat\n \ndiikuti\n ...,"[0.008158914744853973, 0.014861694537103176, -..."
4,Chunk Number: 5,"dapat\n \nmemahami\n \ndengan\n \nbaik,\n \nte...","[0.0003005028993356973, -0.0002814330509863794..."


# **APPLY RAG CHAT**

In [15]:
# @title **Define the base model**


base_model = "gemini-2.0-flash"
llm_model = ChatGoogleGenerativeAI(
    model=base_model,
    api_key=GOOGLE_API_KEY,
    temperature=0.5,
)

In [16]:
# @title **Convertation Document Chain**

# Set the vector retriever
retriever = db_vector.as_retriever()

# Set the memory for storing vector history
memory = VectorStoreRetrieverMemory(retriever=retriever)

# Prompt Template
prompt_template =  PromptTemplate(
    input_variables=['context','question'],
    template="""
    You are a helpful AI assistant. The answer must be like Customer service assistant.
    Be clear and complete to the answer. You may modify the context to answer the question.

    Be warm and polite, you may ask their name if they didn't give you in the first place to show that you are warm asistant.

    Use the following context to answer the question at the end.
    Context: {context}

    Question: {question}
    Answer:
    """
)

qa_chain = load_qa_chain(
    llm=llm_model,
    prompt=prompt_template,
    chain_type="stuff"
)

# Declare chat object
qa_retriever = RetrievalQA(
    retriever=retriever,
    combine_documents_chain=qa_chain,
    memory=memory
)

  memory = VectorStoreRetrieverMemory(retriever=retriever)
stuff: https://python.langchain.com/docs/versions/migrating_chains/stuff_docs_chain
map_reduce: https://python.langchain.com/docs/versions/migrating_chains/map_reduce_chain
refine: https://python.langchain.com/docs/versions/migrating_chains/refine_chain
map_rerank: https://python.langchain.com/docs/versions/migrating_chains/map_rerank_docs_chain

See also guides on retrieval and question-answering here: https://python.langchain.com/docs/how_to/#qa-with-rag
  qa_chain = load_qa_chain(
  qa_retriever = RetrievalQA(


In [17]:
# @title **Helper Function for Chat Inference**
def get_answer(question, chat_history=[]):
  result = qa_retriever.invoke(question)
  return result['result']

def printmd_answer(question):
  printmd(get_answer(question))

def reset_memory():
    # Clear FAISS index and document store
    db_vector.index.reset() # Clears the FAISS index
    # db_vector.docstore._dict.clear()

# reset_memory()
printmd_answer("Saya mau mengetahui lebih lanjut mengenai tujuan Junge Leiter")

<span style='color:None'>Halo! Dengan senang hati saya akan menjelaskan lebih lanjut mengenai tujuan Junge Leiter.

Tujuan utama dibentuknya Junge Leiter adalah untuk membantu mahasiswa dalam menyelesaikan persyaratan SKPI (Surat Keterangan Pendamping Ijazah) dan pengayaan. Junge Leiter, yang berarti "Pemimpin Muda" dalam bahasa Jerman, berperan sebagai mentor yang akan membimbing mahasiswa lain dalam menyelesaikan tugas-tugas dan kegiatan yang diwajibkan.

Secara lebih rinci, berikut adalah beberapa tujuan utama Junge Leiter:

1.  **Membantu mahasiswa:** Memberikan bantuan kepada mahasiswa dalam memahami dan menyelesaikan dokumen SKPI dan pengayaan.
2.  **Peran Mentor:** Memanfaatkan mahasiswa yang telah berpengalaman dalam menyelesaikan SKPI dan pengayaan sebagai mentor bagi mahasiswa lain.
3.  **Membangun Pemimpin Muda:** Mencetak pemimpin muda dari para anggota (member) melalui peran-peran mentor yang diemban.

Apakah ada hal lain yang ingin Anda ketahui tentang Junge Leiter?</span>

In [18]:
# @title **Chat History**

pprint(db_vector.docstore._dict)

{'05907a5f-78a1-4bc6-af41-9d1c5cc0dde3': Document(id='05907a5f-78a1-4bc6-af41-9d1c5cc0dde3', metadata={'producer': 'Skia/PDF m136 Google Docs Renderer', 'creator': 'PyPDF', 'creationdate': '', 'title': 'PROPOSAL JUNGE LEITER', 'source': 'data/PROPOSAL JUNGE LEITER.pdf', 'total_pages': 8, 'page': 1, 'page_label': '2'}, page_content='DAFTAR  ISI   \nBAB  I  PENDAHULUAN  3 I.  Latar  Belakang  3 II.  Rumusan  Masalah  4 III.  Maksud  dan  Tujuan  4 IV.  Manfaat  4 V.  Sasaran  Kepengurusan  dan  Keanggotaan  5 BAB  II  RINCIAN  PERAN  PENGURUS  6 I.  Peran  Kepengurusan  6 BAB  III  PENUTUPAN  8 I.  Harapan  8'),
 '2e55cda5-127b-4e27-ae39-f679f008bac2': Document(id='2e55cda5-127b-4e27-ae39-f679f008bac2', metadata={'producer': 'Skia/PDF m136 Google Docs Renderer', 'creator': 'PyPDF', 'creationdate': '', 'title': 'PROPOSAL JUNGE LEITER', 'source': 'data/PROPOSAL JUNGE LEITER.pdf', 'total_pages': 8, 'page': 5, 'page_label': '6'}, page_content='dengan\n \ninternal\n \nJunge\n \nLeiter\n \ndan

# **CHATBOT**

In [11]:
print("QA Retriever:", qa_retriever)
print("Vector Index:", db_vector.index)

QA Retriever: memory=VectorStoreRetrieverMemory(retriever=VectorStoreRetriever(tags=['FAISS', 'GoogleGenerativeAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7d8ace3e1b50>, search_kwargs={}), exclude_input_keys=()) verbose=False combine_documents_chain=StuffDocumentsChain(verbose=False, llm_chain=LLMChain(verbose=False, prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="\n    You are a helpful AI assistant. The answer must be like Customer service assistant.\n    Be clear and complete to the answer. You may modify the context to answer the question.\n\n    Be warm and polite, you may ask their name if they didn't give you in the first place to show that you are warm asistant.\n\n    Use the following context to answer the question at the end.\n    Context: {context}\n\n    Question: {question}\n    Answer:\n    "), llm=ChatGoogleGenerativeAI(model='models/gemini-2.0-flash', google_api_key

In [None]:
import time

chat_history = []
def respond(message):
    global chat_history

    # Run the chatbot pipeline
    response = qa_retriever.invoke(message)


    # Update history for the UI
    chat_history.extend([
        {'role': 'user', 'content': message},
        {'role': 'assistant', 'content': response},
    ])
    time.sleep(2)
    return chat_history, ""

def reset_memory():
    global chat_history
    chat_history = []

    # Reset memory in FAISS
    db_vector.index.reset()
    # db_vector.docstore._dict.clear()

    return [], "Memory has been reset!"

with gr.Blocks() as demo:
    gr.Markdown("##RAG Chatbot with Memory (FAISS + Gradio)")

    chatbot = gr.Chatbot(type="messages")
    msg = gr.Textbox(placeholder="Type your question here...", label="Your Message")
    clear_btn = gr.Button("Reset Memory")
    status = gr.Label()

    msg.submit(respond, inputs=msg, outputs=[chatbot, msg])
    clear_btn.click(reset_memory, outputs=[chatbot, status])

demo.launch()

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Note: opening Chrome Inspector may crash demo inside Colab notebooks.

To create a public link, set `share=True` in `launch()`.


<IPython.core.display.Javascript object>

