<a href="https://colab.research.google.com/github/suryaddede/next-js-rag/blob/master/prototype/rag.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Install Package

In [None]:
!pip install -q langchain langchain-community langchain-google-genai langchain-chroma

# Set Environment Variable

In [None]:
import os
from google.colab import userdata

os.environ['USER_AGENT'] = userdata.get('USER_AGENT')
os.environ['GOOGLE_API_KEY'] = userdata.get('GOOGLE_API_KEY')

# Import Library

In [None]:
from langchain import PromptTemplate
from langchain import hub
from langchain.docstore.document import Document
from langchain.document_loaders import WebBaseLoader
from langchain.schema import StrOutputParser
from langchain.schema.prompt_template import format_document
from langchain.schema.runnable import RunnablePassthrough
from langchain_chroma import Chroma

# Load Website Information

In [None]:
loader = WebBaseLoader("https://sisfo.upnjatim.ac.id/struktur-organisasi/")
docs = loader.load()
print(docs)

[Document(metadata={'source': 'https://sisfo.upnjatim.ac.id/struktur-organisasi/', 'title': 'STRUKTUR ORGANISASI | Sistem Informasi', 'language': 'en-US'}, page_content='\n\n\n\n\n\n\nSTRUKTUR ORGANISASI | Sistem Informasi\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nHOME\nPROFIL\n\nVISI KEILMUAN\nSTRUKTUR ORGANISASI\nPROFIL LULUSAN\nDOSEN\nTENDIK\n\n\nTRI DHARMA\n\nKURIKULUM\nPENELITIAN\n\nSOLUSI SI\nMANAJEMEN SI\n\n\nPENGABDIAN MASYARAKAT\n\n\nINFORMASI\n\nBERITA PRODI\nMBKM PRODI\nLAYANAN AKADEMIK MAHASISWA\n\nPRAKTIK KERJA LAPANGAN\nSEMINAR PROPOSAL\nBIMBINGAN SKRIPSI\nSEMINAR HASIL SKRIPSI\n\n\nPENGUMUMAN\nAGENDA\nLABORATORIUM\n\nLab MSI\nLab SOLUSI\n\n\nJURNAL SIBC\nLOWONGAN\n\n\nPORTAL\n\nOBESESI\nKREASI\nKOORDINATOR SITASI\nSITASI MAHASISWA\nSITASI DOSEN\nSIMPEL\n\n\nSTUDENT PROJECTS\nALUMNI\n \n\n\n\n\n\nSelect Page\n\n\n  \n \n\n\n\n \n\n\n\n \n\n\n\n\n\n\n\n\n\nSTRUKTUR ORGANISASI\n\n \n\n\n \n\n\n\n\nKoordinator Program Studi\nAgung Brast

# Extract Website Data

In [None]:
text_content = docs[0].page_content
docs = [Document(page_content=text_content, metadata=docs[0].metadata)]
print(docs)

[Document(metadata={'source': 'https://sisfo.upnjatim.ac.id/struktur-organisasi/', 'title': 'STRUKTUR ORGANISASI | Sistem Informasi', 'language': 'en-US'}, page_content='\n\n\n\n\n\n\nSTRUKTUR ORGANISASI | Sistem Informasi\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nHOME\nPROFIL\n\nVISI KEILMUAN\nSTRUKTUR ORGANISASI\nPROFIL LULUSAN\nDOSEN\nTENDIK\n\n\nTRI DHARMA\n\nKURIKULUM\nPENELITIAN\n\nSOLUSI SI\nMANAJEMEN SI\n\n\nPENGABDIAN MASYARAKAT\n\n\nINFORMASI\n\nBERITA PRODI\nMBKM PRODI\nLAYANAN AKADEMIK MAHASISWA\n\nPRAKTIK KERJA LAPANGAN\nSEMINAR PROPOSAL\nBIMBINGAN SKRIPSI\nSEMINAR HASIL SKRIPSI\n\n\nPENGUMUMAN\nAGENDA\nLABORATORIUM\n\nLab MSI\nLab SOLUSI\n\n\nJURNAL SIBC\nLOWONGAN\n\n\nPORTAL\n\nOBESESI\nKREASI\nKOORDINATOR SITASI\nSITASI MAHASISWA\nSITASI DOSEN\nSIMPEL\n\n\nSTUDENT PROJECTS\nALUMNI\n \n\n\n\n\n\nSelect Page\n\n\n  \n \n\n\n\n \n\n\n\n \n\n\n\n\n\n\n\n\n\nSTRUKTUR ORGANISASI\n\n \n\n\n \n\n\n\n\nKoordinator Program Studi\nAgung Brast

# Initialize Embedding Model

In [None]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

gemini_embeddings = GoogleGenerativeAIEmbeddings(model='models/text-embedding-004')

# Store the Data with ChromaDB

In [None]:
vectorstore = Chroma.from_documents(
                       documents=docs,
                       embedding=gemini_embeddings,
                       persist_directory='./drive/MyDrive/Kuliah/Semester 8 (6 SKS)/Skripsi/Proof Of Concept/ChromaDB'
                     )

# Create a Retriever

In [None]:
vectorstore_disk = Chroma(
                     persist_directory='./drive/MyDrive/Kuliah/Semester 8 (6 SKS)/Skripsi/Proof Of Concept/ChromaDB',
                     embedding_function=gemini_embeddings
                   )
retriever = vectorstore_disk.as_retriever(search_kwargs={'k': 1})

# Initialize Gemini

In [None]:
from langchain_google_genai import GoogleGenerativeAI

llm = GoogleGenerativeAI(model='gemini-exp-1206')

# Create Prompt Template

In [None]:
llm_prompt_template = """You are an assistant for question-answering tasks at Universitas Pembangunan Nasional "Veteran" Jawa Timur.
Use the following context to answer the question.
If you don't know the answer, just say that you don't know concisely.
Use five sentences maximum and keep the answer concise.\n
Question: {question} \nContext: {context} \nAnswer:"""

llm_prompt = PromptTemplate.from_template(llm_prompt_template)

# Pass the Context to the Model along with the Question

In [None]:
def format_docs(docs):
  return '\n\n'.join(doc.page_content for doc in docs)

rag_chain = (
  {"context": retriever | format_docs, "question": RunnablePassthrough()}
  | llm_prompt
  | llm
  | StrOutputParser()
)

# Prompt the Model

In [None]:
rag_chain.invoke('Siapa koordinator prodi sistem informasi?')

'Koordinator Program Studi Sistem Informasi adalah Agung Brastama Putra, S.Kom, M.Kom. Beliau merupakan dosen di program studi tersebut. Dia bertanggung jawab atas koordinasi seluruh kegiatan di program studi. Informasi lebih lanjut dapat ditemukan di situs web resmi Sistem Informasi Fasilkom UPNVJT. Semoga informasi ini membantu.\n'