In [2]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_community.document_loaders import PyPDFLoader
import nltk
from langchain_text_splitters import NLTKTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.messages import SystemMessage
from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from IPython.display import Markdown as md
from dotenv import load_dotenv
import os

In [3]:
load_dotenv()  
key = os.getenv("GOOGLE_API_KEY")

In [4]:
chat_model = ChatGoogleGenerativeAI(google_api_key=key, 
                                   model="gemini-1.5-pro-latest")
loader = PyPDFLoader("../data/1neural_network.pdf")
pages = loader.load_and_split()
pages

[Document(metadata={'producer': 'ÈÅ©Áî®Êñº Microsoft 365 ÁöÑ Microsoft¬Æ PowerPoint¬Æ', 'creator': 'ÈÅ©Áî®Êñº Microsoft 365 ÁöÑ Microsoft¬Æ PowerPoint¬Æ', 'creationdate': '2024-02-19T15:15:35+08:00', 'title': 'Ê∑±Â∫¶Â≠∏ÁøíÁ∞°‰ªã', 'author': 'fhwang', 'moddate': '2024-02-19T15:15:35+08:00', 'source': '../data/1neural_network.pdf', 'total_pages': 20, 'page': 0, 'page_label': '1'}, page_content='È°ûÁ•ûÁ∂ìÁ∂≤Ë∑ØÂü∫Á§é\nÁéãË±êÁ∑í\nÈäòÂÇ≥Â§ßÂ≠∏Ë≥áÂ∑•Á≥ª'),
 Document(metadata={'producer': 'ÈÅ©Áî®Êñº Microsoft 365 ÁöÑ Microsoft¬Æ PowerPoint¬Æ', 'creator': 'ÈÅ©Áî®Êñº Microsoft 365 ÁöÑ Microsoft¬Æ PowerPoint¬Æ', 'creationdate': '2024-02-19T15:15:35+08:00', 'title': 'Ê∑±Â∫¶Â≠∏ÁøíÁ∞°‰ªã', 'author': 'fhwang', 'moddate': '2024-02-19T15:15:35+08:00', 'source': '../data/1neural_network.pdf', 'total_pages': 20, 'page': 1, 'page_label': '2'}, page_content='Â≠∏ÁøíÁõÆÊ®ô\n‚Ä¢ ÁêÜËß£È°ûÁ•ûÁ∂ìÂÖÉÁöÑÂü∫Êú¨ÁµêÊßãËàáÈÅã‰ΩúÊñπÂºè\n‚Ä¢ ÁêÜËß£‰ΩïË¨ÇPerceptronÈ°ûÁ•ûÁ∂ìÁ∂≤Ë∑Ø\n‚Ä¢ ÁêÜËß£È°ûÁ•ûÁ∂ìÁöÑÂ≠∏ÁøíÊñπÂºè\n‚Ä

In [5]:
text_splitter = NLTKTextSplitter(chunk_size=500, chunk_overlap=100)

chunks = text_splitter.split_documents(pages)
print(len(chunks))
print(type(chunks[0]))

20
<class 'langchain_core.documents.base.Document'>


In [6]:
embedding_model = GoogleGenerativeAIEmbeddings(google_api_key=key, model="models/embedding-001")
db = Chroma.from_documents(chunks, embedding_model, persist_directory="../chroma_db_")
db.persist()
db_connection = Chroma(persist_directory="../chroma_db_", embedding_function=embedding_model)

  db.persist()
  db_connection = Chroma(persist_directory="../chroma_db_", embedding_function=embedding_model)


In [7]:
retriever = db_connection.as_retriever(search_kwargs={"k": 5})

print(type(retriever))

<class 'langchain_core.vectorstores.base.VectorStoreRetriever'>


In [8]:
chat_template = ChatPromptTemplate.from_messages([
    SystemMessage(content="""You are a teacher in Scaffolding Instruction education.
                  Given a context and question from user,
                  you should answer based on the given context."""),
    HumanMessagePromptTemplate.from_template("""Answer the question based on the given context.
    Context: {context}
    Question: {question}
    Answer: """)
])

output_parser = StrOutputParser()


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | chat_template
    | chat_model
    | output_parser
)

In [9]:
response = rag_chain.invoke("""Please summarize what is a neural network""")
md(response)

Based on the context provided, a neural network, inspired by biological neurons in the brain, is a computing system composed of interconnected processing units called neurons (or perceptrons in the context of a Perceptron neural network, an early type of multi-neuron network).  These artificial neurons receive weighted inputs, sum them, and apply an activation function to produce an output.  Learning occurs by adjusting the connection weights between neurons based on the difference between the network's output and the desired output (using a learning rate). This process, aimed at strengthening or weakening signal intensity, allows the network to improve its performance over time through training and testing phases.  Matrix operations are involved in the calculations within the network.