In [2]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_community.document_loaders import PyPDFLoader
import nltk
from langchain_text_splitters import NLTKTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.messages import SystemMessage
from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from IPython.display import Markdown as md
from dotenv import load_dotenv
import os

In [3]:
load_dotenv()  
key = os.getenv("GOOGLE_API_KEY")

In [4]:
chat_model = ChatGoogleGenerativeAI(google_api_key=key, 
                                   model="gemini-1.5-pro-latest")
loader = PyPDFLoader("../data/1neural_network.pdf")
pages = loader.load_and_split()
pages

[Document(metadata={'producer': '適用於 Microsoft 365 的 Microsoft® PowerPoint®', 'creator': '適用於 Microsoft 365 的 Microsoft® PowerPoint®', 'creationdate': '2024-02-19T15:15:35+08:00', 'title': '深度學習簡介', 'author': 'fhwang', 'moddate': '2024-02-19T15:15:35+08:00', 'source': '../data/1neural_network.pdf', 'total_pages': 20, 'page': 0, 'page_label': '1'}, page_content='類神經網路基礎\n王豐緒\n銘傳大學資工系'),
 Document(metadata={'producer': '適用於 Microsoft 365 的 Microsoft® PowerPoint®', 'creator': '適用於 Microsoft 365 的 Microsoft® PowerPoint®', 'creationdate': '2024-02-19T15:15:35+08:00', 'title': '深度學習簡介', 'author': 'fhwang', 'moddate': '2024-02-19T15:15:35+08:00', 'source': '../data/1neural_network.pdf', 'total_pages': 20, 'page': 1, 'page_label': '2'}, page_content='學習目標\n• 理解類神經元的基本結構與運作方式\n• 理解何謂Perceptron類神經網路\n• 理解類神經的學習方式\n• 理解類神經的訓練與測試過程\n• 理解矩陣運算與類神經的關聯\n2'),
 Document(metadata={'producer': '適用於 Microsoft 365 的 Microsoft® PowerPoint®', 'creator': '適用於 Microsoft 365 的 Microsoft® PowerPoint®', 'creationd

In [5]:
text_splitter = NLTKTextSplitter(chunk_size=500, chunk_overlap=100)

chunks = text_splitter.split_documents(pages)
print(len(chunks))
print(type(chunks[0]))

20
<class 'langchain_core.documents.base.Document'>


In [6]:
embedding_model = GoogleGenerativeAIEmbeddings(google_api_key=key, model="models/embedding-001")
db = Chroma.from_documents(chunks, embedding_model, persist_directory="../chroma_db_")
db.persist()
db_connection = Chroma(persist_directory="../chroma_db_", embedding_function=embedding_model)

  db.persist()
  db_connection = Chroma(persist_directory="../chroma_db_", embedding_function=embedding_model)


In [7]:
retriever = db_connection.as_retriever(search_kwargs={"k": 5})

print(type(retriever))

<class 'langchain_core.vectorstores.base.VectorStoreRetriever'>


In [8]:
chat_template = ChatPromptTemplate.from_messages([
    SystemMessage(content="""You are a teacher in Scaffolding Instruction education.
                  Given a context and question from user,
                  you should answer based on the given context."""),
    HumanMessagePromptTemplate.from_template("""Answer the question based on the given context.
    Context: {context}
    Question: {question}
    Answer: """)
])

output_parser = StrOutputParser()


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | chat_template
    | chat_model
    | output_parser
)

In [9]:
response = rag_chain.invoke("""Please summarize what is a neural network""")
md(response)

Based on the context provided, a neural network, inspired by biological neurons in the brain, is a computing system composed of interconnected processing units called neurons (or perceptrons in the context of a Perceptron neural network, an early type of multi-neuron network).  These artificial neurons receive weighted inputs, sum them, and apply an activation function to produce an output.  Learning occurs by adjusting the connection weights between neurons based on the difference between the network's output and the desired output (using a learning rate). This process, aimed at strengthening or weakening signal intensity, allows the network to improve its performance over time through training and testing phases.  Matrix operations are involved in the calculations within the network.

In [10]:
response = rag_chain.invoke("""詳細解釋權重""")

md(response)

提供的文本沒有詳細解釋權重（weights）本身的含義和作用，只在公式中展示了權重符號 *Wᵢ* 以及它如何與輸入 *Xᵢ*  一起使用。

要詳細解釋權重，需要補充以下信息：

* **權重代表輸入的重要性：**  每個輸入 *Xᵢ* 都有一個对应的權重 *Wᵢ*.  權重值越大，表示該輸入對神經元最終輸出的影響越大。反之，權重值越小，則表示該輸入對輸出的影響越小。

* **權重如何影響輸出：** 神經元將所有輸入乘以它們各自的權重，然後將這些乘積加總起來。  這個加總的結果，再加上一個偏置值 (bias)，將決定神經元的激活狀態。

* **權重是學習的關鍵：**  在訓練神經網絡的過程中，不斷調整權重值，以使網絡的輸出更接近預期的結果。  學習的過程本質上就是找到一組最佳的權重值。

* **權重與突觸的類比：**  可以將權重比作生物神經元之間突觸的強度。  突觸強度越大，信號傳遞就越有效。  權重在人工神經網絡中扮演着类似的角色。

* **權重初始化：**  在開始訓練之前，需要對權重進行初始化。  初始化方法有很多，例如随机初始化、使用特定分布初始化等。  初始化值的选择会影响训练的效果。


總而言之，權重是神經網絡的核心組成部分，它們決定了網絡如何處理輸入並產生輸出。  通過調整權重，神經網絡可以學習複雜的模式和關係。  提供的文本提到了權重，但缺乏對其更深入的解釋。