In [None]:
from dotenv import load_dotenv
load_dotenv()

from langchain.chat_models import ChatOpenAI
from langchain_core.messages import HumanMessage, SystemMessage

# DeepSeek API KEY
import os
api_key = os.getenv("DEEPSEEK_API_KEY")  # 你的 .env 要存這個 API KEY

# 初始化 DeepSeek
model = ChatOpenAI(
    openai_api_key=api_key,
    base_url="https://api.deepseek.com",  
    model="deepseek-chat",  
)

# 建立訊息
messages = [
    SystemMessage(content="將下列的英文翻譯成中文"),
    HumanMessage(content="hi!"),
]

# 呼叫
response = model.invoke(messages)
print(response.content)


你好！


In [11]:
from dotenv import load_dotenv
load_dotenv()

from langchain.chat_models import ChatOpenAI
# from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate

system_template = "將下列英文翻譯成 {language}"

prompt_template = ChatPromptTemplate.from_messages(
    [("system", system_template), ("user", "{text}")]
)

prompt = prompt_template.invoke({"language": "中文", "text": "hi"})
print(prompt)
response = model.invoke(prompt)
print(response.content)


messages=[SystemMessage(content='將下列英文翻譯成 中文', additional_kwargs={}, response_metadata={}), HumanMessage(content='hi', additional_kwargs={}, response_metadata={})]
嗨 / 你好


In [14]:
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp310-cp310-win_amd64.whl.metadata (4.5 kB)
Downloading faiss_cpu-1.10.0-cp310-cp310-win_amd64.whl (13.7 MB)
   ---------------------------------------- 0.0/13.7 MB ? eta -:--:--
   --------------- ------------------------ 5.2/13.7 MB 39.8 MB/s eta 0:00:01
   ---------------------------------------- 13.7/13.7 MB 61.3 MB/s eta 0:00:00
Installing collected packages: faiss-cpu
Successfully installed faiss-cpu-1.10.0


In [20]:
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_text_splitters import RecursiveCharacterTextSplitter
from dotenv import load_dotenv
import re
import os


api_key = os.getenv("DEEPSEEK_API_KEY")  # 你的 .env 要存這個 API KEY

# 清理文字資料（Data Cleaning）
def clean_text(text):
    text = re.sub(r"[^a-zA-Z\s]", "", text)  # 只留英文字母和空白
    text = re.sub(r"\s+", " ", text).strip()  # 把多餘的空白合成一個空白
    text = text.lower()                      # 全部轉小寫
    return text

# 載入文字檔案
documents = TextLoader("./dream.txt").load()
cleaned_documents = [clean_text(doc.page_content) for doc in documents] # 讀取 dream.txt 這個檔案內容。

# 切割文字成小段（分段）
# chunk_size=500：每段最多500個字     
# chunk_overlap=100：前後段落重疊100個字。
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100) 
texts = text_splitter.split_documents(documents)
texts = [clean_text(text.page_content) for text in texts]

# 把小段轉成向量 Embedding
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

# 建立檢索器 Retriever
retriever = Chroma.from_texts(texts, embeddings).as_retriever()

# 查詢 retriever
query = "請以精要點概述演講內容"
docs = retriever.invoke(query) # 最有可能回答你問題的文件段落集合
# print(docs)

# Chat with the model and our docs
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

prompt = ChatPromptTemplate.from_template(
    "Please use the following docs {docs},and answer the following question {query}",
)


# 初始化 DeepSeek
model = ChatOpenAI(
    openai_api_key=api_key,
    base_url="https://api.deepseek.com",  # DeepSeek官方API URL
    model="deepseek-chat",  # 也可以是 deepseek-coder
)
chain = prompt | model | StrOutputParser()

response = chain.invoke({"docs": docs, "query": query})
print(response)

這篇演講的核心內容可精要概述如下：

1. **夢想的主軸**  
   反覆強調「我有一個夢想」（I have a dream），表達對平等與自由的深切渴望，並以宗教意象（如「神的榮耀顯現」）和自然隱喻（山谷升高、山嶺降低）象徵社會不平等的消弭。

2. **美國夢的實踐**  
   呼籲國家實現立國精神，引用《獨立宣言》中「人皆生而平等」的信念，期待不同種族（如昔日奴隸與奴隸主的後代）能和解共融，體現「兄弟情誼」。

3. **當下與未來的挑戰**  
   承認當前困境，但仍以「希望」和「信念」為動力，強調夢想根植於美國夢，需持續奮鬥以達成社會轉型。

4. **重複修辭的感染力**  
   透過重複句式強化訴求，如三次「I have a dream today」及對「one day」的描繪，凸顯理想的迫切性與普世性。

全文結合宗教願景、國家理想與現實批判，傳遞對種族平等與社會正義的堅定呼籲。


In [21]:
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Load your document 
text_loader = TextLoader("./dream.txt")  
documents = text_loader.load()  

# Create text splitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=20,
    length_function=len,
)

# Split documents
splits = text_splitter.split_documents(documents)
for i, split in enumerate(splits):
    print(f"Split {i+1}:\n{split}\n")

Split 1:
page_content='And so even though we face the difficulties of today and tomorrow, I still have a dream. It is a' metadata={'source': './dream.txt'}

Split 2:
page_content='a dream. It is a dream deeply rooted in the American dream.' metadata={'source': './dream.txt'}

Split 3:
page_content='I have a dream that one day this nation will rise up and live out the true meaning of its creed:' metadata={'source': './dream.txt'}

Split 4:
page_content='We hold these truths to be self-evident, that all men are created equal.' metadata={'source': './dream.txt'}

Split 5:
page_content='I have a dream that one day on the red hills of Georgia, the sons of former slaves and the sons of' metadata={'source': './dream.txt'}

Split 6:
page_content='and the sons of former slave owners will be able to sit down together at the table of brotherhood.' metadata={'source': './dream.txt'}

Split 7:
page_content='I have a dream that one day even the state of Mississippi, a state sweltering with the heat 

LangChain DirectoryLoader

In [None]:
from langchain_community.document_loaders import TextLoader, PyPDFLoader, CSVLoader, DirectoryLoader

dir_loader = DirectoryLoader("./data/", glob="**/*.txt", loader_cls=TextLoader)
dir_documents = dir_loader.load()

print("Directory Text Documents:", dir_documents)

Directory Text Documents: [Document(metadata={'source': 'data\\dream.txt'}, page_content='And so even though we face the difficulties of today and tomorrow, I still have a dream. It is a dream deeply rooted in the American dream.\n \nI have a dream that one day this nation will rise up and live out the true meaning of its creed:\n \nWe hold these truths to be self-evident, that all men are created equal.\n \nI have a dream that one day on the red hills of Georgia, the sons of former slaves and the sons of former slave owners will be able to sit down together at the table of brotherhood.\n \nI have a dream that one day even the state of Mississippi, a state sweltering with the heat of injustice, sweltering with the heat of oppression, will be transformed into an oasis of freedom and justice.\n \nI have a dream that my four little children will one day live in a nation where they will not be judged by the color of their skin but by the content of their character.\n \nI have a dream today

In [24]:
!pip install unstructured

Collecting unstructured
  Downloading unstructured-0.17.2-py3-none-any.whl.metadata (24 kB)
Collecting chardet (from unstructured)
  Using cached chardet-5.2.0-py3-none-any.whl.metadata (3.4 kB)
Collecting filetype (from unstructured)
  Using cached filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting python-magic (from unstructured)
  Downloading python_magic-0.4.27-py2.py3-none-any.whl.metadata (5.8 kB)
Collecting lxml (from unstructured)
  Downloading lxml-5.3.2-cp310-cp310-win_amd64.whl.metadata (3.7 kB)
Collecting emoji (from unstructured)
  Downloading emoji-2.14.1-py3-none-any.whl.metadata (5.7 kB)
Collecting python-iso639 (from unstructured)
  Downloading python_iso639-2025.2.18-py3-none-any.whl.metadata (14 kB)
Collecting langdetect (from unstructured)
  Downloading langdetect-1.0.9.tar.gz (981 kB)
     ---------------------------------------- 0.0/981.5 kB ? eta -:--:--
     ------------------------------------- 981.5/981.5 kB 11.4 MB/s eta 0:00:00
  Preparing meta

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
gradio 5.15.0 requires aiofiles<24.0,>=22.0, but you have aiofiles 24.1.0 which is incompatible.


In [None]:
# LangChain DirectoryLoader

In [2]:
from langchain_community.document_loaders import (
    TextLoader,
    PyPDFLoader,
    CSVLoader,
    DirectoryLoader,
)

pdf_loader = PyPDFLoader("./data/DeepSeek-Coder When the Large Language Model Meets.pdf")

docs = pdf_loader.load()
print("PDF Documents:", docs)

PDF Documents: [Document(metadata={'source': './data/DeepSeek-Coder When the Large Language Model Meets.pdf', 'page': 0}, page_content='DeepSeek-Coder: When the Large Language Model Meets\nProgramming - The Rise of Code Intelligence\nDaya Guo*1, Qihao Zhu∗1,2, Dejian Yang1, Zhenda Xie1, Kai Dong1, Wentao Zhang1\nGuanting Chen1, Xiao Bi 1, Y. Wu1, Y.K. Li1, Fuli Luo1, Yingfei Xiong2, Wenfeng Liang1\n1DeepSeek-AI\n2Key Lab of HCST (PKU), MOE; SCS, Peking University\n{zhuqh, guodaya}@deepseek.com\nhttps://github.com/deepseek-ai/DeepSeek-Coder\nAbstract\nThe rapid development of large language models has revolutionized code intelligence in\nsoftware development. However, the predominance of closed-source models has restricted\nextensive research and development. To address this, we introduce the DeepSeek-Coder series,\na range of open-source code models with sizes from 1.3B to 33B, trained from scratch on 2\ntrillion tokens. These models are pre-trained on a high-quality project-level code

In [1]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI


# Define a prompt template
prompt = ChatPromptTemplate.from_template("我今天心情{mood}")

# Create a chat model：初始化 DeepSeek
model = ChatOpenAI(
    openai_api_key=api_key,
    base_url="https://api.deepseek.com",  # DeepSeek官方API URL
    model="deepseek-chat",  # 也可以是 deepseek-coder
)

# Chain the prompt, model, and output parser
chain = prompt | model | StrOutputParser()

# Run the chain
response = chain.invoke({"mood": "真的非常糟"})
print(response)

NameError: name 'api_key' is not defined

In [None]:
# 
