In [8]:
from langchain_community.document_loaders import TextLoader

file_path = "./a.txt"

textloader = TextLoader(
    file_path=file_path,
    encoding="utf-8"
)

docs = textloader.load()

In [5]:
print(docs)

print(len(docs))

print(docs[0])

[Document(metadata={'source': './a.txt'}, page_content="what's your name？")]
1
page_content='what's your name？' metadata={'source': './a.txt'}


In [6]:
print(docs[0].metadata)

print(docs[0].page_content)

{'source': './a.txt'}
what's your name？


加载pdf文档

In [None]:
from langchain_community.document_loaders.pdf import PyPDFLoader
pdf_loader = PyPDFLoader(
    file_path = "./a.pdf"
)

docs = pdf_loader.load()

print(docs)

[Document(metadata={'producer': 'Microsoft® Word 2019', 'creator': 'Microsoft® Word 2019', 'creationdate': '2025-10-30T09:41:47+08:00', 'author': '琦 夏', 'moddate': '2025-10-30T09:41:47+08:00', 'source': './a.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}, page_content='what’s your name? \nmy name is Bob.')]


加载网络中的一个文件

In [6]:
from langchain_community.document_loaders.pdf import PyPDFLoader
pdf_loader = PyPDFLoader(
    file_path = "https://arxiv.org/pdf/2302.03803"
)

docs = pdf_loader.load()

print(len(docs))

for doc in docs:
    print(doc)

8
page_content='arXiv:2302.03803v1  [math.AG]  7 Feb 2023
A WEAK (k, k )-LEFSCHETZ THEOREM FOR PROJECTIVE
TORIC ORBIFOLDS
William D. Montoya
Instituto de Matem´ atica, Estat´ ıstica e Computa¸ c˜ ao Cient´ ıﬁca,
Universidade Estadual de Campinas (UNICAMP),
Rua S´ ergio Buarque de Holanda 651, 13083-859, Campinas, SP , Brazil
February 9, 2023
Abstract
Firstly we show a generalization of the (1,1)-Lefschetz theorem for projective
toric orbifolds and secondly we prove that on 2k-dimensional quasi-smooth hyper-
surfaces coming from quasi-smooth intersection surfaces, under the Cayley trick,
every rational (k, k)-cohomology class is algebraic, i.e., the Hodge conjectureholds
on them.
1 Introduction
In [3] we proved that, under suitable conditions, on a very general codimension s quasi-
smooth intersection subvariety X in a projective toric orbifold Pd
Σ with d +s = 2(k +1)
the Hodge conjecture holds, that is, every (p, p )-cohomology class, under the Poincar´ e
duality is a rational linear 

加载CSV文档

In [None]:
from langchain_community.document_loaders import CSVLoader

csv_loader = CSVLoader(
    file_path= "./a.csv",
)

csv_docs = csv_loader.load()

print(len(csv_docs))

for doc in csv_docs:
    print(doc)

3
page_content='﻿1: 3
2: 4' metadata={'source': './a.csv', 'row': 0}
page_content='﻿1: 5
2: 6' metadata={'source': './a.csv', 'row': 1}
page_content='﻿1: 7
2: 8' metadata={'source': './a.csv', 'row': 2}


加载JSON文档

加载json文件中所有的数据

In [10]:
from langchain_community.document_loaders import JSONLoader

json_loader = JSONLoader(
    file_path="./a.json",
    jq_schema=".",  #表示加载所有的字段
    text_content=False  #将加载的json对象转换为json字符串
)

docs = json_loader.load()

print(docs)

[Document(metadata={'source': '/home/qixia/langchain/chapter07/a.json', 'seq_num': 1}, page_content='{"messages": [{"sender": "Alice", "content": "Hello, how are you today?", "timestamp": "2023-05-15T10:00:00"}, {"sender": "Bob", "content": "I\'m doing well, thanks for asking!", "timestamp": "2023-05-15T10:02:00"}, {"sender": "Alice", "content": "Would you like to meet for lunch?", "timestamp": "2023-05-15T10:05:00"}, {"sender": "Bob", "content": "Sure, that sounds great!", "timestamp": "2023-05-15T10:07:00"}], "conversation_id": "conv_12345", "participants": ["Alice", "Bob"]}')]


加载json文件中messages[]中的所有的conten字段

In [13]:
from langchain_community.document_loaders import JSONLoader

json_loader = JSONLoader(
    file_path="./a.json",
    jq_schema=".messages[].content",  #表示加载messages[]的所有的content
    # text_content=False  #将加载的json对象转换为json字符串
)

docs = json_loader.load()

print(docs)

for doc in docs:
    print(doc.page_content)

[Document(metadata={'source': '/home/qixia/langchain/chapter07/a.json', 'seq_num': 1}, page_content='Hello, how are you today?'), Document(metadata={'source': '/home/qixia/langchain/chapter07/a.json', 'seq_num': 2}, page_content="I'm doing well, thanks for asking!"), Document(metadata={'source': '/home/qixia/langchain/chapter07/a.json', 'seq_num': 3}, page_content='Would you like to meet for lunch?'), Document(metadata={'source': '/home/qixia/langchain/chapter07/a.json', 'seq_num': 4}, page_content='Sure, that sounds great!')]
Hello, how are you today?
I'm doing well, thanks for asking!
Would you like to meet for lunch?
Sure, that sounds great!


In [None]:
from langchain_community.document_loaders import JSONLoader

#方式1
json_loader = JSONLoader(
    file_path="./a.json",
    jq_schema=".data.items[].content"
)

#方式2
json_loader = JSONLoader(
    file_path="./a.json",
    jq_schema=".data.items[]",
    content_key=".content",
    is_content_key_jq_parsable=True #用jq解析content key
)

docs = json_loader.load()

for doc in docs:
    print(doc.page_content)

In [None]:
from langchain_community.document_loaders import JSONLoader

loader = JSONLoader(
    file_path="./a.json",
    jq_schema=".data.items[]",
    content_key='.title + "\\n\n" + .content',
    is_content_key_jq_parsable=True
)

data = loader.load()

for doc in data:
    print(doc.page_content)