# DuckDB Loader

Load a DuckDB query with one document per row.

In [1]:
from langchain.document_loaders.duckdb_loader import DuckDBLoader

In [3]:
%%file example.csv
Team,Payroll
Nationals,81.34
Reds,82.20

Writing example.csv


In [6]:
loader = DuckDBLoader("SELECT * FROM read_csv_auto('example.csv')")

data = loader.load()

In [7]:
print(data)

[Document(page_content='Team: Nationals\nPayroll: 81.34', lookup_str='', metadata={}, lookup_index=0), Document(page_content='Team: Reds\nPayroll: 82.2', lookup_str='', metadata={}, lookup_index=0)]


## Specifying Which Columns are Content vs Metadata

In [8]:
loader = DuckDBLoader(
    "SELECT * FROM read_csv_auto('example.csv')",
    page_content_columns=["Team"],
    metadata_columns=["Payroll"]
)

data = loader.load()

In [9]:
print(data)

[Document(page_content='Team: Nationals', lookup_str='', metadata={'Payroll': 81.34}, lookup_index=0), Document(page_content='Team: Reds', lookup_str='', metadata={'Payroll': 82.2}, lookup_index=0)]


## Adding Source to Metadata

In [10]:
loader = DuckDBLoader(
    "SELECT Team, Payroll, Team As source FROM read_csv_auto('example.csv')",
    metadata_columns=["source"]
)

data = loader.load()

In [11]:
print(data)

[Document(page_content='Team: Nationals\nPayroll: 81.34\nsource: Nationals', lookup_str='', metadata={'source': 'Nationals'}, lookup_index=0), Document(page_content='Team: Reds\nPayroll: 82.2\nsource: Reds', lookup_str='', metadata={'source': 'Reds'}, lookup_index=0)]
