# DirectoryLoader

In [1]:
from langchain_community.document_loaders.directory import DirectoryLoader

Load all .csv files in a directory:

In [2]:
loader = DirectoryLoader("./example_data", glob="*.csv")

data = loader.load()
data

[Document(page_content='\n\n\nTeam\n"Payroll (millions)"\n"Wins"\n\n\nNationals\n81.34\n98\n\n\nReds\n82.20\n97\n\n\nYankees\n197.96\n95\n\n\nGiants\n117.62\n94\n\n\nBraves\n83.31\n94\n\n\nAthletics\n55.37\n94\n\n\nRangers\n120.51\n93\n\n\nOrioles\n81.43\n93\n\n\nRays\n64.17\n90\n\n\nAngels\n154.49\n89\n\n\nTigers\n132.30\n88\n\n\nCardinals\n110.30\n88\n\n\nDodgers\n95.14\n86\n\n\nWhite Sox\n96.92\n85\n\n\nBrewers\n97.65\n83\n\n\nPhillies\n174.54\n81\n\n\nDiamondbacks\n74.28\n81\n\n\nPirates\n63.43\n79\n\n\nPadres\n55.24\n76\n\n\nMariners\n81.97\n75\n\n\nMets\n93.35\n74\n\n\nBlue Jays\n75.48\n73\n\n\nRoyals\n60.91\n72\n\n\nMarlins\n118.07\n69\n\n\nRed Sox\n173.18\n69\n\n\nIndians\n78.43\n68\n\n\nTwins\n94.08\n66\n\n\nRockies\n78.06\n64\n\n\nCubs\n88.19\n61\n\n\nAstros\n60.65\n55\n\n\n', metadata={'source': 'example_data\\mlb_teams_2012.csv'})]

Recursively search for files:

In [3]:
loader = DirectoryLoader("./example_data", glob="*.csv", recursive=True)

data = loader.load()
data

[Document(page_content='\n\n\nTeam\n"Payroll (millions)"\n"Wins"\n\n\nNationals\n81.34\n98\n\n\nReds\n82.20\n97\n\n\nYankees\n197.96\n95\n\n\nGiants\n117.62\n94\n\n\nBraves\n83.31\n94\n\n\nAthletics\n55.37\n94\n\n\nRangers\n120.51\n93\n\n\nOrioles\n81.43\n93\n\n\nRays\n64.17\n90\n\n\nAngels\n154.49\n89\n\n\nTigers\n132.30\n88\n\n\nCardinals\n110.30\n88\n\n\nDodgers\n95.14\n86\n\n\nWhite Sox\n96.92\n85\n\n\nBrewers\n97.65\n83\n\n\nPhillies\n174.54\n81\n\n\nDiamondbacks\n74.28\n81\n\n\nPirates\n63.43\n79\n\n\nPadres\n55.24\n76\n\n\nMariners\n81.97\n75\n\n\nMets\n93.35\n74\n\n\nBlue Jays\n75.48\n73\n\n\nRoyals\n60.91\n72\n\n\nMarlins\n118.07\n69\n\n\nRed Sox\n173.18\n69\n\n\nIndians\n78.43\n68\n\n\nTwins\n94.08\n66\n\n\nRockies\n78.06\n64\n\n\nCubs\n88.19\n61\n\n\nAstros\n60.65\n55\n\n\n', metadata={'source': 'example_data\\mlb_teams_2012.csv'}),
 Document(page_content='\n\n\nID\nTimestamp\nContents\nAttachments\n\n\n7.73264E+18\n2023-04-19T15:14:45.904819+00:00\nlaocgfgbxyqfigvtyyygjzypx

By default, `DirectoryLoader` uses the `UnstructuredFileLoader` to load a file. You can specify another loader class with the `loader_cls` argument. You can pass keyword arguments to the `loader_cls` via the optional `loader_kwargs` argument.

In [4]:
from langchain_community.document_loaders.csv_loader import CSVLoader

loader = DirectoryLoader("./example_data", glob="*.csv", loader_cls=CSVLoader)

data = loader.load()
print(len(data))
data[:10]

30


[Document(page_content='Team: Nationals\n"Payroll (millions)": 81.34\n"Wins": 98', metadata={'source': 'example_data\\mlb_teams_2012.csv', 'row': 0}),
 Document(page_content='Team: Reds\n"Payroll (millions)": 82.20\n"Wins": 97', metadata={'source': 'example_data\\mlb_teams_2012.csv', 'row': 1}),
 Document(page_content='Team: Yankees\n"Payroll (millions)": 197.96\n"Wins": 95', metadata={'source': 'example_data\\mlb_teams_2012.csv', 'row': 2}),
 Document(page_content='Team: Giants\n"Payroll (millions)": 117.62\n"Wins": 94', metadata={'source': 'example_data\\mlb_teams_2012.csv', 'row': 3}),
 Document(page_content='Team: Braves\n"Payroll (millions)": 83.31\n"Wins": 94', metadata={'source': 'example_data\\mlb_teams_2012.csv', 'row': 4}),
 Document(page_content='Team: Athletics\n"Payroll (millions)": 55.37\n"Wins": 94', metadata={'source': 'example_data\\mlb_teams_2012.csv', 'row': 5}),
 Document(page_content='Team: Rangers\n"Payroll (millions)": 120.51\n"Wins": 93', metadata={'source': 'ex

If there are too many documents to load at once, you may want to load each document separately. You can use the `lazy_load` function for that:

In [5]:
loader = DirectoryLoader("./example_data", glob="*.csv", loader_cls=CSVLoader)

for i, doc in enumerate(loader.lazy_load()):
    if i == 10:
        break
    print(doc)

page_content='Team: Nationals\n"Payroll (millions)": 81.34\n"Wins": 98' metadata={'source': 'example_data\\mlb_teams_2012.csv', 'row': 0}
page_content='Team: Reds\n"Payroll (millions)": 82.20\n"Wins": 97' metadata={'source': 'example_data\\mlb_teams_2012.csv', 'row': 1}
page_content='Team: Yankees\n"Payroll (millions)": 197.96\n"Wins": 95' metadata={'source': 'example_data\\mlb_teams_2012.csv', 'row': 2}
page_content='Team: Giants\n"Payroll (millions)": 117.62\n"Wins": 94' metadata={'source': 'example_data\\mlb_teams_2012.csv', 'row': 3}
page_content='Team: Braves\n"Payroll (millions)": 83.31\n"Wins": 94' metadata={'source': 'example_data\\mlb_teams_2012.csv', 'row': 4}
page_content='Team: Athletics\n"Payroll (millions)": 55.37\n"Wins": 94' metadata={'source': 'example_data\\mlb_teams_2012.csv', 'row': 5}
page_content='Team: Rangers\n"Payroll (millions)": 120.51\n"Wins": 93' metadata={'source': 'example_data\\mlb_teams_2012.csv', 'row': 6}
page_content='Team: Orioles\n"Payroll (million