In [4]:
from dotenv import load_dotenv,find_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_openai import ChatOpenAI
from pathlib import Path

from langchain_community.document_loaders.csv_loader import CSVLoader

In [2]:
dataPath=Path("data").joinpath("mlb_teams_2012.csv")

In [5]:
loader=CSVLoader(file_path=dataPath)

In [6]:
data=loader.load()

In [7]:
data

[Document(metadata={'source': 'data\\mlb_teams_2012.csv', 'row': 0}, page_content='Team: Nationals\n"Payroll (millions)": 81.34\n"Wins": 98'),
 Document(metadata={'source': 'data\\mlb_teams_2012.csv', 'row': 1}, page_content='Team: Reds\n"Payroll (millions)": 82.20\n"Wins": 97'),
 Document(metadata={'source': 'data\\mlb_teams_2012.csv', 'row': 2}, page_content='Team: Yankees\n"Payroll (millions)": 197.96\n"Wins": 95'),
 Document(metadata={'source': 'data\\mlb_teams_2012.csv', 'row': 3}, page_content='Team: Giants\n"Payroll (millions)": 117.62\n"Wins": 94'),
 Document(metadata={'source': 'data\\mlb_teams_2012.csv', 'row': 4}, page_content='Team: Braves\n"Payroll (millions)": 83.31\n"Wins": 94'),
 Document(metadata={'source': 'data\\mlb_teams_2012.csv', 'row': 5}, page_content='Team: Athletics\n"Payroll (millions)": 55.37\n"Wins": 94'),
 Document(metadata={'source': 'data\\mlb_teams_2012.csv', 'row': 6}, page_content='Team: Rangers\n"Payroll (millions)": 120.51\n"Wins": 93'),
 Document(m

<h3>Customizing CSV Parsing and Loading</h3>

In [20]:
loader=CSVLoader(file_path=dataPath,
                 csv_args={"delimiter":",",
                           "quotechar":'"',
                           "fieldnames":["MLB Team","Payroll in Millions","Wins"]
})

In [28]:
for l in loader.lazy_load():
    print(l)
    print("\n")

page_content='MLB Team: Team
Payroll in Millions: "Payroll (millions)"
Wins: "Wins"' metadata={'source': 'data\\mlb_teams_2012.csv', 'row': 0}


page_content='MLB Team: Nationals
Payroll in Millions: 81.34
Wins: 98' metadata={'source': 'data\\mlb_teams_2012.csv', 'row': 1}


page_content='MLB Team: Reds
Payroll in Millions: 82.20
Wins: 97' metadata={'source': 'data\\mlb_teams_2012.csv', 'row': 2}


page_content='MLB Team: Yankees
Payroll in Millions: 197.96
Wins: 95' metadata={'source': 'data\\mlb_teams_2012.csv', 'row': 3}


page_content='MLB Team: Giants
Payroll in Millions: 117.62
Wins: 94' metadata={'source': 'data\\mlb_teams_2012.csv', 'row': 4}


page_content='MLB Team: Braves
Payroll in Millions: 83.31
Wins: 94' metadata={'source': 'data\\mlb_teams_2012.csv', 'row': 5}


page_content='MLB Team: Athletics
Payroll in Millions: 55.37
Wins: 94' metadata={'source': 'data\\mlb_teams_2012.csv', 'row': 6}


page_content='MLB Team: Rangers
Payroll in Millions: 120.51
Wins: 93' metadata={

In [29]:
data=loader.load()

In [30]:
data

[Document(metadata={'source': 'data\\mlb_teams_2012.csv', 'row': 0}, page_content='MLB Team: Team\nPayroll in Millions: "Payroll (millions)"\nWins: "Wins"'),
 Document(metadata={'source': 'data\\mlb_teams_2012.csv', 'row': 1}, page_content='MLB Team: Nationals\nPayroll in Millions: 81.34\nWins: 98'),
 Document(metadata={'source': 'data\\mlb_teams_2012.csv', 'row': 2}, page_content='MLB Team: Reds\nPayroll in Millions: 82.20\nWins: 97'),
 Document(metadata={'source': 'data\\mlb_teams_2012.csv', 'row': 3}, page_content='MLB Team: Yankees\nPayroll in Millions: 197.96\nWins: 95'),
 Document(metadata={'source': 'data\\mlb_teams_2012.csv', 'row': 4}, page_content='MLB Team: Giants\nPayroll in Millions: 117.62\nWins: 94'),
 Document(metadata={'source': 'data\\mlb_teams_2012.csv', 'row': 5}, page_content='MLB Team: Braves\nPayroll in Millions: 83.31\nWins: 94'),
 Document(metadata={'source': 'data\\mlb_teams_2012.csv', 'row': 6}, page_content='MLB Team: Athletics\nPayroll in Millions: 55.37\nW

<h3>Specifying Column to identify the Data Source</h3>

In [31]:
loader=CSVLoader(file_path=dataPath,source_column="Team")

In [33]:
data=loader.load()

In [34]:
data

[Document(metadata={'source': 'Nationals', 'row': 0}, page_content='Team: Nationals\n"Payroll (millions)": 81.34\n"Wins": 98'),
 Document(metadata={'source': 'Reds', 'row': 1}, page_content='Team: Reds\n"Payroll (millions)": 82.20\n"Wins": 97'),
 Document(metadata={'source': 'Yankees', 'row': 2}, page_content='Team: Yankees\n"Payroll (millions)": 197.96\n"Wins": 95'),
 Document(metadata={'source': 'Giants', 'row': 3}, page_content='Team: Giants\n"Payroll (millions)": 117.62\n"Wins": 94'),
 Document(metadata={'source': 'Braves', 'row': 4}, page_content='Team: Braves\n"Payroll (millions)": 83.31\n"Wins": 94'),
 Document(metadata={'source': 'Athletics', 'row': 5}, page_content='Team: Athletics\n"Payroll (millions)": 55.37\n"Wins": 94'),
 Document(metadata={'source': 'Rangers', 'row': 6}, page_content='Team: Rangers\n"Payroll (millions)": 120.51\n"Wins": 93'),
 Document(metadata={'source': 'Orioles', 'row': 7}, page_content='Team: Orioles\n"Payroll (millions)": 81.43\n"Wins": 93'),
 Docume

In [39]:
for l in loader.lazy_load():
    print(f"Team:{l.metadata['source']}")

Team:Nationals
Team:Reds
Team:Yankees
Team:Giants
Team:Braves
Team:Athletics
Team:Rangers
Team:Orioles
Team:Rays
Team:Angels
Team:Tigers
Team:Cardinals
Team:Dodgers
Team:White Sox
Team:Brewers
Team:Phillies
Team:Diamondbacks
Team:Pirates
Team:Padres
Team:Mariners
Team:Mets
Team:Blue Jays
Team:Royals
Team:Marlins
Team:Red Sox
Team:Indians
Team:Twins
Team:Rockies
Team:Cubs
Team:Astros
