# CSV
---

Un archivo de valores separados por comas (CSV) es un archivo de texto delimitado que utiliza una coma para separar valores. 

Cada línea del archivo es un registro de datos. 

Cada registro consta de uno o más campos, separados por comas.


Haremos un ejemplo de como cargar el archvio CSV, es este caso usaremos las opciones por defecto.

In [3]:
from langchain.document_loaders.csv_loader import CSVLoader

loader = CSVLoader(
    file_path="../../example_data/mlb_teams_2012.csv"
)
data = loader.load()

Veamos el contenido del documento

In [4]:
print(data)

[Document(page_content='Team: Nationals\n"Payroll (millions)": 81.34\n"Wins": 98', metadata={'source': '../../example_data/mlb_teams_2012.csv', 'row': 0}), Document(page_content='Team: Reds\n"Payroll (millions)": 82.20\n"Wins": 97', metadata={'source': '../../example_data/mlb_teams_2012.csv', 'row': 1}), Document(page_content='Team: Yankees\n"Payroll (millions)": 197.96\n"Wins": 95', metadata={'source': '../../example_data/mlb_teams_2012.csv', 'row': 2}), Document(page_content='Team: Giants\n"Payroll (millions)": 117.62\n"Wins": 94', metadata={'source': '../../example_data/mlb_teams_2012.csv', 'row': 3}), Document(page_content='Team: Braves\n"Payroll (millions)": 83.31\n"Wins": 94', metadata={'source': '../../example_data/mlb_teams_2012.csv', 'row': 4}), Document(page_content='Team: Athletics\n"Payroll (millions)": 55.37\n"Wins": 94', metadata={'source': '../../example_data/mlb_teams_2012.csv', 'row': 5}), Document(page_content='Team: Rangers\n"Payroll (millions)": 120.51\n"Wins": 93',

Usaremos pandas, para mostrar de mejor forma como fue cargado el documento

In [13]:
import pandas as pd

pd.set_option("max.colwidth", None)
df = pd.DataFrame(data)
df

Unnamed: 0,0,1
0,"(page_content, Team: Nationals\n""Payroll (millions)"": 81.34\n""Wins"": 98)","(metadata, {'source': '../../example_data/mlb_teams_2012.csv', 'row': 0})"
1,"(page_content, Team: Reds\n""Payroll (millions)"": 82.20\n""Wins"": 97)","(metadata, {'source': '../../example_data/mlb_teams_2012.csv', 'row': 1})"
2,"(page_content, Team: Yankees\n""Payroll (millions)"": 197.96\n""Wins"": 95)","(metadata, {'source': '../../example_data/mlb_teams_2012.csv', 'row': 2})"
3,"(page_content, Team: Giants\n""Payroll (millions)"": 117.62\n""Wins"": 94)","(metadata, {'source': '../../example_data/mlb_teams_2012.csv', 'row': 3})"
4,"(page_content, Team: Braves\n""Payroll (millions)"": 83.31\n""Wins"": 94)","(metadata, {'source': '../../example_data/mlb_teams_2012.csv', 'row': 4})"
5,"(page_content, Team: Athletics\n""Payroll (millions)"": 55.37\n""Wins"": 94)","(metadata, {'source': '../../example_data/mlb_teams_2012.csv', 'row': 5})"
6,"(page_content, Team: Rangers\n""Payroll (millions)"": 120.51\n""Wins"": 93)","(metadata, {'source': '../../example_data/mlb_teams_2012.csv', 'row': 6})"
7,"(page_content, Team: Orioles\n""Payroll (millions)"": 81.43\n""Wins"": 93)","(metadata, {'source': '../../example_data/mlb_teams_2012.csv', 'row': 7})"
8,"(page_content, Team: Rays\n""Payroll (millions)"": 64.17\n""Wins"": 90)","(metadata, {'source': '../../example_data/mlb_teams_2012.csv', 'row': 8})"
9,"(page_content, Team: Angels\n""Payroll (millions)"": 154.49\n""Wins"": 89)","(metadata, {'source': '../../example_data/mlb_teams_2012.csv', 'row': 9})"


# Customizing the CSV parsing and loading
---

Consulte la documentación del módulo csv para obtener más información sobre qué argumentos csv son compatibles.

In [15]:
loader = CSVLoader(
    file_path="../../example_data/mlb_teams_2012.csv",
    csv_args={
        "delimiter": ",",
        "quotechar": "\"",
        "fieldnames": [
            "Team",
            "Payroll (millions)",
            "Wins",
        ]
    }
)

data = loader.load()

In [16]:
print(data)

[Document(page_content='Team: Team\nPayroll (millions): "Payroll (millions)"\nWins: "Wins"', metadata={'source': '../../example_data/mlb_teams_2012.csv', 'row': 0}), Document(page_content='Team: Nationals\nPayroll (millions): 81.34\nWins: 98', metadata={'source': '../../example_data/mlb_teams_2012.csv', 'row': 1}), Document(page_content='Team: Reds\nPayroll (millions): 82.20\nWins: 97', metadata={'source': '../../example_data/mlb_teams_2012.csv', 'row': 2}), Document(page_content='Team: Yankees\nPayroll (millions): 197.96\nWins: 95', metadata={'source': '../../example_data/mlb_teams_2012.csv', 'row': 3}), Document(page_content='Team: Giants\nPayroll (millions): 117.62\nWins: 94', metadata={'source': '../../example_data/mlb_teams_2012.csv', 'row': 4}), Document(page_content='Team: Braves\nPayroll (millions): 83.31\nWins: 94', metadata={'source': '../../example_data/mlb_teams_2012.csv', 'row': 5}), Document(page_content='Team: Athletics\nPayroll (millions): 55.37\nWins: 94', metadata={'s

In [18]:
pd.set_option("max.colwidth", None)
df = pd.DataFrame(data)
df

Unnamed: 0,0,1
0,"(page_content, Team: Team\nPayroll (millions): ""Payroll (millions)""\nWins: ""Wins"")","(metadata, {'source': '../../example_data/mlb_teams_2012.csv', 'row': 0})"
1,"(page_content, Team: Nationals\nPayroll (millions): 81.34\nWins: 98)","(metadata, {'source': '../../example_data/mlb_teams_2012.csv', 'row': 1})"
2,"(page_content, Team: Reds\nPayroll (millions): 82.20\nWins: 97)","(metadata, {'source': '../../example_data/mlb_teams_2012.csv', 'row': 2})"
3,"(page_content, Team: Yankees\nPayroll (millions): 197.96\nWins: 95)","(metadata, {'source': '../../example_data/mlb_teams_2012.csv', 'row': 3})"
4,"(page_content, Team: Giants\nPayroll (millions): 117.62\nWins: 94)","(metadata, {'source': '../../example_data/mlb_teams_2012.csv', 'row': 4})"
5,"(page_content, Team: Braves\nPayroll (millions): 83.31\nWins: 94)","(metadata, {'source': '../../example_data/mlb_teams_2012.csv', 'row': 5})"
6,"(page_content, Team: Athletics\nPayroll (millions): 55.37\nWins: 94)","(metadata, {'source': '../../example_data/mlb_teams_2012.csv', 'row': 6})"
7,"(page_content, Team: Rangers\nPayroll (millions): 120.51\nWins: 93)","(metadata, {'source': '../../example_data/mlb_teams_2012.csv', 'row': 7})"
8,"(page_content, Team: Orioles\nPayroll (millions): 81.43\nWins: 93)","(metadata, {'source': '../../example_data/mlb_teams_2012.csv', 'row': 8})"
9,"(page_content, Team: Rays\nPayroll (millions): 64.17\nWins: 90)","(metadata, {'source': '../../example_data/mlb_teams_2012.csv', 'row': 9})"
