Skip to content
This repository was archived by the owner on Apr 11, 2025. It is now read-only.
This repository was archived by the owner on Apr 11, 2025. It is now read-only.

IndexError: list index out of range #210

@tsoernes

Description

@tsoernes

Describe the bug

File ~/code/ai-pilot-wellreport/table_extraction/no_conda/.venv/lib/python3.12/site-packages/camelot/core.py:443, in Table.set_edges(self, vertical, horizontal, joint_tol)
    441     i = len(self.rows) - 1
    442     for j in range(start, end):
--> 443         self.cells[i][j].bottom = True
    444 elif i == 0:  # only top edge
    445     for j in range(start, end):

IndexError: list index out of range

Steps to reproduce the bug

Expected behavior

Code

    tables_li = pypdf_table_extraction.read_pdf(
        str(pdf_path),
        pages=page_numbers,
        strip_text="\n",
    )

PDF

Cannot add, confidential

Screenshots

Environment

  • OS: [e.g. macOS]
  • Python version: 3.12
  • Numpy version: '1.26.4'
  • OpenCV version:
  • Ghostscript version: GPL Ghostscript 10.02.1 (2023-11-01)
  • pypdf_table_extraction version: 0.0.2

Additional context

Full traceback

2024-10-21T09:47:14 - INFO - Processing page-149
Processing page-149
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
File ~/code/ai-pilot-wellreport/table_extraction/no_conda/extraction.py:175
    171         page_to_tables[table.page].append(TableInfo(table._bbox[1], table._bbox[3], df))
    172     return dict(page_to_tables)
--> 175 page_to_tables = get_tables(pdf_path, page_numbers)
    176 markdown_content = pdf_to_markdown(pdf_path, page_numbers, page_to_tables)
    177 print(markdown_content)

File ~/code/ai-pilot-wellreport/table_extraction/no_conda/extraction.py:163, in get_tables(pdf_path, page_numbers)
    161 elif isinstance(page_numbers, list):
    162     page_numbers = ",".join(map(str, page_numbers))
--> 163 tables_li = camelot.read_pdf(
    164     str(pdf_path),
    165     pages=page_numbers,
    166     strip_text="\n",
    167 )
    168 page_to_tables = defaultdict(list)
    169 for table in tables_li:

File ~/code/ai-pilot-wellreport/table_extraction/no_conda/.venv/lib/python3.12/site-packages/camelot/io.py:125, in read_pdf(filepath, pages, password, flavor, suppress_stdout, parallel, layout_kwargs, **kwargs)
    123 p = PDFHandler(filepath, pages=pages, password=password)
    124 kwargs = remove_extra(kwargs, flavor=flavor)
--> 125 tables = p.parse(
    126     flavor=flavor,
    127     suppress_stdout=suppress_stdout,
    128     parallel=parallel,
    129     layout_kwargs=layout_kwargs,
    130     **kwargs
    131 )
    132 return tables

File ~/code/ai-pilot-wellreport/table_extraction/no_conda/.venv/lib/python3.12/site-packages/camelot/handlers.py:202, in PDFHandler.parse(self, flavor, suppress_stdout, parallel, layout_kwargs, **kwargs)
    200     else:
    201         for p in self.pages:
--> 202             t = self._parse_page(
    203                 p, tempdir, parser, suppress_stdout, layout_kwargs
    204             )
    205             tables.extend(t)
    207 return TableList(sorted(tables))

File ~/code/ai-pilot-wellreport/table_extraction/no_conda/.venv/lib/python3.12/site-packages/camelot/handlers.py:232, in PDFHandler._parse_page(self, page, tempdir, parser, suppress_stdout, layout_kwargs)
    230 self._save_page(self.filepath, page, tempdir)
    231 page_path = os.path.join(tempdir, f"page-{page}.pdf")
--> 232 tables = parser.extract_tables(
    233     page_path, suppress_stdout=suppress_stdout, layout_kwargs=layout_kwargs
    234 )
    235 return tables

File ~/code/ai-pilot-wellreport/table_extraction/no_conda/.venv/lib/python3.12/site-packages/camelot/parsers/lattice.py:412, in Lattice.extract_tables(self, filename, suppress_stdout, layout_kwargs)
    408 for table_idx, tk in enumerate(
    409     sorted(self.table_bbox.keys(), key=lambda x: x[1], reverse=True)
    410 ):
    411     cols, rows, v_s, h_s = self._generate_columns_and_rows(table_idx, tk)
--> 412     table = self._generate_table(table_idx, cols, rows, v_s=v_s, h_s=h_s)
    413     table._bbox = tk
    414     _tables.append(table)

File ~/code/ai-pilot-wellreport/table_extraction/no_conda/.venv/lib/python3.12/site-packages/camelot/parsers/lattice.py:334, in Lattice._generate_table(self, table_idx, cols, rows, **kwargs)
    332 table = Table(cols, rows)
    333 # set table edges to True using ver+hor lines
--> 334 table = table.set_edges(v_s, h_s, joint_tol=self.joint_tol)
    335 # set table border edges to True
    336 table = table.set_border()

File ~/code/ai-pilot-wellreport/table_extraction/no_conda/.venv/lib/python3.12/site-packages/camelot/core.py:443, in Table.set_edges(self, vertical, horizontal, joint_tol)
    441     i = len(self.rows) - 1
    442     for j in range(start, end):
--> 443         self.cells[i][j].bottom = True
    444 elif i == 0:  # only top edge
    445     for j in range(start, end):

IndexError: list index out of range

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions