Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CONTRIBUTORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@
In alphabetical order:

* `Mark Skelton <https://github.com/mtskelton>`_
* `Pierre-Louis Peeters <https://github.com/PLPeeters>`_
5 changes: 5 additions & 0 deletions changelog.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@ name: pyexcel-xlsxr
organisation: pyexcel
releases:
- changes:
- action: Fixed
details:
- 'Fix freeze when parsing certain corrupt XLSX files'
date: 31.10.2025
version: 0.6.3
- action: Fixed
details:
- 'Fix reading of files with more than 26 columns'
Expand Down
26 changes: 11 additions & 15 deletions pyexcel_xlsxr/messy_xlsx.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,12 @@
STYLE_FILENAME = "xl/styles.xml"
SHARED_STRING = "xl/sharedStrings.xml"
WORK_BOOK = "xl/workbook.xml"
SHEET_MATCHER = "xl/worksheets/(work)?sheet([0-9]+)?.xml"
SHEET_INDEX_MATCHER = "xl/worksheets/(work)?sheet(([0-9]+)?).xml"
XLSX_ROW_MATCH = re.compile(rb".*?(<row.*?<\/.*?row>).*?", re.MULTILINE)
NUMBER_FMT_MATCHER = re.compile(
rb".*?(<numFmts.*?<\/.*?numFmts>).*?", re.MULTILINE
)
XFS_FMT_MATCHER = re.compile(
rb".*?(<cellXfs.*?<\/.*?cellXfs>).*?", re.MULTILINE
)
SHEET_FMT_MATCHER = re.compile(rb".*?(<sheet .*?\/>).*?", re.MULTILINE)
DATE_1904_MATCHER = re.compile(rb".*?(<workbookPr.*?\/>).*?", re.MULTILINE)
SHEET_MATCHER = re.compile(r"xl/worksheets/(?:work)?sheet([0-9]+)?.xml")
XLSX_ROW_MATCH = re.compile(rb"<row\b[^>]*>.*?</row>", re.DOTALL)
NUMBER_FMT_MATCHER = re.compile(rb"<numFmts\b[^>]*>.*?</numFmts>", re.DOTALL)
XFS_FMT_MATCHER = re.compile(rb"<cellXfs\b[^>]*>.*?</cellXfs>", re.DOTALL)
SHEET_FMT_MATCHER = re.compile(rb"<sheet\b.*?/>", re.DOTALL)
DATE_1904_MATCHER = re.compile(rb"<workbookPr\b.*?/>", re.DOTALL)
# "xmlns:x14ac="http://schemas.microsoft.com/office/spreadsheetml/2009/9/ac"
# But it not used for now
X14AC_NAMESPACE = b'xmlns:x14ac="http://not.used.com/"'
Expand Down Expand Up @@ -159,14 +154,15 @@ def find_sheets(file_list):
return [
sheet_file
for sheet_file in file_list
if re.match(SHEET_MATCHER, sheet_file)
if SHEET_MATCHER.match(sheet_file)
]


def get_sheet_index(file_name):
if re.match(SHEET_MATCHER, file_name):
result = re.search(SHEET_INDEX_MATCHER, file_name)
index = int(result.group(3)) if result.group(3) else 1
sheet_match = SHEET_MATCHER.match(file_name)

if sheet_match:
index = int(sheet_match.group(1)) if sheet_match.group(1) else 1
return index - 1
else:
raise Exception("Invalid sheet file name")
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ def filter_out_test_code(file_handle):
keywords=KEYWORDS,
python_requires=PYTHON_REQUIRES,
extras_require=EXTRAS_REQUIRE,
tests_require=["nose"],
tests_require=["pytest~=8.4"],
install_requires=INSTALL_REQUIRES,
packages=PACKAGES,
include_package_data=True,
Expand Down
2 changes: 1 addition & 1 deletion tests/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
nose
pytest~=8.4
mock;python_version<"3"
codecov
coverage
Expand Down
Loading