Skip to content

Commit

Permalink
rework xlsx mode for working with remote sources
Browse files Browse the repository at this point in the history
  • Loading branch information
Juarez Rudsatz committed Aug 8, 2020
1 parent fab8a44 commit f12dd9b
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 33 deletions.
57 changes: 36 additions & 21 deletions petl/io/xlsx.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,41 +110,56 @@ def toxlsx(tbl, filename, sheet=None, write_header=True, mode="replace"):
mode is used with a named sheet. In the latter case, the file
must exist and be a valid .xlsx file.
"""
wb = _load_or_create_workbook(filename, mode, sheet)
ws = _insert_sheet_on_workbook(mode, sheet, wb)
if write_header:
rows = tbl
else:
rows = data(tbl)
for row in rows:
ws.append(row)
target = write_source_from_arg(filename)
with target.open('wb') as target2:
wb.save(target2)


def _load_or_create_workbook(filename, mode, sheet):
if PY3:
FileNotFound = FileNotFoundError
else:
FileNotFound = IOError

import openpyxl
if mode == "overwrite" or (mode == "replace" and sheet is None):
wb = openpyxl.Workbook(write_only=True)
ws = wb.create_sheet(title=sheet)
elif mode == "replace":
if PY3:
FileNotFound = FileNotFoundError
else:
FileNotFound = IOError
wb = None
if mode != "overwrite" and (mode != "replace" or sheet is not None):
try:
wb = openpyxl.load_workbook(filename=filename, read_only=False)
source = read_source_from_arg(filename)
with source.open('rb') as source2:
wb = openpyxl.load_workbook(filename=source2, read_only=False)
except FileNotFound:
wb = openpyxl.Workbook(write_only=True)
wb = None
if wb is None:
wb = openpyxl.Workbook(write_only=True)
return wb


def _insert_sheet_on_workbook(mode, sheet, wb):
if mode == "replace":
try:
ws = wb[str(sheet)]
ws.delete_rows(1, ws.max_row)
except KeyError:
ws = wb.create_sheet(title=sheet)
elif mode == "add":
wb = openpyxl.load_workbook(filename=filename, read_only=False)
ws = wb.create_sheet(title=sheet)
# wb.create_sheet(title="foo") creates "foo1" if "foo" exists.
# it creates a sheet named "foo1" if "foo" exists.
if sheet is not None and ws.title != sheet:
raise ValueError("Sheet %s already exists in file" % sheet)
elif mode == "overwrite":
ws = wb.create_sheet(title=sheet)
else:
raise ValueError("Unknown mode '%s'" % mode)
if write_header:
rows = tbl
else:
rows = data(tbl)
for row in rows:
ws.append(row)
target = write_source_from_arg(filename)
with target.open('wb') as target2:
wb.save(target2)
return ws


Table.toxlsx = toxlsx
Expand Down
34 changes: 22 additions & 12 deletions petl/test/io/test_remotes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import sys
import os
import time
from importlib import import_module

from petl.compat import PY3
Expand Down Expand Up @@ -87,17 +88,17 @@ def _write_read_from_env_url(env_var_name):


def _write_read_into_url(base_url):
_write_read_file_into_url(base_url, "filename10.csv")
_write_read_file_into_url(base_url, "filename11.csv", "gz")
_write_read_file_into_url(base_url, "filename12.csv", "xz")
_write_read_file_into_url(base_url, "filename13.csv", "zst")
_write_read_file_into_url(base_url, "filename14.csv", "lz4")
_write_read_file_into_url(base_url, "filename15.csv", "snappy")
_write_read_file_into_url(base_url, "filename20.json")
_write_read_file_into_url(base_url, "filename21.json", "gz")
_write_read_file_into_url(base_url, "filename30.avro", pkg='fastavro')
# _write_read_file_into_url(base_url, "filename10.csv")
# _write_read_file_into_url(base_url, "filename11.csv", "gz")
# _write_read_file_into_url(base_url, "filename12.csv", "xz")
# _write_read_file_into_url(base_url, "filename13.csv", "zst")
# _write_read_file_into_url(base_url, "filename14.csv", "lz4")
# _write_read_file_into_url(base_url, "filename15.csv", "snappy")
# _write_read_file_into_url(base_url, "filename20.json")
# _write_read_file_into_url(base_url, "filename21.json", "gz")
# _write_read_file_into_url(base_url, "filename30.avro", pkg='fastavro')
_write_read_file_into_url(base_url, "filename40.xlsx", pkg='openpyxl')
_write_read_file_into_url(base_url, "filename50.xls", pkg='xlwt')
# _write_read_file_into_url(base_url, "filename50.xls", pkg='xlwt')


def _write_read_file_into_url(base_url, filename, compression=None, pkg=None):
Expand Down Expand Up @@ -126,8 +127,9 @@ def _write_read_file_into_url(base_url, filename, compression=None, pkg=None):
toavro(_table, source_url)
actual = fromavro(source_url)
elif ".xlsx" in filename:
toxlsx(_table, source_url, 'test')
actual = fromxlsx(source_url, 'test')
toxlsx(_table, source_url, 'test1', mode='overwrite')
toxlsx(_table2, source_url, 'test2', mode='add')
actual = fromxlsx(source_url, 'test1')
elif ".xls" in filename:
toxls(_table, source_url, 'test')
actual = fromxls(source_url, 'test')
Expand Down Expand Up @@ -173,4 +175,12 @@ def _is_installed(package_name):
(u"Ted", "23", "51"),
)

_table2 = (
(u"name", u"friends", u"age"),
(u"Giannis", "31", "12"),
(u"James", "38", "8"),
(u"Stephen", "28", "4"),
(u"Jason", "23", "12"),
)

# endregion

0 comments on commit f12dd9b

Please sign in to comment.