Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow toxlsx() to overwrite add or replace a worksheet #509

Merged
merged 9 commits into from
Aug 13, 2020
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion petl/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

from petl.io.xls import fromxls, toxls

from petl.io.xlsx import fromxlsx, toxlsx
from petl.io.xlsx import fromxlsx, toxlsx, appendxlsx

from petl.io.numpy import fromarray, toarray, torecarray

Expand Down
48 changes: 28 additions & 20 deletions petl/io/xls.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
import locale


from petl.compat import izip_longest, next, xrange
from petl.compat import izip_longest, next, xrange, BytesIO
from petl.util.base import Table
from petl.io.sources import read_source_from_arg, write_source_from_arg


def fromxls(filename, sheet=None, use_view=True, **kwargs):
Expand Down Expand Up @@ -36,26 +37,31 @@ def __iter__(self):
# converted
if self.use_view:
from petl.io import xlutils_view
wb = xlutils_view.View(self.filename)
if self.sheet is None:
ws = wb[0]
else:
ws = wb[self.sheet]
for row in ws:
yield tuple(row)

else:
import xlrd
with xlrd.open_workbook(filename=self.filename,
on_demand=True, **self.kwargs) as wb:
source = read_source_from_arg(self.filename)
with source.open('rb') as source2:
source3 = source2.read()
wb = xlutils_view.View(source3)
if self.sheet is None:
ws = wb.sheet_by_index(0)
elif isinstance(self.sheet, int):
ws = wb.sheet_by_index(self.sheet)
ws = wb[0]
else:
ws = wb.sheet_by_name(str(self.sheet))
for rownum in xrange(ws.nrows):
yield tuple(ws.row_values(rownum))
ws = wb[self.sheet]
for row in ws:
yield tuple(row)
else:
import xlrd
source = read_source_from_arg(self.filename)
with source.open('rb') as source2:
source3 = source2.read()
with xlrd.open_workbook(file_contents=source3,
on_demand=True, **self.kwargs) as wb:
if self.sheet is None:
ws = wb.sheet_by_index(0)
elif isinstance(self.sheet, int):
ws = wb.sheet_by_index(self.sheet)
else:
ws = wb.sheet_by_name(str(self.sheet))
for rownum in xrange(ws.nrows):
yield tuple(ws.row_values(rownum))


def toxls(tbl, filename, sheet, encoding=None, style_compression=0,
Expand Down Expand Up @@ -92,7 +98,9 @@ def toxls(tbl, filename, sheet, encoding=None, style_compression=0,
fillvalue=None)):
ws.write(r+1, c, label=v, style=style)

wb.save(filename)
target = write_source_from_arg(filename)
with target.open('wb') as target2:
wb.save(target2)


Table.toxls = toxls
92 changes: 77 additions & 15 deletions petl/io/xlsx.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, print_function, division


import locale


from petl.compat import PY3
from petl.util.base import Table, data
from petl.io.sources import read_source_from_arg, write_source_from_arg


def fromxlsx(filename, sheet=None, range_string=None, min_row=None,
Expand Down Expand Up @@ -58,9 +56,11 @@ def __init__(self, filename, sheet=None, range_string=None,

def __iter__(self):
import openpyxl
wb = openpyxl.load_workbook(filename=self.filename,
read_only=self.read_only,
**self.kwargs)
source = read_source_from_arg(self.filename)
with source.open('rb') as source2:
wb = openpyxl.load_workbook(filename=source2,
read_only=self.read_only,
**self.kwargs)
if self.sheet is None:
ws = wb[wb.sheetnames[0]]
elif isinstance(self.sheet, int):
Expand All @@ -86,22 +86,80 @@ def __iter__(self):
pass


def toxlsx(tbl, filename, sheet=None, write_header=True):
def toxlsx(tbl, filename, sheet=None, write_header=True, mode="replace"):
"""
Write a table to a new Excel .xlsx file.

"""
N.B., the sheet name is case sensitive.

import openpyxl
wb = openpyxl.Workbook(write_only=True)
ws = wb.create_sheet(title=sheet)
The `mode` argument controls how the file and sheet are treated:

- `replace`: This is the default. It either replaces or adds a
named sheet, or if no sheet name is provided, all sheets
(overwrites the entire file).

- `overwrite`: Always overwrites the file. This produces a file
with a single sheet.

- `add`: Adds a new sheet. Raises `ValueError` if a named sheet
already exists.

The `sheet` argument can be omitted in all cases. The new sheet
will then get a default name.
If the file does not exist, it will be created, unless `replace`
mode is used with a named sheet. In the latter case, the file
must exist and be a valid .xlsx file.
"""
wb = _load_or_create_workbook(filename, mode, sheet)
ws = _insert_sheet_on_workbook(mode, sheet, wb)
if write_header:
rows = tbl
else:
rows = data(tbl)
for row in rows:
ws.append(row)
wb.save(filename)
target = write_source_from_arg(filename)
with target.open('wb') as target2:
wb.save(target2)


def _load_or_create_workbook(filename, mode, sheet):
if PY3:
FileNotFound = FileNotFoundError
else:
FileNotFound = IOError

import openpyxl
wb = None
if mode != "overwrite" and (mode != "replace" or sheet is not None):
juarezr marked this conversation as resolved.
Show resolved Hide resolved
try:
source = read_source_from_arg(filename)
with source.open('rb') as source2:
wb = openpyxl.load_workbook(filename=source2, read_only=False)
except FileNotFound:
wb = None
juarezr marked this conversation as resolved.
Show resolved Hide resolved
if wb is None:
wb = openpyxl.Workbook(write_only=True)
return wb


def _insert_sheet_on_workbook(mode, sheet, wb):
if mode == "replace":
try:
ws = wb[str(sheet)]
ws.delete_rows(1, ws.max_row)
except KeyError:
ws = wb.create_sheet(title=sheet)
elif mode == "add":
ws = wb.create_sheet(title=sheet)
# it creates a sheet named "foo1" if "foo" exists.
if sheet is not None and ws.title != sheet:
raise ValueError("Sheet %s already exists in file" % sheet)
elif mode == "overwrite":
ws = wb.create_sheet(title=sheet)
else:
raise ValueError("Unknown mode '%s'" % mode)
return ws


Table.toxlsx = toxlsx
Expand All @@ -113,7 +171,9 @@ def appendxlsx(tbl, filename, sheet=None, write_header=False):
"""

import openpyxl
wb = openpyxl.load_workbook(filename=filename, read_only=False)
source = read_source_from_arg(filename)
with source.open('rb') as source2:
wb = openpyxl.load_workbook(filename=source2, read_only=False)
if sheet is None:
ws = wb[wb.sheetnames[0]]
elif isinstance(sheet, int):
Expand All @@ -126,7 +186,9 @@ def appendxlsx(tbl, filename, sheet=None, write_header=False):
rows = data(tbl)
for row in rows:
ws.append(row)
wb.save(filename)
target = write_source_from_arg(filename)
with target.open('wb') as target2:
wb.save(target2)


Table.appendxlsx = appendxlsx
5 changes: 3 additions & 2 deletions petl/io/xlutils_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,10 +111,11 @@ class View(object):
#: :class:`SheetView` for the views of sheets returned.
class_ = SheetView

def __init__(self, path, class_=None):
def __init__(self, file_contents, class_=None, **kwargs):
self.class_ = class_ or self.class_
from xlrd import open_workbook
self.book = open_workbook(path, formatting_info=0, on_demand=True)
self.book = open_workbook(file_contents=file_contents,
on_demand=True, **kwargs)

def __getitem__(self, item):
"""
Expand Down
83 changes: 56 additions & 27 deletions petl/test/io/test_remotes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,16 @@

import sys
import os
import time
juarezr marked this conversation as resolved.
Show resolved Hide resolved
from importlib import import_module

from petl.compat import PY3
from petl.test.helpers import ieq, eq_
from petl.io.avro import fromavro, toavro
from petl.io.csv import fromcsv, tocsv
from petl.io.json import fromjson, tojson
from petl.io.xlsx import fromxlsx, toxlsx
from petl.io.xls import fromxls, toxls
from petl.util.vis import look

# region Codec test cases
Expand Down Expand Up @@ -69,7 +74,7 @@ def _write_read_from_env_matching(prefix):
if q < 1:
msg = """SKIPPED
For testing remote source define a environment variable:
$ export PETL_TEST_<protocol>='<protocol>://myuser:mypassword@host:port/path/to/file.ext'"""
$ export PETL_TEST_<protocol>='<protocol>://myuser:mypassword@host:port/path/to/folder'"""
print(msg, file=sys.stderr)


Expand All @@ -83,19 +88,24 @@ def _write_read_from_env_url(env_var_name):


def _write_read_into_url(base_url):
_write_read_file_into_url(base_url, "filename1.csv")
_write_read_file_into_url(base_url, "filename2.avro")
_write_read_file_into_url(base_url, "filename3.csv", "gz")
_write_read_file_into_url(base_url, "filename4.avro", "gz")
_write_read_file_into_url(base_url, "filename5.csv", "xz")
_write_read_file_into_url(base_url, "filename6.csv", "zst")
_write_read_file_into_url(base_url, "filename7.csv", "lz4")
_write_read_file_into_url(base_url, "filename8.csv", "snappy")


def _write_read_file_into_url(base_url, filename, compression=None):
if ".avro" in filename and not _has_avro:
return
_write_read_file_into_url(base_url, "filename10.csv")
_write_read_file_into_url(base_url, "filename11.csv", "gz")
_write_read_file_into_url(base_url, "filename12.csv", "xz")
_write_read_file_into_url(base_url, "filename13.csv", "zst")
_write_read_file_into_url(base_url, "filename14.csv", "lz4")
_write_read_file_into_url(base_url, "filename15.csv", "snappy")
_write_read_file_into_url(base_url, "filename20.json")
_write_read_file_into_url(base_url, "filename21.json", "gz")
_write_read_file_into_url(base_url, "filename30.avro", pkg='fastavro')
_write_read_file_into_url(base_url, "filename40.xlsx", pkg='openpyxl')
_write_read_file_into_url(base_url, "filename50.xls", pkg='xlwt')


def _write_read_file_into_url(base_url, filename, compression=None, pkg=None):
if pkg is not None:
if not _is_installed(pkg):
print("\n - %s SKIPPED " % filename, file=sys.stderr, end="")
return
is_local = base_url.startswith("./")
if compression is not None:
if is_local:
Expand All @@ -112,40 +122,51 @@ def _write_read_file_into_url(base_url, filename, compression=None):
else:
source_url = os.path.join(base_url, filename)

_show__rows_from("Expected:", _table)

actual = None
if ".avro" in filename:
toavro(_table, source_url)
actual = fromavro(source_url)
else:
elif ".xlsx" in filename:
toxlsx(_table, source_url, 'test1', mode='overwrite')
toxlsx(_table2, source_url, 'test2', mode='add')
actual = fromxlsx(source_url, 'test1')
elif ".xls" in filename:
toxls(_table, source_url, 'test')
actual = fromxls(source_url, 'test')
elif ".json" in filename:
tojson(_table, source_url)
actual = fromjson(source_url)
elif ".csv" in filename:
tocsv(_table, source_url, encoding="ascii", lineterminator="\n")
actual = fromcsv(source_url, encoding="ascii")

_show__rows_from("Actual:", actual)
ieq(_table, actual)
ieq(_table, actual) # verify can iterate twice
if actual is not None:
_show__rows_from("Expected:", _table)
_show__rows_from("Actual:", actual)
ieq(_table, actual)
ieq(_table, actual) # verify can iterate twice
else:
print("\n - %s SKIPPED " % filename, file=sys.stderr, end="")


def _show__rows_from(label, test_rows, limit=0):
print(label)
print(look(test_rows, limit=limit))


def _test_avro_too():
def _is_installed(package_name):
try:
import fastavro

return True
except:
mod = import_module(package_name)
return mod is not None
except Exception as exm:
print(exm, file=sys.stderr)
return False


# endregion

# region Mockup data

_has_avro = _test_avro_too()

_table = (
(u"name", u"friends", u"age"),
(u"Bob", "42", "33"),
Expand All @@ -154,4 +175,12 @@ def _test_avro_too():
(u"Ted", "23", "51"),
)

_table2 = (
(u"name", u"friends", u"age"),
(u"Giannis", "31", "12"),
(u"James", "38", "8"),
(u"Stephen", "28", "4"),
(u"Jason", "23", "12"),
)

# endregion
Loading