Skip to content

Commit

Permalink
Merge f12dd9b into d713928
Browse files Browse the repository at this point in the history
  • Loading branch information
juarezr authored Aug 8, 2020
2 parents d713928 + f12dd9b commit d86e03a
Show file tree
Hide file tree
Showing 6 changed files with 267 additions and 70 deletions.
2 changes: 1 addition & 1 deletion petl/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

from petl.io.xls import fromxls, toxls

from petl.io.xlsx import fromxlsx, toxlsx
from petl.io.xlsx import fromxlsx, toxlsx, appendxlsx

from petl.io.numpy import fromarray, toarray, torecarray

Expand Down
48 changes: 28 additions & 20 deletions petl/io/xls.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
import locale


from petl.compat import izip_longest, next, xrange
from petl.compat import izip_longest, next, xrange, BytesIO
from petl.util.base import Table
from petl.io.sources import read_source_from_arg, write_source_from_arg


def fromxls(filename, sheet=None, use_view=True, **kwargs):
Expand Down Expand Up @@ -36,26 +37,31 @@ def __iter__(self):
# converted
if self.use_view:
from petl.io import xlutils_view
wb = xlutils_view.View(self.filename)
if self.sheet is None:
ws = wb[0]
else:
ws = wb[self.sheet]
for row in ws:
yield tuple(row)

else:
import xlrd
with xlrd.open_workbook(filename=self.filename,
on_demand=True, **self.kwargs) as wb:
source = read_source_from_arg(self.filename)
with source.open('rb') as source2:
source3 = source2.read()
wb = xlutils_view.View(source3)
if self.sheet is None:
ws = wb.sheet_by_index(0)
elif isinstance(self.sheet, int):
ws = wb.sheet_by_index(self.sheet)
ws = wb[0]
else:
ws = wb.sheet_by_name(str(self.sheet))
for rownum in xrange(ws.nrows):
yield tuple(ws.row_values(rownum))
ws = wb[self.sheet]
for row in ws:
yield tuple(row)
else:
import xlrd
source = read_source_from_arg(self.filename)
with source.open('rb') as source2:
source3 = source2.read()
with xlrd.open_workbook(file_contents=source3,
on_demand=True, **self.kwargs) as wb:
if self.sheet is None:
ws = wb.sheet_by_index(0)
elif isinstance(self.sheet, int):
ws = wb.sheet_by_index(self.sheet)
else:
ws = wb.sheet_by_name(str(self.sheet))
for rownum in xrange(ws.nrows):
yield tuple(ws.row_values(rownum))


def toxls(tbl, filename, sheet, encoding=None, style_compression=0,
Expand Down Expand Up @@ -92,7 +98,9 @@ def toxls(tbl, filename, sheet, encoding=None, style_compression=0,
fillvalue=None)):
ws.write(r+1, c, label=v, style=style)

wb.save(filename)
target = write_source_from_arg(filename)
with target.open('wb') as target2:
wb.save(target2)


Table.toxls = toxls
92 changes: 77 additions & 15 deletions petl/io/xlsx.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, print_function, division


import locale


from petl.compat import PY3
from petl.util.base import Table, data
from petl.io.sources import read_source_from_arg, write_source_from_arg


def fromxlsx(filename, sheet=None, range_string=None, min_row=None,
Expand Down Expand Up @@ -58,9 +56,11 @@ def __init__(self, filename, sheet=None, range_string=None,

def __iter__(self):
import openpyxl
wb = openpyxl.load_workbook(filename=self.filename,
read_only=self.read_only,
**self.kwargs)
source = read_source_from_arg(self.filename)
with source.open('rb') as source2:
wb = openpyxl.load_workbook(filename=source2,
read_only=self.read_only,
**self.kwargs)
if self.sheet is None:
ws = wb[wb.sheetnames[0]]
elif isinstance(self.sheet, int):
Expand All @@ -86,22 +86,80 @@ def __iter__(self):
pass


def toxlsx(tbl, filename, sheet=None, write_header=True):
def toxlsx(tbl, filename, sheet=None, write_header=True, mode="replace"):
"""
Write a table to a new Excel .xlsx file.
"""
N.B., the sheet name is case sensitive.
import openpyxl
wb = openpyxl.Workbook(write_only=True)
ws = wb.create_sheet(title=sheet)
The `mode` argument controls how the file and sheet are treated:
- `replace`: This is the default. It either replaces or adds a
named sheet, or if no sheet name is provided, all sheets
(overwrites the entire file).
- `overwrite`: Always overwrites the file. This produces a file
with a single sheet.
- `add`: Adds a new sheet. Raises `ValueError` if a named sheet
already exists.
The `sheet` argument can be omitted in all cases. The new sheet
will then get a default name.
If the file does not exist, it will be created, unless `replace`
mode is used with a named sheet. In the latter case, the file
must exist and be a valid .xlsx file.
"""
wb = _load_or_create_workbook(filename, mode, sheet)
ws = _insert_sheet_on_workbook(mode, sheet, wb)
if write_header:
rows = tbl
else:
rows = data(tbl)
for row in rows:
ws.append(row)
wb.save(filename)
target = write_source_from_arg(filename)
with target.open('wb') as target2:
wb.save(target2)


def _load_or_create_workbook(filename, mode, sheet):
if PY3:
FileNotFound = FileNotFoundError
else:
FileNotFound = IOError

import openpyxl
wb = None
if mode != "overwrite" and (mode != "replace" or sheet is not None):
try:
source = read_source_from_arg(filename)
with source.open('rb') as source2:
wb = openpyxl.load_workbook(filename=source2, read_only=False)
except FileNotFound:
wb = None
if wb is None:
wb = openpyxl.Workbook(write_only=True)
return wb


def _insert_sheet_on_workbook(mode, sheet, wb):
if mode == "replace":
try:
ws = wb[str(sheet)]
ws.delete_rows(1, ws.max_row)
except KeyError:
ws = wb.create_sheet(title=sheet)
elif mode == "add":
ws = wb.create_sheet(title=sheet)
# it creates a sheet named "foo1" if "foo" exists.
if sheet is not None and ws.title != sheet:
raise ValueError("Sheet %s already exists in file" % sheet)
elif mode == "overwrite":
ws = wb.create_sheet(title=sheet)
else:
raise ValueError("Unknown mode '%s'" % mode)
return ws


Table.toxlsx = toxlsx
Expand All @@ -113,7 +171,9 @@ def appendxlsx(tbl, filename, sheet=None, write_header=False):
"""

import openpyxl
wb = openpyxl.load_workbook(filename=filename, read_only=False)
source = read_source_from_arg(filename)
with source.open('rb') as source2:
wb = openpyxl.load_workbook(filename=source2, read_only=False)
if sheet is None:
ws = wb[wb.sheetnames[0]]
elif isinstance(sheet, int):
Expand All @@ -126,7 +186,9 @@ def appendxlsx(tbl, filename, sheet=None, write_header=False):
rows = data(tbl)
for row in rows:
ws.append(row)
wb.save(filename)
target = write_source_from_arg(filename)
with target.open('wb') as target2:
wb.save(target2)


Table.appendxlsx = appendxlsx
5 changes: 3 additions & 2 deletions petl/io/xlutils_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,10 +111,11 @@ class View(object):
#: :class:`SheetView` for the views of sheets returned.
class_ = SheetView

def __init__(self, path, class_=None):
def __init__(self, file_contents, class_=None, **kwargs):
self.class_ = class_ or self.class_
from xlrd import open_workbook
self.book = open_workbook(path, formatting_info=0, on_demand=True)
self.book = open_workbook(file_contents=file_contents,
on_demand=True, **kwargs)

def __getitem__(self, item):
"""
Expand Down
83 changes: 56 additions & 27 deletions petl/test/io/test_remotes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,16 @@

import sys
import os
import time
from importlib import import_module

from petl.compat import PY3
from petl.test.helpers import ieq, eq_
from petl.io.avro import fromavro, toavro
from petl.io.csv import fromcsv, tocsv
from petl.io.json import fromjson, tojson
from petl.io.xlsx import fromxlsx, toxlsx
from petl.io.xls import fromxls, toxls
from petl.util.vis import look

# region Codec test cases
Expand Down Expand Up @@ -69,7 +74,7 @@ def _write_read_from_env_matching(prefix):
if q < 1:
msg = """SKIPPED
For testing remote source define a environment variable:
$ export PETL_TEST_<protocol>='<protocol>://myuser:mypassword@host:port/path/to/file.ext'"""
$ export PETL_TEST_<protocol>='<protocol>://myuser:mypassword@host:port/path/to/folder'"""
print(msg, file=sys.stderr)


Expand All @@ -83,19 +88,24 @@ def _write_read_from_env_url(env_var_name):


def _write_read_into_url(base_url):
_write_read_file_into_url(base_url, "filename1.csv")
_write_read_file_into_url(base_url, "filename2.avro")
_write_read_file_into_url(base_url, "filename3.csv", "gz")
_write_read_file_into_url(base_url, "filename4.avro", "gz")
_write_read_file_into_url(base_url, "filename5.csv", "xz")
_write_read_file_into_url(base_url, "filename6.csv", "zst")
_write_read_file_into_url(base_url, "filename7.csv", "lz4")
_write_read_file_into_url(base_url, "filename8.csv", "snappy")


def _write_read_file_into_url(base_url, filename, compression=None):
if ".avro" in filename and not _has_avro:
return
# _write_read_file_into_url(base_url, "filename10.csv")
# _write_read_file_into_url(base_url, "filename11.csv", "gz")
# _write_read_file_into_url(base_url, "filename12.csv", "xz")
# _write_read_file_into_url(base_url, "filename13.csv", "zst")
# _write_read_file_into_url(base_url, "filename14.csv", "lz4")
# _write_read_file_into_url(base_url, "filename15.csv", "snappy")
# _write_read_file_into_url(base_url, "filename20.json")
# _write_read_file_into_url(base_url, "filename21.json", "gz")
# _write_read_file_into_url(base_url, "filename30.avro", pkg='fastavro')
_write_read_file_into_url(base_url, "filename40.xlsx", pkg='openpyxl')
# _write_read_file_into_url(base_url, "filename50.xls", pkg='xlwt')


def _write_read_file_into_url(base_url, filename, compression=None, pkg=None):
if pkg is not None:
if not _is_installed(pkg):
print("\n - %s SKIPPED " % filename, file=sys.stderr, end="")
return
is_local = base_url.startswith("./")
if compression is not None:
if is_local:
Expand All @@ -112,40 +122,51 @@ def _write_read_file_into_url(base_url, filename, compression=None):
else:
source_url = os.path.join(base_url, filename)

_show__rows_from("Expected:", _table)

actual = None
if ".avro" in filename:
toavro(_table, source_url)
actual = fromavro(source_url)
else:
elif ".xlsx" in filename:
toxlsx(_table, source_url, 'test1', mode='overwrite')
toxlsx(_table2, source_url, 'test2', mode='add')
actual = fromxlsx(source_url, 'test1')
elif ".xls" in filename:
toxls(_table, source_url, 'test')
actual = fromxls(source_url, 'test')
elif ".json" in filename:
tojson(_table, source_url)
actual = fromjson(source_url)
elif ".csv" in filename:
tocsv(_table, source_url, encoding="ascii", lineterminator="\n")
actual = fromcsv(source_url, encoding="ascii")

_show__rows_from("Actual:", actual)
ieq(_table, actual)
ieq(_table, actual) # verify can iterate twice
if actual is not None:
_show__rows_from("Expected:", _table)
_show__rows_from("Actual:", actual)
ieq(_table, actual)
ieq(_table, actual) # verify can iterate twice
else:
print("\n - %s SKIPPED " % filename, file=sys.stderr, end="")


def _show__rows_from(label, test_rows, limit=0):
print(label)
print(look(test_rows, limit=limit))


def _test_avro_too():
def _is_installed(package_name):
try:
import fastavro

return True
except:
mod = import_module(package_name)
return mod is not None
except Exception as exm:
print(exm, file=sys.stderr)
return False


# endregion

# region Mockup data

_has_avro = _test_avro_too()

_table = (
(u"name", u"friends", u"age"),
(u"Bob", "42", "33"),
Expand All @@ -154,4 +175,12 @@ def _test_avro_too():
(u"Ted", "23", "51"),
)

_table2 = (
(u"name", u"friends", u"age"),
(u"Giannis", "31", "12"),
(u"James", "38", "8"),
(u"Stephen", "28", "4"),
(u"Jason", "23", "12"),
)

# endregion
Loading

0 comments on commit d86e03a

Please sign in to comment.