Skip to content

Commit

Permalink
Support eliding output name; return string or bytes
Browse files Browse the repository at this point in the history
  • Loading branch information
pwaller committed Sep 25, 2017
1 parent 8441db9 commit 0e53acc
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 7 deletions.
32 changes: 25 additions & 7 deletions pdftables_api/pdftables_api.py
Expand Up @@ -39,52 +39,70 @@
'.xlsx': FORMAT_XLSX,
'.xml': FORMAT_XML,
}

_STRING_FORMATS = {FORMAT_CSV, FORMAT_XML}

class Client(object):
def __init__(self, api_key, api_url=_API_URL, timeout=_DEFAULT_TIMEOUT):
self.api_key = api_key
self.api_url = api_url
self.timeout = timeout

def xlsx(self, pdf_path, xlsx_path):
def xlsx(self, pdf_path, xlsx_path=None):
"""
Convenience method to convert PDF to XLSX multiple sheets.
If xlsx_path is None, returns the output as a byte string.
"""
return self.xlsx_multiple(pdf_path, xlsx_path)

def xlsx_single(self, pdf_path, xlsx_path):
def xlsx_single(self, pdf_path, xlsx_path=None):
"""
Convenience method to convert PDF to XLSX single sheet.
If xlsx_path is None, returns the output as a byte string.
"""
return self.convert(pdf_path, xlsx_path, out_format=FORMAT_XLSX_SINGLE)

def xlsx_multiple(self, pdf_path, xlsx_path):
def xlsx_multiple(self, pdf_path, xlsx_path=None):
"""
Convenience method to convert PDF to XLSX multiple sheets.
If xlsx_path is None, returns the output as a byte string.
"""
return self.convert(pdf_path, xlsx_path, out_format=FORMAT_XLSX_MULTIPLE)

def xml(self, pdf_path, xml_path):
def xml(self, pdf_path, xml_path=None):
"""
Convenience method to convert PDF to XML.
If xml_path is None, returns the output as a string.
"""
return self.convert(pdf_path, xml_path, out_format=FORMAT_XML)

def csv(self, pdf_path, csv_path):
def csv(self, pdf_path, csv_path=None):
"""
Convenience method to convert PDF to CSV.
If csv_path is None, returns the output as a string.
"""
return self.convert(pdf_path, csv_path, out_format=FORMAT_CSV)

def convert(self, pdf_path, out_path, out_format=None, query_params=None, **requests_params):
def convert(self, pdf_path, out_path=None, out_format=None, query_params=None, **requests_params):
"""
Convert PDF given by `pdf_path` into `format` at `out_path`.
If `out_path` is None, returns a string containing the contents, or a
bytes for binary output types (e.g, XLSX)
"""
(out_path, out_format) = Client.ensure_format_ext(out_path, out_format)
with open(pdf_path, 'rb') as pdf_fo:
response = self.request(pdf_fo, out_format, query_params,
**requests_params)

if out_path is None:
use_text = out_format in _STRING_FORMATS
return response.text if use_text else response.content

with open(out_path, 'wb') as out_fo:
copyfileobj(response.raw, out_fo)

Expand Down
26 changes: 26 additions & 0 deletions test/test_pdftables_api.py
Expand Up @@ -92,6 +92,32 @@ def test_successful_conversion(self):
except OSError:
pass

def test_successful_conversion_bytes(self):
with requests_mock.mock() as m:
m.post('https://pdftables.com/api?key=fake_key', content=b'xlsx output')

with NamedTemporaryFile(suffix="test.pdf") as tf:
filename = tf.name
tf.write(b"Hello world")
tf.file.close()

output = Client('fake_key').convert(filename)

self.assertEqual(b'xlsx output', output)

def test_successful_conversion_string(self):
with requests_mock.mock() as m:
m.post('https://pdftables.com/api?key=fake_key', text='csv output')

with NamedTemporaryFile(suffix="test.pdf") as tf:
filename = tf.name
tf.write(b"Hello world")
tf.file.close()

output = Client('fake_key').convert(filename, out_format="csv")

self.assertEqual('csv output', output)

def test_different_api_url(self):
with requests_mock.mock() as m:
m.post('http://example.com/api?key=fake_key', text='xlsx output')
Expand Down

0 comments on commit 0e53acc

Please sign in to comment.