diff --git a/pdftables_api/pdftables_api.py b/pdftables_api/pdftables_api.py index 65e6241..2b5f31d 100644 --- a/pdftables_api/pdftables_api.py +++ b/pdftables_api/pdftables_api.py @@ -39,7 +39,7 @@ '.xlsx': FORMAT_XLSX, '.xml': FORMAT_XML, } - +_STRING_FORMATS = {FORMAT_CSV, FORMAT_XML} class Client(object): def __init__(self, api_key, api_url=_API_URL, timeout=_DEFAULT_TIMEOUT): @@ -47,44 +47,62 @@ def __init__(self, api_key, api_url=_API_URL, timeout=_DEFAULT_TIMEOUT): self.api_url = api_url self.timeout = timeout - def xlsx(self, pdf_path, xlsx_path): + def xlsx(self, pdf_path, xlsx_path=None): """ Convenience method to convert PDF to XLSX multiple sheets. + + If xlsx_path is None, returns the output as a byte string. """ return self.xlsx_multiple(pdf_path, xlsx_path) - def xlsx_single(self, pdf_path, xlsx_path): + def xlsx_single(self, pdf_path, xlsx_path=None): """ Convenience method to convert PDF to XLSX single sheet. + + If xlsx_path is None, returns the output as a byte string. """ return self.convert(pdf_path, xlsx_path, out_format=FORMAT_XLSX_SINGLE) - def xlsx_multiple(self, pdf_path, xlsx_path): + def xlsx_multiple(self, pdf_path, xlsx_path=None): """ Convenience method to convert PDF to XLSX multiple sheets. + + If xlsx_path is None, returns the output as a byte string. """ return self.convert(pdf_path, xlsx_path, out_format=FORMAT_XLSX_MULTIPLE) - def xml(self, pdf_path, xml_path): + def xml(self, pdf_path, xml_path=None): """ Convenience method to convert PDF to XML. + + If xml_path is None, returns the output as a string. """ return self.convert(pdf_path, xml_path, out_format=FORMAT_XML) - def csv(self, pdf_path, csv_path): + def csv(self, pdf_path, csv_path=None): """ Convenience method to convert PDF to CSV. + + If csv_path is None, returns the output as a string. """ return self.convert(pdf_path, csv_path, out_format=FORMAT_CSV) - def convert(self, pdf_path, out_path, out_format=None, query_params=None, **requests_params): + def convert(self, pdf_path, out_path=None, out_format=None, query_params=None, **requests_params): """ Convert PDF given by `pdf_path` into `format` at `out_path`. + + If `out_path` is None, returns a string containing the contents, or a + bytes for binary output types (e.g, XLSX) """ (out_path, out_format) = Client.ensure_format_ext(out_path, out_format) with open(pdf_path, 'rb') as pdf_fo: response = self.request(pdf_fo, out_format, query_params, **requests_params) + + if out_path is None: + use_text = out_format in _STRING_FORMATS + return response.text if use_text else response.content + with open(out_path, 'wb') as out_fo: copyfileobj(response.raw, out_fo) diff --git a/test/test_pdftables_api.py b/test/test_pdftables_api.py index 512f07a..9348641 100644 --- a/test/test_pdftables_api.py +++ b/test/test_pdftables_api.py @@ -92,6 +92,32 @@ def test_successful_conversion(self): except OSError: pass + def test_successful_conversion_bytes(self): + with requests_mock.mock() as m: + m.post('https://pdftables.com/api?key=fake_key', content=b'xlsx output') + + with NamedTemporaryFile(suffix="test.pdf") as tf: + filename = tf.name + tf.write(b"Hello world") + tf.file.close() + + output = Client('fake_key').convert(filename) + + self.assertEqual(b'xlsx output', output) + + def test_successful_conversion_string(self): + with requests_mock.mock() as m: + m.post('https://pdftables.com/api?key=fake_key', text='csv output') + + with NamedTemporaryFile(suffix="test.pdf") as tf: + filename = tf.name + tf.write(b"Hello world") + tf.file.close() + + output = Client('fake_key').convert(filename, out_format="csv") + + self.assertEqual('csv output', output) + def test_different_api_url(self): with requests_mock.mock() as m: m.post('http://example.com/api?key=fake_key', text='xlsx output')