From 67a7114415fde06b687af3d15f09f8869e54e003 Mon Sep 17 00:00:00 2001 From: Henry Rizzi Date: Tue, 10 Jan 2017 13:47:48 -0600 Subject: [PATCH 01/21] add support for pulling data from google sheets --- docs/io.rst | 18 +++++ petl/io/__init__.py | 2 + petl/io/gsheet.py | 141 ++++++++++++++++++++++++++++++++++++ petl/test/io/test_gsheet.py | 116 +++++++++++++++++++++++++++++ 4 files changed, 277 insertions(+) create mode 100644 petl/io/gsheet.py create mode 100644 petl/test/io/test_gsheet.py diff --git a/docs/io.rst b/docs/io.rst index c5ffcfe4..6b2491cb 100644 --- a/docs/io.rst +++ b/docs/io.rst @@ -210,6 +210,24 @@ Supported File Formats .. module:: petl.io.xls .. _io_xls: +Google sheet files (gsheet) +---------------------------- + +.. note:: + + The following functions require `gspread + `_ to be installed, + e.g.:: + + $ pip install gspread + +.. autofunction:: petl.io.gsheet.fromgsheet +.. autofunction:: petl.io.gsheet.togsheet + + +.. module:: petl.io.gsheet +.. _io_gsheet: + Excel .xls files (xlrd/xlwt) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/petl/io/__init__.py b/petl/io/__init__.py index 630735eb..cd50e710 100644 --- a/petl/io/__init__.py +++ b/petl/io/__init__.py @@ -43,3 +43,5 @@ from petl.io.remotes import RemoteSource from petl.io.remotes import SMBSource + +from petl.io.gsheet import fromgsheet, togsheet diff --git a/petl/io/gsheet.py b/petl/io/gsheet.py new file mode 100644 index 00000000..b51382bd --- /dev/null +++ b/petl/io/gsheet.py @@ -0,0 +1,141 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import, print_function, division + + +from petl.util.base import Table + + +def fromgsheet(filename, credentials, filekey=False, sheet=None, + range_string=None): + """ + Extract a table from a google spreadsheet. + + The `filename` can either be the key of the spreadsheet or its name. If the + filename is a key, set `filekey` to True. + + Credentials are used to authenticate with the google apis. + For more info visit: http://gspread.readthedocs.io/en/latest/oauth2.html + + Set `filekey` to `True` if accessing the sheet from a key rather than name. + + N.B., the sheet name is case sensitive. + + The `sheet` argument can be omitted, in which case the first sheet in + the workbook is used by default. + + The `range_string` argument can be used to provide a range string + specifying a range of cells to extract. (i.e. 'A1:C7'). + + Example usage follows:: + >>> import petl as etl + >>> from oauth2client.service_account import ServiceAccountCredentials + >>> scope = ['https://spreadsheets.google.com/feeds'] + >>> credentials = ServiceAccountCredentials.from_json_keyfile_name('path/to/credentials.json', scope) + >>> tbl = etl.fromgsheet('example', credentials) + + This module relies heavily on the work by @burnash and his great gspread + module: http://gspread.readthedocs.io/en/latest/index.html + + + """ + + return GoogleSheetView(filename, credentials, filekey=filekey, sheet=sheet, + range_string=range_string) + + +class GoogleSheetView(Table): + """This module resembles XLSXView, as both use abstracted modules.""" + + def __init__(self, filename, credentials, filekey, sheet, range_string): + self.filename = filename + self.credentials = credentials + self.filekey = filekey + self.sheet = sheet + self.range_string = range_string + + def __iter__(self): + import gspread + gspread_client = gspread.authorize(self.credentials) + # @TODO Find a cleaner way to differentiate between the two + if self.filekey: + wb = gspread_client.open_by_key(self.filename) + else: + wb = gspread_client.open(self.filename) + + # Allow for user to specify no sheet, sheet index or sheet name + if self.sheet is None: + ws = wb.sheet1 + elif isinstance(self.sheet, int): + ws = wb.get_worksheet(self.sheet) + else: + ws = wb.worksheet(str(self.sheet)) + + # grab the range or grab the whole sheet + if self.range_string: + start, end = self.range_string.split(':') + start_row, start_col = gspread.utils.a1_to_rowcol(start) + end_row, end_col = gspread.utils.a1_to_rowcol(end) + print(start_col, end_col, start_row, end_row) + for i, row in enumerate(ws.get_all_values(), start=1): + if i in range(start_row, end_row + 1): + machine_start_col = start_col - 1 + yield tuple(row[machine_start_col:end_col]) + else: + # This function returns the value of each cell + for row in ws.get_all_values(): + yield tuple(row) + + +def togsheet(tbl, filename, credentials, sheet=None, user_email=None): + """ + Write a table to a new google sheet. + + filename will be the title of the sheet when uploaded to google sheets. + + credentials are the credentials used to authenticate with the google apis. + For more info visit: http://gspread.readthedocs.io/en/latest/oauth2.html + + If user_email is entered, that will be the account that the sheet will be + shared to automatically upon creation with write privileges. + + If sheet is specified, the first sheet in the spreadsheet will be renamed + to sheet. + + Note: necessary scope for using togsheet is: + 'https://spreadsheets.google.com/feeds' + 'https://www.googleapis.com/auth/drive' + + Example usage:: + >>> import petl as etl + >>> from oauth2client.service_account import ServiceAccountCredentials + >>> scope = ['https://spreadsheets.google.com/feeds', + 'https://www.googleapis.com/auth/drive'] + >>> credentials = ServiceAccountCredentials.from_json_keyfile_name('path/to/credentials.json', scope) + >>> cols = [[0, 1, 2], ['a', 'b', 'c']] + >>> tbl = etl.fromcolumns(cols) + >>> etl.togsheet(tbl, 'example', credentials) + """ + + import gspread + gspread_client = gspread.authorize(credentials) + spread = gspread_client.create(filename) + rows = len(tbl) + # even in a blank table, the header row will be an empty tuple + cols = len(tbl[0]) + # output to first sheet + worksheet = spread.sheet1 + # match row and column length + worksheet.resize(rows=rows, cols=cols) + # rename sheet if set + if sheet: + worksheet.update_title(title=sheet) + # enumerate from 1 instead of from 0 (compat. with p2.6+) + for x, row in enumerate(tbl, start=1): + for y, val in enumerate(row, start=1): + worksheet.update_cell(x, y, val) + # specify the user account to share to + if user_email: + spread.share(user_email, perm_type='user', role='writer') + + +Table.togsheet = togsheet diff --git a/petl/test/io/test_gsheet.py b/petl/test/io/test_gsheet.py new file mode 100644 index 00000000..9e664f73 --- /dev/null +++ b/petl/test/io/test_gsheet.py @@ -0,0 +1,116 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import, print_function, division + + +import petl as etl +from petl.io.gsheet import fromgsheet, togsheet +from petl.test.helpers import ieq + +""" +In order to run these tests, follow the steps described at +http://gspread.readthedocs.io/en/latest/oauth2.html to create a json +authorization file. Change `JSON_PATH` to point to the created local file. +Afterwards, create a spreadsheet modeled after this: +https://docs.google.com/spreadsheets/d/12oFimWB81Jk7dzjdnH8WiYnSo4rl6Xe1xdOadbvAsJI/edit#gid=0 +and share it with the service_account specified in the JSON file. +""" + +SCOPE = [ + 'https://spreadsheets.google.com/feeds', + 'https://www.googleapis.com/auth/drive.file' +] +JSON_PATH = 'path/to/your/auth/json/here.json' + +try: + # noinspection PyUnresolvedReferences + import gspread + from oauth2client.service_account import ServiceAccountCredentials as sac +except ImportError as e: + print('SKIP gsheet tests: %s' % e, file=sys.stderr) +else: + + def test_fromgsheet(): + filename = 'test' + credentials = sac.from_json_keyfile_name(JSON_PATH, SCOPE) + tbl = fromgsheet(filename, credentials, sheet='Sheet1') + expect = (('foo', 'bar'), + ('A', '1'), + ('B', '2'), + ('C', '2'), + (u'é', '1/1/2012')) + ieq(expect, tbl) + + def test_fromgsheet_int(): + filename = 'test' + credentials = sac.from_json_keyfile_name(JSON_PATH, SCOPE) + tbl = fromgsheet(filename, credentials, sheet=0) + expect = (('foo', 'bar'), + ('A', '1'), + ('B', '2'), + ('C', '2'), + (u'é', '1/1/2012')) + ieq(expect, tbl) + + def test_fromgsheet_key(): + filename = '12oFimWB81Jk7dzjdnH8WiYnSo4rl6Xe1xdOadbvAsJI' + credentials = sac.from_json_keyfile_name(JSON_PATH, SCOPE) + tbl = fromgsheet(filename, credentials, sheet='Sheet1', filekey=True) + expect = (('foo', 'bar'), + ('A', '1'), + ('B', '2'), + ('C', '2'), + (u'é', '1/1/2012')) + ieq(expect, tbl) + + def test_fromgsheet_nosheet(): + filename = 'test' + credentials = sac.from_json_keyfile_name(JSON_PATH, SCOPE) + tbl = fromgsheet(filename, credentials) + expect = (('foo', 'bar'), + ('A', '1'), + ('B', '2'), + ('C', '2'), + (u'é', '1/1/2012')) + ieq(expect, tbl) + + def test_fromgsheet_range(): + filename = 'test' + credentials = sac.from_json_keyfile_name(JSON_PATH, SCOPE) + tbl = fromgsheet(filename, credentials, sheet='Sheet2', + range_string='B2:C6') + expect = (('foo', 'bar'), + ('A', '1'), + ('B', '2'), + ('C', '2'), + (u'é', '1/1/2012')) + ieq(expect, tbl) + + def test_togsheet(): + credentials = sac.from_json_keyfile_name(JSON_PATH, SCOPE) + tbl = (('foo', 'bar'), + ('A', '1'), + ('B', '2'), + ('C', '2'), + (u'é', '1/1/2012')) + filename = 'test_togsheet' + togsheet(tbl, filename, credentials, sheet='Sheet1') + actual = fromgsheet(filename, credentials, sheet='Sheet1') + ieq(tbl, actual) + # clean up created table + client = gspread.authorize(credentials) + client.del_spreadsheet(client.open(filename).id) + + def test_togsheet_nosheet(): + credentials = sac.from_json_keyfile_name(JSON_PATH, SCOPE) + tbl = (('foo', 'bar'), + ('A', '1'), + ('B', '2'), + ('C', '2'), + (u'é', '1/1/2012')) + filename = 'test_togsheet_nosheet' + togsheet(tbl, filename, credentials) + actual = fromgsheet(filename, credentials) + ieq(tbl, actual) + # clean up created table + client = gspread.authorize(credentials) + client.del_spreadsheet(client.open(filename).id) From d88e005a034e0a781e966e9feb99926fdd00c738 Mon Sep 17 00:00:00 2001 From: Henry Rizzi Date: Tue, 10 Jan 2017 15:08:44 -0600 Subject: [PATCH 02/21] make gsheet json environ config+ add sys and os dependencies --- petl/test/io/test_gsheet.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/petl/test/io/test_gsheet.py b/petl/test/io/test_gsheet.py index 9e664f73..5627b9b1 100644 --- a/petl/test/io/test_gsheet.py +++ b/petl/test/io/test_gsheet.py @@ -1,6 +1,8 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import, print_function, division +import sys +import os import petl as etl from petl.io.gsheet import fromgsheet, togsheet @@ -9,8 +11,9 @@ """ In order to run these tests, follow the steps described at http://gspread.readthedocs.io/en/latest/oauth2.html to create a json -authorization file. Change `JSON_PATH` to point to the created local file. -Afterwards, create a spreadsheet modeled after this: +authorization file. Point `JSON_PATH` to local file or put the path in the +env variable at `GSHEET_JSON_PATH`. +Afterwards, create a spreadsheet modeled on: https://docs.google.com/spreadsheets/d/12oFimWB81Jk7dzjdnH8WiYnSo4rl6Xe1xdOadbvAsJI/edit#gid=0 and share it with the service_account specified in the JSON file. """ @@ -19,7 +22,8 @@ 'https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive.file' ] -JSON_PATH = 'path/to/your/auth/json/here.json' + +JSON_PATH = os.getenv("GSHEET_JSON_PATH", 'default/fallback.json') try: # noinspection PyUnresolvedReferences From 9eff3a0bb9caa7ffdfa8c0d12f39cd45b71c0757 Mon Sep 17 00:00:00 2001 From: Henry Rizzi Date: Wed, 11 Jan 2017 16:40:56 -0600 Subject: [PATCH 03/21] make tests easily extendable, get rid of user requirement, rename module objects for clarity --- petl/io/gsheet.py | 105 ++++++++++++--------- petl/test/io/test_gsheet.py | 180 ++++++++++++++++++++---------------- 2 files changed, 158 insertions(+), 127 deletions(-) diff --git a/petl/io/gsheet.py b/petl/io/gsheet.py index b51382bd..7b7750c4 100644 --- a/petl/io/gsheet.py +++ b/petl/io/gsheet.py @@ -5,26 +5,29 @@ from petl.util.base import Table -def fromgsheet(filename, credentials, filekey=False, sheet=None, +def fromgsheet(filename, credentials, forcename=False, worksheet_title=None, range_string=None): """ Extract a table from a google spreadsheet. - The `filename` can either be the key of the spreadsheet or its name. If the - filename is a key, set `filekey` to True. + The `filename` can either be the key of the spreadsheet or its name. + If you want to force the module to see it as a name, set `forcename=True`. + NOTE: Only the top level of google drive will be searched for the filename + due to API limitations. Credentials are used to authenticate with the google apis. For more info visit: http://gspread.readthedocs.io/en/latest/oauth2.html - Set `filekey` to `True` if accessing the sheet from a key rather than name. + Set `forcename` to `True` in order to treat filename as a name - N.B., the sheet name is case sensitive. + N.B., the worksheet name is case sensitive. - The `sheet` argument can be omitted, in which case the first sheet in - the workbook is used by default. + The `worksheet_title` argument can be omitted, in which case the first + sheet in the workbook is used by default. The `range_string` argument can be used to provide a range string - specifying a range of cells to extract. (i.e. 'A1:C7'). + specifying the top left and bottom right corners of a set of cells to + extract. (i.e. 'A1:C7'). Example usage follows:: >>> import petl as etl @@ -32,74 +35,86 @@ def fromgsheet(filename, credentials, filekey=False, sheet=None, >>> scope = ['https://spreadsheets.google.com/feeds'] >>> credentials = ServiceAccountCredentials.from_json_keyfile_name('path/to/credentials.json', scope) >>> tbl = etl.fromgsheet('example', credentials) + or + >>> tbl = etl.fromgsheet('9zDNETemfau0uY8ZJF0YzXEPB_5GQ75JV', credentials) This module relies heavily on the work by @burnash and his great gspread module: http://gspread.readthedocs.io/en/latest/index.html - """ - return GoogleSheetView(filename, credentials, filekey=filekey, sheet=sheet, + return GoogleSheetView(filename, + credentials, + forcename=forcename, + worksheet_title=worksheet_title, range_string=range_string) class GoogleSheetView(Table): - """This module resembles XLSXView, as both use abstracted modules.""" + """This module resembles XLSXView.""" - def __init__(self, filename, credentials, filekey, sheet, range_string): + def __init__(self, filename, credentials, forcename, worksheet_title, + range_string): self.filename = filename self.credentials = credentials - self.filekey = filekey - self.sheet = sheet + self.forcename = forcename + self.worksheet_title = worksheet_title self.range_string = range_string def __iter__(self): import gspread gspread_client = gspread.authorize(self.credentials) - # @TODO Find a cleaner way to differentiate between the two - if self.filekey: - wb = gspread_client.open_by_key(self.filename) - else: + if self.forcename: wb = gspread_client.open(self.filename) + else: + try: + wb = gspread_client.open_by_key(self.filename) + except gspread.exceptions.SpreadsheetNotFound: + wb = gspread_client.open(self.filename) # Allow for user to specify no sheet, sheet index or sheet name - if self.sheet is None: + if self.worksheet_title is None: ws = wb.sheet1 - elif isinstance(self.sheet, int): - ws = wb.get_worksheet(self.sheet) + elif isinstance(self.worksheet_title, int): + ws = wb.get_worksheet(self.worksheet_title) else: - ws = wb.worksheet(str(self.sheet)) + ws = wb.worksheet(str(self.worksheet_title)) # grab the range or grab the whole sheet if self.range_string: - start, end = self.range_string.split(':') - start_row, start_col = gspread.utils.a1_to_rowcol(start) - end_row, end_col = gspread.utils.a1_to_rowcol(end) - print(start_col, end_col, start_row, end_row) + # start_cell -> top left, end_cell -> bottom right + start_cell, end_cell = self.range_string.split(':') + start_row, start_col = gspread.utils.a1_to_rowcol(start_cell) + end_row, end_col = gspread.utils.a1_to_rowcol(end_cell) + # gspread starts its indices at 1 for i, row in enumerate(ws.get_all_values(), start=1): if i in range(start_row, end_row + 1): - machine_start_col = start_col - 1 - yield tuple(row[machine_start_col:end_col]) + start_col_index = start_col - 1 + yield tuple(row[start_col_index:end_col]) else: - # This function returns the value of each cell + # no range specified, so return all the rows for row in ws.get_all_values(): yield tuple(row) -def togsheet(tbl, filename, credentials, sheet=None, user_email=None): +def togsheet(tbl, filename, credentials, worksheet_title=None, + share_emails=[], role='writer'): """ Write a table to a new google sheet. filename will be the title of the sheet when uploaded to google sheets. - credentials are the credentials used to authenticate with the google apis. - For more info visit: http://gspread.readthedocs.io/en/latest/oauth2.html + credentials are used to authenticate with the google apis. + For more info, visit: http://gspread.readthedocs.io/en/latest/oauth2.html + + If worksheet_title is specified, the first worksheet in the spreadsheet + will be renamed to the value of worksheet_title. - If user_email is entered, that will be the account that the sheet will be - shared to automatically upon creation with write privileges. + The spreadsheet will be shared with all emails in `share_emails` with + `role` permissions granted. - If sheet is specified, the first sheet in the spreadsheet will be renamed - to sheet. + set the permissions of all `share_emails` as `role`. + For more info, visit: https://developers.google.com/drive/v3/web/manage-sharing Note: necessary scope for using togsheet is: 'https://spreadsheets.google.com/feeds' @@ -118,24 +133,24 @@ def togsheet(tbl, filename, credentials, sheet=None, user_email=None): import gspread gspread_client = gspread.authorize(credentials) - spread = gspread_client.create(filename) + spreadsheet = gspread_client.create(filename) rows = len(tbl) - # even in a blank table, the header row will be an empty tuple - cols = len(tbl[0]) + # get the max length and add [0] to take care of empty iterables + cols = max([0] + [len(row) for row in tbl]) # output to first sheet - worksheet = spread.sheet1 + worksheet = spreadsheet.sheet1 # match row and column length worksheet.resize(rows=rows, cols=cols) # rename sheet if set - if sheet: - worksheet.update_title(title=sheet) - # enumerate from 1 instead of from 0 (compat. with p2.6+) + if worksheet_title: + worksheet.update_title(title=worksheet_title) + # gspread indices start at 1, therefore row/col index starts at 1 for x, row in enumerate(tbl, start=1): for y, val in enumerate(row, start=1): worksheet.update_cell(x, y, val) # specify the user account to share to - if user_email: - spread.share(user_email, perm_type='user', role='writer') + for user_email in share_emails: + spreadsheet.share(user_email, perm_type='user', role=role) Table.togsheet = togsheet diff --git a/petl/test/io/test_gsheet.py b/petl/test/io/test_gsheet.py index 5627b9b1..527c93e3 100644 --- a/petl/test/io/test_gsheet.py +++ b/petl/test/io/test_gsheet.py @@ -3,6 +3,7 @@ import sys import os +import datetime import petl as etl from petl.io.gsheet import fromgsheet, togsheet @@ -13,9 +14,6 @@ http://gspread.readthedocs.io/en/latest/oauth2.html to create a json authorization file. Point `JSON_PATH` to local file or put the path in the env variable at `GSHEET_JSON_PATH`. -Afterwards, create a spreadsheet modeled on: -https://docs.google.com/spreadsheets/d/12oFimWB81Jk7dzjdnH8WiYnSo4rl6Xe1xdOadbvAsJI/edit#gid=0 -and share it with the service_account specified in the JSON file. """ SCOPE = [ @@ -29,92 +27,110 @@ # noinspection PyUnresolvedReferences import gspread from oauth2client.service_account import ServiceAccountCredentials as sac + import uuid except ImportError as e: print('SKIP gsheet tests: %s' % e, file=sys.stderr) else: + args = [ # straight copy test + ((('foo', 'bar'), + ('A', '1'), + ('B', '2'), + ('C', '2'), + (u'é', '1/1/2012')), + None, + None, + (('foo', 'bar'), + ('A', '1'), + ('B', '2'), + ('C', '2'), + (u'é', '1/1/2012'))), - def test_fromgsheet(): - filename = 'test' - credentials = sac.from_json_keyfile_name(JSON_PATH, SCOPE) - tbl = fromgsheet(filename, credentials, sheet='Sheet1') - expect = (('foo', 'bar'), - ('A', '1'), - ('B', '2'), - ('C', '2'), - (u'é', '1/1/2012')) - ieq(expect, tbl) + # Uneven row test + ((('foo', 'bar'), + ('A', '1'), + ('B', '2', '3'), + ('C', '2'), + (u'é', '1/1/2012')), + None, + None, + (('foo', 'bar', ''), + ('A', '1', ''), + ('B', '2', '3'), + ('C', '2', ''), + (u'é', '1/1/2012', ''))), - def test_fromgsheet_int(): - filename = 'test' - credentials = sac.from_json_keyfile_name(JSON_PATH, SCOPE) - tbl = fromgsheet(filename, credentials, sheet=0) - expect = (('foo', 'bar'), - ('A', '1'), - ('B', '2'), - ('C', '2'), - (u'é', '1/1/2012')) - ieq(expect, tbl) + # datetime to string representation test + ((('foo', 'bar'), + ('A', '1'), + ('B', '2'), + ('C', '2'), + (u'é', datetime.date(2012,1,1))), + 'Sheet1', + None, + (('foo', 'bar'), + ('A', '1'), + ('B', '2'), + ('C', '2'), + (u'é', '2012-01-01'))), - def test_fromgsheet_key(): - filename = '12oFimWB81Jk7dzjdnH8WiYnSo4rl6Xe1xdOadbvAsJI' - credentials = sac.from_json_keyfile_name(JSON_PATH, SCOPE) - tbl = fromgsheet(filename, credentials, sheet='Sheet1', filekey=True) - expect = (('foo', 'bar'), - ('A', '1'), - ('B', '2'), - ('C', '2'), - (u'é', '1/1/2012')) - ieq(expect, tbl) + # empty table test + ((), + None, + None, + ()), - def test_fromgsheet_nosheet(): - filename = 'test' - credentials = sac.from_json_keyfile_name(JSON_PATH, SCOPE) - tbl = fromgsheet(filename, credentials) - expect = (('foo', 'bar'), - ('A', '1'), - ('B', '2'), - ('C', '2'), - (u'é', '1/1/2012')) - ieq(expect, tbl) + # range_string specified test + ((('foo', 'bar'), + ('A', '1'), + ('B', '2'), + ('C', '2'), + (u'é', datetime.date(2012,1,1))), + None, + 'B1:B4', + (('bar',), + ('1',), + ('2',), + ('2',))), - def test_fromgsheet_range(): - filename = 'test' - credentials = sac.from_json_keyfile_name(JSON_PATH, SCOPE) - tbl = fromgsheet(filename, credentials, sheet='Sheet2', - range_string='B2:C6') - expect = (('foo', 'bar'), - ('A', '1'), - ('B', '2'), - ('C', '2'), - (u'é', '1/1/2012')) - ieq(expect, tbl) + # range_string+sheet specified test + ((('foo', 'bar'), + ('A', '1'), + ('B', '2'), + ('C', '2'), + (u'é', datetime.date(2012,1,1))), + u'random_stuff-in+_名字', + 'B1:B4', + (('bar',), + ('1',), + ('2',), + ('2',))) + ] + def test_gsheet(): + def test_tofromgsheet(table, worksheet, range_string, expected_result): + filename = 'test-{}'.format(str(uuid.uuid4())) + credentials = sac.from_json_keyfile_name(JSON_PATH, SCOPE) - def test_togsheet(): - credentials = sac.from_json_keyfile_name(JSON_PATH, SCOPE) - tbl = (('foo', 'bar'), - ('A', '1'), - ('B', '2'), - ('C', '2'), - (u'é', '1/1/2012')) - filename = 'test_togsheet' - togsheet(tbl, filename, credentials, sheet='Sheet1') - actual = fromgsheet(filename, credentials, sheet='Sheet1') - ieq(tbl, actual) - # clean up created table - client = gspread.authorize(credentials) - client.del_spreadsheet(client.open(filename).id) + # test to from gsheet + togsheet(table, filename, credentials, worksheet_title=worksheet) + result = fromgsheet(filename, + credentials, + worksheet_title=worksheet, + range_string=range_string) + # make sure the expected_result matches the result + ieq(result, expected_result) - def test_togsheet_nosheet(): - credentials = sac.from_json_keyfile_name(JSON_PATH, SCOPE) - tbl = (('foo', 'bar'), - ('A', '1'), - ('B', '2'), - ('C', '2'), - (u'é', '1/1/2012')) - filename = 'test_togsheet_nosheet' - togsheet(tbl, filename, credentials) - actual = fromgsheet(filename, credentials) - ieq(tbl, actual) - # clean up created table - client = gspread.authorize(credentials) - client.del_spreadsheet(client.open(filename).id) + # test open by key + client = gspread.authorize(credentials) + # get spreadsheet id (key) of previously created sheet + filekey = client.open(filename).id + key_result = fromgsheet(filekey, + credentials, + worksheet_title=worksheet, + range_string=range_string) + ieq(key_result, expected_result) + # clean up created table + client.del_spreadsheet(filekey) + + + for argset in args: + test_tofromgsheet(*argset) From 078fd02d567650a19c26b9a10c66da198dbd1d40 Mon Sep 17 00:00:00 2001 From: Henry Rizzi Date: Thu, 12 Jan 2017 11:04:10 -0600 Subject: [PATCH 04/21] yield test for each variant, fix tuple formatting --- petl/test/io/test_gsheet.py | 108 ++++++++++++++++++------------------ 1 file changed, 55 insertions(+), 53 deletions(-) diff --git a/petl/test/io/test_gsheet.py b/petl/test/io/test_gsheet.py index 527c93e3..f65e7b94 100644 --- a/petl/test/io/test_gsheet.py +++ b/petl/test/io/test_gsheet.py @@ -39,72 +39,73 @@ (u'é', '1/1/2012')), None, None, - (('foo', 'bar'), - ('A', '1'), - ('B', '2'), - ('C', '2'), - (u'é', '1/1/2012'))), + (('foo', 'bar'), + ('A', '1'), + ('B', '2'), + ('C', '2'), + (u'é', '1/1/2012'))), # Uneven row test - ((('foo', 'bar'), - ('A', '1'), - ('B', '2', '3'), - ('C', '2'), - (u'é', '1/1/2012')), - None, - None, - (('foo', 'bar', ''), - ('A', '1', ''), - ('B', '2', '3'), - ('C', '2', ''), - (u'é', '1/1/2012', ''))), + ((('foo', 'bar'), + ('A', '1'), + ('B', '2', '3'), + ('C', '2'), + (u'é', '1/1/2012')), + None, + None, + (('foo', 'bar', ''), + ('A', '1', ''), + ('B', '2', '3'), + ('C', '2', ''), + (u'é', '1/1/2012', ''))), # datetime to string representation test - ((('foo', 'bar'), - ('A', '1'), - ('B', '2'), - ('C', '2'), - (u'é', datetime.date(2012,1,1))), - 'Sheet1', - None, - (('foo', 'bar'), - ('A', '1'), - ('B', '2'), - ('C', '2'), - (u'é', '2012-01-01'))), + ((('foo', 'bar'), + ('A', '1'), + ('B', '2'), + ('C', '2'), + (u'é', datetime.date(2012,1,1))), + 'Sheet1', + None, + (('foo', 'bar'), + ('A', '1'), + ('B', '2'), + ('C', '2'), + (u'é', '2012-01-01'))), # empty table test - ((), - None, - None, - ()), + ((), + None, + None, + ()), # range_string specified test - ((('foo', 'bar'), - ('A', '1'), - ('B', '2'), - ('C', '2'), - (u'é', datetime.date(2012,1,1))), + ((('foo', 'bar'), + ('A', '1'), + ('B', '2'), + ('C', '2'), + (u'é', datetime.date(2012,1,1))), None, 'B1:B4', - (('bar',), - ('1',), - ('2',), - ('2',))), + (('bar',), + ('1',), + ('2',), + ('2',))), # range_string+sheet specified test - ((('foo', 'bar'), - ('A', '1'), - ('B', '2'), - ('C', '2'), - (u'é', datetime.date(2012,1,1))), + ((('foo', 'bar'), + ('A', '1'), + ('B', '2'), + ('C', '2'), + (u'é', datetime.date(2012,1,1))), u'random_stuff-in+_名字', 'B1:B4', - (('bar',), - ('1',), - ('2',), - ('2',))) + (('bar',), + ('1',), + ('2',), + ('2',))) ] + def test_gsheet(): def test_tofromgsheet(table, worksheet, range_string, expected_result): filename = 'test-{}'.format(str(uuid.uuid4())) @@ -132,5 +133,6 @@ def test_tofromgsheet(table, worksheet, range_string, expected_result): client.del_spreadsheet(filekey) - for argset in args: - test_tofromgsheet(*argset) + # yield a test for each tuple of arguments in order to display with nose + for arg_tuple in args: + yield (test_tofromgsheet, *arg_tuple) From 9782c00bfc93c675816bf2aa126e03a5087532ee Mon Sep 17 00:00:00 2001 From: Henry Rizzi Date: Thu, 12 Jan 2017 11:55:30 -0600 Subject: [PATCH 05/21] make changes for cross version compatibility --- petl/io/gsheet.py | 4 +++- petl/test/io/test_gsheet.py | 7 ++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/petl/io/gsheet.py b/petl/io/gsheet.py index 7b7750c4..99936c83 100644 --- a/petl/io/gsheet.py +++ b/petl/io/gsheet.py @@ -3,6 +3,7 @@ from petl.util.base import Table +from petl.compat import text_type def fromgsheet(filename, credentials, forcename=False, worksheet_title=None, @@ -78,7 +79,8 @@ def __iter__(self): elif isinstance(self.worksheet_title, int): ws = wb.get_worksheet(self.worksheet_title) else: - ws = wb.worksheet(str(self.worksheet_title)) + # use text_type for cross version compatibility + ws = wb.worksheet(text_type(self.worksheet_title)) # grab the range or grab the whole sheet if self.range_string: diff --git a/petl/test/io/test_gsheet.py b/petl/test/io/test_gsheet.py index f65e7b94..d6d17166 100644 --- a/petl/test/io/test_gsheet.py +++ b/petl/test/io/test_gsheet.py @@ -135,4 +135,9 @@ def test_tofromgsheet(table, worksheet, range_string, expected_result): # yield a test for each tuple of arguments in order to display with nose for arg_tuple in args: - yield (test_tofromgsheet, *arg_tuple) + table, worksheet, range_string, expected_result = arg_tuple + yield (test_tofromgsheet, + table, + worksheet, + range_string, + expected_result) From 02b00a83b063f945b256eae86c78ce5451baada3 Mon Sep 17 00:00:00 2001 From: Henry Rizzi Date: Mon, 16 Jan 2017 13:36:15 -0600 Subject: [PATCH 06/21] fix docstring issues --- petl/io/gsheet.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/petl/io/gsheet.py b/petl/io/gsheet.py index 99936c83..d55762b4 100644 --- a/petl/io/gsheet.py +++ b/petl/io/gsheet.py @@ -16,10 +16,10 @@ def fromgsheet(filename, credentials, forcename=False, worksheet_title=None, NOTE: Only the top level of google drive will be searched for the filename due to API limitations. - Credentials are used to authenticate with the google apis. + `credentials` are used to authenticate with the google apis. For more info visit: http://gspread.readthedocs.io/en/latest/oauth2.html - Set `forcename` to `True` in order to treat filename as a name + Set `forcename` to `True` in order to treat `filename` as a name N.B., the worksheet name is case sensitive. @@ -104,18 +104,16 @@ def togsheet(tbl, filename, credentials, worksheet_title=None, """ Write a table to a new google sheet. - filename will be the title of the sheet when uploaded to google sheets. + `filename` will be the title of the workbook when uploaded to google sheets. - credentials are used to authenticate with the google apis. + `credentials` are used to authenticate with the google apis. For more info, visit: http://gspread.readthedocs.io/en/latest/oauth2.html - If worksheet_title is specified, the first worksheet in the spreadsheet - will be renamed to the value of worksheet_title. + If `worksheet_title` is specified, the first worksheet in the spreadsheet + will be renamed to the value of `worksheet_title`. The spreadsheet will be shared with all emails in `share_emails` with `role` permissions granted. - - set the permissions of all `share_emails` as `role`. For more info, visit: https://developers.google.com/drive/v3/web/manage-sharing Note: necessary scope for using togsheet is: From 95e3502fc2c15a6f91369d4a1e7f2853ea42c2ea Mon Sep 17 00:00:00 2001 From: Henry Rizzi Date: Tue, 17 Jan 2017 12:23:33 -0600 Subject: [PATCH 07/21] use for for better performance --- petl/io/gsheet.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/petl/io/gsheet.py b/petl/io/gsheet.py index d55762b4..cbb5959f 100644 --- a/petl/io/gsheet.py +++ b/petl/io/gsheet.py @@ -134,20 +134,15 @@ def togsheet(tbl, filename, credentials, worksheet_title=None, import gspread gspread_client = gspread.authorize(credentials) spreadsheet = gspread_client.create(filename) - rows = len(tbl) - # get the max length and add [0] to take care of empty iterables - cols = max([0] + [len(row) for row in tbl]) - # output to first sheet worksheet = spreadsheet.sheet1 - # match row and column length - worksheet.resize(rows=rows, cols=cols) + # make smallest table possible + worksheet.resize(rows=1, cols=1) # rename sheet if set if worksheet_title: worksheet.update_title(title=worksheet_title) - # gspread indices start at 1, therefore row/col index starts at 1 - for x, row in enumerate(tbl, start=1): - for y, val in enumerate(row, start=1): - worksheet.update_cell(x, y, val) + # gspread indices start at 1, therefore row index insert starts at 1 + for index, row in enumerate(tbl, start=1): + worksheet.insert_row(row, index) # specify the user account to share to for user_email in share_emails: spreadsheet.share(user_email, perm_type='user', role=role) From 84425ef1183497aac923de86dfb864f0af2e93a4 Mon Sep 17 00:00:00 2001 From: Henry Rizzi Date: Tue, 24 Jan 2017 12:59:22 -0600 Subject: [PATCH 08/21] added in tests, function, and docs for appending to gsheet --- docs/io.rst | 1 + petl/io/__init__.py | 2 +- petl/io/gsheet.py | 33 +++++++++++++++++++ petl/test/io/test_gsheet.py | 64 ++++++++++++++++++++++++++++++++++++- 4 files changed, 98 insertions(+), 2 deletions(-) diff --git a/docs/io.rst b/docs/io.rst index 6b2491cb..037552aa 100644 --- a/docs/io.rst +++ b/docs/io.rst @@ -223,6 +223,7 @@ Google sheet files (gsheet) .. autofunction:: petl.io.gsheet.fromgsheet .. autofunction:: petl.io.gsheet.togsheet +.. autofunction:: petl.io.gsheet.appendgsheet .. module:: petl.io.gsheet diff --git a/petl/io/__init__.py b/petl/io/__init__.py index cd50e710..09199dcf 100644 --- a/petl/io/__init__.py +++ b/petl/io/__init__.py @@ -44,4 +44,4 @@ from petl.io.remotes import SMBSource -from petl.io.gsheet import fromgsheet, togsheet +from petl.io.gsheet import fromgsheet, togsheet, appendgsheet diff --git a/petl/io/gsheet.py b/petl/io/gsheet.py index cbb5959f..7ead4801 100644 --- a/petl/io/gsheet.py +++ b/petl/io/gsheet.py @@ -148,4 +148,37 @@ def togsheet(tbl, filename, credentials, worksheet_title=None, spreadsheet.share(user_email, perm_type='user', role=role) +def appendgsheet(tbl, filename, credentials, worksheet_title="Sheet1"): + """ + Append a table to an existing google shoot at either a new worksheet + or the end of an existing worksheet + + `filename` is the name of the workbook to append to. + + `credentials` are used to authenticate with the google apis. + For more info, visit: http://gspread.readthedocs.io/en/latest/oauth2.html + + `worksheet_title` is the title of the worksheet to append to or create if + the worksheet does not exist. NOTE: sheet index cannot be used, and None is + not an option. + """ + import gspread + gspread_client = gspread.authorize(credentials) + # be able to give filename or key for file + try: + wb = gspread_client.open_by_key(filename) + except gspread.exceptions.SpreadsheetNotFound: + wb = gspread_client.open(filename) + # check to see if worksheet_title exists, if so append, otherwise create + if worksheet_title in [worksheet.title for worksheet in wb.worksheets()]: + worksheet = wb.worksheet(text_type(worksheet_title)) + else: + worksheet = wb.add_worksheet(text_type(worksheet_title), 1, 1) + # efficiency loss, but get_all_values() will only return meaningful rows, + # therefore len(rows) + 1 gives the earliest open insert index + start_point = len(worksheet.get_all_values()) + 1 + for index, row in enumerate(tbl, start=start_point): + worksheet.insert_row(row, index) + + Table.togsheet = togsheet diff --git a/petl/test/io/test_gsheet.py b/petl/test/io/test_gsheet.py index d6d17166..7014002c 100644 --- a/petl/test/io/test_gsheet.py +++ b/petl/test/io/test_gsheet.py @@ -6,7 +6,7 @@ import datetime import petl as etl -from petl.io.gsheet import fromgsheet, togsheet +from petl.io.gsheet import fromgsheet, togsheet, appendgsheet from petl.test.helpers import ieq """ @@ -141,3 +141,65 @@ def test_tofromgsheet(table, worksheet, range_string, expected_result): worksheet, range_string, expected_result) + + append_args = [ + # appending to the first sheet + ((('foo', 'bar'), + ('A', '1'), + ('B', '2'), + ('C', '2'), + (u'é', datetime.date(2012,1,1))), + u'Sheet1', + (('foo', 'bar'), + ('A', '1'), + ('B', '2'), + ('C', '2'), + (u'é', '2012-01-01'), + ('foo', 'bar'), + ('A', '1'), + ('B', '2'), + ('C', '2'))), + + # appending to a new sheet + ((('foo', 'bar'), + ('A', '1'), + ('B', '2'), + ('C', '2'), + (u'é', datetime.date(2012,1,1))), + u'testing_time', + (('foo', 'bar'), + ('A', '1'), + ('B', '2'), + ('C', '2'))) + + ] + + + def test_appendgsheet(): + + def test_toappendfrom(table, append_worksheet, expected_result): + filename = 'test-{}'.format(str(uuid.uuid4())) + credentials = sac.from_json_keyfile_name(JSON_PATH, SCOPE) + + togsheet(table, filename, credentials) + appendgsheet(table[:-1], + filename, + credentials, + worksheet_title=append_worksheet) + result = fromgsheet(filename, + credentials, + worksheet_title=append_worksheet) + ieq(result, expected_result) + # get client to get information + client = gspread.authorize(credentials) + # get spreadsheet id (key) of previously created sheet + filekey = client.open(filename).id + # then delete the file + client.del_spreadsheet(filekey) + + for append_arg_tuple in append_args: + table, append_ws, expected_result = append_arg_tuple + yield (test_toappendfrom, + table, + append_ws, + expected_result) From 25287b74cd2c2b1105005190a30526dbab1b61bd Mon Sep 17 00:00:00 2001 From: Alistair Miles Date: Tue, 6 Aug 2019 22:16:17 +0100 Subject: [PATCH 09/21] minor style --- petl/io/gsheet.py | 8 +++++--- petl/test/io/test_gsheet.py | 6 +++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/petl/io/gsheet.py b/petl/io/gsheet.py index 7ead4801..f278644c 100644 --- a/petl/io/gsheet.py +++ b/petl/io/gsheet.py @@ -19,7 +19,7 @@ def fromgsheet(filename, credentials, forcename=False, worksheet_title=None, `credentials` are used to authenticate with the google apis. For more info visit: http://gspread.readthedocs.io/en/latest/oauth2.html - Set `forcename` to `True` in order to treat `filename` as a name + Set `forcename` to `True` in order to treat `filename` as a name. N.B., the worksheet name is case sensitive. @@ -31,12 +31,12 @@ def fromgsheet(filename, credentials, forcename=False, worksheet_title=None, extract. (i.e. 'A1:C7'). Example usage follows:: + >>> import petl as etl >>> from oauth2client.service_account import ServiceAccountCredentials >>> scope = ['https://spreadsheets.google.com/feeds'] >>> credentials = ServiceAccountCredentials.from_json_keyfile_name('path/to/credentials.json', scope) >>> tbl = etl.fromgsheet('example', credentials) - or >>> tbl = etl.fromgsheet('9zDNETemfau0uY8ZJF0YzXEPB_5GQ75JV', credentials) This module relies heavily on the work by @burnash and his great gspread @@ -121,6 +121,7 @@ def togsheet(tbl, filename, credentials, worksheet_title=None, 'https://www.googleapis.com/auth/drive' Example usage:: + >>> import petl as etl >>> from oauth2client.service_account import ServiceAccountCredentials >>> scope = ['https://spreadsheets.google.com/feeds', @@ -151,7 +152,7 @@ def togsheet(tbl, filename, credentials, worksheet_title=None, def appendgsheet(tbl, filename, credentials, worksheet_title="Sheet1"): """ Append a table to an existing google shoot at either a new worksheet - or the end of an existing worksheet + or the end of an existing worksheet. `filename` is the name of the workbook to append to. @@ -161,6 +162,7 @@ def appendgsheet(tbl, filename, credentials, worksheet_title="Sheet1"): `worksheet_title` is the title of the worksheet to append to or create if the worksheet does not exist. NOTE: sheet index cannot be used, and None is not an option. + """ import gspread gspread_client = gspread.authorize(credentials) diff --git a/petl/test/io/test_gsheet.py b/petl/test/io/test_gsheet.py index 7014002c..f359d68c 100644 --- a/petl/test/io/test_gsheet.py +++ b/petl/test/io/test_gsheet.py @@ -183,9 +183,9 @@ def test_toappendfrom(table, append_worksheet, expected_result): togsheet(table, filename, credentials) appendgsheet(table[:-1], - filename, - credentials, - worksheet_title=append_worksheet) + filename, + credentials, + worksheet_title=append_worksheet) result = fromgsheet(filename, credentials, worksheet_title=append_worksheet) From f94a932630f7eccdbeefbf13e54844efc84232c8 Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Tue, 8 Feb 2022 21:30:23 -0300 Subject: [PATCH 10/21] add package gspread to test from/togsheet --- requirements-formats.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements-formats.txt b/requirements-formats.txt index c37c4fb1..c365dd95 100644 --- a/requirements-formats.txt +++ b/requirements-formats.txt @@ -11,3 +11,4 @@ xlrd>=2.0.1 xlwt>=1.3.0 fastavro>=0.24.2 ; python_version >= '3.4' fastavro==0.24.2 ; python_version < '3.0' +gspread>=3.4.0 ; python_version >= '3.4' From 7bac6ff0111f6b4057303bd3155c2d64abab9600 Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Fri, 18 Feb 2022 10:56:16 -0300 Subject: [PATCH 11/21] gspread: migrated test_gsheet to pytest - step 1 --- petl/test/io/test_gsheet.py | 388 ++++++++++++++++++------------------ 1 file changed, 193 insertions(+), 195 deletions(-) diff --git a/petl/test/io/test_gsheet.py b/petl/test/io/test_gsheet.py index f359d68c..2542d0e4 100644 --- a/petl/test/io/test_gsheet.py +++ b/petl/test/io/test_gsheet.py @@ -5,201 +5,199 @@ import os import datetime -import petl as etl +import pytest + from petl.io.gsheet import fromgsheet, togsheet, appendgsheet -from petl.test.helpers import ieq - -""" -In order to run these tests, follow the steps described at -http://gspread.readthedocs.io/en/latest/oauth2.html to create a json -authorization file. Point `JSON_PATH` to local file or put the path in the -env variable at `GSHEET_JSON_PATH`. -""" - -SCOPE = [ - 'https://spreadsheets.google.com/feeds', - 'https://www.googleapis.com/auth/drive.file' +from petl.test.helpers import ieq, get_env_vars_named + +gspread = pytest.importorskip("gspread") +uuid = pytest.importorskip("uuid") + + +def _has_gshet_credentials(): + if os.getenv("PETL_GCP_JSON_PATH", None) is not None: + return True + json_props = get_env_vars_named("PETL_GSPREAD_") + if json_props is not None: + return True + if os.path.isfile(os.path.expanduser("~/.config/gspread/service_account.json")): + return True + return False + + +if not _has_gshet_credentials(): + pytest.skip("""SKIPPED. to/from gspread needs json credentials for testing. +In order to run google spreadsheet tests, follow the steps bellow: +1. Create a json authorization file, following the steps described at + http://gspread.readthedocs.io/en/latest/oauth2.html, and save to a local path +2. Point the envvar `PETL_GSPREAD_JSON_PATH` to the json authorization file path +2. Or fill the properties inside the json authorization file in envrionment + variables named with prefix PETL_GSPREAD_: PETL_GSPREAD_project_id=petl +3. Or else save the file in one of the following paths: + unix: ~/.config/gspread/service_account.json + windows: %APPDATA%\gspread\service_account.json +""") + + +@pytest.fixture(autouse=True, scope="module") +def credentials(): + json_path = os.getenv("PETL_GCP_JSON_PATH", None) + if json_path is not None: + creds_from_file = gspread.service_account(filename=json_path) + return creds_from_file + json_props = get_env_vars_named("PETL_GSPREAD_") + if json_props is not None: + creds_from_env = gspread.service_account_from_dict(json_props) + return creds_from_env + default_path = os.path.expanduser("~/.config/gspread/service_account.json") + if os.path.isfile(default_path): + # gc = gspread.service_account() + gc = gspread.service_account_from_dict(default_path) + return gc + return None + + +TEST1 = [ + # straight copy test + ( + (("foo", "bar"), ("A", "1"), ("B", "2"), ("C", "2"), ("é", "1/1/2012")), + None, + None, + (("foo", "bar"), ("A", "1"), ("B", "2"), ("C", "2"), ("é", "1/1/2012")), + ), + # Uneven row test + ( + (("foo", "bar"), ("A", "1"), ("B", "2", "3"), ("C", "2"), ("é", "1/1/2012")), + None, + None, + ( + ("foo", "bar", ""), + ("A", "1", ""), + ("B", "2", "3"), + ("C", "2", ""), + ("é", "1/1/2012", ""), + ), + ), + # datetime to string representation test + ( + ( + ("foo", "bar"), + ("A", "1"), + ("B", "2"), + ("C", "2"), + ("é", datetime.date(2012, 1, 1)), + ), + "Sheet1", + None, + (("foo", "bar"), ("A", "1"), ("B", "2"), ("C", "2"), ("é", "2012-01-01")), + ), + # empty table test + ((), None, None, ()), + # range_string specified test + ( + ( + ("foo", "bar"), + ("A", "1"), + ("B", "2"), + ("C", "2"), + ("é", datetime.date(2012, 1, 1)), + ), + None, + "B1:B4", + (("bar",), ("1",), ("2",), ("2",)), + ), + # range_string+sheet specified test + ( + ( + ("foo", "bar"), + ("A", "1"), + ("B", "2"), + ("C", "2"), + ("é", datetime.date(2012, 1, 1)), + ), + "random_stuff-in+_名字", + "B1:B4", + (("bar",), ("1",), ("2",), ("2",)), + ), ] -JSON_PATH = os.getenv("GSHEET_JSON_PATH", 'default/fallback.json') - -try: - # noinspection PyUnresolvedReferences - import gspread - from oauth2client.service_account import ServiceAccountCredentials as sac - import uuid -except ImportError as e: - print('SKIP gsheet tests: %s' % e, file=sys.stderr) -else: - args = [ # straight copy test - ((('foo', 'bar'), - ('A', '1'), - ('B', '2'), - ('C', '2'), - (u'é', '1/1/2012')), - None, - None, - (('foo', 'bar'), - ('A', '1'), - ('B', '2'), - ('C', '2'), - (u'é', '1/1/2012'))), - - # Uneven row test - ((('foo', 'bar'), - ('A', '1'), - ('B', '2', '3'), - ('C', '2'), - (u'é', '1/1/2012')), - None, - None, - (('foo', 'bar', ''), - ('A', '1', ''), - ('B', '2', '3'), - ('C', '2', ''), - (u'é', '1/1/2012', ''))), - - # datetime to string representation test - ((('foo', 'bar'), - ('A', '1'), - ('B', '2'), - ('C', '2'), - (u'é', datetime.date(2012,1,1))), - 'Sheet1', - None, - (('foo', 'bar'), - ('A', '1'), - ('B', '2'), - ('C', '2'), - (u'é', '2012-01-01'))), - - # empty table test - ((), - None, - None, - ()), - - # range_string specified test - ((('foo', 'bar'), - ('A', '1'), - ('B', '2'), - ('C', '2'), - (u'é', datetime.date(2012,1,1))), - None, - 'B1:B4', - (('bar',), - ('1',), - ('2',), - ('2',))), - - # range_string+sheet specified test - ((('foo', 'bar'), - ('A', '1'), - ('B', '2'), - ('C', '2'), - (u'é', datetime.date(2012,1,1))), - u'random_stuff-in+_名字', - 'B1:B4', - (('bar',), - ('1',), - ('2',), - ('2',))) - ] - - def test_gsheet(): - def test_tofromgsheet(table, worksheet, range_string, expected_result): - filename = 'test-{}'.format(str(uuid.uuid4())) - credentials = sac.from_json_keyfile_name(JSON_PATH, SCOPE) - - # test to from gsheet - togsheet(table, filename, credentials, worksheet_title=worksheet) - result = fromgsheet(filename, - credentials, - worksheet_title=worksheet, - range_string=range_string) - # make sure the expected_result matches the result - ieq(result, expected_result) - - # test open by key - client = gspread.authorize(credentials) - # get spreadsheet id (key) of previously created sheet - filekey = client.open(filename).id - key_result = fromgsheet(filekey, - credentials, - worksheet_title=worksheet, - range_string=range_string) - ieq(key_result, expected_result) - # clean up created table - client.del_spreadsheet(filekey) - - - # yield a test for each tuple of arguments in order to display with nose - for arg_tuple in args: - table, worksheet, range_string, expected_result = arg_tuple - yield (test_tofromgsheet, - table, - worksheet, - range_string, - expected_result) - - append_args = [ - # appending to the first sheet - ((('foo', 'bar'), - ('A', '1'), - ('B', '2'), - ('C', '2'), - (u'é', datetime.date(2012,1,1))), - u'Sheet1', - (('foo', 'bar'), - ('A', '1'), - ('B', '2'), - ('C', '2'), - (u'é', '2012-01-01'), - ('foo', 'bar'), - ('A', '1'), - ('B', '2'), - ('C', '2'))), - - # appending to a new sheet - ((('foo', 'bar'), - ('A', '1'), - ('B', '2'), - ('C', '2'), - (u'é', datetime.date(2012,1,1))), - u'testing_time', - (('foo', 'bar'), - ('A', '1'), - ('B', '2'), - ('C', '2'))) - - ] - - - def test_appendgsheet(): - - def test_toappendfrom(table, append_worksheet, expected_result): - filename = 'test-{}'.format(str(uuid.uuid4())) - credentials = sac.from_json_keyfile_name(JSON_PATH, SCOPE) - - togsheet(table, filename, credentials) - appendgsheet(table[:-1], - filename, - credentials, - worksheet_title=append_worksheet) - result = fromgsheet(filename, - credentials, - worksheet_title=append_worksheet) - ieq(result, expected_result) - # get client to get information - client = gspread.authorize(credentials) - # get spreadsheet id (key) of previously created sheet - filekey = client.open(filename).id - # then delete the file - client.del_spreadsheet(filekey) - - for append_arg_tuple in append_args: - table, append_ws, expected_result = append_arg_tuple - yield (test_toappendfrom, - table, - append_ws, - expected_result) + +def test_tofromgsheet1(): + t1 = TEST1[0] + test_tofromgsheet(t1[0], t1[2], t1[2], t1[3]) + + +@pytest.mark.parametrize("table,worksheet,range_string,expected_result", TEST1) +def test_tofromgsheet(table, worksheet, range_string, expected_result): + filename = "test-{}".format(str(uuid.uuid4())) + # test to from gsheet + togsheet(table, filename, credentials, worksheet_title=worksheet) + result = fromgsheet( + filename, credentials, worksheet_title=worksheet, range_string=range_string + ) + # make sure the expected_result matches the result + ieq(result, expected_result) + + # test open by key + client = gspread.authorize(credentials) + # get spreadsheet id (key) of previously created sheet + filekey = client.open(filename).id + key_result = fromgsheet( + filekey, credentials, worksheet_title=worksheet, range_string=range_string + ) + ieq(key_result, expected_result) + # clean up created table + client.del_spreadsheet(filekey) + + +TEST2 = [ + # appending to the first sheet + ( + ( + ("foo", "bar"), + ("A", "1"), + ("B", "2"), + ("C", "2"), + ("é", datetime.date(2012, 1, 1)), + ), + "Sheet1", + ( + ("foo", "bar"), + ("A", "1"), + ("B", "2"), + ("C", "2"), + ("é", "2012-01-01"), + ("foo", "bar"), + ("A", "1"), + ("B", "2"), + ("C", "2"), + ), + ), + # appending to a new sheet + ( + ( + ("foo", "bar"), + ("A", "1"), + ("B", "2"), + ("C", "2"), + ("é", datetime.date(2012, 1, 1)), + ), + "testing_time", + (("foo", "bar"), ("A", "1"), ("B", "2"), ("C", "2")), + ), +] + + +@pytest.mark.parametrize("table,append_worksheet,expected_result", TEST2) +def test_toappendfrom(table, append_worksheet, expected_result): + filename = "test-{}".format(str(uuid.uuid4())) + togsheet(table, filename, credentials) + appendgsheet(table[:-1], filename, credentials, worksheet_title=append_worksheet) + result = fromgsheet(filename, credentials, worksheet_title=append_worksheet) + ieq(result, expected_result) + # get client to get information + client = gspread.authorize(credentials) + # get spreadsheet id (key) of previously created sheet + filekey = client.open(filename).id + # then delete the file + client.del_spreadsheet(filekey) From ac8ab454255e06974ca7bdb5254ad197d4237b85 Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Sat, 19 Feb 2022 01:58:13 -0300 Subject: [PATCH 12/21] gsheet: refactored to work with recent gspread --- petl/io/gsheet.py | 277 ++++++++++++++++++++++++++-------------------- 1 file changed, 160 insertions(+), 117 deletions(-) diff --git a/petl/io/gsheet.py b/petl/io/gsheet.py index f278644c..0739303f 100644 --- a/petl/io/gsheet.py +++ b/petl/io/gsheet.py @@ -1,186 +1,229 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import, print_function, division - from petl.util.base import Table from petl.compat import text_type +from petl.errors import ArgumentError as PetlArgError + + +def _get_gspread_client(auth_info): + import gspread + if isinstance(auth_info, gspread.Client): + return auth_info + if isinstance(auth_info, dict): + gd = gspread.service_account_from_dict(auth_info) + return gd + import google + + if isinstance(auth_info, google.oauth2.service_account.Credentials): + gc = gspread.authorize(auth_info) + return gc + if auth_info is None: + ga = gspread.service_account() + return ga + raise PetlArgError("gspread: Invalid account credentials") + + +def _open_spreadsheet(gspread_client, spreadsheet, open_by_key=False): + if open_by_key: + from gspread.exceptions import SpreadsheetNotFound + try: + wb = gspread_client.open_by_key(spreadsheet) + except SpreadsheetNotFound: + wb = gspread_client.open(spreadsheet) + elif spreadsheet is not None: + wb = gspread_client.open(spreadsheet) + else: + raise PetlArgError("gspread requires argument spreadsheet") + return wb + + +def _select_worksheet(wb, worksheet): + # Allow for user to specify no sheet, sheet index or sheet name + if worksheet is None: + ws = wb.sheet1 + elif isinstance(worksheet, int): + ws = wb.get_worksheet(worksheet) + else: + # use text_type for cross version compatibility + ws = wb.worksheet(text_type(worksheet)) + return ws -def fromgsheet(filename, credentials, forcename=False, worksheet_title=None, - range_string=None): + +def fromgsheet( + credentials_or_client, spreadsheet, open_by_key=False, + worksheet=None, cell_range=None +): """ Extract a table from a google spreadsheet. - The `filename` can either be the key of the spreadsheet or its name. - If you want to force the module to see it as a name, set `forcename=True`. - NOTE: Only the top level of google drive will be searched for the filename - due to API limitations. - - `credentials` are used to authenticate with the google apis. - For more info visit: http://gspread.readthedocs.io/en/latest/oauth2.html + The `credentials_or_client` are used to authenticate with the google apis. + For more info, check `authentication`_. - Set `forcename` to `True` in order to treat `filename` as a name. + The `spreadsheet` can either be the key of the spreadsheet or its name. - N.B., the worksheet name is case sensitive. + Set `open_by_key` to `True` in order to treat `spreadsheet` as spreadsheet key. - The `worksheet_title` argument can be omitted, in which case the first + The `worksheet` argument can be omitted, in which case the first sheet in the workbook is used by default. - The `range_string` argument can be used to provide a range string + The `cell_range` argument can be used to provide a range string specifying the top left and bottom right corners of a set of cells to extract. (i.e. 'A1:C7'). + .. note:: + - Only the top level of google drive will be searched for the + spreadsheet filename due to API limitations. + - The worksheet name is case sensitive. + Example usage follows:: - >>> import petl as etl - >>> from oauth2client.service_account import ServiceAccountCredentials - >>> scope = ['https://spreadsheets.google.com/feeds'] - >>> credentials = ServiceAccountCredentials.from_json_keyfile_name('path/to/credentials.json', scope) - >>> tbl = etl.fromgsheet('example', credentials) - >>> tbl = etl.fromgsheet('9zDNETemfau0uY8ZJF0YzXEPB_5GQ75JV', credentials) + >>> from petl import fromgsheet + >>> import gspread # doctest: +SKIP + >>> client = gspread.service_account() # doctest: +SKIP + >>> tbl1 = fromgsheet(client, 'example_spreadsheet', 'Sheet1') # doctest: +SKIP + >>> tbl2 = fromgsheet(client, '9zDNETemfau0uY8ZJF0YzXEPB_5GQ75JV', credentials) # doctest: +SKIP - This module relies heavily on the work by @burnash and his great gspread - module: http://gspread.readthedocs.io/en/latest/index.html + This functionality relies heavily on the work by @burnash and his great + `gspread module`_. + .. _gspread module: http://gspread.readthedocs.io/ + .. _authentication: http://gspread.readthedocs.io/en/latest/oauth2.html """ - return GoogleSheetView(filename, - credentials, - forcename=forcename, - worksheet_title=worksheet_title, - range_string=range_string) + return GoogleSheetView( + credentials_or_client, + spreadsheet, + open_by_key, + worksheet=worksheet, + cell_range=cell_range + ) class GoogleSheetView(Table): """This module resembles XLSXView.""" - def __init__(self, filename, credentials, forcename, worksheet_title, - range_string): - self.filename = filename - self.credentials = credentials - self.forcename = forcename - self.worksheet_title = worksheet_title - self.range_string = range_string + def __init__( + self, credentials_or_client, spreadsheet, open_by_key, worksheet, cell_range + ): + self.auth_info = credentials_or_client + self.spreadsheet = spreadsheet + self.open_by_key = open_by_key + self.worksheet = worksheet + self.cell_range = cell_range def __iter__(self): - import gspread - gspread_client = gspread.authorize(self.credentials) - if self.forcename: - wb = gspread_client.open(self.filename) - else: - try: - wb = gspread_client.open_by_key(self.filename) - except gspread.exceptions.SpreadsheetNotFound: - wb = gspread_client.open(self.filename) - - # Allow for user to specify no sheet, sheet index or sheet name - if self.worksheet_title is None: - ws = wb.sheet1 - elif isinstance(self.worksheet_title, int): - ws = wb.get_worksheet(self.worksheet_title) - else: - # use text_type for cross version compatibility - ws = wb.worksheet(text_type(self.worksheet_title)) - + gspread_client = _get_gspread_client(self.auth_info) + wb = _open_spreadsheet(gspread_client, self.spreadsheet, self.open_by_key) + ws = _select_worksheet(wb, self.worksheet) # grab the range or grab the whole sheet - if self.range_string: - # start_cell -> top left, end_cell -> bottom right - start_cell, end_cell = self.range_string.split(':') - start_row, start_col = gspread.utils.a1_to_rowcol(start_cell) - end_row, end_col = gspread.utils.a1_to_rowcol(end_cell) - # gspread starts its indices at 1 - for i, row in enumerate(ws.get_all_values(), start=1): - if i in range(start_row, end_row + 1): - start_col_index = start_col - 1 - yield tuple(row[start_col_index:end_col]) - else: - # no range specified, so return all the rows - for row in ws.get_all_values(): - yield tuple(row) - - -def togsheet(tbl, filename, credentials, worksheet_title=None, - share_emails=[], role='writer'): + if self.cell_range is not None: + return self._yield_by_range(ws) + return self._yield_all_rows(ws) + + def _yield_all_rows(self, ws): + # no range specified, so return all the rows + for row in ws.get_all_values(): + yield tuple(row) + + def _yield_by_range(self, ws): + # start_cell -> top left, end_cell -> bottom right + start_cell, end_cell = self.cell_range.split(":") + from gspread.utils import a1_to_rowcol + + start_row, start_col = a1_to_rowcol(start_cell) + end_row, end_col = a1_to_rowcol(end_cell) + # gspread starts its indices at 1 + for i, row in enumerate(ws.get_all_values(), start=1): + if i in range(start_row, end_row + 1): + start_col_index = start_col - 1 + yield tuple(row[start_col_index:end_col]) + + +def togsheet( + tbl, credentials_or_client, spreadsheet, + worksheet=None, share_emails=None, role="reader" +): """ Write a table to a new google sheet. - `filename` will be the title of the workbook when uploaded to google sheets. + The `credentials_or_client` are used to authenticate with the google apis. + For more info, check `authentication`_. - `credentials` are used to authenticate with the google apis. - For more info, visit: http://gspread.readthedocs.io/en/latest/oauth2.html + The `spreadsheet` will be the title of the workbook created google sheets. - If `worksheet_title` is specified, the first worksheet in the spreadsheet - will be renamed to the value of `worksheet_title`. + If `worksheet` is specified, the first worksheet in the spreadsheet + will be renamed to its value. The spreadsheet will be shared with all emails in `share_emails` with - `role` permissions granted. - For more info, visit: https://developers.google.com/drive/v3/web/manage-sharing + `role` permissions granted. For more info, check `sharing`_. - Note: necessary scope for using togsheet is: - 'https://spreadsheets.google.com/feeds' - 'https://www.googleapis.com/auth/drive' + .. _sharing: https://developers.google.com/drive/v3/web/manage-sharing + + .. note:: + The `gspread`_ package doesn't support serialization of `date` and + `datetime` types yet. Example usage:: - >>> import petl as etl - >>> from oauth2client.service_account import ServiceAccountCredentials - >>> scope = ['https://spreadsheets.google.com/feeds', - 'https://www.googleapis.com/auth/drive'] - >>> credentials = ServiceAccountCredentials.from_json_keyfile_name('path/to/credentials.json', scope) + >>> from petl import fromcolumns, togsheet + >>> import gspread # doctest: +SKIP + >>> client = gspread.service_account() # doctest: +SKIP >>> cols = [[0, 1, 2], ['a', 'b', 'c']] - >>> tbl = etl.fromcolumns(cols) - >>> etl.togsheet(tbl, 'example', credentials) + >>> tbl = fromcolumns(cols) + >>> togsheet(tbl, client, 'example_spreadsheet') # doctest: +SKIP """ - import gspread - gspread_client = gspread.authorize(credentials) - spreadsheet = gspread_client.create(filename) - worksheet = spreadsheet.sheet1 + gspread_client = _get_gspread_client(credentials_or_client) + wb = gspread_client.create(spreadsheet) + ws = wb.sheet1 # make smallest table possible - worksheet.resize(rows=1, cols=1) + ws.resize(rows=1, cols=1) # rename sheet if set - if worksheet_title: - worksheet.update_title(title=worksheet_title) + if worksheet: + ws.update_title(title=worksheet) # gspread indices start at 1, therefore row index insert starts at 1 for index, row in enumerate(tbl, start=1): - worksheet.insert_row(row, index) + ws.insert_row(row, index) # specify the user account to share to - for user_email in share_emails: - spreadsheet.share(user_email, perm_type='user', role=role) + if share_emails is not None: + for user_email in share_emails: + wb.share(user_email, perm_type="user", role=role) + return wb.id -def appendgsheet(tbl, filename, credentials, worksheet_title="Sheet1"): +def appendgsheet( + tbl, credentials_or_client, spreadsheet, open_by_key=False, worksheet="Sheet1" +): """ Append a table to an existing google shoot at either a new worksheet or the end of an existing worksheet. - `filename` is the name of the workbook to append to. + The `credentials_or_client` are used to authenticate with the google apis. + For more info, check `authentication`_. - `credentials` are used to authenticate with the google apis. - For more info, visit: http://gspread.readthedocs.io/en/latest/oauth2.html + The `spreadsheet` is the name of the workbook to append to. - `worksheet_title` is the title of the worksheet to append to or create if - the worksheet does not exist. NOTE: sheet index cannot be used, and None is - not an option. + The `worksheet` is the title of the worksheet to append to or create when it + does not exist yet. + .. note:: + The sheet index cannot be used, and None is not an option. """ - import gspread - gspread_client = gspread.authorize(credentials) + gspread_client = _get_gspread_client(credentials_or_client) # be able to give filename or key for file - try: - wb = gspread_client.open_by_key(filename) - except gspread.exceptions.SpreadsheetNotFound: - wb = gspread_client.open(filename) - # check to see if worksheet_title exists, if so append, otherwise create - if worksheet_title in [worksheet.title for worksheet in wb.worksheets()]: - worksheet = wb.worksheet(text_type(worksheet_title)) + wb = _open_spreadsheet(gspread_client, spreadsheet, open_by_key) + # check to see if worksheet exists, if so append, otherwise create + if worksheet in [wbs.title for wbs in wb.worksheets()]: + ws = wb.worksheet(text_type(worksheet)) else: - worksheet = wb.add_worksheet(text_type(worksheet_title), 1, 1) + ws = wb.add_worksheet(text_type(worksheet), 1, 1) # efficiency loss, but get_all_values() will only return meaningful rows, # therefore len(rows) + 1 gives the earliest open insert index - start_point = len(worksheet.get_all_values()) + 1 + start_point = len(ws.get_all_values()) + 1 for index, row in enumerate(tbl, start=start_point): - worksheet.insert_row(row, index) + ws.insert_row(row, index) Table.togsheet = togsheet From 0a99b33df10a7e975368d9b08c380a63f7c643be Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Sat, 19 Feb 2022 01:59:50 -0300 Subject: [PATCH 13/21] pytest: gsheets test passing after refactoring --- petl/test/io/test_gsheet.py | 146 ++++++++++++++++++++++-------------- 1 file changed, 88 insertions(+), 58 deletions(-) diff --git a/petl/test/io/test_gsheet.py b/petl/test/io/test_gsheet.py index 2542d0e4..c56e0075 100644 --- a/petl/test/io/test_gsheet.py +++ b/petl/test/io/test_gsheet.py @@ -1,12 +1,12 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import, print_function, division -import sys import os -import datetime +import json import pytest +from petl.compat import text_type from petl.io.gsheet import fromgsheet, togsheet, appendgsheet from petl.test.helpers import ieq, get_env_vars_named @@ -14,18 +14,20 @@ uuid = pytest.importorskip("uuid") -def _has_gshet_credentials(): - if os.getenv("PETL_GCP_JSON_PATH", None) is not None: - return True +def _get_gspread_credentials(): + json_path = os.getenv("PETL_GCP_JSON_PATH", None) + if json_path is not None and os.path.isfile(json_path): + return json_path json_props = get_env_vars_named("PETL_GSPREAD_") if json_props is not None: - return True - if os.path.isfile(os.path.expanduser("~/.config/gspread/service_account.json")): - return True - return False + return json_props + user_path = os.path.expanduser("~/.config/gspread/service_account.json") + if os.path.isfile(user_path): + return user_path + return None -if not _has_gshet_credentials(): +if _get_gspread_credentials() is None: pytest.skip("""SKIPPED. to/from gspread needs json credentials for testing. In order to run google spreadsheet tests, follow the steps bellow: 1. Create a json authorization file, following the steps described at @@ -35,28 +37,43 @@ def _has_gshet_credentials(): variables named with prefix PETL_GSPREAD_: PETL_GSPREAD_project_id=petl 3. Or else save the file in one of the following paths: unix: ~/.config/gspread/service_account.json - windows: %APPDATA%\gspread\service_account.json + windows: %APPDATA%\\gspread\\service_account.json """) -@pytest.fixture(autouse=True, scope="module") -def credentials(): - json_path = os.getenv("PETL_GCP_JSON_PATH", None) - if json_path is not None: - creds_from_file = gspread.service_account(filename=json_path) - return creds_from_file - json_props = get_env_vars_named("PETL_GSPREAD_") - if json_props is not None: - creds_from_env = gspread.service_account_from_dict(json_props) - return creds_from_env - default_path = os.path.expanduser("~/.config/gspread/service_account.json") - if os.path.isfile(default_path): - # gc = gspread.service_account() - gc = gspread.service_account_from_dict(default_path) - return gc +def _load_creds_from_file(json_path): + with open(json_path, encoding="utf-8") as json_file: + creds = json.load(json_file) + return creds + + +def _get_env_credentials(): + creds = _get_gspread_credentials() + if isinstance(creds, dict): + return creds + if isinstance(creds, text_type): + props = _load_creds_from_file(creds) + return props return None +def _get_gspread_client(): + credentials = _get_env_credentials() + try: + if credentials is None: + gspread_client = gspread.service_account() + else: + gspread_client = gspread.service_account_from_dict(credentials) + except gspread.exceptions.APIError as ex: + pytest.skip("SKIPPED. to/from gspread authentication error: %s" % ex) + return None + return gspread_client + + +def _get_gspread_test_params(): + return "test-{}".format(str(uuid.uuid4())) + + TEST1 = [ # straight copy test ( @@ -85,7 +102,8 @@ def credentials(): ("A", "1"), ("B", "2"), ("C", "2"), - ("é", datetime.date(2012, 1, 1)), + # ("é", datetime.date(2012, 1, 1)), + ("é", "2012-01-01"), ), "Sheet1", None, @@ -93,27 +111,29 @@ def credentials(): ), # empty table test ((), None, None, ()), - # range_string specified test + # cell_range specified test ( ( ("foo", "bar"), ("A", "1"), ("B", "2"), ("C", "2"), - ("é", datetime.date(2012, 1, 1)), + # ("é", datetime.date(2012, 1, 1)), + ("é", "2012-01-01"), ), None, "B1:B4", (("bar",), ("1",), ("2",), ("2",)), ), - # range_string+sheet specified test + # cell_range+sheet specified test ( ( ("foo", "bar"), ("A", "1"), ("B", "2"), ("C", "2"), - ("é", datetime.date(2012, 1, 1)), + # ("é", datetime.date(2012, 1, 1)), + ("é", "2012-01-01"), ), "random_stuff-in+_名字", "B1:B4", @@ -122,35 +142,44 @@ def credentials(): ] -def test_tofromgsheet1(): - t1 = TEST1[0] - test_tofromgsheet(t1[0], t1[2], t1[2], t1[3]) - - -@pytest.mark.parametrize("table,worksheet,range_string,expected_result", TEST1) -def test_tofromgsheet(table, worksheet, range_string, expected_result): - filename = "test-{}".format(str(uuid.uuid4())) +@pytest.mark.parametrize("table,worksheet,cell_range,expected_result", TEST1) +def test_tofromgsheet(table, worksheet, cell_range, expected_result): + filename = _get_gspread_test_params() + gspread_client = _get_gspread_client() # test to from gsheet - togsheet(table, filename, credentials, worksheet_title=worksheet) + spread_id = togsheet(table, gspread_client, filename, title=worksheet) result = fromgsheet( - filename, credentials, worksheet_title=worksheet, range_string=range_string + gspread_client, filename, title=worksheet, cell_range=cell_range ) # make sure the expected_result matches the result ieq(result, expected_result) - # test open by key - client = gspread.authorize(credentials) - # get spreadsheet id (key) of previously created sheet - filekey = client.open(filename).id key_result = fromgsheet( - filekey, credentials, worksheet_title=worksheet, range_string=range_string + gspread_client, spread_id, open_by_key=True, title=worksheet, + cell_range=cell_range ) ieq(key_result, expected_result) # clean up created table - client.del_spreadsheet(filekey) + gspread_client.del_spreadsheet(spread_id) TEST2 = [ + # Simplest test + ( + ( + ("foo", "bar"), + ("A", "1"), + ("B", "2"), + ), + "Sheet1", + ( + ("foo", "bar"), + ("A", "1"), + ("B", "2"), + ("foo", "bar"), + ("A", "1"), + ), + ), # appending to the first sheet ( ( @@ -158,7 +187,8 @@ def test_tofromgsheet(table, worksheet, range_string, expected_result): ("A", "1"), ("B", "2"), ("C", "2"), - ("é", datetime.date(2012, 1, 1)), + # ("é", datetime.date(2012, 1, 1)), + ("é", "2012-01-01"), ), "Sheet1", ( @@ -180,7 +210,8 @@ def test_tofromgsheet(table, worksheet, range_string, expected_result): ("A", "1"), ("B", "2"), ("C", "2"), - ("é", datetime.date(2012, 1, 1)), + # ("é", datetime.date(2012, 1, 1)), + ("é", "2012-01-01"), ), "testing_time", (("foo", "bar"), ("A", "1"), ("B", "2"), ("C", "2")), @@ -190,14 +221,13 @@ def test_tofromgsheet(table, worksheet, range_string, expected_result): @pytest.mark.parametrize("table,append_worksheet,expected_result", TEST2) def test_toappendfrom(table, append_worksheet, expected_result): - filename = "test-{}".format(str(uuid.uuid4())) - togsheet(table, filename, credentials) - appendgsheet(table[:-1], filename, credentials, worksheet_title=append_worksheet) - result = fromgsheet(filename, credentials, worksheet_title=append_worksheet) + filename = _get_gspread_test_params() + gspread_client = _get_gspread_client() + # test to append gsheet + spread_id = togsheet(table, gspread_client, filename) + table2 = table[:-1] + appendgsheet(table2, gspread_client, filename, title=append_worksheet) + result = fromgsheet(gspread_client, filename, title=append_worksheet) ieq(result, expected_result) - # get client to get information - client = gspread.authorize(credentials) - # get spreadsheet id (key) of previously created sheet - filekey = client.open(filename).id # then delete the file - client.del_spreadsheet(filekey) + gspread_client.del_spreadsheet(spread_id) From 00a1c25d3347d4007114c10ddb6bef0b4121ee15 Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Sat, 19 Feb 2022 02:08:23 -0300 Subject: [PATCH 14/21] docs: reorder gsheet and fix minor issues --- docs/io.rst | 54 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/docs/io.rst b/docs/io.rst index 037552aa..ffac2485 100644 --- a/docs/io.rst +++ b/docs/io.rst @@ -210,25 +210,6 @@ Supported File Formats .. module:: petl.io.xls .. _io_xls: -Google sheet files (gsheet) ----------------------------- - -.. note:: - - The following functions require `gspread - `_ to be installed, - e.g.:: - - $ pip install gspread - -.. autofunction:: petl.io.gsheet.fromgsheet -.. autofunction:: petl.io.gsheet.togsheet -.. autofunction:: petl.io.gsheet.appendgsheet - - -.. module:: petl.io.gsheet -.. _io_gsheet: - Excel .xls files (xlrd/xlwt) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -409,6 +390,29 @@ Avro files (fastavro) :start-after: begin_complex_schema :end-before: end_complex_schema +.. module:: petl.io.gsheet +.. _io_gsheet: + +Google Sheets (gspread) +^^^^^^^^^^^^^^^^^^^^^^^ + +.. warning:: + + This is a experimental feature. API and behavior may change between releases + with some possible breaking changes. + +.. note:: + + The following functions require `gspread + `_ to be installed, + e.g.:: + + $ pip install gspread + +.. autofunction:: petl.io.gsheet.fromgsheet +.. autofunction:: petl.io.gsheet.togsheet +.. autofunction:: petl.io.gsheet.appendgsheet + .. module:: petl.io.db .. _io_db: @@ -418,7 +422,7 @@ Databases .. note:: For reading and writing to databases, the following functions require - `SQLAlchemy ` its the database specific driver + `SQLAlchemy ` and the database specific driver to be installed along petl, e.g.:: $ pip install sqlalchemy @@ -452,10 +456,14 @@ in the source path of the file. $ pip install fsspec -The supported filesystems with their URI formats can be found in: +The supported filesystems with their URI formats can be found in fsspec +documentation: -- fsspec `Built-in Implementations ` -- fsspec `Other Known Implementations ` +- `Built-in Implementations `__ +- `Other Known Implementations `__ + +Remote sources +^^^^^^^^^^^^^^ .. autoclass:: petl.io.remotes.RemoteSource .. autoclass:: petl.io.remotes.SMBSource From eb50ffb515b3025f2a626171d02982d0adac3cb5 Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Sat, 19 Feb 2022 02:30:44 -0300 Subject: [PATCH 15/21] gsheet: fix pytest.skip with allow_module_level --- petl/test/io/test_gsheet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/petl/test/io/test_gsheet.py b/petl/test/io/test_gsheet.py index c56e0075..36e87f99 100644 --- a/petl/test/io/test_gsheet.py +++ b/petl/test/io/test_gsheet.py @@ -38,7 +38,7 @@ def _get_gspread_credentials(): 3. Or else save the file in one of the following paths: unix: ~/.config/gspread/service_account.json windows: %APPDATA%\\gspread\\service_account.json -""") +""", allow_module_level=True) def _load_creds_from_file(json_path): From 493913617f1bb1da9ccdf55d7e562cdb13d136cb Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Sat, 19 Feb 2022 23:40:53 -0300 Subject: [PATCH 16/21] gsheet: improve with pytest.mark.skipif --- petl/test/io/test_gsheet.py | 117 ++++++++++++++++++++++++------------ 1 file changed, 77 insertions(+), 40 deletions(-) diff --git a/petl/test/io/test_gsheet.py b/petl/test/io/test_gsheet.py index 36e87f99..a4244d3d 100644 --- a/petl/test/io/test_gsheet.py +++ b/petl/test/io/test_gsheet.py @@ -1,8 +1,10 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import, print_function, division +import datetime import os import json +import time import pytest @@ -13,22 +15,25 @@ gspread = pytest.importorskip("gspread") uuid = pytest.importorskip("uuid") +# region helpers + def _get_gspread_credentials(): json_path = os.getenv("PETL_GCP_JSON_PATH", None) - if json_path is not None and os.path.isfile(json_path): + if json_path is not None and os.path.exists(json_path): return json_path json_props = get_env_vars_named("PETL_GSPREAD_") if json_props is not None: return json_props user_path = os.path.expanduser("~/.config/gspread/service_account.json") - if os.path.isfile(user_path): + if os.path.isfile(user_path) and os.path.exists(user_path): return user_path return None -if _get_gspread_credentials() is None: - pytest.skip("""SKIPPED. to/from gspread needs json credentials for testing. +found_gcp_credentials = pytest.mark.skipif( + _get_gspread_credentials() is None, + reason="""SKIPPED. to/from gspread needs json credentials for testing. In order to run google spreadsheet tests, follow the steps bellow: 1. Create a json authorization file, following the steps described at http://gspread.readthedocs.io/en/latest/oauth2.html, and save to a local path @@ -37,14 +42,8 @@ def _get_gspread_credentials(): variables named with prefix PETL_GSPREAD_: PETL_GSPREAD_project_id=petl 3. Or else save the file in one of the following paths: unix: ~/.config/gspread/service_account.json - windows: %APPDATA%\\gspread\\service_account.json -""", allow_module_level=True) - - -def _load_creds_from_file(json_path): - with open(json_path, encoding="utf-8") as json_file: - creds = json.load(json_file) - return creds + windows: %APPDATA%\\gspread\\service_account.json""" + ) def _get_env_credentials(): @@ -52,8 +51,9 @@ def _get_env_credentials(): if isinstance(creds, dict): return creds if isinstance(creds, text_type): - props = _load_creds_from_file(creds) - return props + with open(creds, encoding="utf-8") as json_file: + creds = json.load(json_file) + return creds return None @@ -74,6 +74,15 @@ def _get_gspread_test_params(): return "test-{}".format(str(uuid.uuid4())) +def teardown_function(): + # try to avoid: User rate limit exceeded. + time.sleep(3) + + +# endregion + +# region test cases data + TEST1 = [ # straight copy test ( @@ -141,28 +150,6 @@ def _get_gspread_test_params(): ), ] - -@pytest.mark.parametrize("table,worksheet,cell_range,expected_result", TEST1) -def test_tofromgsheet(table, worksheet, cell_range, expected_result): - filename = _get_gspread_test_params() - gspread_client = _get_gspread_client() - # test to from gsheet - spread_id = togsheet(table, gspread_client, filename, title=worksheet) - result = fromgsheet( - gspread_client, filename, title=worksheet, cell_range=cell_range - ) - # make sure the expected_result matches the result - ieq(result, expected_result) - - key_result = fromgsheet( - gspread_client, spread_id, open_by_key=True, title=worksheet, - cell_range=cell_range - ) - ieq(key_result, expected_result) - # clean up created table - gspread_client.del_spreadsheet(spread_id) - - TEST2 = [ # Simplest test ( @@ -218,16 +205,66 @@ def test_tofromgsheet(table, worksheet, cell_range, expected_result): ), ] +# endregion -@pytest.mark.parametrize("table,append_worksheet,expected_result", TEST2) -def test_toappendfrom(table, append_worksheet, expected_result): +# region test cases execution + + +@found_gcp_credentials +@pytest.mark.parametrize("table,sheetname,cell_range,expected_result", TEST1) +def test_tofromgsheet(table, sheetname, cell_range, expected_result): + _test_tofromgsheet(table, sheetname, cell_range, expected_result) + + +@found_gcp_credentials +@pytest.mark.xfail( + raises=TypeError, + reason="When this stop failing, uncomment datetime.date in TEST1 and TEST2" + ) +def test_tofromgshee_fail_datetime(): + table_with_datetime = ( + ("foo", "bar"), + ("A", "1"), + ("B", "2"), + ("C", "2"), + ("é", datetime.date(2012, 1, 1)) + ) + _test_tofromgsheet(table_with_datetime, "Sheet1", None, table_with_datetime) + + +def _test_tofromgsheet(table, sheetname, cell_range, expected_result): + filename = _get_gspread_test_params() + gspread_client = _get_gspread_client() + # test to from gsheet + spread_id = togsheet(table, gspread_client, filename, worksheet=sheetname) + result = fromgsheet( + gspread_client, filename, worksheet=sheetname, cell_range=cell_range + ) + # make sure the expected_result matches the result + ieq(result, expected_result) + + key_result = fromgsheet( + gspread_client, spread_id, open_by_key=True, worksheet=sheetname, + cell_range=cell_range + ) + ieq(key_result, expected_result) + # clean up created table + gspread_client.del_spreadsheet(spread_id) + + +@found_gcp_credentials +@pytest.mark.parametrize("table,sheetname,expected_result", TEST2) +def test_toappendfrom(table, sheetname, expected_result): filename = _get_gspread_test_params() gspread_client = _get_gspread_client() # test to append gsheet spread_id = togsheet(table, gspread_client, filename) table2 = table[:-1] - appendgsheet(table2, gspread_client, filename, title=append_worksheet) - result = fromgsheet(gspread_client, filename, title=append_worksheet) + appendgsheet(table2, gspread_client, filename, worksheet=sheetname) + result = fromgsheet(gspread_client, filename, worksheet=sheetname) ieq(result, expected_result) # then delete the file gspread_client.del_spreadsheet(spread_id) + + +# endregion From 7887357dadceb819ccc2fe37c816b34f422b9f4f Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Sat, 19 Feb 2022 23:41:07 -0300 Subject: [PATCH 17/21] gsheet: add some todos --- petl/io/gsheet.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/petl/io/gsheet.py b/petl/io/gsheet.py index 0739303f..1ca9e5e9 100644 --- a/petl/io/gsheet.py +++ b/petl/io/gsheet.py @@ -128,6 +128,7 @@ def _yield_all_rows(self, ws): yield tuple(row) def _yield_by_range(self, ws): + # TODO: try using: worksheet.get_values('A2:C10') # start_cell -> top left, end_cell -> bottom right start_cell, end_cell = self.cell_range.split(":") from gspread.utils import a1_to_rowcol @@ -184,6 +185,7 @@ def togsheet( if worksheet: ws.update_title(title=worksheet) # gspread indices start at 1, therefore row index insert starts at 1 + # TODO: batch insert with ws.insert_rows for index, row in enumerate(tbl, start=1): ws.insert_row(row, index) # specify the user account to share to From abed57da41bbbd967a150c8a822ed44597bb0703 Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Thu, 10 Mar 2022 22:23:31 -0300 Subject: [PATCH 18/21] gsheet: use batch api to send rows --- petl/io/gsheet.py | 77 +++++++++++++++++++++++------------------------ 1 file changed, 37 insertions(+), 40 deletions(-) diff --git a/petl/io/gsheet.py b/petl/io/gsheet.py index 1ca9e5e9..b10f4a67 100644 --- a/petl/io/gsheet.py +++ b/petl/io/gsheet.py @@ -39,21 +39,30 @@ def _open_spreadsheet(gspread_client, spreadsheet, open_by_key=False): return wb -def _select_worksheet(wb, worksheet): +def _select_worksheet(wb, worksheet, find_or_create=False): # Allow for user to specify no sheet, sheet index or sheet name if worksheet is None: ws = wb.sheet1 elif isinstance(worksheet, int): ws = wb.get_worksheet(worksheet) + elif isinstance(worksheet, text_type): + sheetname = text_type(worksheet) + if find_or_create: + if worksheet in [wbs.title for wbs in wb.worksheets()]: + ws = wb.worksheet(sheetname) + else: + ws = wb.add_worksheet(sheetname, 1, 1) + else: + # use text_type for cross version compatibility + ws = wb.worksheet(sheetname) else: - # use text_type for cross version compatibility - ws = wb.worksheet(text_type(worksheet)) + raise PetlArgError("Only can find worksheet by name or by number") return ws def fromgsheet( - credentials_or_client, spreadsheet, open_by_key=False, - worksheet=None, cell_range=None + credentials_or_client, spreadsheet, worksheet=None, cell_range=None, + open_by_key=False ): """ Extract a table from a google spreadsheet. @@ -95,23 +104,24 @@ def fromgsheet( return GoogleSheetView( credentials_or_client, spreadsheet, + worksheet, + cell_range, open_by_key, - worksheet=worksheet, - cell_range=cell_range ) class GoogleSheetView(Table): - """This module resembles XLSXView.""" + """Conects to a worksheet and iterates over its rows.""" def __init__( - self, credentials_or_client, spreadsheet, open_by_key, worksheet, cell_range + self, credentials_or_client, spreadsheet, worksheet, cell_range, + open_by_key ): self.auth_info = credentials_or_client self.spreadsheet = spreadsheet - self.open_by_key = open_by_key self.worksheet = worksheet self.cell_range = cell_range + self.open_by_key = open_by_key def __iter__(self): gspread_client = _get_gspread_client(self.auth_info) @@ -128,23 +138,14 @@ def _yield_all_rows(self, ws): yield tuple(row) def _yield_by_range(self, ws): - # TODO: try using: worksheet.get_values('A2:C10') - # start_cell -> top left, end_cell -> bottom right - start_cell, end_cell = self.cell_range.split(":") - from gspread.utils import a1_to_rowcol - - start_row, start_col = a1_to_rowcol(start_cell) - end_row, end_col = a1_to_rowcol(end_cell) - # gspread starts its indices at 1 - for i, row in enumerate(ws.get_all_values(), start=1): - if i in range(start_row, end_row + 1): - start_col_index = start_col - 1 - yield tuple(row[start_col_index:end_col]) + found = ws.get_values(self.cell_range) + for row in found: + yield tuple(row) def togsheet( - tbl, credentials_or_client, spreadsheet, - worksheet=None, share_emails=None, role="reader" + table, credentials_or_client, spreadsheet, worksheet=None, cell_range=None, + share_emails=None, role="reader" ): """ Write a table to a new google sheet. @@ -153,6 +154,7 @@ def togsheet( For more info, check `authentication`_. The `spreadsheet` will be the title of the workbook created google sheets. + If there is a spreadsheet with same title a new one will be created. If `worksheet` is specified, the first worksheet in the spreadsheet will be renamed to its value. @@ -160,6 +162,9 @@ def togsheet( The spreadsheet will be shared with all emails in `share_emails` with `role` permissions granted. For more info, check `sharing`_. + Returns: the spreadsheet key that can be used in `appendgsheet` further. + + .. _sharing: https://developers.google.com/drive/v3/web/manage-sharing .. note:: @@ -179,15 +184,12 @@ def togsheet( gspread_client = _get_gspread_client(credentials_or_client) wb = gspread_client.create(spreadsheet) ws = wb.sheet1 - # make smallest table possible - ws.resize(rows=1, cols=1) + ws.resize(rows=1, cols=1) # make smallest table possible # rename sheet if set - if worksheet: + if worksheet is not None: ws.update_title(title=worksheet) # gspread indices start at 1, therefore row index insert starts at 1 - # TODO: batch insert with ws.insert_rows - for index, row in enumerate(tbl, start=1): - ws.insert_row(row, index) + ws.append_rows(table, table_range=cell_range) # specify the user account to share to if share_emails is not None: for user_email in share_emails: @@ -196,7 +198,7 @@ def togsheet( def appendgsheet( - tbl, credentials_or_client, spreadsheet, open_by_key=False, worksheet="Sheet1" + table, credentials_or_client, spreadsheet, worksheet=None, open_by_key=False ): """ Append a table to an existing google shoot at either a new worksheet @@ -217,15 +219,10 @@ def appendgsheet( # be able to give filename or key for file wb = _open_spreadsheet(gspread_client, spreadsheet, open_by_key) # check to see if worksheet exists, if so append, otherwise create - if worksheet in [wbs.title for wbs in wb.worksheets()]: - ws = wb.worksheet(text_type(worksheet)) - else: - ws = wb.add_worksheet(text_type(worksheet), 1, 1) - # efficiency loss, but get_all_values() will only return meaningful rows, - # therefore len(rows) + 1 gives the earliest open insert index - start_point = len(ws.get_all_values()) + 1 - for index, row in enumerate(tbl, start=start_point): - ws.insert_row(row, index) + ws = _select_worksheet(wb, worksheet, True) + ws.append_rows(table) + return wb.id Table.togsheet = togsheet +Table.appendgsheet = appendgsheet From 276b38bb4c3d6181ed7454b56a125e84212c62ed Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Thu, 10 Mar 2022 22:25:23 -0300 Subject: [PATCH 19/21] gsheet: separate and simplify pytesting --- petl/test/io/test_gsheet.py | 326 ++++++++++++++++++------------------ 1 file changed, 163 insertions(+), 163 deletions(-) diff --git a/petl/test/io/test_gsheet.py b/petl/test/io/test_gsheet.py index a4244d3d..69cffef1 100644 --- a/petl/test/io/test_gsheet.py +++ b/petl/test/io/test_gsheet.py @@ -22,7 +22,7 @@ def _get_gspread_credentials(): json_path = os.getenv("PETL_GCP_JSON_PATH", None) if json_path is not None and os.path.exists(json_path): return json_path - json_props = get_env_vars_named("PETL_GSPREAD_") + json_props = get_env_vars_named("PETL_GCP_CREDS_") if json_props is not None: return json_props user_path = os.path.expanduser("~/.config/gspread/service_account.json") @@ -37,9 +37,9 @@ def _get_gspread_credentials(): In order to run google spreadsheet tests, follow the steps bellow: 1. Create a json authorization file, following the steps described at http://gspread.readthedocs.io/en/latest/oauth2.html, and save to a local path -2. Point the envvar `PETL_GSPREAD_JSON_PATH` to the json authorization file path +2. Point the envvar `PETL_GCP_JSON_PATH` to the json authorization file path 2. Or fill the properties inside the json authorization file in envrionment - variables named with prefix PETL_GSPREAD_: PETL_GSPREAD_project_id=petl + variables named with prefix PETL_GCP_CREDS_: PETL_GCP_CREDS_project_id=petl 3. Or else save the file in one of the following paths: unix: ~/.config/gspread/service_account.json windows: %APPDATA%\\gspread\\service_account.json""" @@ -70,8 +70,61 @@ def _get_gspread_client(): return gspread_client +def _get_env_sharing_emails(): + emails = get_env_vars_named("PETL_GSHEET_EMAIL", remove_prefix=False) + if emails is not None: + return list(emails.values()) + return [] + + def _get_gspread_test_params(): - return "test-{}".format(str(uuid.uuid4())) + filename = "test-{}".format(str(uuid.uuid4())) + gspread_client = _get_gspread_client() + emails = _get_env_sharing_emails() + return filename, gspread_client, emails + + +def _test_to_fromg_sheet(table, sheetname, cell_range, expected): + filename, gspread_client, emails = _get_gspread_test_params() + # test to from gsheet + spread_id = togsheet( + table, gspread_client, filename, worksheet=sheetname, share_emails=emails + ) + try: + result = fromgsheet( + gspread_client, filename, worksheet=sheetname, cell_range=cell_range + ) + # make sure the expected_result matches the result + ieq(expected, result) + finally: + # clean up created table + gspread_client.del_spreadsheet(spread_id) + + +def _test_append_from_gsheet(table_list, expected, sheetname=None): + filename, gspread_client, emails = _get_gspread_test_params() + # append from the second table from the list + table1 = table_list[0] + other_tables = table_list[1:] + # create the spreadshteet and the 1st sheet + spread_id = togsheet( + table1, gspread_client, filename, worksheet=sheetname, share_emails=emails + ) + try: + for tableN in other_tables: + appendgsheet( + tableN, gspread_client, spread_id, worksheet=sheetname, + open_by_key=True + ) + # read the result appended to the sheet + result = fromgsheet( + gspread_client, spread_id, worksheet=sheetname, open_by_key=True + ) + # make sure the expected_result matches the result + ieq(expected, result) + finally: + # clean up created table + gspread_client.del_spreadsheet(spread_id) def teardown_function(): @@ -83,126 +136,14 @@ def teardown_function(): # region test cases data -TEST1 = [ - # straight copy test - ( - (("foo", "bar"), ("A", "1"), ("B", "2"), ("C", "2"), ("é", "1/1/2012")), - None, - None, - (("foo", "bar"), ("A", "1"), ("B", "2"), ("C", "2"), ("é", "1/1/2012")), - ), - # Uneven row test - ( - (("foo", "bar"), ("A", "1"), ("B", "2", "3"), ("C", "2"), ("é", "1/1/2012")), - None, - None, - ( - ("foo", "bar", ""), - ("A", "1", ""), - ("B", "2", "3"), - ("C", "2", ""), - ("é", "1/1/2012", ""), - ), - ), - # datetime to string representation test - ( - ( - ("foo", "bar"), - ("A", "1"), - ("B", "2"), - ("C", "2"), - # ("é", datetime.date(2012, 1, 1)), - ("é", "2012-01-01"), - ), - "Sheet1", - None, - (("foo", "bar"), ("A", "1"), ("B", "2"), ("C", "2"), ("é", "2012-01-01")), - ), - # empty table test - ((), None, None, ()), - # cell_range specified test - ( - ( - ("foo", "bar"), - ("A", "1"), - ("B", "2"), - ("C", "2"), - # ("é", datetime.date(2012, 1, 1)), - ("é", "2012-01-01"), - ), - None, - "B1:B4", - (("bar",), ("1",), ("2",), ("2",)), - ), - # cell_range+sheet specified test - ( - ( - ("foo", "bar"), - ("A", "1"), - ("B", "2"), - ("C", "2"), - # ("é", datetime.date(2012, 1, 1)), - ("é", "2012-01-01"), - ), - "random_stuff-in+_名字", - "B1:B4", - (("bar",), ("1",), ("2",), ("2",)), - ), -] - -TEST2 = [ - # Simplest test - ( - ( - ("foo", "bar"), - ("A", "1"), - ("B", "2"), - ), - "Sheet1", - ( - ("foo", "bar"), - ("A", "1"), - ("B", "2"), - ("foo", "bar"), - ("A", "1"), - ), - ), - # appending to the first sheet - ( - ( - ("foo", "bar"), - ("A", "1"), - ("B", "2"), - ("C", "2"), - # ("é", datetime.date(2012, 1, 1)), - ("é", "2012-01-01"), - ), - "Sheet1", - ( - ("foo", "bar"), - ("A", "1"), - ("B", "2"), - ("C", "2"), - ("é", "2012-01-01"), - ("foo", "bar"), - ("A", "1"), - ("B", "2"), - ("C", "2"), - ), - ), - # appending to a new sheet - ( - ( - ("foo", "bar"), - ("A", "1"), - ("B", "2"), - ("C", "2"), - # ("é", datetime.date(2012, 1, 1)), - ("é", "2012-01-01"), - ), - "testing_time", - (("foo", "bar"), ("A", "1"), ("B", "2"), ("C", "2")), - ), +TEST_TABLE = [ + ["foo", "bar"], + ["A", "1"], + ["B", "2"], + ["C", "3"], + ["D", "random_stuff-in+_名字"], + ["é", "3/4/2012"], + ["F", "6"], ] # endregion @@ -211,9 +152,31 @@ def teardown_function(): @found_gcp_credentials -@pytest.mark.parametrize("table,sheetname,cell_range,expected_result", TEST1) -def test_tofromgsheet(table, sheetname, cell_range, expected_result): - _test_tofromgsheet(table, sheetname, cell_range, expected_result) +def test_tofromgsheet_01_basic(): + _test_to_fromg_sheet( TEST_TABLE[:], None, None, TEST_TABLE[:] ) + + +@found_gcp_credentials +def test_tofromgsheet_02_uneven_row(): + test_table_t1 = [x + ["3"] if i in [2] else x for i, x in enumerate(TEST_TABLE[:])] + test_table_f1 = [x + [""] if len(x) < 3 else x for x in test_table_t1[:]] + _test_to_fromg_sheet( test_table_t1, None, None, test_table_f1 ) + + +@found_gcp_credentials +def test_tofromgsheet_03_empty_table(): + _test_to_fromg_sheet( (), None, None, () ) + + +@found_gcp_credentials +def test_tofromgsheet_04_cell_range(): + test_table_f2 = [[x[1]] for x in TEST_TABLE[0:4]] + _test_to_fromg_sheet( TEST_TABLE[:], None, "B1:B4", test_table_f2 ) + + +@found_gcp_credentials +def test_tofromgsheet_05_sheet_title(): + _test_to_fromg_sheet( TEST_TABLE[:], "random_stuff-in+_名字", None, TEST_TABLE[:] ) @found_gcp_credentials @@ -221,50 +184,87 @@ def test_tofromgsheet(table, sheetname, cell_range, expected_result): raises=TypeError, reason="When this stop failing, uncomment datetime.date in TEST1 and TEST2" ) -def test_tofromgshee_fail_datetime(): - table_with_datetime = ( - ("foo", "bar"), - ("A", "1"), - ("B", "2"), - ("C", "2"), - ("é", datetime.date(2012, 1, 1)) - ) - _test_tofromgsheet(table_with_datetime, "Sheet1", None, table_with_datetime) +def test_tofromgsheet_06_datetime_date(): + test_table_dt = [[x[0], datetime.date(2012, 5, 6)] if i in [5] else x for i, x in enumerate(TEST_TABLE[:])] + _test_to_fromg_sheet( test_table_dt[:], None, "B1:B4", test_table_dt[:] ) -def _test_tofromgsheet(table, sheetname, cell_range, expected_result): - filename = _get_gspread_test_params() - gspread_client = _get_gspread_client() +@found_gcp_credentials +def test_tofromgsheet_07_open_by_key(): + filename, gspread_client, emails = _get_gspread_test_params() # test to from gsheet - spread_id = togsheet(table, gspread_client, filename, worksheet=sheetname) - result = fromgsheet( - gspread_client, filename, worksheet=sheetname, cell_range=cell_range - ) - # make sure the expected_result matches the result - ieq(result, expected_result) + table = TEST_TABLE[:] + # test to from gsheet + spread_id = togsheet(table, gspread_client, filename, share_emails=emails) + try: + result = fromgsheet(gspread_client, spread_id, open_by_key=True) + # make sure the expected_result matches the result + ieq(table, result) + finally: + # clean up created table + gspread_client.del_spreadsheet(spread_id) - key_result = fromgsheet( - gspread_client, spread_id, open_by_key=True, worksheet=sheetname, - cell_range=cell_range - ) - ieq(key_result, expected_result) - # clean up created table - gspread_client.del_spreadsheet(spread_id) + +@found_gcp_credentials +def test_tofromgsheet_08_recreate(): + filename, gspread_client, emails = _get_gspread_test_params() + # test to from gsheet + table1 = TEST_TABLE[:] + table2 = [[ x[0] , text_type(i)] if i > 0 else x for i, x in enumerate(table1)] + # test to from gsheet + spread_id = togsheet(table1, gspread_client, filename, share_emails=emails) + try: + result1 = fromgsheet(gspread_client, spread_id, open_by_key=True) + ieq(table1, result1) + spread_id2 = togsheet(table2, gspread_client, filename, share_emails=emails) + try: + result2 = fromgsheet(gspread_client, spread_id2, open_by_key=True) + ieq(table2, result2) + finally: + gspread_client.del_spreadsheet(spread_id2) + # make sure the expected_result matches the result + finally: + # clean up created table + gspread_client.del_spreadsheet(spread_id) + + +def _get_testcase_for_append(): + table_list = [TEST_TABLE[:], TEST_TABLE[:]] + expected = TEST_TABLE[:] + TEST_TABLE[:] + return table_list, expected @found_gcp_credentials -@pytest.mark.parametrize("table,sheetname,expected_result", TEST2) -def test_toappendfrom(table, sheetname, expected_result): - filename = _get_gspread_test_params() - gspread_client = _get_gspread_client() +def test_appendgsheet_10_double(): + table_list, expected = _get_testcase_for_append() + _test_append_from_gsheet(table_list, expected) + + +@found_gcp_credentials +def test_appendgsheet_11_named_sheet(): + table_list, expected = _get_testcase_for_append() + _test_append_from_gsheet(table_list, expected, sheetname="petl_append") + + +@found_gcp_credentials +def test_appendgsheet_12_other_sheet(): + filename, gspread_client, emails = _get_gspread_test_params() # test to append gsheet - spread_id = togsheet(table, gspread_client, filename) - table2 = table[:-1] - appendgsheet(table2, gspread_client, filename, worksheet=sheetname) - result = fromgsheet(gspread_client, filename, worksheet=sheetname) - ieq(result, expected_result) - # then delete the file - gspread_client.del_spreadsheet(spread_id) + table = TEST_TABLE[:] + spread_id = togsheet(table, gspread_client, filename, share_emails=emails) + try: + appendgsheet(table, gspread_client, filename, worksheet="petl") + result1 = fromgsheet(gspread_client, filename, worksheet=None) + ieq(result1, table) + result2 = fromgsheet(gspread_client, filename, worksheet="petl") + ieq(result2, table) + finally: + gspread_client.del_spreadsheet(spread_id) + + +@pytest.fixture() +def setup_emails(monkeypatch): + monkeypatch.setenv("PETL_GSHEET_EMAIL_J", "juarezr@gmail.com") # endregion From 6a08ceb3a0a8a8a1211f870ac62e56ad1812d191 Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Thu, 10 Mar 2022 22:26:10 -0300 Subject: [PATCH 20/21] avro: fix missing Table.toavro --- petl/io/avro.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/petl/io/avro.py b/petl/io/avro.py index b2d3f023..a48cb123 100644 --- a/petl/io/avro.py +++ b/petl/io/avro.py @@ -573,4 +573,7 @@ def _ordered_dict_iterator(table): yield OrderedDict(items) +Table.toavro = toavro +Table.appendavro = appendavro + # endregion From a11c7bb02074865270681fe3130d0132c34a01aa Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Thu, 10 Mar 2022 23:29:36 -0300 Subject: [PATCH 21/21] gsheet: append rows without header --- petl/io/gsheet.py | 17 ++++++++++++----- petl/test/io/test_gsheet.py | 11 ++++------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/petl/io/gsheet.py b/petl/io/gsheet.py index b10f4a67..9c8293d6 100644 --- a/petl/io/gsheet.py +++ b/petl/io/gsheet.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import, print_function, division -from petl.util.base import Table +from petl.util.base import Table, iterdata from petl.compat import text_type from petl.errors import ArgumentError as PetlArgError @@ -72,8 +72,6 @@ def fromgsheet( The `spreadsheet` can either be the key of the spreadsheet or its name. - Set `open_by_key` to `True` in order to treat `spreadsheet` as spreadsheet key. - The `worksheet` argument can be omitted, in which case the first sheet in the workbook is used by default. @@ -81,6 +79,8 @@ def fromgsheet( specifying the top left and bottom right corners of a set of cells to extract. (i.e. 'A1:C7'). + Set `open_by_key` to `True` in order to treat `spreadsheet` as spreadsheet key. + .. note:: - Only the top level of google drive will be searched for the spreadsheet filename due to API limitations. @@ -198,7 +198,8 @@ def togsheet( def appendgsheet( - table, credentials_or_client, spreadsheet, worksheet=None, open_by_key=False + table, credentials_or_client, spreadsheet, worksheet=None, + open_by_key=False, include_header=False ): """ Append a table to an existing google shoot at either a new worksheet @@ -212,6 +213,11 @@ def appendgsheet( The `worksheet` is the title of the worksheet to append to or create when it does not exist yet. + Set `open_by_key` to `True` in order to treat `spreadsheet` as spreadsheet key. + + Set `include_header` to `True` if you don't want omit fieldnames as the + first row appended. + .. note:: The sheet index cannot be used, and None is not an option. """ @@ -220,7 +226,8 @@ def appendgsheet( wb = _open_spreadsheet(gspread_client, spreadsheet, open_by_key) # check to see if worksheet exists, if so append, otherwise create ws = _select_worksheet(wb, worksheet, True) - ws.append_rows(table) + rows = table if include_header else list(iterdata(table)) + ws.append_rows(rows) return wb.id diff --git a/petl/test/io/test_gsheet.py b/petl/test/io/test_gsheet.py index 69cffef1..cf14e9eb 100644 --- a/petl/test/io/test_gsheet.py +++ b/petl/test/io/test_gsheet.py @@ -230,7 +230,7 @@ def test_tofromgsheet_08_recreate(): def _get_testcase_for_append(): table_list = [TEST_TABLE[:], TEST_TABLE[:]] - expected = TEST_TABLE[:] + TEST_TABLE[:] + expected = TEST_TABLE[:] + TEST_TABLE[1:] return table_list, expected @@ -251,20 +251,17 @@ def test_appendgsheet_12_other_sheet(): filename, gspread_client, emails = _get_gspread_test_params() # test to append gsheet table = TEST_TABLE[:] + table2 = TEST_TABLE[1:] spread_id = togsheet(table, gspread_client, filename, share_emails=emails) try: appendgsheet(table, gspread_client, filename, worksheet="petl") + # get the results from the 2 sheets result1 = fromgsheet(gspread_client, filename, worksheet=None) ieq(result1, table) result2 = fromgsheet(gspread_client, filename, worksheet="petl") - ieq(result2, table) + ieq(result2, table2) finally: gspread_client.del_spreadsheet(spread_id) -@pytest.fixture() -def setup_emails(monkeypatch): - monkeypatch.setenv("PETL_GSHEET_EMAIL_J", "juarezr@gmail.com") - - # endregion