From 8441ca5908542b1b5dbd9b19ac910d640c7283af Mon Sep 17 00:00:00 2001 From: Max Risuhin Date: Tue, 10 Mar 2020 00:33:48 -0700 Subject: [PATCH] gdrive: Google Service account support --- dvc/config.py | 4 ++ dvc/remote/gdrive.py | 82 ++++++++++++++++++++++++-------- setup.py | 2 +- tests/func/test_data_cloud.py | 24 ++++++---- tests/remotes.py | 7 +-- tests/unit/remote/test_gdrive.py | 6 +-- 6 files changed, 87 insertions(+), 38 deletions(-) diff --git a/dvc/config.py b/dvc/config.py index 1d6118ec45..096bcf6f59 100644 --- a/dvc/config.py +++ b/dvc/config.py @@ -172,9 +172,13 @@ class RelPath(str): **REMOTE_COMMON, }, "gdrive": { + "gdrive_use_service_account": Bool, "gdrive_client_id": str, "gdrive_client_secret": str, "gdrive_user_credentials_file": str, + "gdrive_service_account_email": str, + "gdrive_service_account_user_email": str, + "gdrive_service_account_p12_file_path": str, **REMOTE_COMMON, }, "http": {**HTTP_COMMON, **REMOTE_COMMON}, diff --git a/dvc/remote/gdrive.py b/dvc/remote/gdrive.py index 80b4261bbb..56499e8537 100644 --- a/dvc/remote/gdrive.py +++ b/dvc/remote/gdrive.py @@ -76,7 +76,7 @@ class RemoteGDrive(RemoteBASE): DEFAULT_NO_TRAVERSE = False DEFAULT_VERIFY = True - GDRIVE_USER_CREDENTIALS_DATA = "GDRIVE_USER_CREDENTIALS_DATA" + GDRIVE_CREDENTIALS_DATA = "GDRIVE_CREDENTIALS_DATA" DEFAULT_USER_CREDENTIALS_FILE = "gdrive-user-credentials.json" def __init__(self, repo, config): @@ -93,17 +93,22 @@ def __init__(self, repo, config): ) self._bucket = self.path_info.bucket + self._use_service_account = config.get("gdrive_use_service_account") + self._service_account_email = config.get( + "gdrive_service_account_email" + ) + self._service_account_user_email = config.get( + "gdrive_service_account_user_email" + ) + self._service_account_p12_file_path = config.get( + "gdrive_service_account_p12_file_path" + ) self._client_id = config.get("gdrive_client_id") self._client_secret = config.get("gdrive_client_secret") - if not self._client_id or not self._client_secret: - raise DvcException( - "Please specify Google Drive's client id and " - "secret in DVC config. Learn more at " - "{}.".format(format_link("https://man.dvc.org/remote/add")) - ) + self._validate_config() self._gdrive_user_credentials_path = ( tmp_fname(os.path.join(self.repo.tmp_dir, "")) - if os.getenv(RemoteGDrive.GDRIVE_USER_CREDENTIALS_DATA) + if os.getenv(RemoteGDrive.GDRIVE_CREDENTIALS_DATA) else config.get( "gdrive_user_credentials_file", os.path.join( @@ -114,6 +119,33 @@ def __init__(self, repo, config): self._list_params = None + def _validate_config(self): + # Validate Service Account configuration + if self._use_service_account and ( + not self._service_account_email + or not self._service_account_p12_file_path + ): + raise DvcException( + "To use service account please specify {}, {} and " + "{} in DVC config. Learn more at " + "{}.".format( + "gdrive_service_account_email", + "gdrive_service_account_p12_file_path", + "gdrive_service_account_user_email (optional)", + format_link("https://man.dvc.org/remote/modify"), + ) + ) + + # Validate OAuth 2.0 Client ID configuration + if not self._use_service_account and ( + not self._client_id or not self._client_secret + ): + raise DvcException( + "Please specify Google Drive's client id and " + "secret in DVC config. Learn more at " + "{}.".format(format_link("https://man.dvc.org/remote/modify")) + ) + @wrap_prop(threading.RLock()) @cached_property def drive(self): @@ -121,23 +153,30 @@ def drive(self): from pydrive2.auth import GoogleAuth from pydrive2.drive import GoogleDrive - if os.getenv(RemoteGDrive.GDRIVE_USER_CREDENTIALS_DATA): + if os.getenv(RemoteGDrive.GDRIVE_CREDENTIALS_DATA): with open( self._gdrive_user_credentials_path, "w" ) as credentials_file: credentials_file.write( - os.getenv(RemoteGDrive.GDRIVE_USER_CREDENTIALS_DATA) + os.getenv(RemoteGDrive.GDRIVE_CREDENTIALS_DATA) ) GoogleAuth.DEFAULT_SETTINGS["client_config_backend"] = "settings" - GoogleAuth.DEFAULT_SETTINGS["client_config"] = { - "client_id": self._client_id, - "client_secret": self._client_secret, - "auth_uri": "https://accounts.google.com/o/oauth2/auth", - "token_uri": "https://oauth2.googleapis.com/token", - "revoke_uri": "https://oauth2.googleapis.com/revoke", - "redirect_uri": "", - } + if self._use_service_account: + GoogleAuth.DEFAULT_SETTINGS["service_config"] = { + "client_service_email": self._service_account_email, + "client_user_email": self._service_account_user_email, + "client_pkcs12_file_path": self._service_account_p12_file_path, + } + else: + GoogleAuth.DEFAULT_SETTINGS["client_config"] = { + "client_id": self._client_id, + "client_secret": self._client_secret, + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + "revoke_uri": "https://oauth2.googleapis.com/revoke", + "redirect_uri": "", + } GoogleAuth.DEFAULT_SETTINGS["save_credentials"] = True GoogleAuth.DEFAULT_SETTINGS["save_credentials_backend"] = "file" GoogleAuth.DEFAULT_SETTINGS[ @@ -153,7 +192,10 @@ def drive(self): gauth = GoogleAuth(settings_file="") try: - gauth.CommandLineAuth() + if self._use_service_account: + gauth.ServiceAuth() + else: + gauth.CommandLineAuth() except RefreshError as exc: raise GDriveAccessTokenRefreshError from exc except KeyError as exc: @@ -164,7 +206,7 @@ def drive(self): except Exception as exc: raise DvcException("Google Drive authentication failed") from exc finally: - if os.getenv(RemoteGDrive.GDRIVE_USER_CREDENTIALS_DATA): + if os.getenv(RemoteGDrive.GDRIVE_CREDENTIALS_DATA): os.remove(self._gdrive_user_credentials_path) return GoogleDrive(gauth) diff --git a/setup.py b/setup.py index 1f7f67e847..35c22c121d 100644 --- a/setup.py +++ b/setup.py @@ -84,7 +84,7 @@ def run(self): # Extra dependencies for remote integrations gs = ["google-cloud-storage==1.19.0"] -gdrive = ["pydrive2>=1.4.5"] +gdrive = ["pydrive2>=1.4.6"] s3 = ["boto3>=1.9.201"] azure = ["azure-storage-blob==2.1.0"] oss = ["oss2==2.6.1"] diff --git a/tests/func/test_data_cloud.py b/tests/func/test_data_cloud.py index b28c2e438b..aaf1a12c5c 100644 --- a/tests/func/test_data_cloud.py +++ b/tests/func/test_data_cloud.py @@ -37,8 +37,6 @@ OSS, TEST_CONFIG, TEST_GCP_CREDS_FILE, - TEST_GDRIVE_CLIENT_ID, - TEST_GDRIVE_CLIENT_SECRET, TEST_REMOTE, ) @@ -193,8 +191,9 @@ def setup_gdrive_cloud(remote_url, dvc): config = copy.deepcopy(TEST_CONFIG) config["remote"][TEST_REMOTE] = { "url": remote_url, - "gdrive_client_id": TEST_GDRIVE_CLIENT_ID, - "gdrive_client_secret": TEST_GDRIVE_CLIENT_SECRET, + "gdrive_service_account_email": "test", + "gdrive_service_account_p12_file_path": "test.p12", + "gdrive_use_service_account": True, } dvc.config = config @@ -434,8 +433,8 @@ def _test(self): "remote", "modify", TEST_REMOTE, - "gdrive_client_id", - TEST_GDRIVE_CLIENT_ID, + "gdrive_service_account_email", + "modified", ] ) self.main( @@ -443,8 +442,17 @@ def _test(self): "remote", "modify", TEST_REMOTE, - "gdrive_client_secret", - TEST_GDRIVE_CLIENT_SECRET, + "gdrive_service_account_p12_file_path", + "modified.p12", + ] + ) + self.main( + [ + "remote", + "modify", + TEST_REMOTE, + "gdrive_use_service_account", + "True", ] ) diff --git a/tests/remotes.py b/tests/remotes.py index 55029312d3..a80619b174 100644 --- a/tests/remotes.py +++ b/tests/remotes.py @@ -35,11 +35,6 @@ # Ensure that absolute path is used os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = TEST_GCP_CREDS_FILE -TEST_GDRIVE_CLIENT_ID = ( - "217948389181-rs7it4a635b3qrf8dnmklmoj2kimun9n.apps.googleusercontent.com" -) -TEST_GDRIVE_CLIENT_SECRET = "LNg9n_cK7bohI8gEHn4bUeMX" - always_test = staticmethod(lambda: True) @@ -146,7 +141,7 @@ def put_objects(remote, objects): class GDrive: @staticmethod def should_test(): - return os.getenv(RemoteGDrive.GDRIVE_USER_CREDENTIALS_DATA) is not None + return os.getenv(RemoteGDrive.GDRIVE_CREDENTIALS_DATA) is not None def get_url(self): if not getattr(self, "_remote_url", None): diff --git a/tests/unit/remote/test_gdrive.py b/tests/unit/remote/test_gdrive.py index 9d7c511e18..e165903ccb 100644 --- a/tests/unit/remote/test_gdrive.py +++ b/tests/unit/remote/test_gdrive.py @@ -33,15 +33,15 @@ def test_init(self): def test_drive(self): remote = RemoteGDrive(Repo(), self.CONFIG) os.environ[ - RemoteGDrive.GDRIVE_USER_CREDENTIALS_DATA + RemoteGDrive.GDRIVE_CREDENTIALS_DATA ] = USER_CREDS_TOKEN_REFRESH_ERROR with pytest.raises(GDriveAccessTokenRefreshError): remote.drive - os.environ[RemoteGDrive.GDRIVE_USER_CREDENTIALS_DATA] = "" + os.environ[RemoteGDrive.GDRIVE_CREDENTIALS_DATA] = "" remote = RemoteGDrive(Repo(), self.CONFIG) os.environ[ - RemoteGDrive.GDRIVE_USER_CREDENTIALS_DATA + RemoteGDrive.GDRIVE_CREDENTIALS_DATA ] = USER_CREDS_MISSED_KEY_ERROR with pytest.raises(GDriveMissedCredentialKeyError): remote.drive