In [3]:
import requests
from dotenv import load_dotenv
import os
import json
load_dotenv()

True

# 1. Exchange token and access GCP resources through API

In [41]:
TENANT_ID = os.getenv("TENANT_ID")
CLIENT_ID = os.getenv("CLIENT_ID")
CLIENT_SECRET = os.getenv("CLIENT_SECRET")
PROJECT_NUMBER = os.getenv("PROJECT_NUMBER")
PROJECT_ID = os.getenv("PROJECT_ID")
SERVICE_ACCOUNT_EMAIL = os.getenv("SERVICE_ACCOUNT_EMAIL")
POOL_ID = os.getenv("POOL_ID")
PROVIDER_ID = os.getenv("PROVIDER_ID")

In [7]:
azure_token_issuer_url = "https://login.microsoftonline.com/{tenant_id}/oauth2/v2.0/token".format(tenant_id=TENANT_ID)

#### Get access_token from Azure Entra ID first. (Note: please register app via Azure and create client_secret if Azure Metadata Server is not accessiblee)

In [8]:
headers = {
    'Content-Type': 'application/x-www-form-urlencoded'
}

In [9]:
data = {
    'client_id': CLIENT_ID,
    'client_secret': CLIENT_SECRET,
    'grant_type': 'client_credentials',
    'scope': '{client_id}'.format(client_id=CLIENT_ID) + "/.default"
}

In [10]:
response = requests.post(azure_token_issuer_url, data=data, headers=headers)

In [11]:
azure_access_token = response.json()['access_token']

#### Exchange Azure token for a GCP token with STS - Security Token Service

In [None]:
gcp_sts_url = "https://sts.googleapis.com/v1/token"
headers = {
    'Content-Type': 'application/json'
}
data = {
    "audience": "//iam.googleapis.com/projects/{project_number}/locations/global/workloadIdentityPools/{pool_id}/providers/{provider_id}".format(project_number=PROJECT_NUMBER, pool_id=POOL_ID, provider_id=PROVIDER_ID),
    "grantType": "urn:ietf:params:oauth:grant-type:token-exchange",
    "requestedTokenType": "urn:ietf:params:oauth:token-type:access_token",
    "scope": "https://www.googleapis.com/auth/cloud-platform",
    "subjectTokenType": "urn:ietf:params:oauth:token-type:jwt",
    "subjectToken": azure_access_token
}

In [13]:
response = requests.post(gcp_sts_url, data=json.dumps(data), headers=headers)

In [14]:
sts_access_token = response.json()['access_token']

#### Exchange STS access_token for final access token

##### Approach 1 - Service Account Imperpersonation

Please refer to GCP documentation for detailed instruction on how to set up service account impersonation.

In [15]:
iam_url = "https://iamcredentials.googleapis.com/v1/projects/-/serviceAccounts/{service_account_email}:generateAccessToken".format(service_account_email=SERVICE_ACCOUNT_EMAIL)
headers = {
    'Content-Type': 'application/json',
    'Authorization': 'Bearer ' + sts_access_token
}
data = {
    "delegates": [],
    "scope": ["https://www.googleapis.com/auth/cloud-platform"]
}

In [17]:
response = requests.post(iam_url, data=json.dumps(data), headers=headers)

In [20]:
access_token = response.json()['accessToken']

##### Test WIF working by calling the BigQuery REST API with retrieved access token

In [21]:
test_url = "https://bigquery.googleapis.com//bigquery/v2/projects/{projectId}/datasets/melbourne_real_estate".format(projectId=PROJECT_ID)

In [22]:
response = requests.get(test_url, headers={'Authorization': 'Bearer ' + access_token})

In [26]:
response.json()['datasetReference']['datasetId']

'melbourne_real_estate'

##### Approach 2 - 

# 2. Exchange token and access GCP resources through Python libraries

In [22]:
from msal import ConfidentialClientApplication
from dotenv import load_dotenv
import os
load_dotenv()

True

#### Generate the Azure access token

In [2]:
CLIENT_SECRET = os.getenv('CLIENT_SECRET')
CLIENT_ID = os.getenv('CLIENT_ID')
TENANT_ID = os.getenv('TENANT_ID')

In [3]:
app = ConfidentialClientApplication(
    client_id = CLIENT_ID,
    client_credential = CLIENT_SECRET,
    authority = f"https://login.microsoftonline.com/{TENANT_ID}"
)

In [4]:
azure_access_token = app.acquire_token_for_client(scopes=["{client_id}/.default".format(client_id=CLIENT_ID)])['access_token']

#### Use the Google’s STS Client to get a federated token via the Workload Identity Federation

In [5]:
from google.oauth2.sts import Client
from google.auth.transport.requests import Request

In [6]:
GCP_PROJECT_ID = os.getenv('PROJECT_ID')
GCP_PROJECT_NUMBER = os.getenv('PROJECT_NUMBER')
POOL_ID = os.getenv('POOL_ID')
PROVIDER_ID = os.getenv('PROVIDER_ID')

In [7]:
sts_client = Client(token_exchange_endpoint="https://sts.googleapis.com/v1/token")
response = sts_client.exchange_token(
    request=Request(),
    audience=f"//iam.googleapis.com/projects/{GCP_PROJECT_NUMBER}/locations/global/workloadIdentityPools/{POOL_ID}/providers/{PROVIDER_ID}",
    grant_type="urn:ietf:params:oauth:grant-type:token-exchange",
    subject_token=azure_access_token,
    scopes=["https://www.googleapis.com/auth/cloud-platform"],
    subject_token_type="urn:ietf:params:oauth:token-type:jwt",
    requested_token_type="urn:ietf:params:oauth:token-type:access_token"
)

In [8]:
sts_access_token = response["access_token"]

#### Impersonate the target service account with STS token

In [9]:
from google.oauth2.credentials import Credentials
from google.auth import impersonated_credentials

In [10]:
TARGET_SERVICE_ACCOUNT = os.getenv('SERVICE_ACCOUNT_EMAIL')

In [11]:
sts_credentials = Credentials(token=sts_access_token)

In [12]:
credentials = impersonated_credentials.Credentials(
    source_credentials=sts_credentials,
    target_principal=TARGET_SERVICE_ACCOUNT,
    target_scopes=["https://www.googleapis.com/auth/cloud-platform"],
    lifetime=3600
)

In [13]:
credentials.refresh(Request())

#### Call your Google API (here BigQuery) for testing

In [23]:
from google.cloud import bigquery
BIGQUERY_DATASET_ID = os.getenv('BIGQUERY_DATASET_ID')

In [19]:
client = bigquery.Client(credentials=credentials, project=GCP_PROJECT_ID)

In [26]:
query = "SELECT * FROM {project_id}.{dataset_id}.INFORMATION_SCHEMA.TABLES;".format(project_id=GCP_PROJECT_ID, dataset_id=BIGQUERY_DATASET_ID)

In [27]:
query_job = client.query(query)

In [28]:
print("The query data:")
for row in query_job:
    print(row)

The query data:
Row(('robin-sandpit', 'melbourne_real_estate', 'mel_estate_data_til_2017_geo', 'BASE TABLE', 'YES', 'NO', datetime.datetime(2023, 5, 12, 0, 3, 12, 295000, tzinfo=datetime.timezone.utc), None, None, None, None, 'CREATE TABLE `robin-sandpit.melbourne_real_estate.mel_estate_data_til_2017_geo`\n(\n  ListingId INT64,\n  Agency STRING,\n  Price INT64,\n  DateSold STRING,\n  StreetAddress STRING,\n  Postcode INT64,\n  Locality STRING,\n  Latitude FLOAT64,\n  Longitude FLOAT64,\n  LandSize INT64,\n  LandSizeUnit STRING,\n  Bedrooms INT64,\n  Bathrooms INT64,\n  ParkingSpaces INT64,\n  PropertyType STRING,\n  cbdDistance FLOAT64,\n  `1PrimaryName` STRING,\n  `1PrimarySector` STRING,\n  `1PrimaryDistance` FLOAT64,\n  `1PrimaryScore` INT64,\n  `2PrimaryName` STRING,\n  `2PrimarySector` STRING,\n  `2PrimaryDistance` FLOAT64,\n  `2PrimaryScore` INT64,\n  `3PrimaryName` STRING,\n  `3PrimarySector` STRING,\n  `3PrimaryDistance` FLOAT64,\n  `3PrimaryScore` INT64,\n  `1SecondaryName` ST