In [149]:
# Import necessary libraries
import pandas as pd
from datetime import date
import datetime

from google.cloud import bigquery
from google.oauth2 import service_account
from google.cloud import storage

from dotenv import load_dotenv
from pycarol.apps import Apps
from pycarol.bigquery import TokenManager
from pycarol import Carol

In [190]:
def generate_download_signed_url_v4(bucket_name, blob_name, credentials):
    """Generates a v4 signed URL for downloading a blob.

    Note that this method requires a service account key file. You can not use
    this if you are using Application Default Credentials from Google Compute
    Engine or from the Google Cloud SDK.
    """
    # bucket_name = 'your-bucket-name'
    # blob_name = 'your-object-name'

    storage_client = storage.Client(credentials=credentials)
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(blob_name)

    url = blob.generate_signed_url(
        version="v4",
        # This URL is valid for 15 minutes
        expiration=datetime.timedelta(minutes=15),
        # Allow GET requests using this URL.
        method="GET",
    )

    print("Generated GET signed URL:")
    print(url)
    print("You can use this URL with any user agent, for example:")
    print(f"curl '{url}'")
    return url

In [191]:
load_dotenv(".env")

True

In [192]:
carol = Carol()
tokenManager = TokenManager(carol, None, False)

In [193]:
tokenGcp = tokenManager.get_token()
credentials = service_account.Credentials.from_service_account_info(tokenGcp.to_dict()['service_account'])
client = bigquery.Client(credentials=credentials)

In [194]:
envId = tokenGcp.to_dict()['env']['env_id']
projectId = tokenGcp.to_dict()['service_account']['project_id']

In [195]:
job_config = bigquery.ExtractJobConfig()
job_config.compression = bigquery.Compression.SNAPPY
job_config.destination_format = (bigquery.DestinationFormat.PARQUET)

In [196]:
table_id = 'ingestion_mdbusinesspartner'
dateNow = date.today().isoformat()

destination_uri = "gs://{}/user_space/{}".format(projectId, "export-" + table_id + "-" + dateNow + ".parquet")
print(destination_uri)

table_ref = "{}.{}.{}".format(projectId, envId, table_id)

gs://carol-00b66d7bb91a4e43ae8e/user_space/export-ingestion_mdbusinesspartner-2024-02-26.parquet


In [197]:
extract_job = client.extract_table(
    table_ref,
    destination_uri,
    job_config=job_config,
    # Location must match that of the source table.
    location="US",
)  # API request
a = extract_job.result()  # Waits for job to complete.
print(a)
print("Exported {}:{}.{} to {}".format(projectId, envId, table_ref, destination_uri))

ExtractJob<project=carol-00b66d7bb91a4e43ae8e, location=US, id=413d0bb6-659e-4440-a15d-20da9746267b>
Exported carol-00b66d7bb91a4e43ae8e:00b66d7bb91a4e43ae8e17649fb1a8fb.carol-00b66d7bb91a4e43ae8e.00b66d7bb91a4e43ae8e17649fb1a8fb.ingestion_mdbusinesspartner to gs://carol-00b66d7bb91a4e43ae8e/user_space/export-ingestion_mdbusinesspartner-2024-02-26.parquet


In [198]:
generate_download_signed_url_v4(projectId, "user_space/export-{}-{}.parquet".format(table_id, dateNow), credentials)

Generated GET signed URL:
https://storage.googleapis.com/carol-00b66d7bb91a4e43ae8e/user_space/export-ingestion_mdbusinesspartner-2024-02-26.parquet?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=tmpaccess-20240227t071214105z%40carol-00b66d7bb91a4e43ae8e.iam.gserviceaccount.com%2F20240227%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20240227T032204Z&X-Goog-Expires=900&X-Goog-SignedHeaders=host&X-Goog-Signature=ce87f7319392267a18bb543a3c6004c15d15e0c7d3d28bf4fa3b57956814f44c7bb04447ef466e978ec68fc23ebd50ed9e6271e855f608c6f8498195fffc99db39a40d3fe6154a369f653e85ae478a22eb2002820c8a4bb70de1bd4d82840f9b7cbc79afca90fd8edb3a0080b342457d773cd2f266823051f8537eb05d3e76a83767aa11c73a164bebb7af0c2f9f58e1e175c3a449ecd9cedc17772c554489ec737b63f8f084e698ec8029ab672cbb65d26e119730ee67359084d666005a791dc874d8da594f744eda385e595f7a74b10dea82c9cec5f3483f40e10b28edcc746dbceb8c9ac0175d8b6df104753710f7ded3ff66c45a6e2a65885634fa024408
You can use this URL with any user agent, for example:
curl 'https:/

'https://storage.googleapis.com/carol-00b66d7bb91a4e43ae8e/user_space/export-ingestion_mdbusinesspartner-2024-02-26.parquet?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=tmpaccess-20240227t071214105z%40carol-00b66d7bb91a4e43ae8e.iam.gserviceaccount.com%2F20240227%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20240227T032204Z&X-Goog-Expires=900&X-Goog-SignedHeaders=host&X-Goog-Signature=ce87f7319392267a18bb543a3c6004c15d15e0c7d3d28bf4fa3b57956814f44c7bb04447ef466e978ec68fc23ebd50ed9e6271e855f608c6f8498195fffc99db39a40d3fe6154a369f653e85ae478a22eb2002820c8a4bb70de1bd4d82840f9b7cbc79afca90fd8edb3a0080b342457d773cd2f266823051f8537eb05d3e76a83767aa11c73a164bebb7af0c2f9f58e1e175c3a449ecd9cedc17772c554489ec737b63f8f084e698ec8029ab672cbb65d26e119730ee67359084d666005a791dc874d8da594f744eda385e595f7a74b10dea82c9cec5f3483f40e10b28edcc746dbceb8c9ac0175d8b6df104753710f7ded3ff66c45a6e2a65885634fa024408'