<a href="https://colab.research.google.com/github/max-ostapenko/ga4_data_import/blob/main/scripts/GA4_Data_Import.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# @markdown 1. Authenticate Google Colab to your GCP project {display-mode: "form"}
# Or run `gcloud auth application-default login` in terminal if you don't use Colab

from google.colab import auth
auth.authenticate_user()

!pip install ga4-data-import -q

In [None]:
# @markdown 2. Define the input variables {display-mode: "form"}
from uuid import uuid4

GCP_PROJECT_ID = "max-ostapenko"  # @param {type: "string"}
ZONE = "us-central1-a"  # @param {type: "string"}
INSTANCE_NAME = "sftp-server-ga4"  # @param {type: "string"}
BUCKET_NAME = "sftp-bucket-ga4"  # @param {type: "string"}
SFTP_USERNAME = uuid4().hex  # @param {type: "string"}

REGION = ZONE[:-2]

!gcloud config set project {GCP_PROJECT_ID}

In [None]:
# @markdown 3. Reserve static IP for your server {display-mode: "form"}
from ga4_data_import.compute import create_static_address

INSTANCE_IP = create_static_address(GCP_PROJECT_ID, REGION, instance_name=INSTANCE_NAME)
print(
    f"""Instance IP `{INSTANCE_IP}` is reserved in your project: https://console.cloud.google.com/networking/addresses/list?project={GCP_PROJECT_ID}"""
)


In [None]:
# @markdown 4. Create VM Instance for SFTP server and GCS bucket. Mount a bucket as read-only on SFTP server. {display-mode: "form"}
from ga4_data_import.compute import create_instance
from ga4_data_import.storage import create_bucket, add_bucket_read_access

instance = create_instance(
    instance_name=INSTANCE_NAME,
    project_id=GCP_PROJECT_ID,
    zone=ZONE,
    static_address=INSTANCE_IP,
    bucket_name=BUCKET_NAME,
    sftp_username=SFTP_USERNAME,
)
print(
    f"""VM instance `{INSTANCE_NAME}` is available in your project: https://console.cloud.google.com/compute/instancesDetail/zones/{ZONE}/instances/{INSTANCE_NAME}?project={GCP_PROJECT_ID}"""
)

print(f"""Now you can enter connection settings to Data Import UI:
    Server Username: {SFTP_USERNAME}""")

SERVICE_ACCOUNT_EMAIL = instance.service_accounts[0].email

create_bucket(BUCKET_NAME, REGION)
add_bucket_read_access(BUCKET_NAME, SERVICE_ACCOUNT_EMAIL)
print(
    f"""Bucket `{BUCKET_NAME}` is available in your project: https://console.cloud.google.com/storage/browser/{BUCKET_NAME}?project={GCP_PROJECT_ID}"""
)


In [None]:
# @markdown 5. Paste and push Public Key from GA4 to SFTP server for authorisation {display-mode: "form"}
from ga4_data_import.compute import add_server_pub_key

KEY_VALUE = "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQDeDrtHfPb8TM6/BqANwQJdSmAGjygLE+2jMdIxfJZFRdcEogMLMrEARhzj9fjRQKw7RhudOpxkyvMjVRXlv6zvcbvmJhQCxk6yp3Cgq3U7ux2PYppBWSxei1R4DH2TSjy/k/7nuKDvdDaBwUC/WLOgjvQ5nKLbYp53RbWoPrv63YEDDEyuLjMww9rlJ+J9iL/fd1TxlZxvwVvPCxweWCzfQfLP6MWGvDmUMfe7eTg11rt3p/wdNUa/RxKC1HOJRWloGCw7dX5LgZYoCq8tKOS8rpHt8BoTFhKjoPsIw8h4uNsjOb9y6YSOQrQFaXj3o2l4X0ssLxkKQcaG+EDZ8PtUr/WnuPpfHCuTNN1Q6TnaF2t2zm4J7C6LchPMgiP/vO+wwloQpgg9tUVGC4FZxQhHTLmCoW+19KIuBu4nm+zgaeIWXuHfnPLE4UdFsIB3o76tAfZprrykjaWVLPc6PFVlvRx1PIAPTL7dHaajnv1rxuQdQlNU2a2dOE4KNoxnffE= Google Analytics Data Import Key"  # @param {type: "string"}
add_server_pub_key(
    GCP_PROJECT_ID,
    ZONE,
    instance_name=INSTANCE_NAME,
    key=KEY_VALUE,
    sftp_username=SFTP_USERNAME,
)
print(
    f"""Public Key is added to your SFTP server. Now you can connect to your SFTP server with your private key."""
)


In [None]:
# @markdown 6. Automate and schedule data export from BigQuery to GCS {display-mode: "form"}
from ga4_data_import.workflow import deploy_workflow, deploy_scheduler

WORKFLOW_ID = "BQ-to-GCS" # @param {type: "string"}
SCHEDULER_ID = "Cost-Data-Export_at_8amUTC" # @param {type: "string"}
SCHEDULE = "* 8 * * *" # @param {type: "string"}
QUERY = "SELECT * FROM `max-ostapenko.Public.cost_data`" # @param {type: "string"}
STORAGE_OBJECT = "cost_data.csv" # @param {type: "string"}

deploy_workflow(
    GCP_PROJECT_ID,
    REGION,
    workflow_id=WORKFLOW_ID,
    service_account_email=SERVICE_ACCOUNT_EMAIL,
)

deploy_scheduler(
    GCP_PROJECT_ID,
    REGION,
    scheduler_id=SCHEDULER_ID,
    service_account_email=SERVICE_ACCOUNT_EMAIL,
    schedule=SCHEDULE,
    workflow_id=WORKFLOW_ID,
    query=QUERY,
    storage_path=f"gs://{BUCKET_NAME}/{STORAGE_OBJECT}",
)

print(
    f"""Workflow and trigger are deployed. Now you can test your data export: https://console.cloud.google.com/cloudscheduler?project={GCP_PROJECT_ID}"""
)

print(f"""Now you can enter connection settings to Data Import UI:
    Server Username: {SFTP_USERNAME}
    Server url: sftp://{INSTANCE_IP}/{STORAGE_OBJECT}""")