# Google Analytics 4 Data Import

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/max-ostapenko/website-source/blob/main/src/posts/google_analytics_4_data_import/notebook.ipynb)

In [None]:
# Install the ga4-data-import package
%pip install ga4-data-import -q

## 0. Define inputs and authenticate



In [None]:
from google.colab import auth
auth.authenticate_user()
# or run `gcloud auth application-default login` in terminal if you don't use Colab

from uuid import uuid4

GCP_PROJECT_ID = "max-ostapenko"
ZONE = "us-central1-a"
INSTANCE_NAME = "sftp-server-ga4"
BUCKET_NAME = "sftp-bucket-ga4"
SFTP_USERNAME = uuid4().hex
REGION = ZONE[:-2]

!gcloud config set project {GCP_PROJECT_ID}

## 1. Create VM Instance for SFTP server.

In [None]:
from ga4_data_import.compute import create_static_address
from ga4_data_import.compute import create_instance
from ga4_data_import.storage import create_bucket, add_bucket_read_access

# Reserve static IP for your server
INSTANCE_IP = create_static_address(GCP_PROJECT_ID, REGION, instance_name=INSTANCE_NAME)
print(
    f"""Instance IP `{INSTANCE_IP}` is reserved in your project: https://console.cloud.google.com/networking/addresses/list?project={GCP_PROJECT_ID}"""
)

instance = create_instance(
    instance_name=INSTANCE_NAME,
    project_id=GCP_PROJECT_ID,
    zone=ZONE,
    static_address=INSTANCE_IP,
    bucket_name=BUCKET_NAME,
    sftp_username=SFTP_USERNAME,
)
print(
    f"""SFTP server on a VM instance `{INSTANCE_NAME}` is available in your project: https://console.cloud.google.com/compute/instancesDetail/zones/{ZONE}/instances/{INSTANCE_NAME}?project={GCP_PROJECT_ID}"""
)

print(
    f"""Now you can enter connection settings to Data Import UI:
    Server Username: {SFTP_USERNAME}"""
)

## 2. Mount a bucket as read-only on SFTP server.

In [None]:
SERVICE_ACCOUNT_EMAIL = instance.service_accounts[0].email

create_bucket(BUCKET_NAME, REGION)
add_bucket_read_access(BUCKET_NAME, SERVICE_ACCOUNT_EMAIL)
print(
    f"""Bucket `{BUCKET_NAME}` linked to an SFTP server is available in your project: https://console.cloud.google.com/storage/browser/{BUCKET_NAME}?project={GCP_PROJECT_ID}"""
)

## 3. Automate and schedule data export from BigQuery to GCS

In [None]:
from ga4_data_import.workflow import deploy_workflow, deploy_scheduler

WORKFLOW_ID = "BQ-to-GCS"
SCHEDULER_ID = "Cost-Data-Export_at_8amUTC"
SCHEDULE = "* 8 * * *"
QUERY = "SELECT * FROM `max-ostapenko.Public.cost_data`"
STORAGE_OBJECT = "cost_data.csv"

deploy_workflow(
    GCP_PROJECT_ID,
    REGION,
    workflow_id=WORKFLOW_ID,
    service_account_email=SERVICE_ACCOUNT_EMAIL,
)

deploy_scheduler(
    GCP_PROJECT_ID,
    REGION,
    scheduler_id=SCHEDULER_ID,
    service_account_email=SERVICE_ACCOUNT_EMAIL,
    schedule=SCHEDULE,
    workflow_id=WORKFLOW_ID,
    query=QUERY,
    storage_path=f"gs://{BUCKET_NAME}/{STORAGE_OBJECT}",
)

print(
    f"""Workflow and trigger are deployed. Now you can test your data export: https://console.cloud.google.com/cloudscheduler?project={GCP_PROJECT_ID}"""
)

## 4. Configure a data source in GA4

In [None]:
print(
    f"""Now you can enter connection settings to Data Import UI:
    Server Username: {SFTP_USERNAME}
    Server url: sftp://{INSTANCE_IP}/{STORAGE_OBJECT}"""
)

## 5. Paste and push Public Key from GA4 to SFTP server for authorisation

In [None]:
from ga4_data_import.compute import add_server_pub_key

KEY_VALUE = "ssh-rsa AAA...ffE= Google Analytics Data Import Key"
add_server_pub_key(
    GCP_PROJECT_ID,
    ZONE,
    instance_name=INSTANCE_NAME,
    pub_key=KEY_VALUE,
    username=SFTP_USERNAME,
)
print(
    f"""Public Key is added to your SFTP server. Now you can connect to your SFTP server with your private key."""
)