In [14]:
import logging
import os
import json
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google.cloud import bigquery
import numpy as np
import pandas as pd

# Define your Google Cloud project ID
project_id = "logistics-customer-staging"  # Replace with your actual project ID

logging.info(f"Initializing BigQuery client for project: {project_id}")

# Determine credentials path (GitHub Actions vs. Local)
if os.getenv("GITHUB_ACTIONS"):
    credentials_path = "/tmp/credentials.json"  # GitHub Actions path
else:
    credentials_path = "/Users/shazeb.asad/global_pricing/config/credentials.json"  # Local machine path

# Check if the credentials file exists
if not os.path.exists(credentials_path):
    raise FileNotFoundError(f"Credentials file not found at {credentials_path}. Make sure to set up authentication.")

# Load credentials
with open(credentials_path, "r") as f:
    creds_data = json.load(f)

# Rebuild credentials object
credentials = Credentials.from_authorized_user_info(creds_data)

# Refresh credentials if necessary
if credentials.expired and credentials.refresh_token:
    credentials.refresh(Request())

# Initialize the BigQuery client with explicit credentials
try:
    client = bigquery.Client(credentials=credentials, project=project_id)
    logging.info(f"BigQuery client initialized successfully for project: {project_id}")
except Exception as e:
    logging.error(f"Failed to initialize BigQuery client: {e}")
    raise e

# Test Query
test_query = """
  SELECT
        COUNT(DISTINCT CASE WHEN is_customer_holdout IS True THEN customer_id END) AS holdout_customers
        ,COUNT(DISTINCT CASE WHEN is_customer_holdout IS False THEN customer_id END) AS non_holdout_customers
        ,COUNT(DISTINCT customer_id) AS all_customers
  FROM `fulfillment-dwh-production.cl.dps_holdout_users` AS d
  WHERE d.created_date = DATE('2025-02-01')
    AND customer_id NOT IN UNNEST(ARRAY(SELECT id FROM `fulfillment-dwh-production.cl._bad_dps_logs_ids`))
    AND customer_id IS NOT NULL

"""

try:
    test_df = client.query(test_query).to_dataframe()
    print(test_df)
except Exception as e:
    logging.error(f"Query execution failed: {e}")
    raise e


   holdout_customers  non_holdout_customers  all_customers
0             329114               16077234       16406348


In [19]:
import logging
import os
import json
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google.cloud import bigquery
import numpy as np
import pandas as pd
from google.auth import default  # To handle default credentials locally

# Define your Google Cloud project ID
project_id = "logistics-customer-staging"  # Replace with your actual project ID

logging.info(f"Initializing BigQuery client for project: {project_id}")

# Determine credentials path (GitHub Actions vs. Local)
if os.getenv("GITHUB_ACTIONS"):
    # For GitHub Actions, the credentials file path is fixed
    credentials_path = "/tmp/credentials.json"  # GitHub Actions path
else:
    # For local machine, use Google Cloud SDK's application default credentials
    credentials, project = default()  # This will use credentials from `gcloud auth application-default login`
    project_id = project if project else project_id  # Ensure project_id is set

# Check if the credentials file exists for GitHub Actions (Only for GitHub Actions)
if os.getenv("GITHUB_ACTIONS") and not os.path.exists(credentials_path):
    raise FileNotFoundError(f"Credentials file not found at {credentials_path}. Make sure to set up authentication in GitHub Actions.")

# For GitHub Actions, load credentials from the file (Not required for local when using default())
if os.getenv("GITHUB_ACTIONS"):
    with open(credentials_path, "r") as f:
        creds_data = json.load(f)

    # Rebuild credentials object from the loaded data
    credentials = Credentials.from_authorized_user_info(creds_data)

    # Refresh credentials if necessary
    if credentials.expired and credentials.refresh_token:
        credentials.refresh(Request())

# Initialize the BigQuery client with explicit credentials and project_id
try:
    client = bigquery.Client(credentials=credentials, project=project_id)
    logging.info(f"BigQuery client initialized successfully for project: {project_id}")
except Exception as e:
    logging.error(f"Failed to initialize BigQuery client: {e}")
    raise e

# Test Query
test_query = """
  SELECT
        COUNT(DISTINCT CASE WHEN is_customer_holdout IS True THEN customer_id END) AS holdout_customers
        ,COUNT(DISTINCT CASE WHEN is_customer_holdout IS False THEN customer_id END) AS non_holdout_customers
        ,COUNT(DISTINCT customer_id) AS all_customers
  FROM `fulfillment-dwh-production.cl.dps_holdout_users` AS d
  WHERE d.created_date = DATE('2025-02-01')
    AND customer_id NOT IN UNNEST(ARRAY(SELECT id FROM `fulfillment-dwh-production.cl._bad_dps_logs_ids`))
    AND customer_id IS NOT NULL
"""

try:
    test_df = client.query(test_query).to_dataframe()
    print(test_df)
except Exception as e:
    logging.error(f"Query execution failed: {e}")
    raise e




   holdout_customers  non_holdout_customers  all_customers
0             329114               16077234       16406348
