<a href="https://colab.research.google.com/github/larry-tableau/tableau/blob/main/Querying_Tableau's_Metadata_API.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


# Example GraphQL Query using Python to query Tableau's Metadata API

---

This document discusses the benefits of implementing certain calculations in Tableau to compliment BigQuery/DBT, emphasizing a balanced approach to data analysis, reporting, visual creativity and impactful story telling.

Key points include:

1.   Enhanced user accessibility and flexibility for self-service analytics
2.   Improved visibility and governance through certified metrics and data sources
3.   Flexibility by leveraging Tableau Extracts vs Live Connections
4.   Context-aware calculations for dynamic, interactive visualizations
5.   Managing responsibilities between data engineering and business logic vs use cases
6.   Potential cost savings in development and maintenance

The following Python code will demonstrate how to integrate BigQuery and Tableau-like functionalities, showcasing the synergy between backend data processing and frontend visualization capabilities.




In [None]:
!pip install requests
!pip install pantab==4.1.0
!pip install tableauserverclient

In [2]:
from google.colab import userdata

# Tableau Cloud URL and authentication details
tableau_cloud_url = 'https://prod-apsoutheast-a.online.tableau.com'
pat_name = userdata.get('PAT_NAME')
pat_secret = userdata.get('PAT_SECRET')
site_content_url = 'tableauanzpresalesdemositesydney'  # e.g., 'my_site'

In [None]:
import requests
import pandas as pd
import pantab
from google.colab import auth
from google.cloud import bigquery
from google.colab import data_table

# Authenticate with Google Cloud
auth.authenticate_user()

# Constants
AUTH_URL = f"{tableau_cloud_url}/api/3.22/auth/signin"
GRAPHQL_URL = f"{tableau_cloud_url}/api/metadata/graphql"
QUERY = """
query published_datasources {
  publishedDatasources(filter: {projectNameWithin: ["Data Sources (LD)","Larry"]})  {
    id
    luid
    name
    hasUserReference
    hasExtracts
    extractLastRefreshTime
    site {
      luid
    }
    fields {
      name
      ... on CalculatedField {
        formula
      }
    }
    projectName
    projectVizportalUrlId
    owner {
      luid
    }
    isCertified
    certifier {
      luid
    }
    certificationNote
    certifierDisplayName
    description
  }
}
"""

def get_auth_token(pat_name, pat_secret, site_content_url):
    """Authenticate with Tableau Server and return an auth token."""
    headers = {'Content-Type': 'application/json', 'Accept': 'application/json'}
    payload = {
        "credentials": {
            "personalAccessTokenName": pat_name,
            "personalAccessTokenSecret": pat_secret,
            "site": {"contentUrl": site_content_url}
        }
    }
    response = requests.post(AUTH_URL, headers=headers, json=payload)
    response.raise_for_status()  # Will raise an HTTPError for bad responses
    return response.json()['credentials']['token']

def query_metadata_api(token):
    """Query Tableau Metadata API and return the result."""
    headers = {
        'Content-Type': 'application/json',
        'Accept': 'application/json',
        'X-Tableau-Auth': token
    }
    payload = {"query": QUERY}
    response = requests.post(GRAPHQL_URL, headers=headers, json=payload)
    try:
        response.raise_for_status()
    except requests.exceptions.HTTPError as e:
        print(f"HTTP Error: {e}")
        print(f"Response content: {response.content}")
        raise

    result = response.json()

    if 'errors' in result:
        errors = "\n".join(
            f"Error: {error.get('message')}, Path: {error.get('path')}, "
            f"Classification: {error.get('extensions', {}).get('classification')}"
            for error in result['errors']
        )
        raise Exception(f"GraphQL query returned errors:\n{errors}")

    if result.get('data') is None:
        print(f"Full API response: {result}")
        raise Exception("No data returned from GraphQL API.")

    return result['data']

def flatten_json(nested_json):
    """Flatten the nested JSON structure from the API response."""
    flattened_data = []

    if nested_json is None:
        print("Error: nested_json is None")
        return flattened_data

    published_datasources = nested_json.get('publishedDatasources')

    if published_datasources is None:
        print("Error: 'publishedDatasources' key not found in the response")
        print(f"Keys in nested_json: {nested_json.keys()}")
        return flattened_data

    if not published_datasources:
        print("No published datasources found in the response.")
        return flattened_data

    for datasource in published_datasources:
        base_datasource = {}
        for key, value in datasource.items():
            if isinstance(value, dict):
                # Handle nested dictionaries
                for nested_key, nested_value in value.items():
                    base_datasource[f"{key}_{nested_key}"] = nested_value
            elif not isinstance(value, list):
                # Directly add non-list values
                base_datasource[key] = value

        # Handle fields separately
        fields = datasource.get('fields', [])
        has_calculated_fields = False
        for field in fields:
            if isinstance(field, dict) and 'formula' in field:
                has_calculated_fields = True
                flattened_data.append({
                    **base_datasource,
                    'field_name': field.get('name'),
                    'formula': field.get('formula')
                })

        if not has_calculated_fields:
            flattened_data.append({
                **base_datasource,
                'field_name': None,
                'formula': None
            })

    return flattened_data

def display_dataframe(df, title):
    """Display a DataFrame as an HTML table."""
    print(f"\n{title}:")
    display(data_table.DataTable(df, include_index=False))

def write_to_bigquery(df, project_id, dataset_id, table_id):
    """Write the DataFrame to BigQuery."""
    client = bigquery.Client(project=project_id)
    table_ref = f"{project_id}.{dataset_id}.{table_id}"

    job_config = bigquery.LoadJobConfig(
        autodetect=True,
        write_disposition="WRITE_TRUNCATE",
    )

    job = client.load_table_from_dataframe(df, table_ref, job_config=job_config)
    job.result()  # Wait for the job to complete

    print(f"Loaded {len(df)} rows into {table_ref}")

def main():
    """Main execution flow."""
    try:
        # Authenticate and get token
        auth_token = get_auth_token(pat_name, pat_secret, site_content_url)

        # Query the Metadata API
        result = query_metadata_api(auth_token)
        print(f"Raw API result: {result}")  # Add this line

        # Flatten the JSON structure
        flattened_data = flatten_json(result)

        if not flattened_data:
            print("No data to process. Exiting.")
            return

        # Create and display DataFrame
        df = pd.DataFrame(flattened_data)

        # Convert problematic columns to strings
        problematic_columns = ['certifier', 'certificationNote', 'certifierDisplayName']
        for col in problematic_columns:
            if col in df.columns:
                df[col] = df[col].astype(str)

        display_dataframe(df, "Flattened Datasources and Calculated Fields")

        # Save to CSV
        df.to_csv('tableau_metadata_output.csv', index=False)
        print("\nData saved to 'tableau_metadata_output.csv'")

        # Write to BigQuery
        project_id = '<Google_Cloud_Project_ID>'  # Replace with your Google Cloud project ID
        dataset_id = '<Google_BQ_Dataset_ID>'  # Replace with your BigQuery dataset ID
        table_id = '<Google_BQ_Table_ID'  # Replace with your desired table name

        # write_to_bigquery(df, project_id, dataset_id, table_id)

        # Save as Hyper file
        pantab.frame_to_hyper(df, '/content/tableau_metadata_output.hyper', table='Extract')
        print("\nData saved to 'tableau_metadata_output.hyper'")

    except Exception as e:
        print(f"An error occurred: {str(e)}")

# Execute the main function
main()
