In [None]:
# Asked ChatGPT4 to define calculated fields and Data Sources

In [None]:
# First, let's check the file type and its basic properties to ensure we can process it correctly.
import os

# File path
file_path = '/mnt/data/Superstore Performance Dashboard.twb'

# Getting file details
file_info = {
    "File Name": os.path.basename(file_path),
    "File Size (KB)": os.path.getsize(file_path) / 1024,
    "File Type": os.path.splitext(file_path)[1]
}

file_info

In [None]:
# Initally only returned first 3 it found

In [None]:
# Tableau Workbook files (.twb) are essentially XML files. We can parse this file to extract information about calculated fields and data sources.

import xml.etree.ElementTree as ET

# Function to parse the Tableau Workbook (TWB) file
def parse_tableau_workbook(file_path):
    # Parse the XML file
    tree = ET.parse(file_path)
    root = tree.getroot()

    # Initialize containers for data sources and calculated fields
    data_sources = {}
    calculated_fields = {}

    # Iterate through the XML structure to find data sources and calculated fields
    for datasource in root.iter('datasource'):
        # Data source name
        ds_name = datasource.get('name', 'Unknown')
        
        # Add datasource to the dictionary
        data_sources[ds_name] = []

        # Search for calculated fields within each data source
        for column in datasource.iter('column'):
            caption = column.get('caption')
            formula = column.find('calculation')

            # Check if it's a calculated field (presence of a formula)
            if formula is not None and formula.get('class') == 'tableau':
                calc_field = {
                    "Caption": caption,
                    "Formula": formula.get('formula')
                }
                calculated_fields[caption] = calc_field
                data_sources[ds_name].append(calc_field)

    return data_sources, calculated_fields

# Parse the workbook and get calculated fields and data sources
data_sources_info, calculated_fields_info = parse_tableau_workbook(file_path)

# Limiting output for readability, showing first few calculated fields and data sources
limited_data_sources_info = {k: data_sources_info[k][:3] for k in list(data_sources_info.keys())[:3]}
limited_calculated_fields_info = {k: calculated_fields_info[k] for k in list(calculated_fields_info.keys())[:3]}

limited_data_sources_info, limited_calculated_fields_info

In [None]:
# Asked for it to Return all calculated fields

In [None]:
# Returning all the calculated fields from the Tableau Workbook
all_calculated_fields_info = calculated_fields_info

all_calculated_fields_info

In [None]:
# provide a list of calculated fields

In [None]:
# Extracting and listing distinct names of the calculated fields from the Tableau Workbook

# Function to extract distinct names of calculated fields
def get_distinct_calculated_field_names(calculated_fields):
    distinct_names = [field_info['Caption'] for field_info in calculated_fields.values()]
    return distinct_names

# Get the distinct names of the calculated fields
distinct_calculated_field_names = get_distinct_calculated_field_names(calculated_fields_info)

distinct_calculated_field_names

In [None]:
# Asked what Views are Associated

In [None]:
# Parsing the Tableau Workbook file to find views (sheets, dashboards, etc.)

def parse_tableau_views(file_path):
    # Parse the XML file
    tree = ET.parse(file_path)
    root = tree.getroot()

    # Initialize containers for views
    views = {
        "sheets": [],
        "dashboards": []
    }

    # Iterate through the XML structure to find views
    for workbook in root.iter('workbook'):
        for sheets in workbook.iter('sheets'):
            for sheet in sheets.iter('sheet'):
                views['sheets'].append(sheet.get('name'))

        for dashboards in workbook.iter('dashboards'):
            for dashboard in dashboards.iter('dashboard'):
                views['dashboards'].append(dashboard.get('name'))

    return views

# Parse the workbook to get views
tableau_views = parse_tableau_views(file_path)

tableau_views

In [None]:
# Asked for the sheets within the dashboards

In [None]:
# Parsing the Tableau Workbook file to find sheets within dashboards

def parse_sheets_within_dashboards(file_path):
    # Parse the XML file
    tree = ET.parse(file_path)
    root = tree.getroot()

    # Initialize containers for dashboard sheets
    dashboard_sheets = {}

    # Iterate through the XML structure to find dashboards and their sheets
    for workbook in root.iter('workbook'):
        for dashboards in workbook.iter('dashboards'):
            for dashboard in dashboards.iter('dashboard'):
                dashboard_name = dashboard.get('name')
                sheets_in_dashboard = []

                for zone in dashboard.iter('zone'):
                    if 'name' in zone.attrib:
                        sheets_in_dashboard.append(zone.get('name'))

                dashboard_sheets[dashboard_name] = sheets_in_dashboard

    return dashboard_sheets

# Parse the workbook to get sheets within dashboards
sheets_within_dashboards = parse_sheets_within_dashboards(file_path)

sheets_within_dashboards