In [0]:
CATALOG = "ds_training_1"
SCHEMA = "thoughtspot_inventory_ak"

spark.sql(f"CREATE SCHEMA IF NOT EXISTS {CATALOG}.{SCHEMA}")

CHART_TYPE_MAPPING_TABLE = f"{CATALOG}.{SCHEMA}.chart_type_mappings"
WIDGET_SIZE_CONFIG_TABLE = f"{CATALOG}.{SCHEMA}.widget_size_config"
EXPRESSION_TRANSFORM_TABLE = f"{CATALOG}.{SCHEMA}.expression_transformations"
SCALE_TYPE_DETECTION_TABLE = f"{CATALOG}.{SCHEMA}.scale_type_detection"

# Create chart_type_mappings
spark.sql(f"""
    DROP TABLE IF EXISTS {CHART_TYPE_MAPPING_TABLE}
""")

spark.sql(f"""
    CREATE TABLE {CHART_TYPE_MAPPING_TABLE} (
        tml_chart_type STRING,
        widget_type STRING,
        default_size STRING
    )
""")

# Insert data
spark.sql(f"""
    INSERT INTO {CHART_TYPE_MAPPING_TABLE} VALUES
    ('COLUMN', 'bar', 'MEDIUM'),
    ('STACKED_COLUMN', 'bar', 'MEDIUM'),
    ('LINE_COLUMN', 'line', 'LARGE'),
    ('PIVOT_TABLE', 'pivot', 'LARGE'),
    ('LINE', 'line', 'LARGE'),
    ('KPI', 'counter', 'EXTRA_SMALL'),
    ('DONUT', 'pie', 'MEDIUM'),
    ('PIE', 'pie', 'MEDIUM'),
    ('BAR', 'bar', 'MEDIUM'),
    ('STACKED_BAR', 'bar', 'MEDIUM'),
    ('AREA', 'area', 'LARGE'),
    ('STACKED_AREA', 'area', 'LARGE'),
    ('SCATTER', 'scatter', 'LARGE'),
    ('WATERFALL', 'bar', 'MEDIUM'),
    ('FUNNEL', 'funnel', 'MEDIUM'),
    ('GEO_BUBBLE', 'choropleth map', 'LARGE'),
    ('GEO_HEATMAP', 'choropleth map', 'LARGE'),
    ('CHOROPLETH', 'choropleth map', 'LARGE'),
    ('GEO_AREA', 'choropleth map', 'LARGE'),
    ('RADAR', NULL, 'MEDIUM'),
    ('PARETO', NULL, 'MEDIUM'),
    ('COMBO_LINE_COLUMN', 'combo', 'LARGE'),
    ('COMBO_LINE_STACKED_COLUMN', 'combo', 'LARGE'),
    ('BUBBLE', 'scatter', 'LARGE'),
    ('HEATMAP', 'heatmap', 'LARGE'),
    ('TREEMAP', NULL, 'MEDIUM'),
    ('SANKEY', 'sankey', 'LARGE'),
    ('CANDLESTICK', NULL, 'LARGE'),
    ('TABLE', 'table', 'LARGE'),
    ('TABLE_MODE', 'table', 'LARGE'),
    ('PIVOT_TABLE_CLASSIC', 'pivot', 'LARGE')
""")

# Create widget_size_config
spark.sql(f"""
    DROP TABLE IF EXISTS {WIDGET_SIZE_CONFIG_TABLE}
""")

spark.sql(f"""
    CREATE TABLE {WIDGET_SIZE_CONFIG_TABLE} (
        size_category STRING,
        width INT,
        height INT,
        description STRING
    )
""")

spark.sql(f"""
    INSERT INTO {WIDGET_SIZE_CONFIG_TABLE} VALUES
    ('EXTRA_SMALL', 2, 2, 'Smallest widget size'),
    ('SMALL', 2, 3, 'Small widget size'),
    ('MEDIUM_SMALL', 3, 3, 'Medium-small widget size'),
    ('MEDIUM', 3, 4, 'Medium widget size'),
    ('LARGE_SMALL', 4, 4, 'Large-small widget size'),
    ('LARGE', 4, 5, 'Large widget size'),
    ('EXTRA_LARGE', 6, 6, 'Largest widget size')
""")

# Create expression_transformations
spark.sql(f"""
    DROP TABLE IF EXISTS {EXPRESSION_TRANSFORM_TABLE}
""")

spark.sql(f"""
    CREATE TABLE {EXPRESSION_TRANSFORM_TABLE} (
        tml_pattern STRING,
        target_expression STRING,
        expression_type STRING,
        priority INT
    )
""")

spark.sql(f"""
    INSERT INTO {EXPRESSION_TRANSFORM_TABLE} VALUES
    ('sum(field)', 'SUM(`field`)', 'AGGREGATION', 1),
    ('count(distinct field)', 'COUNT(DISTINCT `field`)', 'AGGREGATION', 1),
    ('count(field)', 'COUNT(`field`)', 'AGGREGATION', 2),
    ('avg(field)', 'AVG(`field`)', 'AGGREGATION', 3),
    ('min(field)', 'MIN(`field`)', 'AGGREGATION', 4),
    ('max(field)', 'MAX(`field`)', 'AGGREGATION', 5),
    ('Day(field)', "DATE_TRUNC('DAY', `field`)", 'DATE_FUNCTION', 5),
    ('Week(field)', "DATE_TRUNC('WEEK', `field`)", 'DATE_FUNCTION', 6),
    ('Month(field)', "DATE_TRUNC('MONTH', `field`)", 'DATE_FUNCTION', 7),
    ('Year(field)', "DATE_TRUNC('YEAR', `field`)", 'DATE_FUNCTION', 8)
""")

# Create scale_type_detection
spark.sql(f"""
    DROP TABLE IF EXISTS {SCALE_TYPE_DETECTION_TABLE}
""")

spark.sql(f"""
    CREATE TABLE {SCALE_TYPE_DETECTION_TABLE} (
        field_pattern STRING,
        scale_type STRING,
        priority INT
    )
""")

spark.sql(f"""
    INSERT INTO {SCALE_TYPE_DETECTION_TABLE} VALUES
    ('*_date, *_time', 'temporal', 1),
    ('*year*, *month*, *day*', 'temporal', 2),
    ('*timestamp*, *datetime*', 'temporal', 3),
    ('*week*', 'temporal', 4),
    ('sum_*, count_*, avg_*', 'quantitative', 10),
    ('*total*, *revenue*, *price*', 'quantitative', 11),
    ('*quantity*, *amount*, *sales*', 'quantitative', 12),
    ('*number*, *num_*', 'quantitative', 13),
    ('*min*, *max*', 'quantitative', 14),
    ('*_category, *_name', 'categorical', 20),
    ('*status*, *type*', 'categorical', 21),
    ('*id, *_id', 'categorical', 22)
""")

print("✓ All configuration tables created successfully!")
print("You can now run the converter.")

In [0]:
%sql

select * from ds_training_1.thoughtspot_inventory_ak.tml_metadata_mapping

In [0]:
# Databricks notebook source
# MAGIC %md
# MAGIC # Dashboard Configuration Setup
# MAGIC 
# MAGIC This notebook:
# MAGIC 1. Reads TML files from a volume folder
# MAGIC 2. Identifies all liveboards
# MAGIC 3. Creates/updates a configuration table with dashboard name, GUID, and process flag (default: N)

# COMMAND ----------

import json
import yaml
from pathlib import Path
from datetime import datetime

# COMMAND ----------

# Configuration
CATALOG = "ts_migration"
SCHEMA = "thoughtspot_inventory_ak"
VOLUME = "tml_files_ak/tml_hp"
FOLDER = "liveboard"

# Full path to TML files
TML_PATH = f"/Volumes/{CATALOG}/{SCHEMA}/{VOLUME}/{FOLDER}/"

# Configuration table
CONFIG_TABLE = f"{CATALOG}.{SCHEMA}.liveboard_migration_config"

print(f"Reading from: {TML_PATH}")
print(f"Config table: {CONFIG_TABLE}")

# COMMAND ----------

def parse_tml_file(file_path):
    """Parse TML file (YAML or JSON)"""
    try:
        content = dbutils.fs.head(file_path, 10 * 1024 * 1024)
        try:
            return yaml.safe_load(content)
        except yaml.YAMLError:
            return json.loads(content)
    except Exception as e:
        print(f"  Error parsing file: {e}")
        return None

# COMMAND ----------

# MAGIC %md
# MAGIC ## Step 1: Read files from volume

# COMMAND ----------

try:
    all_files = dbutils.fs.ls(TML_PATH)
    tml_files = [f.path for f in all_files if f.path.endswith(('.tml', '.yaml', '.json'))]
    print(f"Found {len(tml_files)} TML files")
except Exception as e:
    print(f"ERROR: Cannot read from {TML_PATH}")
    print(f"Error: {e}")
    raise

# COMMAND ----------

# MAGIC %md
# MAGIC ## Step 2: Identify liveboards

# COMMAND ----------

liveboards = []

for file_path in tml_files:
    filename = Path(file_path).name
    print(f"Processing: {filename}")
    
    tml_data = parse_tml_file(file_path)
    
    if tml_data and 'liveboard' in tml_data:
        liveboard = tml_data['liveboard']
        name = liveboard.get('name', filename.replace('.tml', '').replace('.yaml', '').replace('.json', ''))
        guid = tml_data.get('guid', 'NO_GUID')
        
        liveboards.append({
            'name': name,
            'guid': guid,
            'process_flag': 'N',
            'last_updated': datetime.now()
        })
        
        print(f"  ✓ Found liveboard: {name} ({guid})")
    else:
        print(f"  ✗ Not a liveboard or parse error")

print(f"\nTotal liveboards found: {len(liveboards)}")

# COMMAND ----------

# MAGIC %md
# MAGIC ## Step 3: Create table and insert entries

# COMMAND ----------

# Create table if not exists
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {CATALOG}.{SCHEMA}")

create_table_sql = f"""
CREATE TABLE IF NOT EXISTS {CONFIG_TABLE} (
    name STRING,
    guid STRING,
    process_flag STRING,
    last_updated TIMESTAMP
) USING DELTA
"""

spark.sql(create_table_sql)
print(f"✓ Table {CONFIG_TABLE} ready")

# COMMAND ----------

# Get existing GUIDs
try:
    existing_df = spark.sql(f"SELECT guid FROM {CONFIG_TABLE}").toPandas()
    existing_guids = set(existing_df['guid'].tolist())
    print(f"Found {len(existing_guids)} existing entries")
except:
    existing_guids = set()
    print("No existing entries")

# COMMAND ----------

# Insert only new liveboards
import pandas as pd

new_liveboards = [lb for lb in liveboards if lb['guid'] not in existing_guids]

if new_liveboards:
    df = pd.DataFrame(new_liveboards)
    df['last_updated'] = pd.to_datetime(df['last_updated'])
    spark_df = spark.createDataFrame(df)
    spark_df.write.mode("append").saveAsTable(CONFIG_TABLE)
    print(f"✓ Added {len(new_liveboards)} new liveboards")
else:
    print("✓ No new liveboards to add")

# COMMAND ----------

# MAGIC %md
# MAGIC ## View Configuration Table

# COMMAND ----------

display(spark.sql(f"SELECT * FROM {CONFIG_TABLE} ORDER BY last_updated DESC"))

# COMMAND ----------

# MAGIC %md
# MAGIC ## Summary

# COMMAND ----------

summary = spark.sql(f"""
SELECT 
    process_flag,
    COUNT(*) as count
FROM {CONFIG_TABLE}
GROUP BY process_flag
""")

print("=== Summary ===")
display(summary)

print(f"""
Configuration table: {CONFIG_TABLE}

To enable a dashboard:
UPDATE {CONFIG_TABLE} 
SET process_flag = 'Y', last_updated = CURRENT_TIMESTAMP()
WHERE name = 'Your Dashboard Name';

To enable all:
UPDATE {CONFIG_TABLE} 
SET process_flag = 'Y', last_updated = CURRENT_TIMESTAMP();
""")

In [0]:
%sql
CREATE OR REPLACE TABLE ts_migration.thoughtspot_inventory_ak.paas_tracking_card (
    session_id STRING,
    app_package_deployed_uuid STRING,
    os_platform STRING,
    app_name STRING,
    app_package_id STRING,
    app_version STRING,
    session_start_date_time TIMESTAMP,
    geo_country_code STRING,
    is_hpid_signed_in BOOLEAN,
    total_printer_count INT,
    total_device_count INT,
    total_accessory_count INT,
    total_pc_count INT,
    is_viewed_aip_tracking_card BOOLEAN,
    is_viewed_aip_tracking_card_order_confirmed BOOLEAN,
    is_viewed_aip_tracking_card_order_processing BOOLEAN,
    is_viewed_aip_tracking_card_order_shipped BOOLEAN,
    is_viewed_aip_tracking_card_order_delivered BOOLEAN,
    is_clicked_aip_order_accordian BOOLEAN,
    is_clicked_order_confirmation BOOLEAN,
    is_clicked_order_processing BOOLEAN,
    is_clicked_track_delivery BOOLEAN,
    is_clicked_complete_setup BOOLEAN,
    is_clicked_order_confirmation_pill BOOLEAN,
    is_clicked_order_processing_pill BOOLEAN,
    is_clicked_order_shipped_pill BOOLEAN,
    is_clicked_order_delivered_pill BOOLEAN,
    device_app_package_deployed_uuid STRING,
    max_total_printer_count INT,
    max_total_device_count INT,
    max_total_accessory_count INT,
    max_total_pc_count INT,
    associated_device_session_id STRING,
    aip_device_uuid STRING,
    is_associated_device BOOLEAN,
    is_clicked_aip_order_accordian_order_processing BOOLEAN,
    is_clicked_aip_order_accord BOOLEAN,
    is_aip_setup_start BOOLEAN,
    is_clicked_aip_order_accordian_order_confirmed BOOLEAN,
    is_ows_start BOOLEAN,
    is_clicked_aip_order_accordian_order_shipped BOOLEAN,
    is_oobe_complete BOOLEAN,
    is_aip_setup_complete BOOLEAN,
    is_clicked_support BOOLEAN,
    is_oobe_support_session BOOLEAN
)
USING DELTA;



In [0]:
from pyspark.sql import SparkSession
from datetime import datetime, timedelta
import random

spark = SparkSession.builder.getOrCreate()

def generate_row(i):
    return (
        f"sess_{i}",  # session_id
        f"pkg_deployed_{random.randint(1000,2000)}",  # app_package_deployed_uuid
        random.choice(["Windows", "MacOS", "Linux"]),  # os_platform
        random.choice(["HP Smart", "Instant Ink", "HP App"]),  # app_name
        f"app_{random.randint(1,500)}",  # app_package_id
        f"v{random.randint(1,10)}.{random.randint(0,9)}",  # app_version
        datetime.now() - timedelta(days=random.randint(0, 120)),  # session_start_date_time
        random.choice(["US", "IN", "UK", "DE", "FR", "CN"]),  # geo_country_code
        random.choice([True, False]),  # is_hpid_signed_in
        random.randint(0, 5),  # total_printer_count
        random.randint(0, 10),  # total_device_count
        random.randint(0, 3),  # total_accessory_count
        random.randint(0, 7),  # total_pc_count
        random.choice([True, False]),  # is_viewed_aip_tracking_card
        random.choice([True, False]),  # is_viewed_aip_tracking_card_order_confirmed
        random.choice([True, False]),  # is_viewed_aip_tracking_card_order_processing
        random.choice([True, False]),  # is_viewed_aip_tracking_card_order_shipped
        random.choice([True, False]),  # is_viewed_aip_tracking_card_order_delivered
        random.choice([True, False]),  # is_clicked_aip_order_accordian
        random.choice([True, False]),  # is_clicked_order_confirmation
        random.choice([True, False]),  # is_clicked_order_processing
        random.choice([True, False]),  # is_clicked_track_delivery
        random.choice([True, False]),  # is_clicked_complete_setup
        random.choice([True, False]),  # is_clicked_order_confirmation_pill
        random.choice([True, False]),  # is_clicked_order_processing_pill
        random.choice([True, False]),  # is_clicked_order_shipped_pill
        random.choice([True, False]),  # is_clicked_order_delivered_pill
        f"device_pkg_{random.randint(100,999)}",  # device_app_package_deployed_uuid
        random.randint(0, 5),  # max_total_printer_count
        random.randint(0, 10),  # max_total_device_count
        random.randint(0, 3),  # max_total_accessory_count
        random.randint(0, 7),  # max_total_pc_count
        f"assoc_session_{random.randint(100,999)}",  # associated_device_session_id
        f"aip_uuid_{random.randint(10000,99999)}",  # aip_device_uuid
        random.choice([True, False]),  # is_associated_device
        random.choice([True, False]),  # is_clicked_aip_order_accordian_order_processing
        random.choice([True, False]),  # is_clicked_aip_order_accord
        random.choice([True, False]),  # is_aip_setup_start
        random.choice([True, False]),  # is_clicked_aip_order_accordian_order_confirmed
        random.choice([True, False]),  # is_ows_start
        random.choice([True, False]),  # is_clicked_aip_order_accordian_order_shipped
        random.choice([True, False]),  # is_oobe_complete
        random.choice([True, False]),  # is_aip_setup_complete
        random.choice([True, False]),  # is_clicked_support
        random.choice([True, False])   # is_oobe_support_session
    )
data = [generate_row(i) for i in range(1, 101)]

schema = spark.table("ts_migration.thoughtspot_inventory_ak.paas_tracking_card").schema

df = spark.createDataFrame(data, schema=schema)

df.write.mode("append").saveAsTable("ts_migration.thoughtspot_inventory_ak.paas_tracking_card")