### Photon Compatibility Analysis
This cell analyzes the `sql_df` DataFrame to identify how much of a Spark application's workload can be accelerated by Photon.
It parses the JSON query plans, flags compatible operations, and calculates a "Photon Compatibility Score" for each application.
A higher score indicates a greater portion of the application's SQL workload is Photon-compatible, suggesting better performance. A low score highlights opportunities for optimization by refactoring non-compatible operations.

In [0]:
dbutils.widgets.text("catalog_name", "", "Catalog (required)")
dbutils.widgets.text("schema_name", "", "Schema")
CATALOG_NAME = dbutils.widgets.get("catalog_name").strip()
SCHEMA_NAME = dbutils.widgets.get("schema_name").strip() or "spark_observability"

# UC Validation
if not CATALOG_NAME:
    raise ValueError("catalog widget must point to an existing catalog")

spark.sql(f"CREATE SCHEMA IF NOT EXISTS {CATALOG_NAME}.{SCHEMA_NAME}")

In [0]:
%sql

create or replace table IDENTIFIER(:catalog_name || '.' || :schema_name || '.' || 'photonanalysis')
as
with raw as (select * from IDENTIFIER(:catalog_name || '.' || :schema_name || '.' || 'sql')
lateral view explode(try_variant_get(try_parse_json(sql_raw_json), '$.nodes', 'array<struct<nodeId: INT, nodeName: STRING, metrics: array<struct<name:STRING, value:STRING>>>>')) as nodemetrics),

photoncheck as (select *, case when nodemetrics.nodeName = 'MapElements' then 0 
when nodemetrics.nodeName = 'MapPartitions' then 0 
when nodemetrics.nodeName = 'Scan csv' then 0
when nodemetrics.nodeName = 'Scan json' then 0 
when nodemetrics.nodeName = 'PythonUDF' then 0 
when nodemetrics.nodeName = 'ScalaUDF' then 0 
when nodemetrics.nodeName = 'FlatMapGroupsInPandas' then 0  
when nodemetrics.nodeName = 'DeserializeToObject' then 0
when nodemetrics.nodeName = 'SerializeFromObject' then 0  
else 1 end as photonbinary
from raw),

jobcheck as (select cluster_name, application_id, try_divide(sum(photonbinary), count(*)) as jobphotonperc 
from photoncheck 
group by all)

select *
from jobcheck

In [0]:
%sql
with raw as (select * from IDENTIFIER(:catalog_name || '.' || :schema_name || '.' || 'sql')
lateral view explode(try_variant_get(try_parse_json(sql_raw_json), '$.nodes', 'array<struct<nodeId: INT, nodeName: STRING, metrics: array<struct<name:STRING, value:STRING>>>>')) as nodemetrics),

photoncheck as (select *, nodemetrics.nodeName as nodename, case when nodemetrics.nodeName = 'MapElements' then 0 
when nodemetrics.nodeName = 'MapPartitions' then 0 
when nodemetrics.nodeName = 'Scan csv' then 0
when nodemetrics.nodeName = 'Scan json' then 0 
when nodemetrics.nodeName = 'PythonUDF' then 0 
when nodemetrics.nodeName = 'ScalaUDF' then 0 
when nodemetrics.nodeName = 'FlatMapGroupsInPandas' then 0
when nodemetrics.nodeName = 'DeserializeToObject' then 0
when nodemetrics.nodeName = 'SerializeFromObject' then 0  
else 1 end as photonbinary
from raw),

pu as (select nodename, count(*) as cuenta 
from photoncheck
group by all)

select * from pu