# Compare All Grants Between HMS and Destination

Used Widgets: 
* Destination Catalog -- Type in what catalog you are migrating to

Ignored Widgets:
* Scan Catalog (always uses hive_metastore)
* Scan Database (always looks at all databases)

# Initialization
You will have to run both of these code cells in this section each time you reconnect to the cluster.

Note: The usage of "from ... import" works expects a single .py file, as included from github.
If you are not using github repos, create a notebook with the DbInventoryCollector.py file's contents in it, and change this line to read:

```%run ./Db-Inventory-Collector```

In [None]:
from DbInventoryCollector import InventoryCollector

In [None]:
from pyspark.sql.functions import *

#Create Widgets
InventoryCollector.CreateWidgets(dbutils, spark, reset=False)

#Instantiate and initialize collector class
collector = InventoryCollector(spark, dbutils.widgets.get("Inventory_Catalog"), dbutils.widgets.get("Inventory_Database"))
collector.initialize()

#This pulls out the widget values to a python variable.
#Paste these lines into a cell to enable automatic execution on widget change
destCatalog = dbutils.widgets.get("Migration_Catalog")

if destCatalog == "ChangeMeToDest":
    raise Exception("You must set the destination catalog using the widgets above")

# Scanning Databases

Scan both the HMS and destination catalogs for their grants, rescanning previous results to get the latest data.
Commented by default for speed purposes. Rescanning hive is optional, rescanning the destination catalog is **required**.

In [None]:
# collector.scan_all_databases("hive_metastore", scanObjects = False, scanGrants = True, rescan = True)
# collector.scan_all_databases(destCatalog, scanObjects = False, scanGrants = True, rescan = True)

# Compare Grants Between Hive_Metastore and Destination

In [None]:
allGrants_hive = spark.sql(f"""
WITH ranked_grants AS (
    SELECT *,
    RANK() OVER (PARTITION BY source_catalog,source_database ORDER BY execution_time DESC) as rank
    FROM {collector.inventory_catdb}.grant_statements)
SELECT source_database, ObjectType, ActionType, ObjectKey, Principal, grant_statement
FROM ranked_grants
WHERE source_catalog = "hive_metastore" AND rank = 1
order by source_database, ObjectType, ObjectKey
""")

allGrants_dest = spark.sql(f"""
WITH ranked_grants AS (
    SELECT *,
    RANK() OVER (PARTITION BY source_catalog,source_database ORDER BY execution_time DESC) as rank
    FROM {collector.inventory_catdb}.grant_statements)
SELECT source_database, ObjectType, ActionType, ObjectKey, Principal, grant_statement
FROM ranked_grants
WHERE source_catalog = "{destCatalog}" AND rank = 1
order by source_database, ObjectType, ObjectKey
""")


joinColumns = ["source_database", "Principal", "ActionType", "ObjectType", "ObjectKey"]
allGrants_hive = allGrants_hive.select("source_database", "Principal", "ObjectType", "ObjectKey", 
                                       col("ActionType").alias("HiveAction"),
                                       when(col("ActionType") == "USAGE", "USE SCHEMA")
                                       .when(col("ActionType") == "CREATE_NAMED_FUNCTION", "CREATE FUNCTION")
                                       .when(col("ActionType") == "CREATE", "CREATE TABLE")
                                       .otherwise(col("ActionType")).alias("ActionType")
    ).filter('ActionType != "READ_METADATA" AND ActionType != "OWN"')

grantCompare_both = allGrants_hive.join(allGrants_dest, joinColumns, how = "inner")
grantCompare_hiveOnly = allGrants_hive.join(allGrants_dest, joinColumns, how = "left_anti")
grantCompare_destOnly = allGrants_dest.join(allGrants_hive, joinColumns, how = "left_anti")


In [None]:
display(grantCompare_both)

In [None]:
display(grantCompare_hiveOnly)

In [None]:
display(grantCompare_destOnly)

# Generate DDL for Missing Grants

In [None]:
hiveGrantTodo = grantCompare_hiveOnly.drop('ActionType').withColumnRenamed('HiveAction', 'ActionType')
hiveGrantsString = collector.generate_migration_grant_sql(hiveGrantTodo, destCatalog)
hiveGrantsList = collector.split_sql_to_list(hiveGrantsString)

print(f"Generated {len(hiveGrantsList)} Grants to reconcile catalog {destCatalog} with hive_metastore:\n")
print(hiveGrantsString)



In [None]:
#Execute Missing Grants
collector.execute_sql_list(hiveGrantsList, echo=True)

In [None]:
#Rescan potentially changed databases to update our results
missingDbs = [r.source_database for r in grantCompare_hiveOnly.select("source_database").distinct().collect()]
collector.scan_all_databases(destCatalog, rescan=True, scanObjects=False, scanGrants=True, databaseScanList=missingDbs)
