In [0]:
%python
dbutils.widgets.text("catalog", "mpelletier")
dbutils.widgets.text("database", "dbdemos")
dbutils.widgets.text("volume", "input")

In [0]:
%python
## CHANGE THESE VARIABLES AS NEEDED

catalog = dbutils.widgets.get("catalog")
database = dbutils.widgets.get("database")
volume = dbutils.widgets.get("volume")

print(f"{catalog}.{database}")

## Create and apply tags (Optional)

If you have enabled data classification, you can skip this step and go to **Create policies (ABAC)**

In [0]:
SET TAG ON COLUMN ${catalog}.${database}.students.name `class.name`;
SET TAG ON COLUMN ${catalog}.${database}.students.username `class.name`;
SET TAG ON COLUMN ${catalog}.${database}.students.address `class.location`;
SET TAG ON COLUMN ${catalog}.${database}.students.phone_number `class.phone_number`;
SET TAG ON COLUMN ${catalog}.${database}.students.email `class.email_address`;

## Explore existing tags

Use the these tables found under system.information_schema
- catalog_tags
- table_tags
- column_tags

In [0]:
SELECT DISTINCT(tag_name) FROM system.information_schema.table_tags
ORDER BY tag_name
LIMIT 10

### Multiple tags

`ALTER TABLE catalog.schema.table SET TAGS ('key1' = 'value1', 'key2' = 'value2');
`

In [0]:
SET TAG ON TABLE ${catalog}.${database}.students pii = yes;

## Create functions

In [0]:
-- Masks any SSN input by returning a fully masked value
CREATE FUNCTION mask_SSN(ssn STRING)
RETURN '***-**-****' ;



## Create policies (ABAC)

In [0]:
CREATE OR REPLACE FUNCTION mpelletier.dbdemos.contains_pii()
RETURNS BOOLEAN
RETURN FALSE;

In [0]:
DROP POLICY hide_pii_rows ON ${catalog}.${database}.students

In [0]:
CREATE POLICY hide_pii_rows
ON TABLE ${catalog}.${database}.students
COMMENT 'Hide rows with European customers from sensitive tables'
ROW FILTER mpelletier.dbdemos.contains_pii
TO `0b8f5441-81fe-485b-b38f-64b98a665675`
FOR TABLES
WHEN
  hasTag('pii')




In [0]:
CREATE POLICY mask_information
ON TABLE ${catalog}.${database}.students
COMMENT 'Mask social security numbers'
COLUMN MASK mask_SSN
TO `0b8f5441-81fe-485b-b38f-64b98a665675`
EXCEPT ask_group
FOR TABLES
MATCH COLUMNS
  hasTag('class.location') as ssn
ON COLUMN ssn;



In [0]:
SELECt mask_SSN('123-45-6789')

In [0]:
SHOW POLICIES ON SCHEMA mpelletier.dbdemos;

In [0]:
SET POLICY filter_by_state 
ON CATALOG sales_prod
ROW FILTER state_lookup_filter
TO all_analysts EXCEPT analyst_managers
FOR TABLES
WHEN COLUMNS col_has_tag_value('pii','state')CREATE POLICY <policy_name>
ON <securable_type> <securable_name>
COMMENT '<policy_description>'
-- One of the following:
  ROW FILTER <udf_name>
  | COLUMN MASK <udf_name> ON COLUMN <target_column>
TO <principal_name>[, <principal_name>, ...]
[EXCEPT <principal_name>[, <principal_name>, ...]]
FOR TABLES
[WHEN has_tag('<key>') OR has_tag_value('<key>', '<value>')]
MATCH COLUMNS has_tag('<key>') OR has_tag_value('<key>', '<value>') AS <alias>
USING COLUMNS <alias>[, <alias>, ...];



In [0]:
DROP POLICY <policy_name> ON <securable_type> <securable_name>



In [0]:
SELECT * FROM mpelletier.dbdemos.students

## Test

with SP
with Duck DB


In [0]:
%python
# Use the Databricks CLI to create a secret scope
!databricks secrets create-scope --scope abac-demo-scope

In [0]:
%python
import subprocess

# Copy secret from CLI
subprocess.run(["databricks", "secrets", "create-scope", "--scope", "abac-demo-scope"])

In [0]:
%python
dbutils.secrets.get(scope="abac-demo-scope", key="databricks-token")
       

In [0]:
%python
import getpass

password = getpass.getpass("Enter your password: ")
print("Password received.")


In [0]:
%python
import time

from databricks.sdk import WorkspaceClient, AccountClient
from databricks.sdk.service import iam

w = WorkspaceClient()
a = AccountClient()

spn = w.service_principals.create(
    display_name=f"sdk-{time.time_ns()}",
    groups=[],
)

spn

In [0]:
GRANT SELECT ON SCHEMA mpelletier.dbdemos TO `0b8f5441-81fe-485b-b38f-64b98a665675`;
GRANT USAGE ON SCHEMA mpelletier.dbdemos TO `0b8f5441-81fe-485b-b38f-64b98a665675`;

In [0]:
%python
w.service_principals.delete(id=spn.id)

In [0]:
%python
import pandas as pd
from databricks.sdk import WorkspaceClient

# Replace these with your actual service principal details
client_id = "0b8f5441-81fe-485b-b38f-64b98a665675"
client_secret = "dose21d839166e956ad2980ee48a2c999277"
tenant_id = "9f37a392-f0ae-4280-9796-f1864a10effc"
workspace_url = "https://adb-984752964297111.11.azuredatabricks.net/"

# Initialize the WorkspaceClient with service principal authentication
w = WorkspaceClient(
    host=workspace_url,
    client_id=client_id,
    client_secret=client_secret
)

# Define the query to execute
query = "SELECT * FROM mpelletier.dbdemos.students LIMIT 10"

# Execute the query using execute_statement
statement_response = w.statement_execution.execute_statement(query, '148ccb90800933a1')
#print(statement_response)

if statement_response.result:
    # Extract the data array from the statement response
    data_array = statement_response.result.data_array or []

    if data_array:
        # Convert the data array to a pandas DataFrame
        df = pd.DataFrame(data_array)

        # Display the DataFrame
        display(df)
    else:
        print("Query returned an empty dataset.")

## Clean-up resources

In [0]:
DELETE TAG
DELETE POLICIES
DELETE SPs
UNSET TAG
DROP TABLE
DROP SCHEMA
