**Helper notebook**

In [None]:
%run nb_helper

**Define a logging dataframe**

In [None]:
dfLogging = pd.DataFrame(columns = ['LoadId','NotebookId', 'NotebookName', 'WorkspaceId', 'CellId', 'Timestamp', 'ElapsedTime', 'Message', 'ErrorMessage'])
vContext = mssparkutils.runtime.context
vNotebookId = vContext["currentNotebookId"]
vLogNotebookName = vContext["currentNotebookName"]
vWorkspaceId = vContext["currentWorkspaceId"] # where the notebook is running, to not confuse with source and target workspaces

**Parameters --> convert to code for debugging the notebook. otherwise, keep commented as parameters are passed from DevOps pipelines**

In [None]:
pToken = ""
pSqlToken = ""
pSourceWorkspaceId = ""
pTargetWorkspaceId = ""
pDebugMode = "yes"
pOnelakeRoles = ''
pOnelakeRules = ''
pOnelakeEntraMembers = ''
pOnelakeItemMembers = ''

**Resolve source and target workspace ids**

In [None]:
vSourceWorkspaceName = fabric.resolve_workspace_name(pSourceWorkspaceId)
vTargetWorkspaceName = fabric.resolve_workspace_name(pTargetWorkspaceId)

**List source and target lakehouses**

In [None]:
df_source_lakehouses = labs.list_lakehouses(workspace=vSourceWorkspaceName)
df_target_lakehouses = labs.list_lakehouses(workspace=vTargetWorkspaceName)

**Verify that there is a least one lakehouse in the source**

In [None]:
if df_source_lakehouses.empty:
    vMessage = f"workspace <vSourceWorkspaceName> have 0 lakehouse. pre-update is not required."

    # Display an exit message
    display(Markdown("### ✅ Notebook execution stopped successfully!"))

    # Exit without error
    mssparkutils.notebook.exit(vMessage)

**Variables related to the logic**

In [None]:
vApiVersion = "v1"
vShortcutConflictPolicy = "Abort"
if pOnelakeRoles == "":
    vCustomRoles = "no"
else:
    vCustomRoles = "yes"

**Resolve source and target workspace ids**

In [None]:
vSourceWorkspaceId = pSourceWorkspaceId
vTargetWorkspaceId = pTargetWorkspaceId

**Access token**

In [None]:
vScope = "https://analysis.windows.net/powerbi/api"

# get the access token 
if pDebugMode == "yes":
    # in debug mode, use the token of the current user
    vAccessToken  = mssparkutils.credentials.getToken(vScope)
    vSqlAccessToken = vAccessToken
else:
    # when the code is run from DevOps, the token passed as a parameter
    vAccessToken = pToken 
    vSqlAccessToken = pSqlToken

**Base Url and Headers**

In [None]:
vBaseUrl = f"https://api.fabric.microsoft.com/{vApiVersion}/"
vHeaders = {'Authorization': f'Bearer {vAccessToken}'}

**Functions**

In [None]:
def input_for_full_deployment(df_source_lakehouse_columns):

    # get the dataframe passed as parameter
    df = df_source_lakehouse_columns

    # if the column has space in it, replace it with an underscore
    df["Column Name"] = df["Column Name"].str.replace(' ', '_')

    # concat the column name and data tupe
    df["ColumnNameDataType"] = df["Column Name"] + " " + df["Data Type"]

    # group the columns
    df_grouped = df.groupby(["WorkspaceTargetName", "Lakehouse Name", "Table Name"])['ColumnNameDataType'].agg(','.join).reset_index()

    # generate the sql statement
    df_grouped["SqlStatement"] = "CREATE TABLE " + df_grouped["Lakehouse Name"] + "." + df_grouped["Table Name"] + "(" + df_grouped["ColumnNameDataType"] + ")"

    # return the dataframe
    return df_grouped

In [None]:
def input_for_incremental_deployment(df_source_lakehouse_columns_incremental, incremental_type):


    # get the dataframe passed as parameter
    df = df_source_lakehouse_columns_incremental

    # if the column has space in it, replace it with an underscore
    df["Column Name"] = df["Column Name"].str.replace(' ', '_')

    # concat the column name and data tupe
    df["ColumnNameDataType"] = df["Column Name"] + " " + df["Data Type"]


    if incremental_type == "alter table add column":

        # group the columns
        df_grouped = df.groupby(["WorkspaceTargetName", "Lakehouse Name", "Table Name"])['ColumnNameDataType'].agg(','.join).reset_index()

        # generate the sql statement for adding columns
        df_grouped["SqlStatement"] = "ALTER TABLE " + df_grouped["Lakehouse Name"] + "." + df_grouped["Table Name"] + " ADD COLUMNS(" + df_grouped["ColumnNameDataType"] + ")"

    elif incremental_type == "alter table drop column":

        # ALTER TABLE ALTER COLUMN does not work as of 02.2025
        # add this statement before droping the columns: ALTER TABLE data_types SET TBLPROPERTIES ('delta.columnMapping.mode' = 'name', 'delta.minReaderVersion' = '2','delta.minWriterVersion' = '5')        

        df_0 = df.groupby(["WorkspaceTargetName", "Lakehouse Name", "Table Name"])['Column Name'].agg(','.join).reset_index()
        df_0['SqlStatement'] = "ALTER TABLE " + df_0["Lakehouse Name"] + "." + df_0["Table Name"] + " SET TBLPROPERTIES ('delta.columnMapping.mode'='name', 'delta.minReaderVersion'='2','delta.minWriterVersion'='5');"
        df_0.drop(columns='Column Name', inplace=True)

        df_1 = df.groupby(["WorkspaceTargetName", "Lakehouse Name", "Table Name"])['Column Name'].agg(','.join).reset_index()
        df_1["SqlStatement"] = "ALTER TABLE " + df_1["Lakehouse Name"] + "." + df_1["Table Name"] + " DROP COLUMNS(" + df_1["Column Name"] + ")"


        df_union = pd.concat([df_0, df_1], ignore_index=True)
        df_grouped = df_union.groupby(["WorkspaceTargetName", "Lakehouse Name", "Table Name"])['SqlStatement'].apply(lambda x: '\n'.join(x)).reset_index()
        

    else:
        # ALTER TABLE ALTER COLUMN does not work as of 02.2025
        # add this statement before droping the columns: ALTER TABLE data_types SET TBLPROPERTIES ('delta.columnMapping.mode' = 'name', 'delta.minReaderVersion' = '2','delta.minWriterVersion' = '5')
        # the alternative is the following logic (example with 2 columns changing data types):
        # ALTER TABLE data_types SET TBLPROPERTIES ('delta.columnMapping.mode' = 'name', 'delta.minReaderVersion' = '2','delta.minWriterVersion' = '5')
        # ALTER TABLE data_types ADD COLUMNS (C_New INT, D_New INT)
        # UPDATE data_types SET C_New = C, D_New = C
        # ALTER TABLE data_types DROP COLUMNS (C,D)
        # ALTER TABLE data_types RENAME COLUMN C_New TO C;
        # ALTER TABLE data_types RENAME COLUMN D_New TO D;

        # add the required columns for the logic
        df['InputForAddingColumns'] = df['Column Name'] + '_New ' + df['Data Type']
        df['InputForUpdatingColumns'] = df['Column Name'] + '_New =' + df['Column Name']
        df['InputForRenamingColumns'] = df['Column Name'] + '_New TO ' + df['Column Name']

        df_0 = df.groupby(["WorkspaceTargetName", "Lakehouse Name", "Table Name"])['Column Name'].agg(','.join).reset_index()
        df_0['SqlStatement'] = "ALTER TABLE " + df_0["Lakehouse Name"] + "." + df_0["Table Name"] + " SET TBLPROPERTIES ('delta.columnMapping.mode'='name', 'delta.minReaderVersion'='2','delta.minWriterVersion'='5');"
        df_0.drop(columns='Column Name', inplace=True)

        df_1 = df.groupby(["WorkspaceTargetName", "Lakehouse Name", "Table Name"])['InputForAddingColumns'].agg(','.join).reset_index()
        df_1['SqlStatement'] = 'ALTER TABLE ' + df_1["Lakehouse Name"] + '.' + df_1["Table Name"] + ' ADD COLUMNS (' + df_1['InputForAddingColumns'] +');'
        df_1.drop(columns='InputForAddingColumns', inplace=True)

        df_2 = df.groupby(["WorkspaceTargetName", "Lakehouse Name", "Table Name"])['InputForUpdatingColumns'].agg(','.join).reset_index()
        df_2['SqlStatement'] = 'UPDATE ' + df_2["Lakehouse Name"] + '.' + df_2["Table Name"] + ' SET ' + df_2['InputForUpdatingColumns'] + ';'
        df_2.drop(columns='InputForUpdatingColumns', inplace=True)

        df_3 = df.groupby(["WorkspaceTargetName", "Lakehouse Name", "Table Name"])['Column Name'].agg(','.join).reset_index()
        df_3['SqlStatement'] = 'ALTER TABLE ' + df_2["Lakehouse Name"] + '.' + df_2["Table Name"] + ' DROP COLUMNS (' + df_3['Column Name'] +');'
        df_3.drop(columns='Column Name', inplace=True)

        def generate_sql(group):
            return ";\n".join([f"ALTER TABLE {row['Lakehouse Name']}.{row['Table Name']} RENAME COLUMN {row['InputForRenamingColumns']}" for _, row in group.iterrows()]) + ";"

        df_4 = df.groupby(["WorkspaceTargetName", "Lakehouse Name", "Table Name"]).apply(generate_sql).reset_index(name='SqlStatement')

        df_union = pd.concat([df_0, df_1, df_2, df_3, df_4], ignore_index=True)
        df_grouped = df_union.groupby(["WorkspaceTargetName", "Lakehouse Name", "Table Name"])['SqlStatement'].apply(lambda x: '\n'.join(x)).reset_index()


    # return the dataframe
    return df_grouped

In [None]:
def shortcut_body(target_type, name, path, target_location, target_subpath, target_connection_id):

    vTargetType = ""
    if target_type == "AdlsGen2":
        vTargetType = "adlsGen2"
    elif target_type == "AmazonS3":
        vTargetType = "amazonS3"
    elif target_type == "GoogleCloudStorage":
        vTargetType = "googleCloudStorage"
    elif target_type == "S3Compatible":
        vTargetType = "s3Compatible"
    else:
        vTargetType = ""

    shortcut_body = {
        "name": name,
        "path": path
    }

    shortcut_specific_template_temp = {
        "target": {
            f"{vTargetType}": {
                "location": "{location_}",
                "subpath": "{subpath_}",
                "connectionId": "{connectionId_}"
            }
        }
    }

    inputs = {
        "location_": target_location,
        "subpath_": target_subpath,
        "connectionId_": target_connection_id
    }

    # replace the placeholders
    shortcut_specific_template = replace_placeholders_in_json(shortcut_specific_template_temp, inputs)
    
    # inject the specific template
    shortcut_body.update(shortcut_specific_template)
    # print(json.dumps(shortcut_body, indent=4))

    return shortcut_body

In [None]:
def onelake_shortcut_body(target_type, name, path, target_onelake_workspace_id, target_one_lake_item_id, target_onelake_path):

    shortcut_body = {
        "name": name,
        "path": path
    }

    shortcut_specific_template_temp = {
        "target": {
            "oneLake": {
                "workspaceId": "{workspaceId_}",
                "itemId": "{itemId_}",
                "path": "{path_}"
            }
        }
    }

    inputs = {
        "workspaceId_": target_onelake_workspace_id,
        "itemId_": target_one_lake_item_id,
        "path_": target_onelake_path
    }

    # replace the placeholders
    shortcut_specific_template = replace_placeholders_in_json(shortcut_specific_template_temp, inputs)
    
    # inject the specific template
    shortcut_body.update(shortcut_specific_template)
    # print(json.dumps(shortcut_body, indent=4))

    return shortcut_body

In [None]:
def create_lakehouse(lakehouse_name, url, headers, operation, workspace_target_id, item_type, sleep_in_seconds, debug_mode):
    
    # create the json body
    body = {
        "displayName": f"{lakehouse_name}",
        "type": "Lakehouse",
        "description": f"Lakehouse {lakehouse_name} created by fabric deployment notebook"
    }

    # create the lakehouse
    create_or_update_fabric_item(url, headers, body, 'post', operation, workspace_target_id, lakehouse_name, item_type, sleep_in_seconds, debug_mode)

In [None]:
def create_notebook(notebook_name, url, headers, operation, workspace_target_id, item_type, sleep_in_seconds, debug_mode):

    # create the json body
    body = {
        "displayName": f"{notebook_name}",
        "type": "Notebook",
        "description": f"Notebook {notebook_name} created by fabric deployment notebook"
    }

    # create the notebook
    create_or_update_fabric_item(url, headers, body, 'post', operation, workspace_target_id, notebook_name, item_type, sleep_in_seconds, debug_mode)

In [None]:
# create the alchemy engine
def create_sqlalchemy_engine(connection_string : str):
    token = vSqlAccessToken
    SQL_COPT_SS_ACCESS_TOKEN = 1256

    # the following code is required to structure the token for pyodbc.connect
    exptoken = b'';
    for i in bytes(token, "UTF-8"):
        exptoken += bytes({i});
        exptoken += bytes(1);
    tokenstruct = struct.pack("=i", len(exptoken)) + exptoken;

    return sqlalchemy.create_engine("mssql+pyodbc://", creator=lambda: pyodbc.connect(connection_string, attrs_before = { SQL_COPT_SS_ACCESS_TOKEN:bytearray(tokenstruct) }))

**Identify source lakehouses**

In [None]:
try:

    # define the source lakehouse column dataframe
    df_source_lakehouse_columns = pd.DataFrame()
    df_source_lakehouse_tables = pd.DataFrame()

    # iterate over the source lakehouses 
    for lakehouse in df_source_lakehouses['Lakehouse Name']:

        # 1. get the tables 
        df = labs.lakehouse.get_lakehouse_tables(lakehouse = lakehouse, workspace = vSourceWorkspaceName)

        # append the rows to the dataframe
        df_source_lakehouse_tables = pd.concat([df_source_lakehouse_tables, df], ignore_index=True)


        # 2. get the tables and columns
        df = labs.lakehouse.get_lakehouse_columns(lakehouse = lakehouse, workspace = vSourceWorkspaceName)

        # append the rows to the dataframe
        df_source_lakehouse_columns = pd.concat([df_source_lakehouse_columns, df], ignore_index=True)

    # add the target workspace to the lakehouse columns dataframe
    df_source_lakehouse_columns["WorkspaceTargetName"] = vTargetWorkspaceName

    # keep the required columns in the lakehouse tables datafreme
    columns_to_drop = ["Workspace Name", "Format", "Type", "Location"]
    df_source_lakehouse_tables = df_source_lakehouse_tables.drop(columns=columns_to_drop)

    # logging
    vMessage = f"succeeded"
    dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, 'identify source lakehouses', datetime.now(), None, vMessage, ''] 
except Exception as e:
    vMessage = f"failed"
    dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, 'identify source lakehouses', datetime.now(), None, vMessage, str(e) ] 
    if pDebugMode == "yes":
        print(str(e))

**Create target lakehouses and notebooks --> at this stage, these would be empty shells**


In [None]:
try:

    # sleep time before checking the operation's status in post requests
    vSleepInSeconds = 30

    # perform the deployment
    for lakehouse in df_source_lakehouses['Lakehouse Name']:

        # set the lakehouse name, and the notebooks used to define the lakehouse content
        vLakehouseName = lakehouse
        vTargetNotebookName = "nb_" + vLakehouseName + "_definition"
        vTargetSqlNotebookName = "nb_" + vLakehouseName + "_sql_definition"


        # filter the target lakehouse dataframe on the current lakehouse
        df_target_lakehouses_in_scope = df_target_lakehouses[df_target_lakehouses['Lakehouse Name']==vLakehouseName]       

        # set the url
        vUrl = vBaseUrl + f"workspaces/{vTargetWorkspaceId}/items" 

        # if the target lakehouse dataframe is empty --> create the lakehouse
        if df_target_lakehouses_in_scope.empty:


            # create the lakehouse
            create_lakehouse(vLakehouseName, vUrl, vHeaders, "creating", vTargetWorkspaceId, "lakehouse", vSleepInSeconds, pDebugMode)

            # create the correspondant notebook
            create_notebook(vTargetNotebookName, vUrl, vHeaders, "creating", vTargetWorkspaceId, "notebook", vSleepInSeconds, pDebugMode)

            # create the correspondant sql notebook
            create_notebook(vTargetSqlNotebookName, vUrl, vHeaders, "creating", vTargetWorkspaceId, "notebook", vSleepInSeconds, pDebugMode)

        else:
            # create the correspondant notebook
            create_notebook(vTargetNotebookName, vUrl, vHeaders, "creating", vTargetWorkspaceId, "notebook", vSleepInSeconds, pDebugMode)

            # create the correspondant sql notebook
            create_notebook(vTargetSqlNotebookName, vUrl, vHeaders, "creating", vTargetWorkspaceId, "notebook", vSleepInSeconds, pDebugMode)     

    # logging
    vMessage = f"succeeded"
    dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, 'create target lakehouses and notebooks', datetime.now(), None, vMessage, ''] 

except Exception as e:
    vMessage = f"failed"
    dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, 'create target lakehouses and notebooks', datetime.now(), None, vMessage, str(e) ] 
    if pDebugMode == "yes":
        print(str(e))

**Source and target sql analytics endpoints**

In [None]:
df_target_lakehouses = labs.list_lakehouses(workspace=vTargetWorkspaceName)
vSourceSqlEndpoint = df_source_lakehouses.loc[0, 'SQL Endpoint Connection String']
vTargetSqlEndpoint = df_target_lakehouses.loc[0, 'SQL Endpoint Connection String']

**Identify source shortcuts, folders, access roles and sql objects**

In [None]:
try:

    # define the dataframes
    df_shortcuts = pd.DataFrame()
    df_folders = pd.DataFrame()
    df_onelake_roles = pd.DataFrame()
    df_sql_objects = pd.DataFrame()
    df_routines = pd.DataFrame()

    # get a token for storage
    vOnelakeHeaders = {"authorization": f"bearer {mssparkutils.credentials.getToken('storage')}"}

    # iterate over the lakehouses
    for index, row in df_source_lakehouses.iterrows():

        # set the lakehouse name and id
        vLakehouseName = row['Lakehouse Name']
        vLakehouseSourceId = row['Lakehouse ID']
        vLakehouseTargetId = labs.resolve_lakehouse_id(lakehouse=vLakehouseName, workspace=vTargetWorkspaceName)


        # 1. extract shortcuts 
        vExtractionType = "shortcuts"
        vShortcutUrl = f"workspaces/{vSourceWorkspaceId}/items/{vLakehouseSourceId}/shortcuts"
        vUrl = vBaseUrl + vShortcutUrl

        print(f"extracting shortcuts for lakehouse {vLakehouseName}")

        # create the api global dataframe for shortcuts
        api_call_global_dataframe = pd.DataFrame()

        try:
                
            # make the api call
            api_call_main(vUrl, vHeaders, pDebugMode, vExtractionType)
            api_call_global_dataframe['WorkspaceSourceId'] = vSourceWorkspaceId
            api_call_global_dataframe['WorkspaceTargetId'] = vTargetWorkspaceId   
            api_call_global_dataframe['LakehouseTargetName'] = vLakehouseName
            api_call_global_dataframe['LakehouseTargetId'] = vLakehouseTargetId


            # concat to the correspondant dataframe
            df_shortcuts = pd.concat([df_shortcuts, api_call_global_dataframe], ignore_index=True)

            # logging
            vMessage = f"extracting shortcuts for lakehouse {vLakehouseName} succeeded"
            print(vMessage)

        except Exception as e:
            vMessage = f"extracting shortcuts for lakehouse {vLakehouseName} failed"
            print(vMessage)
            print(str(e))


        # 2. extract folders
        vExtractionType = "file_system"
        vUrl = f'https://onelake.dfs.fabric.microsoft.com/{vSourceWorkspaceName}/{vLakehouseName}.lakehouse/Files?recursive=True&resource=filesystem'
        print(f"extracting folders for lakehouse {vLakehouseName}")

        # create the api global dataframe
        api_call_global_dataframe = pd.DataFrame()

        try:
                
            # make the api call
            api_call_main(vUrl, vOnelakeHeaders, pDebugMode, vExtractionType)

            api_call_global_dataframe['FolderName'] = api_call_global_dataframe['name'].replace(vLakehouseSourceId + "/", '', regex=True)
            api_call_global_dataframe_new = api_call_global_dataframe[['FolderName','isDirectory']]

            api_call_global_dataframe_new['WorkspaceSourceId'] = vSourceWorkspaceId
            api_call_global_dataframe_new['WorkspaceTargetId'] = vTargetWorkspaceId   
            api_call_global_dataframe_new['LakehouseTargetName'] = vLakehouseName
            api_call_global_dataframe_new['LakehouseTargetId'] = vLakehouseTargetId

            df_folders_temp = api_call_global_dataframe_new[api_call_global_dataframe_new['isDirectory'] == 'true']

            # concat to the correspondant dataframe
            df_folders = pd.concat([df_folders, df_folders_temp], ignore_index=True)

            # logging
            vMessage = f"extracting folders of lakehouse {vLakehouseName} succeeded"
            print(vMessage)

        except Exception as e:
            vMessage = f"extracting files and folders of lakehouse {vLakehouseName} failed"
            print(vMessage)
            print(str(e))

        # 3. extract onelake access
        # if no custom roles are provided for the target lakehouses, use roles defined in the source lakehouses
        if vCustomRoles == "no":

            vExtractionType = "onelake_access"
            vShortcutUrl = f"workspaces/{vSourceWorkspaceId}/items/{vLakehouseSourceId}/dataAccessRoles"
            vUrl = vBaseUrl + vShortcutUrl
            print(f"extracting onelake access for lakehouse {vLakehouseName}")  

            # create the api global dataframe for shortcuts
            api_call_global_dataframe = pd.DataFrame()

            try:
                    
                # make the api call
                api_call_main(vUrl, vHeaders, pDebugMode, vExtractionType)

                api_call_global_dataframe['WorkspaceSourceId'] = vSourceWorkspaceId
                api_call_global_dataframe['WorkspaceTargetId'] = vTargetWorkspaceId   
                api_call_global_dataframe['lakehouse'] = vLakehouseName
                api_call_global_dataframe['LakehouseTargetId'] = vLakehouseTargetId


                # concat to the correspondant dataframe
                df_onelake_roles = pd.concat([df_onelake_roles, api_call_global_dataframe], ignore_index=True)

                # prepare the rules, entra members and item members dataframes
                df_role_rules = flatten_nested_json_df(df_onelake_roles[['id', 'decisionRules']].explode('decisionRules').dropna())
                condition_1 = (df_role_rules["decisionRules.permission.attributeName"] == "Action") & (df_role_rules["decisionRules.permission.attributeValueIncludedIn"] != "Read")
                df_role_rules_1 = df_role_rules[~condition_1]
                condition_2 = (df_role_rules_1["decisionRules.permission.attributeName"] == "Path") & (df_role_rules_1["decisionRules.permission.attributeValueIncludedIn"] == "Read")
                df_role_rules_2 = df_role_rules_1[~condition_2]
                df_role_rules = df_role_rules_2
                df_entra_members = flatten_nested_json_df(df_onelake_roles[['id', 'members.microsoftEntraMembers']].explode('members.microsoftEntraMembers').dropna())
                df_item_members = flatten_nested_json_df(df_onelake_roles[['id', 'members.fabricItemMembers']].explode('members.fabricItemMembers').dropna())                

                # logging
                vMessage = f"extracting onelake access for lakehouse {vLakehouseName} succeeded"
                print(vMessage)

            except Exception as e:
                vMessage = f"extracting onelake access for lakehouse {vLakehouseName} failed"
                print(vMessage)
                print(str(e))
        else: # use the parameters provided for the roles

            # onelake roles
            onelake_roles = json.loads(pOnelakeRoles)
            df_onelake_roles = pd.DataFrame(onelake_roles)

            # onelake rules
            role_rules = json.loads(pOnelakeRules)
            df_role_rules = pd.DataFrame(role_rules)

            # onelake entra members
            entra_members = json.loads(pOnelakeEntraMembers)
            df_entra_members = pd.DataFrame(entra_members)

            # onelake item members
            item_members = json.loads(pOnelakeItemMembers)
            df_item_members = pd.DataFrame(item_members) 


        # 4. extaction sql objects created in the sql endpoint
        print(f"extracting routines and views for lakehouse {vLakehouseName}")
        vSqlStatement = """
        SELECT 
            a.ROUTINE_CATALOG AS LakehouseName, 
            a.ROUTINE_SCHEMA AS SchemaName, 
            a.ROUTINE_NAME AS ObjectName, 
            '' AS DropStatement,
            REPLACE(a.ROUTINE_DEFINITION, 'CREATE', 'CREATE OR ALTER') AS CreateStatement,
            'Routines' AS ObjectType
        FROM 
            INFORMATION_SCHEMA.ROUTINES a
        UNION
        SELECT 
            TABLE_CATALOG, 
            TABLE_SCHEMA, 
            TABLE_NAME, 
            '' AS DropStatement,
            REPLACE(VIEW_DEFINITION, 'CREATE', 'CREATE OR ALTER') AS CreateStatement, 
            'View' AS ObjectType
        FROM 
            INFORMATION_SCHEMA.VIEWS
        WHERE 
            TABLE_SCHEMA NOT IN ('sys','queryinsights')
        """

        spark_df_sql_objects = spark.read.option(Constants.WorkspaceId, vSourceWorkspaceId).option(Constants.DatabaseName, vLakehouseName).synapsesql(vSqlStatement)
        df_sql_objects_temp = spark_df_sql_objects.toPandas()
        df_sql_objects = pd.concat([df_sql_objects, df_sql_objects_temp], ignore_index=True)

        # 5. extraction of security policies
        print(f"extracting security for lakehouse {vLakehouseName}")
        vSqlStatement = f"""
        SELECT 
            '{vLakehouseName}' AS LakehouseName,
            schema_name AS SchemaName, 
            policy_name AS ObjectName,
            'DROP SECURITY POLICY IF EXISTS ' + policy_name_new AS DropStatement,
            CONCAT(
                create_statement,
                policy_name_new,
                filter_predicate,
                CASE is_enabled
                    WHEN 0 THEN ' WITH (STATE = OFF)'
                    ELSE ' WITH (STATE = ON)'
                END 
            ) AS CreateStatement,
            'Security Policy' AS ObjectType
        FROM 
            (
            SELECT 
                schema_name, policy_name, create_statement, policy_name_new, is_enabled, STRING_AGG(filter_predicate, ',') AS filter_predicate
            FROM 
                (
                SELECT 
                    pol_schema.name AS schema_name,
                    pol.name as policy_name,
                    'CREATE SECURITY POLICY ' AS create_statement,
                    '[' + pol_schema.name + '].[' + pol.name + ']' AS policy_name_new,
                    pol.is_enabled,
                    ' ADD FILTER PREDICATE ' 
                    + RIGHT(LEFT(pre.predicate_definition, LEN(pre.predicate_definition)-1),LEN(pre.predicate_definition)-2)
                    + ' ON [' + obj_schema.name + '].[' + obj.name + ']' 
                    AS filter_predicate

                FROM 
                    sys.security_policies pol
                    INNER JOIN sys.schemas pol_schema 
                        ON pol_schema.schema_id = pol.schema_id
                    INNER JOIN sys.security_predicates pre
                        ON pre.object_id = pol.object_id
                    INNER JOIN sys.objects obj 
                        ON obj.object_id = pre.target_object_id
                    INNER JOIN sys.schemas obj_schema 
                        ON obj_schema.schema_id = obj.schema_id
                ) a
            GROUP BY 
                schema_name, policy_name, create_statement, policy_name_new, is_enabled
            ) b
        """

        spark_df_sql_objects = spark.read.option(Constants.WorkspaceId, vSourceWorkspaceId).option(Constants.DatabaseName, vLakehouseName).synapsesql(vSqlStatement)
        df_sql_objects_temp = spark_df_sql_objects.toPandas()
        df_sql_objects = pd.concat([df_sql_objects, df_sql_objects_temp], ignore_index=True)


    # format shortcuts dataframe
    # get the column names
    columns = df_shortcuts.columns.values.tolist()

    # iterate over the column name and rename after capitalizin the first letter
    for columnName in columns:

        # split the column name, take the last item and upper case the first letter
        processed_column = process_column_name(columnName, '.')

        # replace the column name in the dataframe
        df_shortcuts.rename(columns={columnName: processed_column}, inplace=True)

    # logging
    vMessage = f"succeeded"
    dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, 'identify shortcuts, folders and access roles', datetime.now(), None, vMessage, ''] 

except Exception as e:
    vMessage = f"failed"
    dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, 'identify shortcuts, folders and access roles', datetime.now(), None, vMessage, str(e) ] 
    if pDebugMode == "yes":
        print(str(e))

**Exclude shortcuts from tables**

In [None]:
try:
    shortcuts_columns_subset = ['LakehouseTargetName', 'Name']
    df_shortcuts_tables = df_shortcuts[df_shortcuts['Path']=='/Tables'][shortcuts_columns_subset]
    df_shortcuts_tables.rename(columns={"Name":"Table Name","LakehouseTargetName":"Lakehouse Name"},inplace=True)
    df_tables_exclude_shortcut = df_source_lakehouse_tables.merge(df_shortcuts_tables, on=df_source_lakehouse_tables.columns.tolist(), how='left', indicator=True)
    df_tables_exclude_shortcut = df_tables_exclude_shortcut[df_tables_exclude_shortcut['_merge'] == 'left_only'].drop(columns=['_merge'])
    df_source_lakehouse_columns = pd.merge(df_source_lakehouse_columns, df_tables_exclude_shortcut, on=["Lakehouse Name", "Table Name"], how="inner")
    # logging
    vMessage = f"succeeded"
    dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, 'exclude shortcuts from tables', datetime.now(), None, vMessage, ''] 

except Exception as e:
    vMessage = f"failed"
    dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, 'exclude shortcuts from tables', datetime.now(), None, vMessage, str(e) ] 
    if pDebugMode == "yes":
        print(str(e))

**Exclude shortcuts from folders**

In [None]:
try:
    shortcuts_columns_subset = ['WorkspaceSourceId','WorkspaceTargetId', 'LakehouseTargetName', 'LakehouseTargetId', 'Name', 'Path']
    df_shortcuts_folders = df_shortcuts[df_shortcuts['Path']=='/Files'][shortcuts_columns_subset]
    df_shortcuts_folders['FolderName'] = df_shortcuts_folders['Path'].replace("/", '', regex=True) + '/' + df_shortcuts_folders['Name']
    df_shortcuts_folders['isDirectory'] = 'true'
    df_folders_exclude_shortcut = df_folders.merge(df_shortcuts_folders, on=df_folders.columns.tolist(), how='left', indicator=True)
    df_folders_exclude_shortcut = df_folders_exclude_shortcut[df_folders_exclude_shortcut['_merge'] == 'left_only'].drop(columns=['_merge'])
    # logging
    vMessage = f"succeeded"
    dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, 'exclude shortcuts from folders', datetime.now(), None, vMessage, ''] 

except Exception as e:
    vMessage = f"failed"
    dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, 'exclude shortcuts from folders', datetime.now(), None, vMessage, str(e)]
    if pDebugMode == "yes":
        print(str(e))

In [None]:
# define the dataframe containing the sql definitions for the deployment
df_input_deployment = pd.DataFrame()

try:

    # recreate the list of lakehouses 
    df_target_lakehouses = labs.list_lakehouses(workspace = vTargetWorkspaceName)
    

    # iterate over the target lakehouses 
    for lakehouse in df_target_lakehouses['Lakehouse Name']:

        # get the source lakehouse tables and columns 
        df_source_lakehouse_columns_in_scope = df_source_lakehouse_columns[df_source_lakehouse_columns['Lakehouse Name']==lakehouse]

        # get the lakehouse tables and columns
        # this call brings also table shortcut, they need to be excluded. df_shortcut_tables can be used to this effect.
        df_target_lakehouses_columns_temp = labs.lakehouse.get_lakehouse_columns(lakehouse = lakehouse, workspace = vTargetWorkspaceName)
        df_target_lakehouses_columns = df_target_lakehouses_columns_temp.merge(df_shortcuts_tables, on=['Lakehouse Name', 'Table Name'], how='left', indicator=True)
        df_target_lakehouses_columns = df_target_lakehouses_columns[df_target_lakehouses_columns['_merge'] == 'left_only'].drop(columns=['_merge'])

        if df_target_lakehouses_columns.empty:

            vMessage = f"target lakehouse <{lakehouse} is empty. retrieve the full list of tables from the source lakehouse as an input for table definitions."
            print(vMessage)

            # if the target lakehouse is empty, the source lakehouse definition is used for the target lakehouse
            df_target_lakehouses_columns = df_source_lakehouse_columns_in_scope
            # df_target_lakehouses_columns = pd.concat([df_target_lakehouses_columns, df_target_lakehouses_columns_temp], ignore_index=True)

            # format the input for deployment
            df_input_deployment_temp = input_for_full_deployment(df_target_lakehouses_columns)

            # concat to the dataframe
            df_input_deployment = pd.concat([df_input_deployment, df_input_deployment_temp], ignore_index=True)

        else:
            
            vMessage = f"target lakehouse <{lakehouse} is not empty. retrieve an increment list of changes from the source lakehouse as an input for table definitions."
            print(vMessage)

            # align the structure of df to df_source_lakehouse_columns
            df_target_lakehouses_columns["WorkspaceTargetName"] = vTargetWorkspaceName

            # # replace the source workspace name by source workspace and the add the target workspace 
            # # this will alow excluding rows in source not in target workspace
            # df_target_lakehouses_columns["WorkspaceName"] = vSourceWorkspaceName
            # df_target_lakehouses_columns["WorkspaceTargetName"] = vTargetWorkspaceName

            # identify the incremental

            # 1. tables in source but not in target lakehouse
            df_source_lakehouse_tables = df_source_lakehouse_columns_in_scope[['Lakehouse Name', 'Table Name']].drop_duplicates()
            df_target_lakehouse_tables = df_target_lakehouses_columns[['Lakehouse Name', 'Table Name']].drop_duplicates()
            df_tables_in_source_only_temp = pd.merge(df_source_lakehouse_tables, df_target_lakehouse_tables, on=['Lakehouse Name', 'Table Name'], how='left', indicator=True)
            df_tables_in_source_only_temp = df_tables_in_source_only_temp[df_tables_in_source_only_temp['_merge'] == 'left_only'].drop(columns=['_merge'])
            df_tables_in_source_only = pd.merge(df_source_lakehouse_columns_in_scope, df_tables_in_source_only_temp, on=['Lakehouse Name', 'Table Name'], how='inner')   

            
            # 2. tables in both source and target lakehouses but with a structural change (added columns, deleted columns, changed Data Type)
            
            # 2.1 find common tables
            df_tables_in_common = pd.merge(df_source_lakehouse_tables, df_target_lakehouse_tables, on=['Lakehouse Name', 'Table Name'], how='inner', indicator=True)

            # 2.2 tables in common, source columns
            df_tables_in_common_source_columns = pd.merge(df_source_lakehouse_columns_in_scope, df_tables_in_common[['Lakehouse Name', 'Table Name']], on=['Lakehouse Name', 'Table Name'], how='inner')  

            # 2.2 tables in common, columns only in source --> ALTER TABLE ADD COLUMN
            df_tables_in_common_source_columns_only_temp = pd.merge(df_tables_in_common_source_columns,df_target_lakehouses_columns, on=['Lakehouse Name', 'Table Name', 'Column Name'], how='left', suffixes=('', '_target'))
            df_tables_in_common_source_columns_only_temp = df_tables_in_common_source_columns_only_temp[df_tables_in_common_source_columns_only_temp.isna().any(axis=1)]
            df_tables_in_common_source_columns_only = df_tables_in_common_source_columns_only_temp[df_tables_in_common_source_columns.columns]  

            # 2.3 tables in common, columns only in target --> ALTER TABLE DROP COLUMN
            # dropping columns requires the table property delta.columnMapping.mode = name
            # example ALTER TABLE lake.Table Name SET TBLPROPERTIES ('delta.columnMapping.mode' = 'name','delta.minReaderVersion' = '2','delta.minWriterVersion' = '5')
            df_tables_in_common_target_columns_only_temp = df_target_lakehouses_columns.merge(df_tables_in_common_source_columns, on=['Lakehouse Name', 'Table Name', 'Column Name'], how='left', suffixes=('', '_source'))
            df_tables_in_common_target_columns_only_temp = df_tables_in_common_target_columns_only_temp[df_tables_in_common_target_columns_only_temp.isna().any(axis=1)]
            df_tables_in_common_target_columns_only = df_tables_in_common_target_columns_only_temp[df_target_lakehouses_columns.columns]
            df_tables_in_common_target_columns_only

            # 2.4, tables in common, columns in common, but data type changed --> ALTER TABLE ALTER COLUMN
            df_data_type_comparison = pd.merge(df_tables_in_common_source_columns, df_target_lakehouses_columns, on=["Lakehouse Name", "Table Name", "Column Name"], suffixes=('', '_target'))
            df_data_type_comparison["is_different"] = df_data_type_comparison["Data Type"] != df_data_type_comparison["Data Type_target"]
            df_tables_in_common_data_type_changed = df_data_type_comparison.loc[df_data_type_comparison["is_different"], df_tables_in_common_source_columns.columns]


            if not df_tables_in_source_only.empty:
                df_input_deployment_1 = input_for_full_deployment(df_tables_in_source_only)
            else:
                df_input_deployment_1 = pd.DataFrame()

            if not df_tables_in_common_source_columns_only.empty:
                df_input_deployment_2 = input_for_incremental_deployment(df_tables_in_common_source_columns_only, 'alter table add column')
            else:
                df_input_deployment_2 = pd.DataFrame()

            if not df_tables_in_common_target_columns_only.empty:
                df_input_deployment_3 = input_for_incremental_deployment(df_tables_in_common_target_columns_only, 'alter table drop column')
            else:
                df_input_deployment_3 = pd.DataFrame()

            if not df_tables_in_common_data_type_changed.empty:
                df_input_deployment_4 = input_for_incremental_deployment(df_tables_in_common_data_type_changed, 'alter table alter column')
            else:
                df_input_deployment_4 = pd.DataFrame()

            # concatenate the different inputs if not empty
            dfs = [df_input_deployment_1, df_input_deployment_2, df_input_deployment_3, df_input_deployment_4]
            non_empty_dfs = [df for df in dfs if not df.empty]
            df_input_deployment_temp = pd.concat(non_empty_dfs, ignore_index=True) if non_empty_dfs else pd.DataFrame()
            
            # concat to the dataframe
            df_input_deployment = pd.concat([df_input_deployment, df_input_deployment_temp], ignore_index=True)

    # # logging
    # vMessage = f"succeeded"
    # dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, 'identify tables in incremental mode', datetime.now(), None, vMessage, ''] 

except Exception as e:
    vMessage = f"failed"
    dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, 'identify tables in incremental mode', datetime.now(), None, vMessage, str(e)]
    if pDebugMode == "yes":
        print(str(e))

**Update the notebook with the definition of the tables and run it against the target lakehouse to define the tables**

In [None]:
try:


    # sleep time before checking the operation's status in post requests
    vSleepInSeconds = 30


    # perform the deployment
    for lakehouse in df_source_lakehouses['Lakehouse Name']:

        # set the lakehouse name
        vLakehouseName = lakehouse

        # get the create table statements related to the current lakehouse
        df_lakehouse_table_statements_current = df_input_deployment[df_input_deployment['Lakehouse Name'] == vLakehouseName]

        # update the target notebook and run it only if there are sql statements to run
        if not df_lakehouse_table_statements_current.empty:

            # define the target notebook name
            vTargetNotebookName = "nb_" + vLakehouseName + "_definition"


            # notebook definition template
            json_notebook_definition_temp = {
                "nbformat": 4,
                "nbformat_minor": 5,
                "cells": [],
                "metadata": {
                    "language_info": {
                        "name": "sql"
                    },
                    "dependencies": {
                        "lakehouse": {
                            "default_lakehouse": "{default_lakehouse_}",
                            "default_lakehouse_name": "{default_lakehouse_name_}",
                            "default_lakehouse_workspace_id": "{default_lakehouse_workspace_id_}"
                        }
                    }
                }
            }

            # set the url
            vUrl = vBaseUrl + f"workspaces/{vTargetWorkspaceId}/items"

            # this part of the code works for full and incremental deployment
            # resolve lakehouse and notebook id
            vLakehouseTargetId = labs.resolve_lakehouse_id(lakehouse=vLakehouseName, workspace=vTargetWorkspaceName)
            vNotebookTargetId = fabric.resolve_item_id( item_name=vTargetNotebookName, type="Notebook", workspace=vTargetWorkspaceName)
            # print(vNotebookTargetId)

            # prepare the default inputs for the notebook definition
            default_inputs_for_notebook_definition = {
                "default_lakehouse_" : vLakehouseTargetId,
                "default_lakehouse_name_" : vLakehouseName,
                "default_lakehouse_workspace_id_" : vTargetWorkspaceId
            }




            # add a new cell in the notebood definition 
            for sql_statement in df_lakehouse_table_statements_current['SqlStatement']:
                new_cell = {
                    "cell_type": "code",
                    "source": [sql_statement]
                }
                json_notebook_definition_temp["cells"].append(new_cell)

            # get the folders of the current lakehouse
            df_folders_current = df_folders_exclude_shortcut[df_folders_exclude_shortcut['LakehouseTargetName'] == vLakehouseName]

            if not df_folders_current.empty:
                for folder in df_folders_current['FolderName']:
                    new_cell = {
                        "cell_type": "code",
                        "source": [
        f"""%%pyspark
        mssparkutils.fs.mkdirs('{folder}')"""
                                ]
                    }
                    json_notebook_definition_temp["cells"].append(new_cell)


            # replace the placeholders
            json_notebook_definition = replace_placeholders_in_json(json_notebook_definition_temp, default_inputs_for_notebook_definition)

            # final json definition
            json_notebook_definition_new = json.loads(json.dumps(json_notebook_definition, indent=4))
            # print(json.dumps(json_notebook_definition, indent=4))

            # base64 encoding for the api call
            json_notebook_definition_new_encoded = base64.b64encode(json.dumps(json_notebook_definition_new, indent=4).encode('utf-8')).decode('utf-8')


            # 3. update the notebook definition

            # set the url for the update
            vUrl = vBaseUrl + f"workspaces/{vTargetWorkspaceId}/notebooks/{vNotebookTargetId}/updateDefinition"

            # set the body
            vJsonBody = {
                "definition": {
                    "format": "ipynb",
                    "parts": [
                        {
                            "path": "notebook-content.py",
                            "payload": f"{json_notebook_definition_new_encoded}",
                            "payloadType": "InlineBase64"
                        }
                    ]
                }
            }


            # update the notebook definition
            # the update notebook definition as of 19.11.2024 has an issue when executin the operation url when the response status code is 202
            # it returns an error although the update is successful
            create_or_update_fabric_item(vUrl, vHeaders, vJsonBody, 'post', "updating", vTargetWorkspaceId, vTargetNotebookName, "Notebook", vSleepInSeconds, pDebugMode) 

            # 4. run the notebook

            # set the url
            vUrl = vBaseUrl + f"workspaces/{vTargetWorkspaceId}/items/{vNotebookTargetId}/jobs/instances?jobType=RunNotebook"

            # run the notebook
            create_or_update_fabric_item(vUrl, vHeaders, None, 'post', "executing", vTargetWorkspaceId, vTargetNotebookName, "Notebook", vSleepInSeconds, pDebugMode) 

        # logging
        vMessage = f"succeeded"
        dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, 'create tables and folders in target lakehouses', datetime.now(), None, vMessage, ''] 

except Exception as e:
    vMessage = f"failed"
    dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, 'create tables and folders in target lakehouses', datetime.now(), None, vMessage, str(e)]
    if pDebugMode == "yes":
        print(str(e))

**Use the commented cell to check a notebook definition if required**

nb = json.loads(
    notebookutils.notebook.getDefinition(
        "nb_saleslake_definition_", #nane of the notebook
        workspaceId=vTargetWorkspaceId
    )
)
print(json.dumps(nb, indent=4))

**Create shortcuts in target lakehouses**

In [None]:
try:
    vSleepInSeconds = 30
    for index, row in df_shortcuts.iterrows():

        # set the variables
        # common inputs 
        vName = row['Name']
        vPath = row['Path']
        vTargetType = row['TargetType']

        # specific to onelake
        vTargetOneLakeWorkspaceId = row['TargetOneLakeWorkspaceId']
        vTargetOneLakeItemId = row['TargetOneLakeItemId']
        vTargetOneLakePath = row['TargetOneLakePath']

        # specific to adls gen2
        vTargetAdlsGen2Location = row['TargetAdlsGen2Location']
        vTargetAdlsGen2Subpath = row['TargetAdlsGen2Subpath']
        vTargetAdlsGen2ConnectionId = row['TargetAdlsGen2ConnectionId']

        # todo
        # specific to AmazonS3
        # specific to GoogleCloudStorage
        # specific to S3Compatible

        # target lakehouse id
        vLakehouseTargetId = row['LakehouseTargetId']

        # shortcut url
        vShortcutUrl = f"workspaces/{vTargetWorkspaceId}/items/{vLakehouseTargetId}/shortcuts?shortcutConflictPolicy={vShortcutConflictPolicy}"
        vUrl = vBaseUrl + vShortcutUrl

        # request body
        if vTargetType in ["AdlsGen2", "AmazonS3", "GoogleCloudStorage", "S3Compatible"]:
            vJsonBody = shortcut_body(vTargetType, vName, vPath, vTargetAdlsGen2Location, vTargetAdlsGen2Subpath, vTargetAdlsGen2ConnectionId)
        elif vTargetType == "OneLake":
            vJsonBody = onelake_shortcut_body(vTargetType, vName, vPath, vTargetOneLakeWorkspaceId, vTargetOneLakeItemId, vTargetOneLakePath)
        else:
            # use case to be implemented
            vJsonBody = ""

        # create the shortcut
        create_or_update_fabric_item(vUrl, vHeaders, vJsonBody, 'post', "creating", vTargetWorkspaceId, vName, "Shortcut", vSleepInSeconds, pDebugMode)   

        # logging
        vMessage = f"succeeded"
        dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, 'create shortcuts in target lakehouses', datetime.now(), None, vMessage, ''] 

except Exception as e:
    vMessage = f"failed"
    dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, 'create shortcuts in target lakehouses', datetime.now(), None, vMessage, str(e)] 
    if pDebugMode == "yes":
        print(str(e))

**Enable onelake security on target lakehouses**

In [None]:
try:
    # get a onelake token
    vOnelakeHeaders = {"authorization": f"bearer {mssparkutils.credentials.getToken('storage')}"}

    # iterate over the target lakehouses
    for index, row in df_target_lakehouses.iterrows():

        vLakehouseTargetId = row['Lakehouse ID']

        vUrl = f'https://onelake.dfs.fabric.microsoft.com/v1.0/workspaces/{vTargetWorkspaceId}/artifacts/{vLakehouseTargetId}/security/enable'

        vJsonBody = {
            "enableOneSecurity":"true"
        }

        response = requests.post(vUrl, headers=vOnelakeHeaders, json=vJsonBody)

    # logging
    vMessage = f"succeeded"
    dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, 'enable onelake security on target lakehouses', datetime.now(), None, vMessage, ''] 

except Exception as e:
    vMessage = f"failed"
    dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, 'enable onelake security on target lakehouses', datetime.now(), None, vMessage, str(e)]
    if pDebugMode == "yes":
        print(str(e))

**Identify source lakehouses onelake roles**

In [None]:
try:

    # if custom roles are provided
    if vCustomRoles == "yes":

        # load the csv files
        df_onelake_access = pd.read_csv(vOnelakeRolesCsvPath)
        df_role_rules = pd.read_csv(vRoleRulesCsvPath)
        df_item_members = pd.read_csv(vItemMembersCsvPath)
        df_entra_members = pd.read_csv(vEntraMembersCsvPath)
    else:
        # 2. prepare the inputs for the creation
        df_role_rules = flatten_nested_json_df(df_onelake_access[['id', 'decisionRules']].explode('decisionRules').dropna())
        condition_1 = (df_role_rules["decisionRules.permission.attributeName"] == "Action") & (df_role_rules["decisionRules.permission.attributeValueIncludedIn"] != "Read")
        df_role_rules_1 = df_role_rules[~condition_1]
        condition_2 = (df_role_rules_1["decisionRules.permission.attributeName"] == "Path") & (df_role_rules_1["decisionRules.permission.attributeValueIncludedIn"] == "Read")
        df_role_rules_2 = df_role_rules_1[~condition_2]
        df_role_rules = df_role_rules_2
        df_entra_members = flatten_nested_json_df(df_onelake_access[['id', 'members.microsoftEntraMembers']].explode('members.microsoftEntraMembers').dropna())
        df_item_members = flatten_nested_json_df(df_onelake_access[['id', 'members.fabricItemMembers']].explode('members.fabricItemMembers').dropna())

        # logging
        vMessage = f"succeeded"
        dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, 'identify source lakehouses onelake roles', datetime.now(), None, vMessage, ''] 

except Exception as e:
    vMessage = f"failed"
    dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, 'identify source lakehouses onelake roles', datetime.now(), None, vMessage, str(e)]
    if pDebugMode == "yes":
        print(str(e))

**Create onelake roles in target lakehouses**

In [None]:
try:

    vSleepInSeconds = 30
    for index, row in df_onelake_roles.iterrows():

        vRoleId = row['id']
        vRoleName = row['name']
        vLakehouseTargetName = row['lakehouse']
        vLakehouseTargetId = labs.resolve_lakehouse_id(lakehouse=vLakehouseTargetName, workspace=vTargetWorkspaceName)
        # print(vRoleId, vRoleName, vLakehouseTargetName, vLakehouseTargetId)

        # updating the DefaultReader role via the API deletes it
        if vRoleName != "DefaultReader":
            # role template with decision rules, entra members and item members
            role_template = {
                "value": [
                    {
                        "name": "{name_}",
                        "decisionRules": [
                        ],
                        "members": {
                            "microsoftEntraMembers": [
                            ],
                            "fabricItemMembers": [
                            ]
                        }
                    }
                ]
            }

            # replace the role name
            role_input = {
                "name_": vRoleName
            }
            role_template = replace_placeholders_in_json(role_template, role_input)

            # handle decision rules
            df_rules_current = df_role_rules[df_role_rules['id']==vRoleId]
            for effect in df_rules_current['decisionRules.effect'].drop_duplicates():

                vEffectName = effect

                # replace the effect name
                effect_input = {
                    "effect_": vEffectName
                }
                rules_template = {
                    "effect": "{effect_}",
                    "permission": [
                    ]
                }
                rules_template = replace_placeholders_in_json(rules_template, effect_input)

                # handle effects
                df_effect_current = df_rules_current[(df_rules_current['id']==vRoleId) & (df_rules_current['decisionRules.effect']==vEffectName)]

                for attribute in df_effect_current['decisionRules.permission.attributeName'].drop_duplicates():

                    vAttributeName = attribute

                    # replace the attribute
                    permission_input = {
                        "attributeName_": attribute
                    }
                    permissions_template = {
                        "attributeName": "{attributeName_}",
                        "attributeValueIncludedIn": [
                        ]   
                    }
                    permissions_template = replace_placeholders_in_json(permissions_template, permission_input)

                    # handle attributes
                    df_attribute_current = df_effect_current[(df_effect_current['id']==vRoleId) & (df_effect_current['decisionRules.effect']==vEffectName) & (df_effect_current['decisionRules.permission.attributeName']==vAttributeName)] 

                    for attribute_included_in in df_attribute_current['decisionRules.permission.attributeValueIncludedIn']:
                        vAttributeIncludedIn = attribute_included_in
                        permissions_template["attributeValueIncludedIn"].append(vAttributeIncludedIn)

                    # append the attributes included in to the permission template
                    rules_template['permission'].append(permissions_template)

                # appedn the rules template to the decision rules in the role template
                role_template["value"][0]["decisionRules"].append(rules_template)


            # handle the entra members
            df_entra_member_current = df_entra_members[df_entra_members['id']==vRoleId]
            for index, row in df_entra_member_current.iterrows():

                vTenantId = row['members.microsoftEntraMembers.tenantId']
                vObjectId = row['members.microsoftEntraMembers.objectId']

                # set the member template
                entra_members_template = {
                    "tenantId": vTenantId,
                    "objectId": vObjectId
                }

                # append the member template to the role template
                role_template["value"][0]["members"]["microsoftEntraMembers"].append(entra_members_template)


            # handle the fabric item members
            df_item_members_current = df_item_members[df_item_members['id']==vRoleId]
            for item_member in df_item_members_current['members.fabricItemMembers.sourcePath'].drop_duplicates():
                
                vSourcePath = item_member # row['members.fabricItemMembers.sourcePath']
                vTargetPath = vTargetWorkspaceId + "/" + vLakehouseTargetId 

                # replace the source path
                items_members_template = {
                    "sourcePath": vTargetPath,
                    "itemAccess": [
                    ]
                }

                # handle the item access
                df_item_access_current = df_item_members_current[df_item_members_current['members.fabricItemMembers.sourcePath']==vSourcePath] 
                for item_access in df_item_access_current['members.fabricItemMembers.itemAccess'].drop_duplicates():

                    vItemAccess = item_access

                    # append the item access to the member template
                    items_members_template["itemAccess"].append(vItemAccess) 

                # append the fabric item template to the role template
                role_template["value"][0]["members"]["fabricItemMembers"].append(items_members_template)

            # print(json.dumps(role_template, indent=4))
            
            vJsonBody = role_template

            # url
            vRoleUrl = f"workspaces/{vTargetWorkspaceId}/items/{vLakehouseTargetId}/dataAccessRoles"
            vUrl = vBaseUrl + vRoleUrl

            create_or_update_fabric_item(vUrl, vHeaders, vJsonBody, 'put', "creating/updating", vTargetWorkspaceId, vRoleName, "onelake role", vSleepInSeconds, pDebugMode)    

    # logging
    vMessage = f"succeeded"
    dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, 'create onelake roles in target lakehouses', datetime.now(), None, vMessage, ''] 

except Exception as e:
    vMessage = f"failed"
    dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, 'create onelake roles in target lakehouses', datetime.now(), None, vMessage, str(e)]
    if pDebugMode == "yes":
        print(str(e))


**Create the sql objects in the target SQL endpoint**

In [None]:
try:


    # sleep time before checking the operation's status in post requests
    vSleepInSeconds = 30


    # perform the deployment
    for lakehouse in df_source_lakehouses['Lakehouse Name']:

        # set the lakehouse name
        vLakehouseName = lakehouse

        # get the lakewarehouse id --> this is different than the lakehouse id
        vLakehouseWarehouseTargetId = df_target_lakehouses[df_target_lakehouses['Lakehouse Name']== vLakehouseName].loc[0, 'SQL Endpoint ID']
     
        # define the target notebook name
        vTargetSqlNotebookName = "nb_" + vLakehouseName + "_sql_definition"

        # get the create table statements related to the current lakehouse
        df_sql_objects_current = df_sql_objects[df_sql_objects['LakehouseName'] == vLakehouseName]


        # update the target notebook and run it only if there are sql statements to run
        if not df_sql_objects_current.empty:


            # notebook definition template --> this will be a TSQL notebook: add to it the lakewarehouse id as a default warehouse
            json_notebook_definition_temp = {
                "nbformat": 4,
                "nbformat_minor": 5,
                "cells": [],
                "metadata": {
                    "kernel_info": {
                        "name": "sqldatawarehouse"
                    },
                    "kernelspec": {
                        "name": "sqldatawarehouse",
                        "language": "sqldatawarehouse",
                        "display_name": "sqldatawarehouse"
                    },
                    "language_info": {
                        "name": "sql"
                    },
                    "dependencies": {
                        "warehouse": {
                            "known_warehouses": [
                                {
                                    "id": "{default_lakewarehouse_}",
                                    "type": "Lakewarehouse"
                                }
                            ],
                            "default_warehouse": "{default_lakewarehouse_}"
                        },
                        "lakehouse": {}
                    }
                }
            }

            # set the url
            vUrl = vBaseUrl + f"workspaces/{vTargetWorkspaceId}/items"

            # this part of the code works for full and incremental deployment
            # resolve notebook id
            vTargetSqlNotebookId = fabric.resolve_item_id( item_name=vTargetSqlNotebookName, type="Notebook", workspace=vTargetWorkspaceName)

            # prepare the default inputs for the notebook definition
            default_inputs_for_notebook_definition = {
                "default_lakewarehouse_" : vLakehouseWarehouseTargetId
            }

            # check if there are security policies defined
            # if yes:
            # 1. add a cell to drop the security policy --> this will allow altering the predicate function
            # 2. add all other cells to create view, functions, etc..
            # 3. add a cell to create the security policy

            # create cells for droping security policies
            sql_objects_contain_security_policies = (df_sql_objects_current['ObjectType'] == 'Security Policy').any()
            if sql_objects_contain_security_policies:
                df_sql_objects_current_policies = df_sql_objects_current[df_sql_objects_current['ObjectType'] == 'Security Policy']

                # iterate over the sql objects of the current lakehouse
                for index, row in df_sql_objects_current_policies.iterrows():

                    # get the ddl statement
                    vSchemaName = row['SchemaName']
                    vObjectName = row['ObjectName']
                    vDropStatement = row['DropStatement']

                    print(f"adding a drop security cell for <{vSchemaName}.{vObjectName}>.")

                    new_cell = {
                        "cell_type": "code",
                        "source": [vDropStatement]
                    }
                    json_notebook_definition_temp["cells"].append(new_cell)
                

            # iterate over the sql objects of the current lakehouse
            for index, row in df_sql_objects_current.iterrows():

                # get the ddl statement
                vSchemaName = row['SchemaName']
                vObjectName = row['ObjectName']
                vDropStatement = row['DropStatement']
                vCreateStatement = row['CreateStatement']
                vObjectType = row['ObjectType']

                print(f"adding a create cell for <{vSchemaName}.{vObjectName}>.")

                # add the create statement
                new_cell = {
                    "cell_type": "code",
                    "source": [vCreateStatement]
                }
                json_notebook_definition_temp["cells"].append(new_cell)                    



            # replace the placeholders
            json_notebook_definition = replace_placeholders_in_json(json_notebook_definition_temp, default_inputs_for_notebook_definition)

            # final json definition
            json_notebook_definition_new = json.loads(json.dumps(json_notebook_definition, indent=4))
            # print(json.dumps(json_notebook_definition, indent=4))

            # base64 encoding for the api call
            json_notebook_definition_new_encoded = base64.b64encode(json.dumps(json_notebook_definition_new, indent=4).encode('utf-8')).decode('utf-8')


            # 3. update the notebook definition

            # set the url for the update
            vUrl = vBaseUrl + f"workspaces/{vTargetWorkspaceId}/notebooks/{vTargetSqlNotebookId}/updateDefinition"

            # set the body
            vJsonBody = {
                "definition": {
                    "format": "ipynb",
                    "parts": [
                        {
                            "path": "notebook-content.py",
                            "payload": f"{json_notebook_definition_new_encoded}",
                            "payloadType": "InlineBase64"
                        }
                    ]
                }
            }


            # update the notebook definition
            # the update notebook definition as of 02.2025 has an issue when executin the operation url when the response status code is 202
            # it returns an error although the update is successful
            create_or_update_fabric_item(vUrl, vHeaders, vJsonBody, 'post', "updating", vTargetWorkspaceId, vTargetSqlNotebookName, "Notebook", vSleepInSeconds, pDebugMode) 

            # 4. run the notebook

            # set the url
            vUrl = vBaseUrl + f"workspaces/{vTargetWorkspaceId}/items/{vTargetSqlNotebookId}/jobs/instances?jobType=RunNotebook"

            # run the notebook
            create_or_update_fabric_item(vUrl, vHeaders, None, 'post', "executing", vTargetWorkspaceId, vTargetSqlNotebookName, "Notebook", vSleepInSeconds, pDebugMode) 

        # logging
        vMessage = f"succeeded"
        dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, 'create sql objects in target lakehouses', datetime.now(), None, vMessage, ''] 

except Exception as e:
    vMessage = f"failed"
    dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, 'create sql objects in target lakehouses', datetime.now(), None, vMessage, str(e)]
    if pDebugMode == "yes":
        print(str(e))

**Delete notebooks created in previous steps**

In [None]:
try:
    for lakehouse in df_source_lakehouses['Lakehouse Name']:

        # set the lakehouse name
        vLakehouseName = lakehouse

        # define the target notebook name
        vTargetNotebookName = "nb_" + vLakehouseName + "_definition"
        vTargetSqlNotebookName = "nb_" + vLakehouseName + "_sql_definition"

        # delete the notebooks
        notebookutils.notebook.delete(vTargetNotebookName, workspaceId=vTargetWorkspaceId)
        notebookutils.notebook.delete(vTargetSqlNotebookName, workspaceId=vTargetWorkspaceId)

        # logging
        vMessage = f"succeeded"
        dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, 'delete temporary notebooks', datetime.now(), None, vMessage, ''] 

except Exception as e:
    vMessage = f"failed"
    dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, 'delete temporary notebooks', datetime.now(), None, vMessage, str(e)]
    if pDebugMode == "yes":
        print(str(e))

**Logging**

In [None]:
try:
    # perform the conversion of columns
    dfLogging = dfLogging.astype({
            "LoadId": "string",	
            "NotebookId": "string", 	
            "NotebookName": "string", 
            "WorkspaceId": "string", 
            "CellId": "string", 
            "Timestamp": "datetime64[ns]", 
            "ElapsedTime": "string", 
            "Message": "string", 
            "ErrorMessage" : "string"
        })

    # save panda dataframe to a spark dataframe 
    sparkDF_Logging = spark.createDataFrame(dfLogging) 

    # save to the lakehouse
    sparkDF_Logging.write.mode("append").format("delta").option("mergeSchema", "true").saveAsTable("staging.notebook_logging_cicd")

except Exception as e:
    vMessage = "saving logs to the lakehouse failed"
    if pDebugMode == "yes":
        print(str(e))