
#### Run the cell below to install the required packages for Copilot


**Please ensure all capacities in your Fabric tenant are running before running this script**

In [None]:
#Specify variables
myLakehouse = 'lakehouse03'
myWorkspace = 'WS_SagarFabric03'
myTablePath = "abfss://"+myWorkspace+"@onelake.dfs.fabric.microsoft.com/"+myLakehouse+".Lakehouse/Tables/ShortcutsMetadata"
#print(myTablePath)

In [None]:
pip install jsonmerge

In [None]:
import json, requests, pandas as pd, jsonmerge
import datetime


In [None]:
# Generate token for Fabric access
access_token = mssparkutils.credentials.getToken('pbi')

In [None]:
# Function definitions to query Fabric REST API

# List workspaces in current tenant
def listWorkspaces(access_token):
     base_url = 'https://api.fabric.microsoft.com/v1/admin/workspaces?state=Active&type=Workspace'
     header = {"Content-Type": "application/json","Authorization": f'Bearer {access_token}'}
     response = requests.get(base_url, headers=header)
     data=response.json()
     jsondata = json.loads(json.dumps(response.text))
     #df = spark.read.json(sc.parallelize([jsondata]))
     return jsondata

# List items of specific type inside of a workspace
def listWorkspaceItems(access_token,workspaceId,itemType):
     base_url = f"https://api.fabric.microsoft.com/v1/admin/items?workspaceId={workspaceId}&type={itemType}"
     header = {"Content-Type": "application/json","Authorization": f'Bearer {access_token}'}
     response = requests.get(base_url, headers=header)   
     data=response.json()
     jsondata = json.loads(json.dumps(response.text))
     #df = spark.read.json(sc.parallelize([data]))
     return jsondata
    
# Retrieve shortcut information for individual shortcuts
def getShortcutInfo(access_token,workspaceId,lakehouseId,shortcutName):
     base_url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspaceId}/items/{lakehouseId}/shortcuts/Tables/{shortcutName}"
     header = {"Content-Type": "application/json","Authorization": f'Bearer {access_token}'}
     response = requests.get(base_url, headers=header)
     data=response.json()
     return data

# List all tables inside of a specific Lakehouse
def listLakeHouseTables(access_token,workspaceId,lakehouseId):
     base_url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/tables"
     header = {"Authorization": f"Bearer {access_token}"}
     response = requests.get(base_url, headers=header)
     data=response.json()
     return data

# List all file shortcuts inside of a specific Lakehouse
def listFileShortcuts(access_token,workspaceId,lakehouseId):
     base_url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspaceId}/items/{lakehouseId}/shortcuts"
     header = {"Content-Type": "application/json","Authorization": f'Bearer {access_token}'}
     response = requests.get(base_url, headers=header)
     data=response.json()
     return data

# List all shortcuts inside of a specific Lakehouse
def listallShortcuts(access_token,workspaceId,lakehouseId):
     base_url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspaceId}/items/{lakehouseId}/shortcuts"
     header = {"Content-Type": "application/json","Authorization": f'Bearer {access_token}'}
     response = requests.get(base_url, headers=header)
     data=response.json()
     jsondata = json.loads(json.dumps(response.text))
     #df = spark.read.json(sc.parallelize([data]))
     return jsondata

**The cell below uses listWorkspaces, listWorkspaceItems and listallShortcuts functions defined above to get all shortcuts information. listWorkspaceItems gets all Lakehouses within a given workspace. listallShortcuts then iterates through all the lakehouses and gets all shortcuts within each lakehouse**

**The reason the code below uses mergejson and not dataframe joins is because all shortcut details do not have the lakehouse/workspace id to join the 3 dataframes (workspace, lakehouse and shortcuts). Hence we have loop through each workspace, lakehouse and shortcut and append relevant content together**

In [None]:
from io import StringIO

# define dataframe for all shortcuts details
df_sh_all = pd.DataFrame()

#get all workspaces and iterate through them
df_ws = pd.read_json(StringIO(listWorkspaces(access_token)))
for index, row in df_ws.iterrows():

    # Get the workspace details to add to the shortcuts info
    workspace_id = (row['workspaces']['id'])
    workspace_name = (row['workspaces']['name'])
    workspace_capacity_id = (row['workspaces']['capacityId'])

    # Get all lakehouses in a workspace and iterate through them 
    df_lh = pd.read_json(StringIO(listWorkspaceItems(access_token,workspace_id,"Lakehouse")))

    # If no lakehouses in a workspace, create a record for the df_sh_all with no lakehouse and shortcut data
    if df_lh.empty:
        newrow = {'value':None,
                  'workspace_id':workspace_id,
                  'workspace_name':workspace_name,
                  'workspace_capacity_id':workspace_capacity_id,
                  'lakehouse_id':None,
                  'lakehouse_name':None
                 }
        #display(newrow)
        df_sh_all = df_sh_all._append(newrow, ignore_index=True)
    else:# iterate through lakehouses

        for index, row in df_lh.iterrows():

            # Get the lakehouse details to add to the shortcuts info
            lakehouse_id = (row['itemEntities']['id'])
            lakehouse_name = (row['itemEntities']['name'])
        
            newrow = {'workspace_id':workspace_id,
                      'workspace_name':workspace_name,
                      'workspace_capacity_id':workspace_capacity_id,
                      'lakehouse_id':lakehouse_id,
                      'lakehouse_name':lakehouse_name
                      }
            #display(newrow)      

            # Get all shortcuts in a lakehouse and iterate through them 
            df_sh = pd.read_json(StringIO(listallShortcuts(access_token,workspace_id,lakehouse_id)))

            # If no shortcuts in the lakehouse, create a record for the df_sh_all with no shortcut data
            if df_sh.empty:
                
                newrow = {'value':None,
                          'workspace_id':workspace_id,
                          'workspace_name':workspace_name,
                          'workspace_capacity_id':workspace_capacity_id,
                          'lakehouse_id':lakehouse_id,
                         'lakehouse_name':lakehouse_name
                         }
                df_sh_all = df_sh_all._append(newrow, ignore_index=True)         

            else:# iterate through shortcuts
            
                #for index, row in df_sh.iterrows(): 
                for i in df_sh.index:
                    #display(row['value'])   

                    # get all details for a shortcut, convert to json dict
                    js_sh = df_sh.iloc[i].to_json()
                    js_sh = json.loads(js_sh)
                    #display(js_sh)
                    #js_ws_lh = json.loads(newrow)

                    # get details of workspace/lakehouse captured above, convert to json dict
                    js_ws_lh = json.loads(json.dumps(newrow))
                    #display(js_ws_lh)

                    # merge the 2 json dicts
                    merg = jsonmerge.merge(js_sh, js_ws_lh)
                    #display(merg)
                    #df = pd.DataFrame(merg, index=[0])
                    #display(merg)
                    #mergej = json.dumps(merg)
                    #df_sh_temp = pd.read_json(StringIO(mergej))

                    #append to the final dataframe with all content
                    df_sh_all = df_sh_all._append(merg, ignore_index=True)
                    

    #print("===========================================")   

#display(df_sh_all)


In [None]:
# Convert pandas dataframe to spark dataframe and load into a view for further processing
df_sh_all_spark = spark.createDataFrame(df_sh_all)
df_sh_all_spark.createOrReplaceTempView("vw_allShortcuts_01")

In [None]:
%%sql
CREATE OR REPLACE TEMP VIEW vw_allShortcuts_02 AS
SELECT workspace_id, workspace_name, workspace_capacity_id, lakehouse_id, lakehouse_name, value.*
FROM vw_allShortcuts_01

In [None]:
%%sql
CREATE OR REPLACE TEMP VIEW vw_allShortcuts_03 AS
SELECT workspace_id, workspace_name, workspace_capacity_id, lakehouse_id, lakehouse_name,
       name AS shortcut_name, path AS shortcut_path, target.type AS shortcut_type,
       CASE WHEN target.type = 'OneLake'
            THEN target.oneLake.workspaceId
            ELSE NULL
       END AS target_workspace_id,
       to_json(target) AS shortcut_details
FROM vw_allShortcuts_02

In [None]:
# Convert the workspaces pandas dataframe to spark dataframe and load into a view. This is for getting the name of the 'target' workspace which has the table/file which the shortcut points to
df_ws_spark = spark.createDataFrame(df_ws)
#display(df_sh_all_spark)
df_ws_spark.createOrReplaceTempView("vw_workspaces")

In [None]:
df_final = spark.sql("""SELECT a.*, b.workspaces.name AS target_workspace_name, current_date AS LoadDatatime
                      FROM vw_allShortcuts_03 a
                      LEFT JOIN vw_workspaces b
                      ON a.target_workspace_id = b.workspaces.id""")


In [None]:
#Drop and recreate the final table
if spark.catalog.tableExists("ShortcutsMetadata"):
    drop_stmt = 'DROP TABLE '+myLakehouse+'.ShortcutsMetadata'
    result = spark.sql(drop_stmt)
    df_final.write.format("delta").mode("overwrite").save(myTablePath)

%%sql
select * from ShortcutsMetadata