## Fabric Gateway Admin
This notebook calls the Power Platform Admin Center and Graph APIs to create the following Delta tables in a Lakehouse:
- **GatewayClusters**: A list of Gateway Clusters for which the authenticated user is an admin
- **GatewayClusterMembers**: A list of all of the machines in those Gateway Clusters
- **GatewayClusterPermissions**: A list of all admins/connection creators for those Gateway Clusters and some information about them from the Graph API 
- **GatewayClusterDatasources**: A list of all the Datasources on those Gateway Cluster
- **GatewayClusterDatasourceUsers**: A list of all users of those Datasources and some information about them from the Graph API

**Important Notes:**
- _This notebook uses undocumented APIs_- please be aware that Microsoft provides no support for these APIs. But this notebook uses them because the supported APIs do not provide this level of detail into Gateway configuration.
- The Gateway APIs cannot be authenticated to using service principals only - they require the username/password of a user or a service account that is an admin on at least 1 Gateway in your Fabric tenant. 
- Multi-Factor Authentication (MFA) must be disabled on the service account/user used to acquire the API bearer tokens
- It is recommended to use Key Vault to store all secrets required to make these API calls - avoid hardcoding secrets in the "Define variables" cell below
- Fabric admins will only see a comprehensive list of Gateway Clusters (but not Gateway Datasources) _unless the Fabric admin is also a Gateway admin on at least 1 machine_. Datasource details are not available to Fabric admins who are not Gateway admins. If you want comprehensive details about every Gateway Cluster - ensure you are authenticating as a user or service account that is a Gateway admin on every Gateway in your Fabric tenant

**Future Work:**
- Incorporate Datasource Status API calls to GatewayClusterDatasources

### Import libraries

In [None]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
import requests 
import json

### Define variables

In [None]:
# Name of the Key Vault
nameOfKeyVault = 'xxxxxxxx' # Name of the Key Vault

# Names of the secrets saved in Key Vault
tenantId_SecretName = 'xxxxxxx'   # Tenant ID secret name
clientId_SecretName = 'xxxxxxx'   # Name for Client ID of Service Principal
clientSecret_SecretName = 'xxxxxxxx' # Name for Client Secret of Service Principal
gatewayAdminUserName_SecretName = 'xxxxxxxx'   # Username of Gateway Admin - gateway APIs cannot use service principal only authentication so they require a username and password of a gateway admin
gatewayAdminPassword_SecretName = 'xxxxxxxxx'   # Password of Gateway Admin - gateway APIs cannot use service principal only authentication so they require a username and password of a gateway admin

# Base URLS for Power BI and Key Vault
pbi_Uri = 'https://api.powerbi.com/v2.0/myorg/'
keyvault = f'https://{nameOfKeyVault}.vault.azure.net/'

### Define functions to get key vault secrets, API bearer token and JSON responses

In [None]:
def get_pbi_bearer_token():
    client_id = mssparkutils.credentials.getSecret(keyvault,clientId_SecretName)
    gateway_admin_username = mssparkutils.credentials.getSecret(keyvault,gatewayAdminUserName_SecretName)
    gateway_admin_password = mssparkutils.credentials.getSecret(keyvault,gatewayAdminPassword_SecretName)
    client_secret = mssparkutils.credentials.getSecret(keyvault,clientSecret_SecretName)
    url = "https://login.microsoftonline.com/common/oauth2/token"
    data = "grant_type=password&client_id=" + client_id + "&client_secret=" + client_secret + "&username=" + gateway_admin_username + "&password=" + gateway_admin_password + "&resource=https://analysis.windows.net/powerbi/api"  
    headers = {'Content-Type': 'application/x-www-form-urlencoded'}
    response = requests.post(url, headers=headers, data=data)
    return response.json()["access_token"]

def get_graph_bearer_token():
    tenant_id = mssparkutils.credentials.getSecret(keyvault,tenantId_SecretName)
    client_id = mssparkutils.credentials.getSecret(keyvault,clientId_SecretName)
    gateway_admin_username = mssparkutils.credentials.getSecret(keyvault,gatewayAdminUserName_SecretName)
    gateway_admin_password = mssparkutils.credentials.getSecret(keyvault,gatewayAdminPassword_SecretName)
    client_secret = mssparkutils.credentials.getSecret(keyvault,clientSecret_SecretName)
    url = "https://login.microsoftonline.com/" + tenant_id + "/oauth2/v2.0/token"
    data = "grant_type=password&client_id=" + client_id + "&client_secret=" + client_secret + "&username=" + gateway_admin_username + "&password=" + gateway_admin_password + "&&scope=https%3A%2F%2Fgraph.microsoft.com%2F.default&redirect_uri=https://oauth.powerbi.com/views/oauthredirect.html"  
    headers = {'Content-Type': 'application/x-www-form-urlencoded'}
    response = requests.post(url, headers=headers, data=data)
    return response.json()["access_token"]
    
def get_pbi_response_json(fullurl, method, data, payload_object):
    bearer_token = get_pbi_bearer_token()
    headers = {'Content-Type': 'application/json', 'Authorization': f'Bearer {bearer_token}'}
    try:
        response = requests.request(method, fullurl, headers=headers, data=data)
    except Exception as e:
        return e
    if response != None and response.status_code == 200:
        if payload_object == "":
            return response.json()
        else:
            return response.json()[payload_object]
    return None

def get_graph_response_json(fullurl, method, data, payload_object ):
    bearer_token = get_graph_bearer_token()
    headers = {'Content-Type': 'application/json', 'Authorization': f'Bearer {bearer_token}'}
    try:
        response = requests.request(method, fullurl, headers=headers, data=data)
    except Exception as e:
        return e
    if response != None and response.status_code == 200:
        if payload_object == "":
            return response.json()
        else:
            return response.json()[payload_object]
    return None

#### Get Gateway Clusters

In [None]:
## Get Gateway Cluster Initial API call with expanded permissions and memberGateways arrays
data = get_pbi_response_json('https://api.powerbi.com/v2.0/myorg/gatewayclusters?$expand=permissions,memberGateways',"GET","","value")

## Get Gateway Cluster Status by looping through each Gateway Cluster ID; add clusterId as additional key:value pair in object returned by API
clusterstatus = []
for i in data:
    url = 'https://api.powerbi.com/v2.0/myorg/gatewayClusters/' + i['id'] + '/status'
    status = get_pbi_response_json(url,"GET","","")
    key = "clusterId"
    value = i['id']
    status[key] = value
    clusterstatus.append(status)

## Define intial Gateway Cluster Status Data Frame schema and populate a dataframe
status_schema = StructType([
    StructField('clusterId', StringType(), True),
    StructField('clusterStatus', StringType(), True),
    StructField('gatewayStaticCapabilities', StringType(), True),
    StructField('gatewayVersion', StringType(), True),
    StructField('gatewayUpgradeState', StringType(), True),
    StructField('memberGatewayErrorMessages',ArrayType(StructType([
        StructField('errorCode', StringType(),True),
        StructField('errorMessage', StringType(),True)
        ])),True)
  ])
status_df = spark.createDataFrame(spark.sparkContext.parallelize(clusterstatus),status_schema)
status_df = status_df.withColumn("memberGatewayErrorMessagesExploded", explode_outer("memberGatewayErrorMessages"))

## Define intial Gateway Cluster Data Frame schema
cluster_schema = StructType([
    StructField('id', StringType(), True),
    StructField('name', StringType(), True),
    StructField('description', StringType(), True),
    StructField('type', StringType(), True),
    StructField('loadBalancingSettings', StringType(), True),
    StructField('options', StructType([
        StructField('CloudDatasourceRefresh', BooleanType(), True),
        StructField('CustomConnectors', BooleanType(), True)
        ]))
    ])

## Define load balancing nested JSON schema
loadbalancing_schema = StructType([ 
    StructField("selector",IntegerType(),True)
    ])

## populate initial Gateway Cluster Data Frame
clusters_df = spark.createDataFrame(spark.sparkContext.parallelize(data),cluster_schema)
## join to Gateway Cluster Status Data Frame
clusters_df = clusters_df.join(status_df,clusters_df.id == status_df.clusterId,'leftouter')
## flatten load balancing settings json into Data Frame based on defined schema
clusters_df = clusters_df.withColumn("selector", from_json(col("loadBalancingSettings"), loadbalancing_schema))
## select and rename columns
clusters_df = clusters_df.select(
    col("id").alias("GatewayClusterID"), 
    col("name").alias("GatewayClusterName"), 
    col("description").alias("GatewayClusterDescription"), 
    col("type").alias("GatewayClusterType"), 
    col("selector.*"), 
    col("options.CloudDatasourceRefresh").alias("CloudDatasourceRefreshEnabled"), 
    col("options.CustomConnectors").alias("CustomConnectorsEnabled"),
    col("clusterStatus").alias("GatewayClusterStatus"), 
    col("gatewayStaticCapabilities").alias("GatewayClusterStaticCapabilities"), 
    col("gatewayUpgradeState").alias("GatewayClusterUpgradeState"), 
    col("memberGatewayErrorMessagesExploded.errorCode").alias("GatewayClusterErrorCode"),
    col("memberGatewayErrorMessagesExploded.errorMessage").alias("GatewayClusterErrorMessage")
    ) 
## change load balancing selection to boolean and rename
clusters_df = clusters_df.withColumn("LoadBalancingSelection", when(clusters_df.selector != 1,False).otherwise(True))
## drop load balanacing selection json column
clusters_df = clusters_df.drop("selector")

## write to lakehouse
clusters_writeToLake = clusters_df.write.mode("overwrite").format("delta").save("Tables/GatewayClusters")

#### Get Gateway Cluster Members

In [None]:
## Define intial Gateway Cluster Members Data Frame schema
members_schema = StructType([
    StructField('id', StringType(), True),
    StructField('memberGateways',ArrayType(StructType([
        StructField('id', StringType(),True),
        StructField('name', StringType(),True),
        StructField('status', StringType(),True),
        StructField('state', StringType(),True),
        StructField('version', StringType(),True),
        StructField('versionStatus', StringType(),True),
        StructField('annotation', StringType(),True)
        ])),True)
  ])

## Define Gateway Cluster Members annotation Data Frame schema
annotation_schema = StructType([ 
    StructField("gatewayContactInformation",ArrayType(StringType()),True),
    StructField("gatewayMachine",StringType(),True),
    StructField("gatewayDepartment",StringType(),True),
    StructField("gatewayVirtualNetworkSubnetId",StringType(),True)
    ])

## populate initial Gateway Cluster Members Data Frame
members_df = spark.createDataFrame(spark.sparkContext.parallelize(data), members_schema)
## explode Members array and rename columns
members_df = members_df.withColumn("membersExploded", explode_outer("memberGateways"))\
            .select(col("id").alias("GatewayClusterID"),
                    col("membersExploded.id").alias("GatewayClusterMemberID"),
                    col("membersExploded.name").alias("GatewayClusterMemberName"),
                    col("membersExploded.status").alias("GatewayClusterMemberStatus"),
                    col("membersExploded.state").alias("GatewayClusterMemberState"),
                    col("membersExploded.version").alias("GatewayClusterMemberVersion"),
                    col("membersExploded.versionStatus").alias("GatewayClusterMemberVersionStatus"),
                    col("membersExploded.annotation").alias("annotation"), 
                    )
## parse annotations JSON with defined annotation schema 
members_df = members_df.withColumn("annotation", from_json(col("annotation"), annotation_schema))
members_df = members_df.select("GatewayClusterID",
                    "GatewayClusterMemberID",
                    "GatewayClusterMemberName",
                    "GatewayClusterMemberStatus",
                    "GatewayClusterMemberState",
                    "GatewayClusterMemberVersion",
                    "GatewayClusterMemberVersionStatus",
                    "annotation.gatewayContactInformation",
                    col("annotation.gatewayMachine").alias("GatewayClusterMemberMachineName"),
                    col("annotation.gatewayDepartment").alias("GatewayClusterMemberDepartment"),
                    col("annotation.gatewayVirtualNetworkSubnetId").alias("GatewayClusterMemberVNetSubnetId")
                )
                
## explode Gate
members_df = members_df.withColumn("GatewayClusterMemberContactInformation", explode("gatewayContactInformation"))
members_df = members_df.drop("gatewayContactInformation")

members_writeToLake = members_df.write.mode("overwrite").format("delta").save("Tables/GatewayClusterMembers")

#### Get Gateway Cluster Permissions

In [None]:
## Create list of Graph IDs of Gateway Cluster Users
graphIds = []
for i in data:
    permissions = i['permissions']
    for p in permissions:
        graphIds.append(p['id'])

## Create unique list of Graph IDs of Gateway Cluster Users
unique_graphIds = list(dict.fromkeys(graphIds))

clusterusersgraph = []
## Batch Graph IDs into batches of 1000 since API has a payload limition
batches = [unique_graphIds[i:i + 1000] for i in range(0, len(unique_graphIds), 1000)]
for batch in batches:
    batch_txt = str(json.dumps(batch))
    batch_txt_clean = batch_txt.replace('app-','')
    payload = '{"ids": ' + batch_txt_clean +',"types": ["user","group","application"]}'
    users = get_graph_response_json('https://graph.microsoft.com/v1.0/directoryObjects/getByIds',"POST",payload,"value")
    clusterusersgraph.append(users)

## Define Gateway Cluster Members Graph  Data Frame schema
clusterusergraph_schema = StructType([
    StructField('id', StringType(), True),
    StructField('displayName', StringType(), True),
    StructField('mail', StringType(), True),
    StructField('userPrincipalName', StringType(), True)
  ])

clusterusergraph_df = spark.createDataFrame(spark.sparkContext.parallelize(clusterusersgraph[0]), clusterusergraph_schema)
clusterusergraph_df = clusterusergraph_df.withColumnRenamed("displayName", "GatewayClusterUserDisplayName")
clusterusergraph_df = clusterusergraph_df.withColumn('GatewayClusterUserEmailAddress', coalesce(clusterusergraph_df['mail'],clusterusergraph_df['userPrincipalName']).cast(StringType()))

permissions_schema = StructType([
    StructField('id', StringType(), True),
    StructField('permissions',ArrayType(StructType([
        StructField('id', StringType(),True),
        StructField('principalType', StringType(),True),
        StructField('role', StringType(),True),
        StructField("allowedDataSources", StringType(),True)
        ])),True)
  ])

permissions_df = spark.createDataFrame(spark.sparkContext.parallelize(data), permissions_schema)
permissions_df = permissions_df.withColumn("permissionsExploded", explode_outer("permissions"))\
            .select(col("id").alias("GatewayClusterID"),
                    col("permissionsExploded.id").alias("GatewayClusterUserGraphID"),
                    col("permissionsExploded.principalType").alias("GatewayClusterUserPrincipalType"),
                    col("permissionsExploded.role").alias("GatewayClusterUserRole"),
                    translate(col("permissionsExploded.allowedDataSources"),"[]",'').alias("GatewayClusterUserAllowedDatasources")
                    )
## join to Gateway Cluster Permissions Graph Frame
permissions_df = permissions_df.join(clusterusergraph_df,permissions_df.GatewayClusterUserGraphID == clusterusergraph_df.id,'leftouter')
permissions_df = permissions_df.drop("id","mail","userPrincipalName")

permissions_writeToLake = permissions_df.write.mode("overwrite").format("delta").save("Tables/GatewayClusterPermissions")

#### Get Gateway Cluster Datasources

In [None]:
datasources = get_pbi_response_json('https://api.powerbi.com/v2.0/myorg/me/gatewayClusterDatasources?$expand=users',"GET","","value")

datasources_schema = StructType([
    StructField('id', StringType(), True),
    StructField('clusterId', StringType(), True),
    StructField('datasourceName', StringType(), True),
    StructField('datasourceType', StringType(), True),
    StructField('datasourceReference', StringType(), True),
    StructField('credentialDetails', StructType([
        StructField("credentialType",StringType(),True),
        StructField("encryptedConnection",StringType(),True),
        StructField("privacyLevel",StringType(),True),
        StructField("encryptionAlgorithm",StringType(),True),
        StructField("useCustomOAuthApp",BooleanType(),True),
        StructField("skipTestConnection",BooleanType(),True),
        StructField("isCredentialEncrypted",BooleanType(),True)
        ])),
    StructField('singleSignOnEnabled', BooleanType(), True),
    StructField('onPremGatewayRequired', BooleanType(), True),
    StructField('singleSignOnType', StringType(), True),
    StructField('supportedAuthenticationTypes', StringType(), True),
    StructField('gatewayType', StringType(), True),
    StructField('allowDatasourceThroughGateway', BooleanType(), True)
    ])

datasourcereference_schema = StructType([ 
    StructField("kind",StringType(),True),
    StructField("path",StringType(),True),
    StructField("maxPermissionRole",StringType(),True),
    StructField("gatewayType",StringType(),True),
    StructField("annotation",StringType(),True)
    ])

datasources_df = spark.createDataFrame(spark.sparkContext.parallelize(datasources), datasources_schema)
datasources_df = datasources_df.withColumn("datasource", from_json(col("datasourceReference"), datasourcereference_schema))
datasources_df = datasources_df.select(
    col("clusterId").alias("GatewayClusterID"), 
    col("id").alias("GatewayDatasourceID"), 
    col("gatewayType").alias("GatewayDatasourceGatewayType"), 
    col("datasourceName").alias("GatewayDatasourceName"), 
    col("datasourceType").alias("GatewayDatasourceType"), 
    col("datasource.path").alias("GatewayDatasourceConnectionString"), 
    col("singleSignOnEnabled").alias("GatewayDatasourceSingleSignOnEnabled"),
    col("singleSignOnType").alias("GatewayDatasourceSingleSignOnType"),
    col("onPremGatewayRequired").alias("GatewayDatasourceOnPremGatewayRequired"),
    col("allowDatasourceThroughGateway").alias("GatewayDatasourceAllowThroughGateway"),
    translate(col("supportedAuthenticationTypes"),"[]",'').alias("GatewayDatasourceSupportedAuthenticationTypes"),
    col("credentialDetails.credentialType").alias("GatewayDatasourceCredentialType"), 
    col("credentialDetails.encryptedConnection").alias("GatewayDatasourceEncryptedConnection"), 
    col("credentialDetails.privacyLevel").alias("GatewayDatasourcePrivacyLevel"), 
    col("credentialDetails.encryptionAlgorithm").alias("GatewayDatasourceEncrytionAlgorithm"), 
    col("credentialDetails.useCustomOAuthApp").alias("GatewayDatasourceUseCustomOAuthApp"), 
    col("credentialDetails.skipTestConnection").alias("GatewayDatasourceSkipTestConnection"), 
    col("credentialDetails.isCredentialEncrypted").alias("GatewayDatasourceIsCredentialEncrypted")
    ) 
datasources_df = datasources_df.drop("datasource")

datasources_writeToLake = datasources_df.write.mode("overwrite").format("delta").save("Tables/GatewayClusterDatasources")

#### Get Gateway Cluster Datasource Users

In [None]:
## Create list of Graph IDs of Gateway Datasource Users
graphIds = []
for i in datasources:
    users = i['users']
    for p in users:
        graphIds.append(p['identifier'])

## Create unique list of Graph IDs of Gateway Datasource Users and convert to string
unique_graphIds = list(dict.fromkeys(graphIds))

datasourceusersgraph = []
## Batch Graph IDs into batches of 1000 since API has a payload limition
batches = [unique_graphIds[i:i + 1000] for i in range(0, len(unique_graphIds), 1000)]
for batch in batches:
    batch_txt = str(json.dumps(batch))
    batch_txt_clean = batch_txt.replace('app-','')
    payload = '{"ids": ' + batch_txt_clean +',"types": ["user","group","application"]}'
    users = get_graph_response_json('https://graph.microsoft.com/v1.0/directoryObjects/getByIds',"POST",payload,"value")
    datasourceusersgraph.append(users)

## Define Gateway Cluster Members Graph  Data Frame schema
datasourceusersgraph_schema = StructType([
    StructField('id', StringType(), True),
    StructField('displayName', StringType(), True),
    StructField('mail', StringType(), True),
    StructField('userPrincipalName', StringType(), True)
  ])

datasourceusersgraph_df = spark.createDataFrame(spark.sparkContext.parallelize(datasourceusersgraph[0]), datasourceusersgraph_schema)
datasourceusersgraph_df = datasourceusersgraph_df.withColumnRenamed("displayName", "GatewayDatasourceUserDisplayName")
datasourceusersgraph_df = datasourceusersgraph_df.withColumn('GatewayDatasourceUserEmailAddress', coalesce(datasourceusersgraph_df['mail'],datasourceusersgraph_df['userPrincipalName']).cast(StringType()))

users_schema = StructType([
    StructField('id', StringType(), True),
    StructField('users',ArrayType(StructType([
        StructField('identifier', StringType(),True),
        StructField('principalType', StringType(),True),
        StructField('role', StringType(),True),
        StructField("datasourceAccessRight", StringType(),True)
        ])),True)
  ])

users_df = spark.createDataFrame(spark.sparkContext.parallelize(datasources), users_schema)
users_df = users_df.withColumn("usersExploded", explode_outer("users"))\
            .select(col("id").alias("GatewayDatasourceID"),
                    col("usersExploded.identifier").alias("GatewayDatasourceUserGraphID"),
                    col("usersExploded.principalType").alias("GatewayDatasourceUserPrincipalType"),
                    col("usersExploded.datasourceAccessRight").alias("GatewayClusterUserAccess")
                    )
## join to Gateway Cluster Permissions Graph Frame
users_df = users_df.join(datasourceusersgraph_df,users_df.GatewayDatasourceUserGraphID == datasourceusersgraph_df.id,'leftouter')
users_df = users_df.withColumn("GatewayDatasourceUserNotFoundInGraphAPI", when(users_df.id.isNull() ,True).otherwise(False))
users_df = users_df.drop("id","mail","userPrincipalName")

users_writeToLake = users_df.write.mode("overwrite").format("delta").save("Tables/GatewayClusterDatasourceUsers")