## Fabric Activities
This notebook calls the [Power BI Activity Events API](https://learn.microsoft.com/en-us/rest/api/power-bi/admin/get-activity-events) to create the following items in a Lakehouse:
- A fact table in Delta format containing a subset of fields from each activity record, appended to each time the notebook is run
- A .json file containing a complete record of all the results from the Activity Events API for each day the notebook is run

**Important Notes:**
- **Schedule this notebook to run nightly after midnight UTC** to capture all activities every day
- A Power BI Admin must turn on the "[Allow Service Principals to use read-only Admin APIs](https://learn.microsoft.com/en-us/power-bi/enterprise/read-only-apis-service-principal-authentication)" feature. 
- If the Allow Service Principals to use read-only Admin APIs is turned on _and limited to specific security groups_, the service principal used to acquire the bearer token must be in one of the group(s) allowed to use the read-only Admin APIs
- The service principal used to acquire the bearer token must have Tenant.Read.All or Tenant.ReadWrite.All permissions
- Multi-Factor Authentication (MFA) must be disabled on the service account/user used to acquire the API bearer tokens

**Future Work:**
- Support for band-aids with known issues with the Activity Events API, including:
    - Workspace IDs are sometimes shows as "FolderObjectId"
    - Analyzed By External Application activities don't show the Workspace ID

### Import libraries

In [None]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from datetime import date, timedelta
import requests 
import json

### Define variables

In [None]:
# Name of the Key Vault
nameOfKeyVault = 'powerbi-admin-keyvault' # Name of the Key Vault

# Names of the secrets saved in Key Vault
tenantIdSecretName = 'xxxxxxxxxxxxx' # Name for Tenant ID
clientIdSecretName = 'xxxxxxxxxxxxx'   # Name for Client ID of Service Principal
clientSecretSecretName = 'xxxxxxxxxxxxx' # Name for Client Secret of Service Principal

# Base URLS for Power BI and Key Vault
pbiUri = 'https://api.powerbi.com/v1.0/myorg/'
keyVaultUri = f'https://{nameOfKeyVault}.vault.azure.net/'

# set to 1 if you want to retain a raw .json file of the activities in your lakehouse - set to 0 if you want to skip that step
saveJsonFile = 1

# list of activities to ignore when creating Delta table - add/remove as necessary for your requirements
ignoreActivities = ['GenerateCustomVisualAADAccessToken','GenerateCustomVisualWACAccessToken','GenerateDataflowSasToken']

# folder/file names
nameOfFileFolder = 'Activities'
nameofDeltaTable = 'Activities'

### Define functions to get key vault secrets, API bearer token and JSON responses

In [None]:
def get_bearer_token():
    tenant_id = mssparkutils.credentials.getSecret(keyVaultUri,tenantIdSecretName)
    client_id = mssparkutils.credentials.getSecret(keyVaultUri,clientIdSecretName)
    client_secret = mssparkutils.credentials.getSecret(keyVaultUri,clientSecretSecretName)
    url = "https://login.microsoftonline.com/" + tenant_id + "/oauth2/token"
    data = "grant_type=client_credentials&client_id=" + client_id + "&client_secret=" + client_secret + "&resource=https://analysis.windows.net/powerbi/api"  
    headers = {'Content-Type': 'application/x-www-form-urlencoded'}
    response = requests.post(url, headers=headers, data=data)
    return response.json()["access_token"]

def get_response_json(fullurl, method, data , payload_object):
    bearer_token = get_bearer_token()
    headers = {'Content-Type': 'application/json', 'Authorization': f'Bearer {bearer_token}'}
    response = requests.request(method, fullurl, headers=headers, data=data)
    if payload_object == "":
        return response.json()
    else:
        return response.json()[payload_object]

#### Get yesterday's activity, run until continuation token is empty, save raw json file and insert into delta table

In [None]:
## define initial batch number (Activity Events API chunks responses)
batch_number = 1
## define yesterday
yesterday = date.today() - timedelta(days = 1)
## create string for yesterday in format Activity Events API expects
yesterday_uri = yesterday.strftime("%Y-%m-%d")
## create string for yesterday in format for file name
yesterday_file = yesterday.strftime("%Y%m%d")
## create full Uri for Activity Events API
activites_Uri = pbiUri + "admin/activityevents?startDateTime='" + yesterday_uri + "T00:00:00'&endDateTime='" + yesterday_uri + "T23:59:59'"

## define initial dataframe schema
schema = StructType([
    StructField("Id",StringType(),True),
    StructField("RequestId",StringType(),True),
    StructField("CreationTime",StringType(),True),
    StructField("UserId",StringType(),True),
    StructField("UserAgent",StringType(),True),
    StructField("ClientIP",StringType(),True),
    StructField("Activity",StringType(),True),
    StructField("DistributionMethod",StringType(),True),
    StructField("ConsumptionMethod",StringType(),True),
    StructField("ItemName",StringType(),True),
    StructField("CapacityId",StringType(),True),
    StructField("WorkspaceId",StringType(),True),
    StructField("FolderObjectId",StringType(),True),
    StructField("DatasetId",StringType(),True),
    StructField("ReportId",StringType(),True),
    StructField("ArtifactId",StringType(),True),
    StructField("AppId",StringType(),True),
    StructField("AppReportId",StringType(),True),
    StructField("DataflowId",StringType(),True),
    StructField("DashboardId",StringType(),True),
    StructField("TileId",StringType(),True)
  ])

## get first batch of activity data
data = get_response_json(activites_Uri,"GET", "", "")

## set the continuation uri for next batch
contUri = data['continuationUri']

## create dict with the batches' activities
j = data['activityEventEntities']

## create data frame for delta table
df = spark.createDataFrame(spark.sparkContext.parallelize(data['activityEventEntities']),schema)

## print success
print("Batch " + str(batch_number) + " completed - " + str(len(data['activityEventEntities'])) + ' activities found' )

## do until last batch is completed
while contUri is not None: 

        ## increment batch number       
        batch_number = batch_number + 1

        ## get next batch of activity data
        data_cont = get_response_json(contUri,"GET", "", "")
        
        ## set the continuation uri for next batch
        contUri = data_cont['continuationUri']

        ## append json 
        j.extend(data_cont['activityEventEntities'])
    
        ## create data frame for delta table
        df_cont = spark.createDataFrame(spark.sparkContext.parallelize(data_cont['activityEventEntities']),schema)    
    
        ## union dataframes
        df = df.union(df_cont)

        ## print success
        print("Batch " + str(batch_number) + " completed - " + str(len(data_cont['activityEventEntities'])) + ' activities found' )

## write to json file in default lakehouse
if saveJsonFile == 1:
    with open("/lakehouse/default/Files/" + nameOfFileFolder + "/Activities" + "_" + yesterday_file + ".json", "w") as f:
            f.write(json.dumps(j))
            f.close()
    print("/lakehouse/default/Files/" + nameOfFileFolder + "/Activities" + "_" + yesterday_file + ".json created")        
else: 
    None

## filter out some activities before writing to delta table
df = df.filter(~df["Activity"].isin(ignoreActivities))

## write to delta table
if spark.catalog.tableExists(nameofDeltaTable):
    writeToLake = df.write.mode("append").format("delta").save("Tables/" + nameofDeltaTable)
else: 
    writeToLake = df.write.mode("overwrite").format("delta").save("Tables/" + nameofDeltaTable)