#### Set up Azure Credentials for use in API Calls
Based on Cluster Configuration

In [None]:
from azure.identity import DefaultAzureCredential

credential = DefaultAzureCredential()
scope = "https://management.azure.com/.default"
token = credential.get_token(scope)

#### Set Variables and Import Modules for use throughout Notebook

In [None]:
import requests
import json

# URI for Resource Graph Queries. 
# Is a variable because api-version may change over time.
resourceGraphURL = 'https://management.azure.com/providers/Microsoft.ResourceGraph/resources?api-version=2020-04-01-preview'

# The original Resource Graph KQL Query
resourceGraphQuery = """
resourcechanges
| extend changeType = tostring(properties.changeType), changeTime = todatetime(properties.changeAttributes.timestamp)
| summarize arg_max(changeTime, changeType) by resourceId=id
"""

# Example of how to parse columns and rows from JSON response
parseQuery = """
    WITH baseExplode AS
    (
    SELECT explode(data.rows) rowData
    FROM v_tempJSON
    )
    SELECT  rowData[0] resourceId
            ,rowData[1] changeTime
            ,rowData[2] changeType
    FROM baseExplode
"""

# Destination on Azure Data Lake Gen 2 to save data.
# Needs to be accessible through Unity Catalog or Cluster Setup
saveDestination = ""     

### Create the POST call to the REST API

In [None]:

def callAPI(skipToken = ""):
    auth_headers = {'Authorization': 'Bearer ' + token.token}

    if skipToken != "":
        data = {"query": resourceGraphQuery,"options":{"$skipToken": skipToken} }
    else:
        data = {"query": resourceGraphQuery}

    resourceGraphResponse = requests.post(resourceGraphURL,headers=auth_headers,json=data)

    responseJSON = resourceGraphResponse.text

    return responseJSON

#### Create a Python Function to parse API Response into a usable Data Frame

In [None]:
def parseResponse(responseJSONText):
    # Parallelize JSON string using Spark Context into a Pyspark Data Frame
    responseDF=spark.read.json(sc.parallelize([responseJSONText]))

    # Create a temp view so data can be transformed using SQL
    responseDF.createOrReplaceTempView("v_tempJSON")

    # Parse response using Spark SQL
    dfParsedResponse = spark.sql(parseQuery)

    return dfParsedResponse

#### Coordinate functions and create Parquet table on Storage

In [None]:
# Save Response Text from API to a variable
responseText = callAPI()

# Parse Columns in Response Text to a Data Frame
dfParsedResponse = parseResponse(responseText)

dfParsedResponse.write.mode("overwrite").parquet(saveDestination)

#### Check for paged API response and append to Parquet table

In [None]:
responseJSON = json.loads(responseText)

while "$skipToken" in responseJSON:
    # Set Skip Token Variable
    skiptoken = responseJSON["$skipToken"]

    # Call the API using the Skip Token option
    responseText = callAPI(skiptoken)

    # Set a new response variable to be evaluated in while loop
    responseJSON = json.loads(responseText)

    # Create a new data frame from new API Response
    dfParsedResponse = parseResponse(responseText)
    
    # Append Parquet table created in previous steps.
    dfParsedResponse.write.mode("append").parquet(saveDestination)

#### Optional: Check output

In [None]:
# df = spark.read.parquet(saveDestination)
# display(df)

#### For Reference. Cluster Setup

In [None]:
# {
#     "num_workers": 0,
#     "cluster_name": "singleNodeCluster",
#     "spark_version": "12.2.x-scala2.12",
#     "spark_conf": {
#         "spark.databricks.cluster.profile": "singleNode",
#         "spark.master": "local[*, 4]",
#         "spark.databricks.delta.preview.enabled": "true"
#     },
#     "azure_attributes": {
#         "first_on_demand": 1,
#         "availability": "ON_DEMAND_AZURE",
#         "spot_bid_max_price": -1
#     },
#     "node_type_id": "Standard_DS3_v2",
#     "driver_node_type_id": "Standard_DS3_v2",
#     "ssh_public_keys": [],
#     "custom_tags": {
#         "ResourceClass": "SingleNode"
#     },
#     "spark_env_vars": {
#         "AZURE_CLIENT_SECRET": "{{secrets/<keyvault name>/<tenant id secret name>}}",
#         "AZURE_TENANT_ID": "{{secrets/<keyvault name>/<tenant id secret name>}}",
#         "AZURE_CLIENT_ID": "{{secrets/<keyvault name>/<client id secret name>}}",
#         "PYSPARK_PYTHON": "/databricks/python3/bin/python3"
#     },
#     "autotermination_minutes": 30,
#     "enable_elastic_disk": true,
#     "cluster_source": "UI",
#     "init_scripts": [],
#     "single_user_name": "",
#     "enable_local_disk_encryption": false,
#     "data_security_mode": "SINGLE_USER",
#     "runtime_engine": "PHOTON",
#     "cluster_id": ""
# }