# Function

## Get Power BI semantic models to refresh

In [None]:
def fn_pbi_get_semantic_models_to_refresh(list_frequency):
    fn_name        = stk()[0][3]
    caller_fn_name = stk()[1].function.replace("<module>", "")
    if is_debug: par = {}
    
    try:
        sql_query = f"""SELECT DISTINCT
            `frequency`
            , `environment`
            , IFNULL(`workspace_id`     , \"\") AS `workspace_id`
            , IFNULL(`semantic_model_id`, \"\") AS `semantic_model_id`
            , `semantic_model_name`
            , `request_timeout`
            , `refresh_timeout`
            , `loop_wait_time_seconds`
        FROM delta.`{global_parameter.abfs_path_lh_cfg}/Tables/power_bi_refresh`
        WHERE
            `frequency`       IN ({str(list_frequency).replace("[", "").replace("]", "")})
            AND `environment` = \"{global_parameter.environment}\"
            AND `is_active`   = 1
        ORDER BY
            `frequency`
            , IFNULL(`workspace_id`, '')
            , IFNULL(`semantic_model_id`, '');"""

        sdf       = fn_execute_spark_sql(sql_query)[2]
        sdf_count = sdf.count()

        alert             = "Success"
        alert_description = f"Rows (count): {sdf_count}"

        return (alert, alert_description, sdf, sdf_count)
    except Exception as ex:
        alert             = "Danger"
        alert_description = str(ex)

        return (alert, alert_description, None, None)
    finally:
        if is_debug:
            par["locals"] = locals()
            fn_print_debug_info(alert, fn_name, par)
            del par
        
        fn_local_log_insert(global_parameter.process_timestamp, medallion_name, fn_name, fn_locals_to_json(locals())[2], alert, alert_description)

## Build async dataframe

In [None]:
def fn_pbi_set_sdf_async(sdf):
    fn_name        = stk()[0][3]
    caller_fn_name = stk()[1].function.replace("<module>", "")
    if is_debug: par = {}
    
    try:
        list_parameter = []
        bearer_token   = mssparkutils.credentials.getToken(global_parameter.bearer_token_url)

        for row in sdf \
            .sort(
                sdf.frequency.asc()
                , sdf.workspace_id.asc()
                , sdf.semantic_model_id.asc()            
            ).collect():

            # Get time parameters
            request_timeout        = row.request_timeout
            refresh_timeout        = row.refresh_timeout
            loop_wait_time_seconds = row.loop_wait_time_seconds

            # Set default values
            if request_timeout        == 0: request_timeout        = 60  # 60 seconds
            if refresh_timeout        == 0: refresh_timeout        = 720 # 720 minutes = 12 hours
            if loop_wait_time_seconds == 0: loop_wait_time_seconds = 15  # 15 seconds

            # Attributes
            power_bi_refresh_base_url = "https://api.powerbi.com/v1.0/myorg"
            # Move power_bi_refresh_base_url in lh_cfg.dbo.global_parameter
            url_refresh    = f"{power_bi_refresh_base_url}/groups/{row.workspace_id}/datasets/{row.semantic_model_id}/refreshes"
            url_get_status = f"{power_bi_refresh_base_url}/groups/{row.workspace_id}/datasets/{row.semantic_model_id}/refreshes?$top=1"
            url_delete     = f"{power_bi_refresh_base_url}/groups/{row.workspace_id}/datasets/{row.semantic_model_id}/refreshes/^req_id^"
            headers        = {"Content-Type": "application/json", "User-Agent": "zPL Concept", "Authorization": f"Bearer {bearer_token}"}
                    
            list_parameter.append(Row(
                request_timeout
                , refresh_timeout
                , loop_wait_time_seconds
                , row.semantic_model_name
                , url_refresh
                , url_get_status
                , url_delete
                , headers
            ))

        sch = st.StructType([
            st.StructField("request_timeout"         , st.IntegerType())
            , st.StructField("refresh_timeout"       , st.IntegerType())
            , st.StructField("loop_wait_time_seconds", st.IntegerType())
            , st.StructField("semantic_model_name"   , st.StringType())
            , st.StructField("url_refresh"           , st.StringType())
            , st.StructField("url_get_status"        , st.StringType())
            , st.StructField("url_delete"            , st.StringType())
            , st.StructField("headers"               , st.MapType(st.StringType(), st.StringType()))
        ])
        sdf       = spark.createDataFrame(list_parameter, sch)
        sdf_count = sdf.count()

        alert             = "Success"
        alert_description = f"Rows (count): {sdf_count}"

        return (alert, alert_description, sdf, sdf_count)
    except Exception as ex:
        alert             = "Danger"
        alert_description = str(ex)

        return (alert, alert_description, None, None)
    finally:
        if is_debug:
            par["locals"] = locals()
            fn_print_debug_info(alert, fn_name, par)
            del par
        
        # Mask bearer_token
        list_parameter = str(list_parameter).replace(bearer_token, "[REDACTED]")
        headers        = fn_mask_bearer_token(headers)
        bearer_token   = "[REDACTED]"

        fn_local_log_insert(global_parameter.process_timestamp, medallion_name, fn_name, fn_locals_to_json(locals())[2], alert, alert_description)

## User-defined function "API Request"

In [None]:
def fn_pbi_refresh_api_request(method, url, headers, request_timeout):
    try:
        if method   == "GET":    rsp = req.get(url = url, headers = headers, timeout = request_timeout)
        elif method == "POST":   rsp = req.post(url = url, headers = headers, timeout = request_timeout)
        elif method == "DELETE": rsp = req.delete(url = url, headers = headers, timeout = request_timeout)
        
        return rsp
    except Exception as ex:
        return str(ex)

udf_pbi_refresh_api_request = sf.udf(fn_pbi_refresh_api_request)

## User-defined function "Power BI Refresh"

In [None]:
def fn_pbi_refresh_async(
    request_timeout
    , refresh_timeout
    , loop_wait_time_seconds
    , semantic_model_name
    , url_refresh
    , url_get_status
    , url_delete
    , headers
):
    if is_debug: par = {}

    try:
        return_value               = {}
        time_start                 = dt.datetime.now()
        return_value["time_start"] = time_start.strftime('%Y-%m-%d %H:%M:%S.%f')
        
        # Power BI Refresh
        rsp_refresh            = fn_pbi_refresh_api_request("POST", url_refresh, headers, request_timeout)
        req_id                 = rsp_refresh.headers["RequestId"]
        return_value["req_id"] = req_id

        # Loop until:
        #   * Status is "Completed'
        #   * Refresh Timeout is reached (Refresh Timeout default = 0 = 720 min = 12 hours)
        while True:
            # Get Status and exit
            rsp_status = fn_pbi_refresh_api_request("GET", url_get_status, headers, request_timeout)
            status     = rsp_status.json()['value'][0]['status']
            if status == "Completed":
                return_value["alert"]             = "Success"
                return_value["alert_description"] = "Completed"
                break
            
            # Get timeout and cancel the request
            now      = dt.datetime.now()
            diff     = now - time_start
            diff_sec = diff.total_seconds()
            diff_min = divmod(diff_sec, 60)[0]
            if diff_min >= refresh_timeout:
                dict_parameter_delete             = dict_parameter_delete.replace("^req_id^", req_id)
                rsp_delete                        = fn_pbi_refresh_api_request("DELETE", url_delete, headers, request_timeout)
                return_value["alert"]             = "Danger"
                return_value["alert_description"] = f"Timeout {refresh_timeout} minutes reached ({rsp_delete})"
                break
            t.sleep(loop_wait_time_seconds)

        return_value["status"]                 = status
        return_value["time_end"]               = now.strftime('%Y-%m-%d %H:%M:%S.%f')
        return_value["duration_sec"]           = diff_sec
        return_value["duration_min"]           = diff_min    
        return_value["refresh_timeout"]        = refresh_timeout
        return_value["loop_wait_time_seconds"] = loop_wait_time_seconds
        return_value["semantic_model_name"]    = semantic_model_name
        return_value["url_refresh"]            = url_refresh
        return_value["url_get_status"]         = url_get_status
        return_value["url_delete"]             = url_delete
        return_value["request_timeout"]        = request_timeout
        return_value["headers"]                = fn_mask_bearer_token(headers)
    except Exception as ex:
        return_value                      = {}
        return_value["alert"]             = "Danger"
        return_value["alert_description"] = str(ex)
    finally:
        if is_debug:
            par["locals"] = locals()
            fn_print_debug_info(alert, fn_name, par)
            del par
        
        return j.dumps(return_value)

udf_pbi_refresh_async = sf.udf(fn_pbi_refresh_async)

## Execute async Power BI Refresh

In [None]:
def fn_pbi_refresh(sdf):
    fn_name        = stk()[0][3]
    caller_fn_name = stk()[1].function.replace("<module>", "")
    if is_debug: par = {}
    
    try:
        sdf               = sdf.withColumn(
            "return_value"
            , udf_pbi_refresh_async(
                sf.col("request_timeout")
                , sf.col("refresh_timeout")
                , sf.col("loop_wait_time_seconds")
                , sf.col("semantic_model_name")
                , sf.col("url_refresh")
                , sf.col("url_get_status")
                , sf.col("url_delete")
                , sf.col("headers")
            )
        )
        sdf_count         = sdf.count()
        alert             = "Success"
        alert_description = f"Rows (count): {sdf_count}"

        return (alert, alert_description, sdf, sdf_count)
    except Exception as ex:
        alert             = "Danger"
        alert_description = str(ex)

        return (alert, alert_description, None, None)
    finally:
        if is_debug:
            par["locals"] = locals()
            fn_print_debug_info(alert, fn_name, par)
            del par
        
        fn_local_log_insert(global_parameter.process_timestamp, medallion_name, fn_name, fn_locals_to_json(locals())[2], alert, alert_description)
    

## Insert into local log

In [None]:
def fn_pbi_local_log_insert(sdf):
    fn_name        = stk()[0][3]
    caller_fn_name = stk()[1].function.replace("<module>", "")
    if is_debug: par = {}
    
    try:
        row_count = 0
        for row in sdf.collect():
            alert             = j.loads(row.return_value)["alert"]
            alert_description = j.loads(row.return_value)["alert_description"]
            fn_local_log_insert(global_parameter.process_timestamp, medallion_name, "fn_pbi_refresh", row.return_value, alert, alert_description)
            row_count += 1
        
        alert             = "Success"
        alert_description = f"Rows (count): {row_count}"

        return (alert, alert_description, row_count)
    except Exception as ex:
        alert             = "Danger"
        alert_description = str(ex)

        return (alert, alert_description, None)
    finally:
        if is_debug:
            par["locals"] = locals()
            fn_print_debug_info(alert, fn_name, par)
            del par

# Operation

## Execute async Power BI Refresh

In [None]:
def fn_exec_async_pbi_refresh(list_frequency):
    # Create list ot semantic modelt to refresh
    sdf = fn_pbi_get_semantic_models_to_refresh(list_frequency)[2]
    if is_debug:
        print(f"sdf (semantic models to refresh)")
        display(sdf)

    # Create a list of async APIs
    sdf = fn_pbi_set_sdf_async(sdf)[2]
    if is_debug:
        print(f"sdf (async list before refresh)")
        display(sdf)

    # Execute async APIs
    sdf = fn_pbi_refresh(sdf)[2]
    if is_debug:
        print(f"sdf (async list after refresh)")
        display(sdf)

    # Insert in local log
    fn_pbi_local_log_insert(sdf)