In [0]:
#0 used for removing old filters etc
#dbutils.widgets.removeAll()

The ADAL package needs to be installed to help connect with qpat data mart &darr;

In [0]:
#1 Install ADAL (Azure Active Directory Authentication Library)

%pip install adal

In [0]:
#2 Import the tools  (from env import env, not used for this notebook)

from src import utils, excel

import openpyxl
import pandas as pd

from pyspark.sql import functions as F
from datetime import datetime
from openpyxl.styles import NamedStyle

This will create a pop up with a discrete code that is required to be entered by following the link that pops up to the left of the code. It is used to authenticate access to qpat &darr;

In [0]:
# 3 A pop up will require the key code for authentication

# Mart authentication
# Note: 'adal' is deprecated; consider using 'msal' for long-term use.

import adal  # ← Required import

# Set MS context
tenant = "common" 
authority = f"https://login.microsoftonline.com/{tenant}" 
context = adal.AuthenticationContext(authority)

# Client ID for Azure Cloud Shell
client_id = "04b07795-8ddb-461a-bbee-02f9e1bf7b46" 
resource_app_id_url = "https://database.windows.net/" 

# Acquire user code
code = context.acquire_user_code(resource_app_id_url, client_id)
print(code['message'])  # Instructs user to authenticate

# Acquire token with device code
token = context.acquire_token_with_device_code(resource_app_id_url, code, client_id)
access_token = token["accessToken"]



In [0]:
#4  Read from mart, note table name is "Schema.TableName" ie; "EROC.PEROC_Initial_Validation"

# JDBC driver and connection info
driver = "com.microsoft.sqlserver.jdbc.spark"
url = "jdbc:sqlserver://udalsqlmartprod.database.windows.net;databaseName=udal-sql-mart-qpat"
table_name = "EROC.PEROC_Initial_Validation"

# Read table into Spark DataFrame
df_raw  = spark.read \
       .format("jdbc") \
       .option("url", url) \
       .option("dbtable", table_name) \
       .option("accessToken", access_token) \
       .option("encrypt", "true") \
       .option("hostNameInCertificate", "*.database.windows.net") \
       .load()

In [0]:
#df_raw.display()

In [0]:
#5 defining what metrics and what columns to bring back from the qpat table
df_moved_discharged = (
    df_raw
    .where(F.col("EROC_DerMetric").isin(
        "PIFUSRTMV01", "PIFUSRTDC01", "PIFUTOTAL01", "PIFUCOMP001", "PIFUBOOK001", "PIFUDNA0001"
    ))
    .where(F.col("EROC_DerMonth") > '2023-03-01')
    #.where(F.col("EROC_Latest_Flag") == 1) Now need both flags so that a comparison can be made previous v current
    .select(
        "EROC_DerMonth",
        "EROC_DerProviderCode",
        "EROC_DerTFC",
        "RegionName",
        "EROC_DerMetric",
        "EROC_Value",
        "EROC_Latest_Flag",
        "Submission_type"  
    )
    .orderBy("EROC_DerMonth", "EROC_DerProviderCode")
)

#display(df_moved_discharged)

In [0]:
#6 Reducing risk of a timeout, sometimes it takes a while to save the file so this needed to prevent timeouts
spark.conf.set("spark.databricks.io.timeout", "600s")

In [0]:
#7 Creates a delta file to save in storage explorer which gives the best performance when chart building
df_moved_discharged.write.format('delta').mode('overwrite').option('mergeSchema', 'true').option('overwriteSchema', 'true').save('abfss://analytics-projects@udalstdataanalysisprod.dfs.core.windows.net/ElectiveRecovery/Projects/PIFU_Validation')

In [0]:
#8 Creates a .csv file if required but current not required
#df_moved_discharged.coalesce(1).write.mode('overwrite').option('header', 'true').csv('abfss://analytics-projects@udalstdataanalysisprod.dfs.core.windows.net/ElectiveRecovery/Projects/PIFU_QA_File')

In [0]:
#df_moved_discharged.display()
