In [ ]:
%%configure -f
{
"conf": {
    "spark.dynamicAllocation.disableIfMinMaxNotSpecified.enabled": true,
    "spark.dynamicAllocation.enabled": true,
    "spark.dynamicAllocation.minExecutors": 2,
    "spark.dynamicAllocation.maxExecutors": 20
   }
}

In [ ]:
batch_id = ''
output_container_path = ''

In [ ]:
import pyodbc
from pyspark.sql.functions import col, year, month, dayofmonth, isnan, when, count, current_timestamp, to_date, lit
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, DoubleType, TimestampType

In [ ]:
# Initiate logging
import logging
from opencensus.ext.azure.log_exporter import AzureLogHandler
from opencensus.ext.azure.trace_exporter import AzureExporter
from opencensus.trace import config_integration
from opencensus.trace.samplers import AlwaysOnSampler
from opencensus.trace.tracer import Tracer

instrumentation_connection_string = mssparkutils.credentials.getSecretWithLS("keyvault", "AppInsightsConnectionString")
config_integration.trace_integrations(['logging'])

logger = logging.getLogger(__name__)
logger.addHandler(AzureLogHandler(connection_string=instrumentation_connection_string))
logger.setLevel(logging.INFO)

tracer = Tracer(
    exporter=AzureExporter(
        connection_string=instrumentation_connection_string
    ),
    sampler=AlwaysOnSampler()
)

# Spool parameters
run_time_parameters = {'custom_dimensions': {
    'batch_id': batch_id,
    'output_container_path': output_container_path,
    'notebook_name': mssparkutils.runtime.context['notebookname']
} }
  
logger.info(f"{mssparkutils.runtime.context['notebookname']}: INITIALISED", extra=run_time_parameters)

In [ ]:
# serverless SQL config
database = 'eiad'
driver= '{ODBC Driver 17 for SQL Server}'

sql_user_name = mssparkutils.credentials.getSecretWithLS("keyvault", "SynapseSQLUserName")
sql_user_pwd = mssparkutils.credentials.getSecretWithLS("keyvault", "SynapseSQLPassword")
serverless_sql_endpoint = mssparkutils.credentials.getSecretWithLS("keyvault", "SyanpseServerlessSQLEndpoint")

In [ ]:
# Update Status Table
def get_recent_status(batch_id, driver, serverless_sql_endpoint, database, sql_user_name, sql_user_pwd):
    query = f"""
        SELECT TOP (1) 
        [batch_id], [date_submitted]
        FROM [dbo].[batch_status] 
        WHERE [batch_id] = '{batch_id}' 
        ORDER BY [update_time_stamp] DESC;
    """
    with pyodbc.connect(f'DRIVER={driver};SERVER=tcp:{serverless_sql_endpoint};PORT=1433;DATABASE={database};UID={sql_user_name};PWD={sql_user_pwd}') as conn:
        with conn.cursor() as cursor:
            cursor.execute(query)
            ret_batch_id, date_submitted = cursor.fetchone()
            return date_submitted

with tracer.span('Updating batch status to "Anomaly Detection Complete"'):
    date_submitted = get_recent_status(batch_id, driver, serverless_sql_endpoint, database, sql_user_name, sql_user_pwd)
    row = [[f'{batch_id}', 'Anomaly Detection Complete']]
    columns = ['batch_id', 'status']
    new_batch_status_df = spark.createDataFrame(row, columns)
    new_batch_status_df.show()
    new_batch_status_df = new_batch_status_df.withColumn("date_submitted", lit(date_submitted))
    new_batch_status_df = new_batch_status_df.withColumn("update_time_stamp", current_timestamp())
    new_batch_status_df.write.mode("append").parquet(f'{output_container_path}/batch_status')