# Load Query history from Query Store
The QueryStoreQueryText table is a Synapse SQL table that has the incremental loading of the QueryStore tables with this query 


```
SELECT txt.query_sql_text,txt.statement_sql_handle,qry.query_id,qry.object_id,qry.is_internal_query,qry.last_execution_time,SUM(count_executions) AS count_executions
FROM sys.query_store_query_text txt
INNER JOIN sys.query_store_query qry
    ON Qry.query_text_id = txt.query_text_id
JOIN sys.query_store_plan pln
    ON qry.query_id=pln.query_id
JOIN sys.query_store_runtime_stats runstate
ON pln.plan_id=runstate.plan_id
GROUP BY txt.query_sql_text,txt.statement_sql_handle,qry.query_id,qry.object_id,qry.is_internal_query,qry.last_execution_time   

#### SynapseML configurations

In [29]:
%%configure -f
{
  "name": "synapseml",
  "conf": {
      "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:0.9.4",
      "spark.jars.repositories": "https://mmlspark.azureedge.net/maven",
      "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12",
      "spark.yarn.user.classpath.first": "true"
  }
}

### API Key Parameter

In [30]:
key="put the Azure Function key here"

## Load QueryStoreQueryText into dataframe

In [31]:
%%spark
val df_scala=spark.read.synapsesql("retaildw.dbo.QueryStoreQueryText")
df_scala.createOrReplaceTempView("vquerystorequerytext")

## Back to pyspark and datatypes adjustment

In [32]:
df=spark.sql("select * from vquerystorequerytext")

In [33]:
df.printSchema()

In [34]:
df=(
    df
    .withColumn("Command",df["query_sql_text"])
    .withColumn("last_execution_time",df["last_execution_time"].cast("date"))
    .drop(df["query_sql_text"])
)

In [35]:
df.printSchema()

#### SynapseML HTTPTransformer
Using SynapseML to call the REST API per record to parse the sql in the column Command

In [36]:
from synapse.ml.io.http import http_udf,HTTPTransformer
from pyspark.sql.functions import udf, col
import requests


# function to call rest API with code authentication
def call_rest_api(sqlCommand):
    url="https://synapsequeryparserfunc.azurewebsites.net/api/parse"
    # set headers
    headers={'Content-Type': 'application/json', 'x-functions-key': key}
    return requests.Request(method="Post", url=url, data=sqlCommand, headers=headers)


df=df.withColumn("request",http_udf(call_rest_api)(col("Command")))

def get_response_body(resp):
    return resp.entity.content.decode()

client = (HTTPTransformer()
          .setConcurrency(24)
          .setInputCol("request")
          .setOutputCol("response"))


def get_response_body(resp):
    return resp.entity.content.decode()



df= client.transform(df).select("*", udf(get_response_body)(col("response")).alias("sqlAnalytics"))

In [37]:
df=df.drop(df["request"])
df=df.drop(df["response"])

In [38]:
from pyspark.sql.functions import from_json,schema_of_json,col
from pyspark.sql.types import *

# read the schema of the json
json_schema = spark.read.json(df.rdd.map(lambda row: row.sqlAnalytics)).schema

#create columns out of the schema
df=df.withColumn('sqlAnalytics', from_json(col('sqlAnalytics'), json_schema)).select("*","sqlAnalytics.*")

In [39]:
df.printSchema()

In [40]:
# Replace empty arrays with null
from pyspark.sql.functions import regexp_replace,when,size
df = (
    df
    .withColumn('joinedTables', when(size(df.joinedTables)==0, None).otherwise(df.joinedTables))
    .withColumn('joinedColumns',when(size(df.joinedColumns)==0, None).otherwise(df.joinedColumns))
    .withColumn('insertStatementTargets',when(size(df.insertStatementTargets)==0, None).otherwise(df.insertStatementTargets))
    .withColumn('deleteStatementTargets',when(size(df.deleteStatementTargets)==0, None).otherwise(df.deleteStatementTargets))
    .withColumn('errors',when(size(df.errors)==0, None).otherwise(df.errors))
    .withColumn('copyStatementFrom',when(size(df.copyStatementFrom)==0, None).otherwise(df.copyStatementFrom))
    .withColumn('copyStatementInto',when(size(df.copyStatementInto)==0, None).otherwise(df.copyStatementInto))
)

## Saving

## Write the dataframes

In [41]:
# With no errors
df.filter('errors is NULL').write.mode("overwrite").saveAsTable("SynapseSqlAnalytics")
# with errors
df.filter('errors is NOT NULL').write.mode("overwrite").saveAsTable("SynapseSqlAnalytics_Errors")