-
Notifications
You must be signed in to change notification settings - Fork 117
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
#What's supported AAD authentication support for connector #Changes Support for AAD auth. Test for AAD Databricks sample for AAD with SQL Cleanup formatting, dependencies #Test done Unit test run AAD test pass on databricks with Azure SQL AAD Tested on BDC for user/pass based authentication. no regression issue.
- Loading branch information
Showing
10 changed files
with
566 additions
and
190 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
sbt.version=1.2.3 | ||
sbt.version=1.3.13 |
42 changes: 42 additions & 0 deletions
42
samples/Databricks-AzureSQL/DatabricksNotebooks/SQL Spark Connector - Python AAD Auth.html
Large diffs are not rendered by default.
Oops, something went wrong.
1 change: 1 addition & 0 deletions
1
samples/Databricks-AzureSQL/DatabricksNotebooks/SQL Spark Connector - Python AAD Auth.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"cells":[{"cell_type":"code","source":["# Microsoft ADAL Library \n# The Python ADAL library will need to be installed.\n# Example: 'pip install adal'\n\n# Documentation\n# https://github.com/AzureAD/azure-activedirectory-library-for-python\n\n# Example\n# https://github.com/AzureAD/azure-activedirectory-library-for-python/blob/dev/sample/client_credentials_sample.py\n\n# Note, the Python ADAL library is no longer being maintained and it's documentation suggests to use MSAL. \n# Unfortunately, it doesn't have feature parity with adal at this point and does not support service principal authentication.\nimport adal"],"metadata":{},"outputs":[],"execution_count":1},{"cell_type":"code","source":["# Located in App Registrations from Azure Portal\ntenant_id = \"72f988bf-86f1-41af-91ab-2d7cd011db47\"\n\n# Authority\nauthority = \"https://login.windows.net/\" + tenant_id\n\n# Located in App Registrations from Azure Portal\nresource_app_id_url = \"https://database.windows.net/\"\n\n# In the below example, we're using a Databricks utility that facilitates acquiring secrets from \n# a configured Key Vault. \n\n# Service Principal Client ID - Created in App Registrations from Azure Portal\nservice_principal_id = dbutils.secrets.get(\"principalClientId\")\n\n# Service Principal Secret - Created in App Registrations from Azure Portal\nservice_principal_secret = dbutils.secrets.get(\"principalSecret\")\n\n# SQL Server URL\nurl = \"jdbc:sqlserver://azuresqlserver.database.windows.net\"\n\n# Database Name\ndatabase_name = \"TestDatabase\"\n\n# Database Table Name\ndb_table = \"dbo.TestTable\" \n\n# Encrypt\nencrypt = \"true\"\n\n# Host Name in Certificate\nhost_name_in_certificate = \"*.database.windows.net\""],"metadata":{},"outputs":[],"execution_count":2},{"cell_type":"code","source":["# SERVICE PRINCIPAL AUTHENTICATION\n# You will need to obtain an access token.\n# The \"accessToken\" option is used in the spark dataframe to indicate this \n# authentication modality.\ncontext = adal.AuthenticationContext(authority)\ntoken = context.acquire_token_with_client_credentials(resource_app_id_url, service_principal_id, service_principal_secret)\n\n# Set Access Token\naccess_token = token[\"accessToken\"]\n\n# ACTIVE DIRECTORY PASSWORD AUTHENTICATION\n# The \"authentication\" option with the value of \"ActiveDirectoryPassword\"\n# is used in the spark dataframe to indicate this \n# authentication modality. \n#\n# The \"user\" and \"password\" options apply to both SQL Authentication\n# and Active Directory Authentication. SQL Authentication is used\n# by default and can be switched to Active Directory with the\n# authentication option above.\nuser = dbutils.secrets.get(\"adUser\")\npassword = dbutils.secrets.get(\"adPassword\")\n"],"metadata":{},"outputs":[],"execution_count":3},{"cell_type":"code","source":["jdbc_df = spark.read.format(\"com.microsoft.sqlserver.jdbc.spark\") \\\n .option(\"url\", url) \\\n .option(\"dbtable\", db_table) \\\n .option(\"accessToken\", access_token) \\\n .option(\"encrypt\", encrypt) \\\n .option(\"databaseName\", database_name) \\\n .option(\"hostNameInCertificate\", host_name_in_certificate) \\\n .load() \n\ndisplay(jdbc_df.select(\"SourceViewName\").limit(1))"],"metadata":{},"outputs":[],"execution_count":4},{"cell_type":"code","source":["jdbc_df = spark.read.format(\"com.microsoft.sqlserver.jdbc.spark\") \\\n .option(\"url\", url) \\\n .option(\"dbtable\", db_table) \\\n .option(\"authentication\", \"ActiveDirectoryPassword\") \\\n .option(\"user\", user) \\\n .option(\"password\", password) \\\n .option(\"encrypt\", encrypt) \\\n .option(\"databaseName\", database_name) \\\n .option(\"hostNameInCertificate\", host_name_in_certificate) \\\n .load() \n\ndisplay(jdbc_df.select(\"SourceViewName\").limit(1))"],"metadata":{},"outputs":[],"execution_count":5}],"metadata":{"name":"SQL Spark Connector - Python AAD Auth","notebookId":1021193713446438},"nbformat":4,"nbformat_minor":0} |
107 changes: 107 additions & 0 deletions
107
samples/Databricks-AzureSQL/DatabricksNotebooks/SQL Spark Connector - Python AAD Auth.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
# Databricks notebook source | ||
# DBTITLE 1,Import Python ADAL Library | ||
# Microsoft ADAL Library | ||
# The Python ADAL library will need to be installed. | ||
# Example: 'pip install adal' | ||
|
||
# Documentation | ||
# https://github.com/AzureAD/azure-activedirectory-library-for-python | ||
|
||
# Example | ||
# https://github.com/AzureAD/azure-activedirectory-library-for-python/blob/dev/sample/client_credentials_sample.py | ||
|
||
# Note, the Python ADAL library is no longer being maintained and it's documentation suggests to use MSAL. | ||
# Unfortunately, it doesn't have feature parity with adal at this point and does not support service principal authentication. | ||
import adal | ||
|
||
# COMMAND ---------- | ||
|
||
# DBTITLE 1,Configure Connection | ||
# Located in App Registrations from Azure Portal | ||
tenant_id = "72f988bf-86f1-41af-91ab-2d7cd011db47" | ||
|
||
# Authority | ||
authority = "https://login.windows.net/" + tenant_id | ||
|
||
# Located in App Registrations from Azure Portal | ||
resource_app_id_url = "https://database.windows.net/" | ||
|
||
# In the below example, we're using a Databricks utility that facilitates acquiring secrets from | ||
# a configured Key Vault. | ||
|
||
# Service Principal Client ID - Created in App Registrations from Azure Portal | ||
service_principal_id = dbutils.secrets.get("principalClientId") | ||
|
||
# Service Principal Secret - Created in App Registrations from Azure Portal | ||
service_principal_secret = dbutils.secrets.get("principalSecret") | ||
|
||
# SQL Server URL | ||
url = "jdbc:sqlserver://azuresqlserver.database.windows.net" | ||
|
||
# Database Name | ||
database_name = "TestDatabase" | ||
|
||
# Database Table Name | ||
db_table = "dbo.TestTable" | ||
|
||
# Encrypt | ||
encrypt = "true" | ||
|
||
# Host Name in Certificate | ||
host_name_in_certificate = "*.database.windows.net" | ||
|
||
# COMMAND ---------- | ||
|
||
# DBTITLE 1,Authentication Options | ||
# SERVICE PRINCIPAL AUTHENTICATION | ||
# You will need to obtain an access token. | ||
# The "accessToken" option is used in the spark dataframe to indicate this | ||
# authentication modality. | ||
context = adal.AuthenticationContext(authority) | ||
token = context.acquire_token_with_client_credentials(resource_app_id_url, service_principal_id, service_principal_secret) | ||
|
||
# Set Access Token | ||
access_token = token["accessToken"] | ||
|
||
# ACTIVE DIRECTORY PASSWORD AUTHENTICATION | ||
# The "authentication" option with the value of "ActiveDirectoryPassword" | ||
# is used in the spark dataframe to indicate this | ||
# authentication modality. | ||
# | ||
# The "user" and "password" options apply to both SQL Authentication | ||
# and Active Directory Authentication. SQL Authentication is used | ||
# by default and can be switched to Active Directory with the | ||
# authentication option above. | ||
user = dbutils.secrets.get("adUser") | ||
password = dbutils.secrets.get("adPassword") | ||
|
||
|
||
# COMMAND ---------- | ||
|
||
# DBTITLE 1,Query SQL using Spark with Service Principal Token | ||
jdbc_df = spark.read.format("com.microsoft.sqlserver.jdbc.spark") \ | ||
.option("url", url) \ | ||
.option("dbtable", db_table) \ | ||
.option("accessToken", access_token) \ | ||
.option("encrypt", encrypt) \ | ||
.option("databaseName", database_name) \ | ||
.option("hostNameInCertificate", host_name_in_certificate) \ | ||
.load() | ||
|
||
display(jdbc_df.select("SourceViewName").limit(1)) | ||
|
||
# COMMAND ---------- | ||
|
||
# DBTITLE 1,Query SQL using Spark with Active Directory Password | ||
jdbc_df = spark.read.format("com.microsoft.sqlserver.jdbc.spark") \ | ||
.option("url", url) \ | ||
.option("dbtable", db_table) \ | ||
.option("authentication", "ActiveDirectoryPassword") \ | ||
.option("user", user) \ | ||
.option("password", password) \ | ||
.option("encrypt", encrypt) \ | ||
.option("databaseName", database_name) \ | ||
.option("hostNameInCertificate", host_name_in_certificate) \ | ||
.load() | ||
|
||
display(jdbc_df.select("SourceViewName").limit(1)) |
42 changes: 42 additions & 0 deletions
42
samples/Databricks-AzureSQL/DatabricksNotebooks/SQL Spark Connector - Scala AAD Auth.html
Large diffs are not rendered by default.
Oops, something went wrong.
90 changes: 90 additions & 0 deletions
90
samples/Databricks-AzureSQL/DatabricksNotebooks/SQL Spark Connector - Scala AAD Auth.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
// Databricks notebook source | ||
// DBTITLE 1,Import Microsoft ADAL Library | ||
// REQUIREMENT - The com.microsoft.aad.adal4j artifact must be included as a dependency. | ||
import com.microsoft.aad.adal4j.{AuthenticationContext, ClientCredential} | ||
|
||
// COMMAND ---------- | ||
|
||
// DBTITLE 1,Import Dependencies | ||
import org.apache.spark.sql.SparkSession | ||
import java.util.concurrent.Executors | ||
|
||
// COMMAND ---------- | ||
|
||
// DBTITLE 1,Setup Connection Properties | ||
val url = "jdbc:sqlserver://azuresqlserver.database.windows.net:1433;databaseName=TestDatabase" | ||
val dbTable = "dbo.TestTable" | ||
|
||
// In the below example, we're using a Databricks utility that facilitates acquiring secrets from | ||
// a configured Key Vault. | ||
|
||
// Service Principal Client ID - Created in App Registrations from Azure Portal | ||
val principalClientId = dbutils.secrets.get("principalClientId") | ||
|
||
// Service Principal Secret - Created in App Registrations from Azure Portal | ||
val principalSecret = dbutils.secrets.get("principalSecret") | ||
|
||
// Located in App Registrations from Azure Portal | ||
val TenantId = "72f988bf-0000-0000-0000-00000000" | ||
|
||
val authority = "https://login.windows.net/" + TenantId | ||
val resourceAppIdURI = "https://database.windows.net/" | ||
|
||
// COMMAND ---------- | ||
|
||
// DBTITLE 1,Authentication Options | ||
// SERVICE PRINCIPAL AUTHENTICATION | ||
// You will need to obtain an access token. | ||
// The "accessToken" option is used in the spark dataframe to indicate this | ||
// authentication modality. | ||
val service = Executors.newFixedThreadPool(1) | ||
val context = new AuthenticationContext(authority, true, service); | ||
val ClientCred = new ClientCredential(principalClientId, principalSecret) | ||
val authResult = context.acquireToken(resourceAppIdURI, ClientCred, null) | ||
|
||
val accessToken = authResult.get().getAccessToken | ||
|
||
// ACTIVE DIRECTORY PASSWORD AUTHENTICATION | ||
// The "authentication" option with the value of "ActiveDirectoryPassword" | ||
// is used in the spark dataframe to indicate this | ||
// authentication modality. | ||
// | ||
// The "user" and "password" options apply to both SQL Authentication | ||
// and Active Directory Authentication. SQL Authentication is used | ||
// by default and can be switched to Active Directory with the | ||
// authentication option above. | ||
val user = dbutils.secrets.get("adUser") | ||
val password = dbutils.secrets.get("adPassword") | ||
|
||
|
||
// COMMAND ---------- | ||
|
||
// DBTITLE 1,Query SQL using Spark with Service Principal | ||
|
||
val jdbcDF = spark.read | ||
.format("com.microsoft.sqlserver.jdbc.spark") | ||
.option("url", url) | ||
.option("dbtable", dbTable) | ||
.option("accessToken", accessToken) | ||
.option("encrypt", "true") | ||
.option("hostNameInCertificate", "*.database.windows.net") | ||
.load() | ||
|
||
display(jdbcDF.select("SourceViewName").limit(1)) | ||
|
||
// COMMAND ---------- | ||
|
||
// DBTITLE 1,Query SQL using Spark with Active Directory Password | ||
|
||
val jdbcDF = spark.read | ||
.format("com.microsoft.sqlserver.jdbc.spark") | ||
.option("url", url) | ||
.option("dbtable", dbTable) | ||
.option("authentication", "ActiveDirectoryPassword") | ||
.option("user", user) | ||
.option("password", password) | ||
.option("encrypt", "true") | ||
.option("hostNameInCertificate", "*.database.windows.net") | ||
.load() | ||
|
||
display(jdbcDF.select("SourceViewName").limit(1)) |
100 changes: 55 additions & 45 deletions
100
src/main/scala/com/microsoft/sqlserver/jdbc/spark/SQLServerBulkJdbcOptions.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,59 +1,69 @@ | ||
package com.microsoft.sqlserver.jdbc.spark | ||
|
||
import java.sql.Connection | ||
|
||
import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap | ||
import org.apache.spark.sql.execution.datasources.jdbc.JdbcOptionsInWrite | ||
|
||
class SQLServerBulkJdbcOptions(val params: CaseInsensitiveMap[String]) extends JdbcOptionsInWrite(params) { | ||
|
||
def this(params: Map[String, String]) = this(CaseInsensitiveMap(params)) | ||
|
||
// Save original parameters for when a JdbcBulkOptions instance is passed | ||
// from the Spark driver to an executor, which loses the reference to the | ||
// params input in memory | ||
override val parameters = params | ||
|
||
val user = params.getOrElse("user", null) | ||
val password = params.getOrElse("password", null) | ||
val dbtable = params.getOrElse("dbtable", null) | ||
|
||
// If no value is provided, then we write to a single SQL Server instance. | ||
// A non-empty value indicates the name of a data source whose location is | ||
// the data pool that the user wants to write to. This data source will | ||
// contain the user's external table. | ||
val dataPoolDataSource = params.getOrElse("dataPoolDataSource", null) | ||
|
||
// In the standard Spark JDBC implementation, the default isolation level is | ||
// "READ_UNCOMMITTED," but for SQL Server, the default is "READ_COMMITTED" | ||
override val isolationLevel = params.getOrElse("mssqlIsolationLevel", "READ_COMMITTED") match { | ||
case "READ_UNCOMMITTED" => Connection.TRANSACTION_READ_UNCOMMITTED | ||
case "READ_COMMITTED" => Connection.TRANSACTION_READ_COMMITTED | ||
case "REPEATABLE_READ" => Connection.TRANSACTION_REPEATABLE_READ | ||
case "SERIALIZABLE" => Connection.TRANSACTION_SERIALIZABLE | ||
case "SNAPSHOT" => Connection.TRANSACTION_READ_COMMITTED + 4094 | ||
import org.apache.spark.sql.execution.datasources.jdbc.{ | ||
JDBCOptions, | ||
JdbcOptionsInWrite | ||
} | ||
|
||
class SQLServerBulkJdbcOptions(val params: CaseInsensitiveMap[String]) | ||
extends JdbcOptionsInWrite(params) { | ||
|
||
def this(params: Map[String, String]) = this(CaseInsensitiveMap(params)) | ||
|
||
// Save original parameters for when a JdbcBulkOptions instance is passed | ||
// from the Spark driver to an executor, which loses the reference to the | ||
// params input in memory | ||
override val parameters = params | ||
|
||
val dbtable = params.getOrElse("dbtable", null) | ||
|
||
val user = params.getOrElse("user", null) | ||
val password = params.getOrElse("password", null) | ||
|
||
// If no value is provided, then we write to a single SQL Server instance. | ||
// A non-empty value indicates the name of a data source whose location is | ||
// the data pool that the user wants to write to. This data source will | ||
// contain the user's external table. | ||
val dataPoolDataSource = params.getOrElse("dataPoolDataSource", null) | ||
|
||
// In the standard Spark JDBC implementation, the default isolation level is | ||
// "READ_UNCOMMITTED," but for SQL Server, the default is "READ_COMMITTED" | ||
override val isolationLevel = | ||
params.getOrElse("mssqlIsolationLevel", "READ_COMMITTED") match { | ||
case "READ_UNCOMMITTED" => Connection.TRANSACTION_READ_UNCOMMITTED | ||
case "READ_COMMITTED" => Connection.TRANSACTION_READ_COMMITTED | ||
case "REPEATABLE_READ" => Connection.TRANSACTION_REPEATABLE_READ | ||
case "SERIALIZABLE" => Connection.TRANSACTION_SERIALIZABLE | ||
case "SNAPSHOT" => Connection.TRANSACTION_READ_COMMITTED + 4094 | ||
} | ||
|
||
val reliabilityLevel = params.getOrElse("reliabilityLevel", "BEST_EFFORT") match { | ||
case "BEST_EFFORT" => SQLServerBulkJdbcOptions.BEST_EFFORT | ||
case "NO_DUPLICATES" => SQLServerBulkJdbcOptions.NO_DUPLICATES | ||
val reliabilityLevel = | ||
params.getOrElse("reliabilityLevel", "BEST_EFFORT") match { | ||
case "BEST_EFFORT" => SQLServerBulkJdbcOptions.BEST_EFFORT | ||
case "NO_DUPLICATES" => SQLServerBulkJdbcOptions.NO_DUPLICATES | ||
} | ||
|
||
// batchSize is already defined in JDBCOptions superclass | ||
val checkConstraints = params.getOrElse("checkConstraints", "false").toBoolean | ||
val fireTriggers = params.getOrElse("fireTriggers", "false").toBoolean | ||
val keepIdentity = params.getOrElse("keepIdentity", "false").toBoolean | ||
val keepNulls = params.getOrElse("keepNulls", "false").toBoolean | ||
val tableLock = params.getOrElse("tableLock", "false").toBoolean | ||
val allowEncryptedValueModifications = params.getOrElse("allowEncryptedValueModifications", "false").toBoolean | ||
// batchSize is already defined in JDBCOptions superclass | ||
val checkConstraints = params.getOrElse("checkConstraints", "false").toBoolean | ||
val fireTriggers = params.getOrElse("fireTriggers", "false").toBoolean | ||
val keepIdentity = params.getOrElse("keepIdentity", "false").toBoolean | ||
val keepNulls = params.getOrElse("keepNulls", "false").toBoolean | ||
val tableLock = params.getOrElse("tableLock", "false").toBoolean | ||
val allowEncryptedValueModifications = | ||
params.getOrElse("allowEncryptedValueModifications", "false").toBoolean | ||
|
||
// Not a feature | ||
// Only used for internally testing data idempotency | ||
val testDataIdempotency = params.getOrElse("testDataIdempotency", "false").toBoolean | ||
// Not a feature | ||
// Only used for internally testing data idempotency | ||
val testDataIdempotency = | ||
params.getOrElse("testDataIdempotency", "false").toBoolean | ||
|
||
val dataPoolDistPolicy = params.getOrElse("dataPoolDistPolicy", "ROUND_ROBIN") | ||
val dataPoolDistPolicy = params.getOrElse("dataPoolDistPolicy", "ROUND_ROBIN") | ||
} | ||
|
||
object SQLServerBulkJdbcOptions { | ||
val BEST_EFFORT = 0 | ||
val NO_DUPLICATES = 1 | ||
val BEST_EFFORT = 0 | ||
val NO_DUPLICATES = 1 | ||
} |
Oops, something went wrong.