In [1]:
dbutils.widgets.text("tenant", "", "Tenant")
tenant = dbutils.widgets.get("tenant")
if tenant == "":
  raise Exception

dbutils.widgets.text("env", "", "Environment")
env = dbutils.widgets.get("env")
if env == "":
  raise Exception

In [2]:
from azure.storage.filedatalake import DataLakeServiceClient
service_client = DataLakeServiceClient(account_url="{}://{}.dfs.core.windows.net".format(
        "https", dbutils.secrets.get(scope="dgsecretscope", key="storageadlsgen2name")),
        credential=dbutils.secrets.get(scope="dgsecretscope", key="storageaccesskey"))

In [3]:
fs_client = service_client.create_file_system(tenant)
fs_client.create_directory("code")
fs_client.create_directory("data")
fs_client.create_directory("logs")

fs_client.create_directory("data/databases")
fs_client.create_directory("data/databases/dg_{}".format(tenant))
fs_client.create_directory("data/raw")
fs_client.create_directory("data/pb_datasets")
fs_client.create_directory("data/adhoc")

In [4]:
configs = {"fs.azure.account.auth.type": "OAuth",
           "fs.azure.account.oauth.provider.type": "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider",
           "fs.azure.account.oauth2.client.id": dbutils.secrets.get(scope="dgsecretscope",key="storagegen2mountappclientid"),
           "fs.azure.account.oauth2.client.secret": dbutils.secrets.get(scope="dgsecretscope",key="storagegen2mountappsecret"),
           "fs.azure.account.oauth2.client.endpoint": "https://login.microsoftonline.com/{}/oauth2/token".format(dbutils.secrets.get(scope="dgsecretscope",key="storagegen2mountapptenantid"))}


In [5]:
if not any(mount.mountPoint == '/mnt/datagamz/{}'.format(tenant) for mount in dbutils.fs.mounts()):
  dbutils.fs.mount(source = "abfss://{}@dg{}lakestorage.dfs.core.windows.net/".format(tenant, env), mount_point = "/mnt/datagamz/{}".format(tenant),extra_configs = configs)

In [6]:
db_name = "dg_{}".format(tenant)
spark.sql("create database if not exists dg_{db_name}  LOCATION '/mnt/datagamz/{tenant}/data/databases/{db_name}'".format(tenant=tenant, db_name=db_name))

In [7]:
dbutils.fs.ls("/mnt/datagamz/testing/data")

In [8]:

print("Creating Databricks cluster for tenant")
base_url = dbutils.secrets.get(scope="dgsecretscope", key="databricksinstanceurl")
url = base_url + "api/2.0/clusters/create"
headers = {'Authorization': 'Bearer {}'.format(dbutils.secrets.get(scope="dgsecretscope", key="dabaricksaccesstoken"))}
body = {
  "autoscale": {
      "min_workers": 1,
      "max_workers": 2
  },
  "cluster_name": "{}".format(tenant),
  "spark_version": "7.0.x-scala2.12",
  "spark_conf": {
      "spark.rpc.message.maxSize": "1024",
      "spark.databricks.session.share": "false",
      "spark.speculation": true
  },
  "node_type_id": "Standard_DS3_v2",
  "driver_node_type_id": "Standard_DS3_v2",
  "ssh_public_keys": [],
  "custom_tags": {},
  "cluster_log_conf": {
      "dbfs": {
          "destination": "dbfs:/mnt/dganalyics/{}/logs".format(tenant)
      }
  },
  "spark_env_vars": {},
  "autotermination_minutes": 30,
  "enable_elastic_disk": True
}


In [9]:
import requests as rq
cluster_create = rq.post(url, headers=headers, json=body)

In [10]:
if cluster_create.status_code != 200:
  raise Exception