In [3]:
from azureml.core import Workspace
from obs.management import provision,set_adx_to_workspace
import pandas as pd
from datetime import timedelta
from obs.collector import Online_Collector

ws = Workspace.from_config()

### Provision Resources

##### Option 1: Let the provisioning process setup everything

- Provision with default SKU of Standard_D11_v2, Standard Tier of ADX cluster.
- You need to have right to provision service principal in the subscription. Run below command to test if you can create Service Principal. Replace sp_name with any name, and fill in subscription_id and resource_group_name. <br>
 az ad sp create-for-rbac --name {sp_name} --role contributor --scopes /subscriptions/{subscription_id}/resourceGroups/{resource_group_name} --sdk-auth
- Make sure resource provider Microsoft.Kusto is registered in your subscription.






In [4]:

provision(ws)

if the login screen does not pop-up, please copy and run the following command to login
az login --tenant 72f988bf-86f1-41af-91ab-2d7cd011db47
Creating Service Principal
CompletedProcess(args='az ad sp create-for-rbac --name ws01ent_ws01entmonitor51_monitoringsp --role contributor --scopes /subscriptions/0e9bace8-7a81-4922-83b5-d995ff706507/resourceGroups/azureml --sdk-auth', returncode=0, stdout=b'{\r\n  "clientId": "a5891756-bde3-4235-b145-d6379919876c",\r\n  "clientSecret": "ROLWI~qsQD_TNQ5ZA4d3wXULsxqahRCEmH",\r\n  "subscriptionId": "0e9bace8-7a81-4922-83b5-d995ff706507",\r\n  "tenantId": "72f988bf-86f1-41af-91ab-2d7cd011db47",\r\n  "activeDirectoryEndpointUrl": "https://login.microsoftonline.com",\r\n  "resourceManagerEndpointUrl": "https://management.azure.com/",\r\n  "activeDirectoryGraphResourceId": "https://graph.windows.net/",\r\n  "sqlManagementEndpointUrl": "https://management.core.windows.net:8443/",\r\n  "galleryEndpointUrl": "https://gallery.azure.com/",\r\n  "management

##### Option 2: Bring your own service principal and ADX cluster

1. Setup ADX cluster:
- Create your own ADX cluster. The cluster has to be enabled with Streaming Ingestion (https://docs.microsoft.com/en-us/azure/data-explorer/ingest-data-streaming?tabs=azure-portal%2Ccsharp) and Python language extension https://docs.microsoft.com/en-us/azure/data-explorer/language-extensions
- Create or reuse an existing service principal
- Create a database
- Assign the service principal to the database as admin https://docs.microsoft.com/en-us/azure/data-explorer/manage-database-permissions
- Assign yourself to the database so that you can query 


2. Attach the cluster to Azure ML workspace
Prepare cluster_uri (e.g. https://adx02.westus2.kusto.windows.net),db_name, client_id, client_secret, subscription_id, tenant_id and run the following command to attach the cluster to Azure ML workspace.

In [16]:
kv = ws.get_default_keyvault()
tenant_id = "72f988bf-86f1-41af-91ab-2d7cd011db47"
client_id = "af883abf-89dd-4889-bdb3-1ee84f68465e"
client_secret = kv.get_secret(client_id)
subscription_id = "0e9bace8-7a81-4922-83b5-d995ff706507"
cluster_uri = "https://adx02.westus2.kusto.windows.net" #URL of the ADX Cluster

db_name = "db01"

set_adx_to_workspace(ws, cluster_uri,db_name, client_id, client_secret, subscription_id, tenant_id)


### Data Collection

Once the resources are created, data can now be ingested to Azure Data Explorer. To use the dashboards, the data must have a timestamp column. 

In [18]:
# Get raw data

dataset = pd.read_csv("https://azuremlexamples.blob.core.windows.net/datasets/iris.csv")

In [19]:
# Add timestamp column
from obs.collector import Online_Collector
import pandas as pd
dataset = pd.read_csv("https://azuremlexamples.blob.core.windows.net/datasets/iris.csv")

dataset["timestamp"] =  [pd.to_datetime('now') - timedelta(days=x) for x in range(len(dataset))]


.create table IRIS_DATA_NEW (['sepal_length']: real, ['sepal_width']: real, ['petal_length']: real, ['petal_width']: real, ['species']: string, ['timestamp']: datetime)


#### Batch Ingestion

Entire dataframe will be loaded into ADX at once as a table named ```irisdata```. The is also a stream ingestion available to ingest data asynchronously with an internal buffering mechanism. This method can be utilized to lower impact to main scoring thread.

In [None]:

table_name = "IRIS_DATA_NEW" #new dataset

online_collector = Online_Collector(table_name,ws=ws)
online_collector.batch_collect(dataset)

### Spark Ingestion (run this in Databricks or Synapse)

In Databricks or Synapse Spark, install the library:

```pip install --upgrade git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-collector```



#### Logon with a service principal so that you can run this as a job. You can also logon w9th interactive  mode

In [None]:

from azureml.core.authentication import ServicePrincipalAuthentication
from azureml.core import Workspace

sp_auth = ServicePrincipalAuthentication(tenant_id =tenant_id,
                                       service_principal_id=service_principal_id,
                                       service_principal_password=service_principal_password)
# Instantiate Azure Machine Learning workspace
ws = Workspace.get(name=workspace_name,
                   subscription_id=subscription_id,
                   resource_group=resource_group,auth= sp_auth)

#### Load a Spark Dataframe

In [None]:
data =spark.read.format("csv").option("header", True).load("wasbs://ojsales-simulatedcontainer@azureopendatastorage.blob.core.windows.net/oj_sales_data/Store10*.csv")

#### Ingest!

In [None]:
from obs.collector import spark_collect

table_name = "adb_oj_sales"
spark_collect(data,table_name,ws)
#will take a few minutes for result to show up in ADX

## Real time ingestion

Check out the example in monitoring notebook to see how real time ingestion works