In [None]:
# ======================== #
# Mounting data #
# ======================== #

# We create a Storage Account and inside that we have 3 containers 
# unprocessed: raw input data
# validated: data that we can work with
# bin: data that are rejected
# refschema: meta data for our table
# We can use Azure DataFactory (Hybrid Data Integraiton Service) to automate the process of data cleaning and loading
# We can create a pipeline and whenever there is a new file in unprocessed
# we can trigger the whole process, first check the meta data of the file
# if the structure matches, we will copy the file to validated, otherwise bin (Linked Service)
# we can use ForEach to achieve Parallelism 
# similar end result can be achieved with Azure Functions & Storage Accounts or AWS Lambda & S3, there are many other ways!
# When we have a new file in validated, we will run the notebook and update our ratings, models and get new movie recommendation
# Logic App can be used to send this new ratings via email or push notifications to the user
storage_account_name = "movierecom1"
container_name = "validated"
root_dir ="/mnt/Files/validated"

# We create an instance form App Registration, then we create a client certificate
# Databricks and Azure are separated, so we need to connect them
# We save our keys in Key Valut, create a scope so that Azure Databricks can access Azure files
# Key Vault is recommended instead of exposing private keys
application_id = dbutils.secrets.get(scope="movieScope", key="clientid")
authentation_key = dbutils.secrets.get(scope="movieScope", key="clientsecret")
tenant_id = dbutils.secrets.get(scope="movieScope", key="tenantid")

# Boilerplate code to connect Azure Storeage from Azure Databrics
endpoint = "https://login.microsoftonline.com/" + tenant_id + "/oauth2/token"
source = "abfss://"+container_name+"@"+storage_account_name+".dfs.core.windows.net"

configs = {"fs.azure.account.auth.type": "OAuth",
          "fs.azure.account.oauth.provider.type": "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider",
          "fs.azure.account.oauth2.client.id": application_id,
          "fs.azure.account.oauth2.client.secret": authentation_key,
          "fs.azure.account.oauth2.client.endpoint": endpoint}

if not any(mount.mountPoint==mountPoint for mount in dbutils.fs.mounts()):
    dbutils.fs.mount(
    source = source,
    mount_point = mountPoint,
    extra_configs = configs)
    
# We have access to our file, now we will use Spark ML library to generate movie recommendation