In [1]:
import os
from shutil import copyfile, rmtree
from azureml.train.estimator import Estimator
from azureml.core import Workspace, Datastore, Experiment
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from nlp_architect.models.absa.inference.inference import SentimentInference

## Initialize workspace

To access an Azure ML Workspace, you will need to import the AML library and the following information:
* A name for your workspace (in our example - `hal`)
* Your subscription id (can be obtained by running `az account list`)
* The resource group name (in our case `robots`)

Initialize a [Workspace](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace/?WT.mc_id=absa-notebook-abornst) object from the existing workspace you created in the Prerequisites step or create a new one. 

In [3]:
#subscription_id = ''
#resource_group  = 'hal'
#workspace_name  = 'robots'
#ws = Workspace(subscription_id = subscription_id, resource_group = resource_group, workspace_name = workspace_name)
#ws.write_config()

try:
    ws = Workspace.from_config()
    print(ws.name, ws.location, ws.resource_group, ws.location, sep='\t')
    print('Library configuration succeeded')
except:
    print('Workspace not found')

hal	westus2	robots	westus2
Library configuration succeeded


## Compute

There are two computer option run once(preview) and persistent compute for this demo we will use persistent compute to learn more about run once compute check out the [docs](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute?WT.mc_id=absa-notebook-abornst).

In [4]:
# Choose a name for your CPU cluster
cluster_name = "gandalf"

# Verify that cluster does not exist already
try:
    cluster = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D3_V2',
                                                           vm_priority='lowpriority',
                                                           min_nodes=1,
                                                           max_nodes=4)
    cluster = ComputeTarget.create(ws, cluster_name, compute_config)
    cluster.wait_for_completion(show_output=True)

Found existing cluster, use it.


## Upload Data

The dataset we are using comes from the [womens ecommerce clothing reviews dataset](https://www.kaggle.com/nicapotato/womens-ecommerce-clothing-reviews/) and is in the open domain, this can be replaced with any csv file with rows of text as the absa model is unsupervised. 

The documentation for uploading data can be found [here](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.data.azure_storage_datastore.azureblobdatastore/?WT.mc_id=absa-notebook-abornst) for now we will us the ds.upload command. 

In [None]:
lib_root = os.path.dirname(os.path.abspath("__file__"))
ds = ws.get_default_datastore()
ds.upload('./dataset', target_path='clothing_data', overwrite=True, show_progress=True)

### Get Datastore Reference

In [5]:
ds = Datastore.get(ws, 'absa')

## Create An Experiment

Create an [Experiment](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#experiment/?WT.mc_id=absa-notebook-abornst) to track all the runs in your workspace for this distributed PyTorch tutorial. 

In [14]:
experiment_name = 'absa'
exp = Experiment(workspace=ws, name=experiment_name)

In [15]:
script_params = {
    '--data_folder': ds,
    '--large': 'yes'
}

nlp_est = Estimator(source_directory='.',
                   script_params=script_params,
                   compute_target=cluster,
                   environment_variables = {'NLP_ARCHITECT_BE':'CPU'},
                   entry_script='train.py',
                   pip_packages=['git+https://github.com/NervanaSystems/nlp-architect.git@absa',
                                 'spacy==2.1.8']
)

To create a run we just submit our expierment as follows.

In [16]:
run = exp.submit(nlp_est)

Note: If you accidently run the following cell more than once you can cancel a run with the run.cancel() command.

In [None]:
# run.cancel()

You can load any previous run using its run id

In [17]:
run.id

'absa_1579824592_b5671411'

In [18]:
run = [r for r in exp.get_runs() if r.id == 'absa_1579824592_b5671411'][0]

Let's visualize our run:

In [19]:
from azureml.widgets import RunDetails

RunDetails(run).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

### Download Model

In [None]:
run.download_files()

if os.path.exists('model'):
    rmtree('model')
    
os.makedirs('model')

aspect_lex = copyfile('outputs/generated_aspect_lex.csv', 'model/generated_aspect_lex.csv')
opinion_lex = copyfile('outputs/generated_opinion_lex_reranked.csv', 'model/generated_opinion_lex_reranked.csv')

run.upload_folder(name='model_files', path='model')

### Register Model

In [None]:
model = run.register_model(model_name='absa', model_path='modelfiles')

### Run Model On Sample Data 

In [None]:
c_aspect_lex = 'model/generated_aspect_lex.csv'
c_opinion_lex = 'model/generated_opinion_lex_reranked.csv' 
inference = SentimentInference(c_aspect_lex, c_opinion_lex)

In [None]:
docs = ["Loved the sweater but hated the pants",
       "Really great outfit, but the shirt is the wrong size",
       "I absolutely love this jacket! i wear it almost everyday. works as a cardigan or a jacket. my favorite retailer purchase so far"]

sentiment_docs = []

for doc_raw in docs:
    sentiment_doc = inference.run(doc=doc_raw)
    sentiment_docs.append(sentiment_doc)

### Visualize Model Results

In [None]:
import spacy
from spacy import displacy
from nlp_architect.models.absa.inference.data_types import TermType
ents = []
for doc in sentiment_docs:    
    if doc:
        doc_viz = {'text':doc._doc_text, 'ents':[]}
        for s in doc._sentences:
            for ev in s._events:
                for e in ev:
                    if e._type == TermType.ASPECT:
                        ent = {'start': e._start, 'end': e._start + e._len,
                               'label':str(e._polarity.value), 
                               'text':str(e._text)}
                        if all(kown_e['start'] != ent['start'] for kown_e in ents):
                            ents.append(ent)
                            doc_viz['ents'].append(ent)
        doc_viz['ents'].sort(key=lambda m: m["start"])
        displacy.render(doc_viz, style="ent", options={'colors':{'POS':'#7CFC00', 'NEG':'#FF0000'}}, 
                        manual=True, jupyter=True)

## Next Steps

We now have gone through all the steps for production training of a custom open source model using the AzureML Service check out AIML50 to learn how to deploy and models and manage re-training pipelines.