In [None]:
import logging
import os
import json

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn import datasets

import azureml.core
import azureml.train
import azureml.train.automl
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.core.datastore import Datastore
from azureml.core.dataset import Dataset
from azureml.train.automl import AutoMLConfig
from azureml.train.automl.run import AutoMLRun

from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget

In [None]:
ws = Workspace.from_config()

experiment_name = 'imdb-sentiment-demo'
project_folder = './sentiment-output/'

experiment = Experiment(ws, experiment_name)

In [None]:
datastore_name = "workspaceblobstore" 
ds = Datastore.get(ws, datastore_name)
dataset = Dataset.get_by_name(ws, name='imdb_sentiment')
dataset.to_pandas_dataframe()
X = dataset.keep_columns(columns=['review'])
y = dataset.keep_columns(columns=['sentiment'])
print(X.take(5).to_pandas_dataframe())
print(y.take(5).to_pandas_dataframe())

In [None]:
#Create a compute cluster
amlcompute_cluster_name = "cpu-f16sv2"

# Check if this compute target already exists in the workspace
cts = ws.compute_targets
if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlInfDemoCompute':
    found = True
    print('Found existing compute target, will use it!')
    compute_target = cts[amlcompute_cluster_name]
else:
    print('Creating a new compute target...')
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = "Standard_F16s_v2", min_nodes = 1, max_nodes = 1,
                                                                idle_seconds_before_scaledown=300)
    compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)

print('Waiting for cluster creation completion...')
compute_target.wait_for_completion(show_output = True, timeout_in_minutes = 20)

print('Cluster is ready!')

In [None]:
#Train the model using automl 
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies
import pkg_resources

conda_run_config = RunConfiguration(framework="python")

conda_run_config.target = compute_target
#conda_run_config.DockerConfiguration = 'use_docker' 
conda_run_config.environment.docker.enabled = True

cd = CondaDependencies.create(conda_packages=['numpy','scikit-learn','py-xgboost<=0.80'],
                              pip_packages=['azureml-train-automl'])

conda_run_config.environment.python.conda_dependencies = cd

In [None]:
automl_settings = {
    "iteration_timeout_minutes": 10,
    "iterations": 20,
    "n_cross_validations": 5,
    "primary_metric": 'AUC_weighted',
    "preprocess": True,
    "max_concurrent_iterations": 3,
    "enable_early_stopping": True,
    "verbosity": logging.INFO
}

automl_config = AutoMLConfig(task = 'classification',
                             debug_log = 'automl_errors.log',
                             path = project_folder,
                             run_configuration=conda_run_config,
                             X = X,
                             y = y,
                             **automl_settings)

remote_run = experiment.submit(automl_config, show_output = False)

In [None]:
from azureml.widgets import RunDetails
RunDetails(remote_run).show()

In [None]:
#if the run was already completed
from azureml.train.automl.run import AutoMLRun
ws = Workspace.from_config()
experiment = ws.experiments['imdb-sentiment-demo']
automl_run = AutoMLRun(experiment, run_id = 'AutoML_101a437a-c7c0-4884-8baf-174f357decf1')

#pick the best performing model 
#remote_run.wait_for_completion(show_output = False)
best_run, fitted_model = automl_run.get_output()
#best_run, fitted_model = remote_run.get_output()
print("results: ", automl_run.get_output())
print("Run:", best_run)
print("Model:", fitted_model)
test = pd.DataFrame(['the food was horrible', 'wow, this movie was truely great, I totally enjoyed it!'], columns = ['Text'])
fitted_model.predict(test)

In [None]:
from azureml.train.automl.run import AutoMLRun
ws = Workspace.from_config()
experiment = ws.experiments['imdb-sentiment-demo']
automl_run = AutoMLRun(experiment, run_id = 'AutoML_101a437a-c7c0-4884-8baf-174f357decf1')
best_run, fitted_model = automl_run.get_output()
print(best_run)
print(fitted_model)

In [None]:
#test out the model 
test = pd.DataFrame(['the food was horrible', 'wow, this movie was truely great, I totally enjoyed it!'], columns = ['review'])
fitted_model.predict(test)