## AutoML Model for Anomaly Detection in IoT Data
### By Lara Suzuki / Vint Cerf - IPNSIG

<table class="tfo-notebook-buttons" align="left">
  <td>
    <a target="_blank" href="https://github.com/lasuzuki"><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />View source on GitHub</a>
  </td>
</table>

In [1]:
!pip install --quiet google-cloud-automl
from google.cloud import automl_v1beta1 as automl

In [56]:
project_id = 'your-project-id'
compute_region = 'your-region'
dataset_display_name = 'your-dataset'
path = 'bq://your-project.your_database.your_table'
model_display_name = 'model_display_name'
train_budget_milli_node_hours = 2000
include_column_spec_names = 'ts, temperature, pressure, humidity, x, y, z'
bq_input_uri = 'bq://your-project.your_database.your_table'
bq_output_uri = 'bq://bq://your-project.your_database.your_table_output'
params = {}

### Create Dataset in AutoML Tables

In [57]:
client = automl.TablesClient(project=project_id, region=compute_region)
dataset = client.create_dataset(dataset_display_name)
print("Dataset name: {}".format(dataset.name))
print("Dataset id: {}".format(dataset.name.split("/")[-1]))
print("Dataset display name: {}".format(dataset.display_name))
print("Dataset metadata:")
print("\t{}".format(dataset.tables_dataset_metadata))
print("Dataset example count: {}".format(dataset.example_count))
print("Dataset create time: {}".format(dataset.create_time))

### Import Data from BigQuery to AutoML Tables

In [58]:
response = None
if path.startswith("bq"):
    response = client.import_data(
        dataset_display_name=dataset_display_name, bigquery_input_uri=path
    )
else:
    input_uris = path.split(",")
    response = client.import_data(
        dataset_display_name=dataset_display_name,
        gcs_input_uris=input_uris,
    )
print("Processing import...")
print("Data imported. {}".format(response.result()))

### Create and Train a Model in AutoML Tables

In [59]:
client.set_target_column(dataset_display_name=dataset_display_name, column_spec_display_name='label')
response = client.create_model(
    model_display_name,
    train_budget_milli_node_hours=train_budget_milli_node_hours,
    dataset_display_name=dataset_display_name,
    include_column_spec_names=include_column_spec_names
)

print("Training model...")
print("Training operation name: {}".format(response.operation.name))
print("Training completed: {}".format(response.result()))

Training model...
Training operation name: projects/746648625607/locations/us-central1/operations/TBL8549322205882744832
Training completed: name: "projects/746648625607/locations/us-central1/models/TBL4443520113005559808"



In [60]:
### Get Evaluation Metrics

In [None]:
response = client.list_model_evaluations(
    model_display_name=model_display_name, filter=filter
)

for evaluation in response:
    if not evaluation.annotation_spec_id:
        model_evaluation_name = evaluation.name
        break

model_evaluation = client.get_model_evaluation(
    model_evaluation_name=model_evaluation_name
)

regression_metrics = model_evaluation.regression_evaluation_metrics
if str(regression_metrics):
    print("Model regression metrics:")
    print(
        "Model RMSE: {}".format(regression_metrics.root_mean_squared_error)
    )
    print("Model MAE: {}".format(regression_metrics.mean_absolute_error))
    print(
        "Model MAPE: {}".format(
            regression_metrics.mean_absolute_percentage_error
        )
    )
    print("Model R^2: {}".format(regression_metrics.r_squared))

In [None]:
### Get Batch Predictions from Real Time Data Ingested into BigQuery

In [None]:
response = client.batch_predict(bigquery_input_uri=bq_input_uri,
                                bigquery_output_uri=bq_output_uri,
                                model_display_name=model_display_name,
                                params=params)
print("Making batch prediction... ")
response.result()
dataset_name = response.metadata.batch_predict_details.output_info.bigquery_output_dataset
print("Batch prediction complete.\nResults are in '{}' dataset.\n{}".format(
    dataset_name, response.metadata))