In [1]:
import google.cloud.aiplatform as aip

In [2]:
PROJECT_ID = "kubeflow-1-0-2"  # <---CHANGE THIS
REGION = "us-central1-a"

In [3]:
from datetime import datetime

TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")
TIMESTAMP

'20211105140403'

In [4]:
BUCKET_NAME = "gs://fire_detection_anurag"  # <---CHANGE THIS
USER = "anuragbhatia"  # <---CHANGE THIS

PIPELINE_ROOT = "{}/pipeline_root/automl/{}".format(BUCKET_NAME, USER)

PIPELINE_ROOT

'gs://fire_detection_anurag/pipeline_root/automl/anuragbhatia'

In [5]:
aip.init(project=PROJECT_ID,
         staging_bucket=BUCKET_NAME)

## Create Vertex AI (Managed) Dataset

In [6]:
IMPORT_FILE = 'gs://fire_detection_anurag/fire_dataset_AutoML.csv'  # <---CHANGE THIS

In [7]:
dataset = aip.ImageDataset.create(
                                display_name="Fire" + "_" + TIMESTAMP,
                                gcs_source=[IMPORT_FILE],
                                import_schema_uri=aip.schema.dataset.ioformat.image.single_label_classification,
                                )

print(dataset.resource_name)

INFO:google.cloud.aiplatform.datasets.dataset:Creating ImageDataset
INFO:google.cloud.aiplatform.datasets.dataset:Create ImageDataset backing LRO: projects/9118975290/locations/us-central1/datasets/2913327581606445056/operations/2721328524701990912
INFO:google.cloud.aiplatform.datasets.dataset:ImageDataset created. Resource name: projects/9118975290/locations/us-central1/datasets/2913327581606445056
INFO:google.cloud.aiplatform.datasets.dataset:To use this ImageDataset in another session:
INFO:google.cloud.aiplatform.datasets.dataset:ds = aiplatform.ImageDataset('projects/9118975290/locations/us-central1/datasets/2913327581606445056')
INFO:google.cloud.aiplatform.datasets.dataset:Importing ImageDataset data: projects/9118975290/locations/us-central1/datasets/2913327581606445056
INFO:google.cloud.aiplatform.datasets.dataset:Import ImageDataset data backing LRO: projects/9118975290/locations/us-central1/datasets/2913327581606445056/operations/2666722379220123648
INFO:google.cloud.aiplatf

## AutoML Model Training

In [8]:
dag = aip.AutoMLImageTrainingJob(
                                display_name="fire_" + TIMESTAMP,
                                prediction_type="classification",
                                multi_label=False,  # 1-label per image
                                model_type="CLOUD",
                                base_model=None,
                                )

In [9]:
model = dag.run(
                dataset=dataset,
                model_display_name="fire_" + TIMESTAMP,
                training_fraction_split=0.8,  # 80:10:10 train:val:test split
                validation_fraction_split=0.1,
                test_fraction_split=0.1,
                budget_milli_node_hours=8000,  # max 8 hours
                disable_early_stopping=False  # avoid over-fitting
               )

INFO:google.cloud.aiplatform.training_jobs:View Training:
https://console.cloud.google.com/ai/platform/locations/us-central1/training/8851180753782833152?project=9118975290
INFO:google.cloud.aiplatform.training_jobs:AutoMLImageTrainingJob projects/9118975290/locations/us-central1/trainingPipelines/8851180753782833152 current state:
PipelineState.PIPELINE_STATE_RUNNING
INFO:google.cloud.aiplatform.training_jobs:AutoMLImageTrainingJob projects/9118975290/locations/us-central1/trainingPipelines/8851180753782833152 current state:
PipelineState.PIPELINE_STATE_RUNNING
INFO:google.cloud.aiplatform.training_jobs:AutoMLImageTrainingJob projects/9118975290/locations/us-central1/trainingPipelines/8851180753782833152 current state:
PipelineState.PIPELINE_STATE_RUNNING
INFO:google.cloud.aiplatform.training_jobs:AutoMLImageTrainingJob projects/9118975290/locations/us-central1/trainingPipelines/8851180753782833152 current state:
PipelineState.PIPELINE_STATE_RUNNING
INFO:google.cloud.aiplatform.traini

## Model performance metrics

Precision-Recall Curve (at threshold set to default value i.e. 0.5)

<a href="https://storage.cloud.google.com/fire_detection_anurag/model_performance/AutoML_using_python_sdk/PR_Curve.png" target="_blank"><img src="https://storage.cloud.google.com/fire_detection_anurag/model_performance/AutoML_using_python_sdk/PR_Curve.png" width="90%"/></a>

Confusion Matrix

<a href="https://storage.cloud.google.com/fire_detection_anurag/model_performance/AutoML_using_python_sdk/Confusion_matrix.png" target="_blank"><img src="https://storage.cloud.google.com/fire_detection_anurag/model_performance/AutoML_using_python_sdk/Confusion_matrix.png" width="90%"/></a>

Not bad at all. :)

Fingers crossed. Let's double-check..

## Batch prediction

In [10]:
import tensorflow as tf
import json

In [11]:
batch_prediction_bucket = "gs://fire_detection_anurag/test_images/"

test_images = tf.io.gfile.listdir(batch_prediction_bucket)
len(test_images)

10

In [12]:
test_images[0]

'fire1.jpg'

In [13]:
gcs_input_uri = batch_prediction_bucket + 'test.jsonl'

with tf.io.gfile.GFile(gcs_input_uri, 'w') as f:
    for i in range(len(test_images)):
        data = {"content": batch_prediction_bucket + test_images[i],
                "mime_type": "image/jpeg"}
        f.write(json.dumps(data) + '\n')

print(gcs_input_uri)

gs://fire_detection_anurag/test_images/test.jsonl


In [14]:
!gsutil cat $gcs_input_uri

{"content": "gs://fire_detection_anurag/test_images/fire1.jpg", "mime_type": "image/jpeg"}
{"content": "gs://fire_detection_anurag/test_images/fire2.jpg", "mime_type": "image/jpeg"}
{"content": "gs://fire_detection_anurag/test_images/fire3.jpg", "mime_type": "image/jpeg"}
{"content": "gs://fire_detection_anurag/test_images/fire4.jpg", "mime_type": "image/jpeg"}
{"content": "gs://fire_detection_anurag/test_images/fire5.jpg", "mime_type": "image/jpeg"}
{"content": "gs://fire_detection_anurag/test_images/no_fire1.jpg", "mime_type": "image/jpeg"}
{"content": "gs://fire_detection_anurag/test_images/no_fire2.jpg", "mime_type": "image/jpeg"}
{"content": "gs://fire_detection_anurag/test_images/no_fire3.jpg", "mime_type": "image/jpeg"}
{"content": "gs://fire_detection_anurag/test_images/no_fire4.jpg", "mime_type": "image/jpeg"}
{"content": "gs://fire_detection_anurag/test_images/no_fire5.jpg", "mime_type": "image/jpeg"}


In [15]:
batch_predict_job = model.batch_predict(
                                        job_display_name="fire_batch_prediction_" + TIMESTAMP,
                                        gcs_source=gcs_input_uri,
                                        gcs_destination_prefix=batch_prediction_bucket,
                                        sync=False
                                        )

print(batch_predict_job)

INFO:google.cloud.aiplatform.jobs:Creating BatchPredictionJob
<google.cloud.aiplatform.jobs.BatchPredictionJob object at 0x7f12d4870410> is waiting for upstream dependencies to complete.
INFO:google.cloud.aiplatform.jobs:BatchPredictionJob created. Resource name: projects/9118975290/locations/us-central1/batchPredictionJobs/6525071546245971968
INFO:google.cloud.aiplatform.jobs:To use this BatchPredictionJob in another session:
INFO:google.cloud.aiplatform.jobs:bpj = aiplatform.BatchPredictionJob('projects/9118975290/locations/us-central1/batchPredictionJobs/6525071546245971968')
INFO:google.cloud.aiplatform.jobs:View Batch Prediction Job:
https://console.cloud.google.com/ai/platform/locations/us-central1/batch-predictions/6525071546245971968?project=9118975290
INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/9118975290/locations/us-central1/batchPredictionJobs/6525071546245971968 current state:
JobState.JOB_STATE_PENDING
INFO:google.cloud.aiplatform.jobs:BatchPredictionJob

In [16]:
batch_predict_job.wait()

INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/9118975290/locations/us-central1/batchPredictionJobs/6525071546245971968 current state:
JobState.JOB_STATE_RUNNING
INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/9118975290/locations/us-central1/batchPredictionJobs/6525071546245971968 current state:
JobState.JOB_STATE_RUNNING
INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/9118975290/locations/us-central1/batchPredictionJobs/6525071546245971968 current state:
JobState.JOB_STATE_RUNNING
INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/9118975290/locations/us-central1/batchPredictionJobs/6525071546245971968 current state:
JobState.JOB_STATE_RUNNING
INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/9118975290/locations/us-central1/batchPredictionJobs/6525071546245971968 current state:
JobState.JOB_STATE_RUNNING
INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/9118975290/locations/us-central1/batchPredictionJobs/

In [17]:
bp_iter_outputs = batch_predict_job.iter_outputs()

type(bp_iter_outputs)

google.api_core.page_iterator.HTTPIterator

In [18]:
prediction_results = list()

for blob in bp_iter_outputs:
    if blob.name.split("/")[-1].startswith("prediction"):
        prediction_results.append(blob.name)
        
prediction_results

['test_images/prediction-fire20211105140403-2021-11-05T15:04:29.256648Z/predictions_00001.jsonl']

Parse the results

In [36]:
for prediction_result in prediction_results:
    gfile_name = f"gs://{bp_iter_outputs.bucket.name}/{prediction_result}"
    
    with tf.io.gfile.GFile(name=gfile_name, mode="r") as gfile:
        for line in gfile.readlines():
            line = json.loads(line)

            my_dict = {
                       "test_image": line["instance"]["content"],
                       "classes": line["prediction"]["displayNames"],
                       "confidence_scores": line["prediction"]["confidences"]
                      }

            print(my_dict)

{'test_image': 'gs://fire_detection_anurag/test_images/fire2.jpg', 'classes': ['Fire', 'No_Fire'], 'confidence_scores': [0.9998115, 0.00018851359]}
{'test_image': 'gs://fire_detection_anurag/test_images/fire1.jpg', 'classes': ['Fire', 'No_Fire'], 'confidence_scores': [0.9976693, 0.0023306988]}
{'test_image': 'gs://fire_detection_anurag/test_images/fire3.jpg', 'classes': ['Fire', 'No_Fire'], 'confidence_scores': [0.99990535, 9.463103e-05]}
{'test_image': 'gs://fire_detection_anurag/test_images/no_fire1.jpg', 'classes': ['No_Fire', 'Fire'], 'confidence_scores': [0.97757256, 0.022427412]}
{'test_image': 'gs://fire_detection_anurag/test_images/fire5.jpg', 'classes': ['Fire', 'No_Fire'], 'confidence_scores': [0.99995315, 4.6900655e-05]}
{'test_image': 'gs://fire_detection_anurag/test_images/fire4.jpg', 'classes': ['Fire', 'No_Fire'], 'confidence_scores': [0.9940141, 0.005985917]}
{'test_image': 'gs://fire_detection_anurag/test_images/no_fire4.jpg', 'classes': ['No_Fire', 'Fire'], 'confidenc

Correct predictions for all 10 images

Original template: https://github.com/GoogleCloudPlatform/ai-platform-samples/blob/master/ai-platform-unified/notebooks/unofficial/sdk/sdk_automl_image_classification_batch.ipynb