# Fraud detection playground

## This notebook contains only the test code to support the prod version of notebook

### Extract metadata and annotations for an already created image dataset

In [18]:
# Loading library

from typing import List, Union
import json
import requests
import base64

#from oauth2client.client import GoogleCredentials
from google.cloud import bigquery
from google.cloud import aiplatform
from google.cloud.aiplatform.gapic.schema import trainingjob, predict

In [19]:
LOCATION = "us-central1" 
PROJECT_ID = "nishitp-daml"
DATASET_ID = "4715062101670887424"
EXPORT_DIRECTORY = "gs://ds-ml-demos/packages-image-metadata/"
ANNOTATION_SET_ID = "package-classification_iod"
FILTER = f"labels.aiplatform.googleapis.com/annotation_set_name={ANNOTATION_SET_ID}"

In [4]:
# Helper function

def query_ts(method, endpoint, data, auth_token):
    data = str(data)
    headers = {'Content-type': 'application/json', "Authorization": f"Bearer {auth_token}"}
    
    if method == "GET":
        resp = requests.get(endpoint, headers=headers)
    if method == "POST":
        resp = requests.post(endpoint, data=data, headers=headers)
    if method == "DELETE":
        resp = requests.delete(endpoint, headers=headers)
    
    return(resp.json())

In [None]:
# Loading credentials into auth_token - this is temp token that needs to be refreshed
token=GoogleCredentials.get_application_default().get_access_token().access_token
token

In [6]:
%%writefile files/annotate_payload.json

{
  "exportConfig": {
    "gcsDestination": {
      "outputUriPrefix": "gs://ds-ml-demos/packages-image-metadata/"
    },
    "annotationsFilter": "labels.aiplatform.googleapis.com/annotation_set_name=package-classification_iod"
  }
}


Overwriting files/annotate_payload.json


In [7]:
with open('files/annotate_payload.json', 'r') as myfile:
    data=myfile.read()
    
print(data)


{
  "exportConfig": {
    "gcsDestination": {
      "outputUriPrefix": "gs://ds-ml-demos/packages-image-metadata/"
    },
    "annotationsFilter": "labels.aiplatform.googleapis.com/annotation_set_name=package-classification_iod"
  }
}



In [8]:
# api endpoint for annotation extration
api_endpoint = f"https://{LOCATION}-aiplatform.googleapis.com/v1/projects/{PROJECT_ID}/locations/{LOCATION}/datasets/{DATASET_ID}:export"

In [10]:
# DO NOT RUN - DATASET ALREADY CREATED
# Call api to extract the annotations and save in gcs file

# Create dataset 
res = query_ts(method="POST", endpoint=api_endpoint, data=data, auth_token=token)
res

{'name': 'projects/474014863033/locations/us-central1/datasets/4715062101670887424/operations/6243958377580331008',
 'metadata': {'@type': 'type.googleapis.com/google.cloud.aiplatform.v1.ExportDataOperationMetadata',
  'genericMetadata': {'createTime': '2022-01-23T19:08:47.443266Z',
   'updateTime': '2022-01-23T19:08:47.443266Z'},
  'gcsOutputDirectory': 'gs://ds-ml-demos/packages-image-metadata/export-data-package-classification-2022-01-23T19:08:47.403208Z'}}

In [9]:
file_path = "gs://ds-ml-demos/packages-image-metadata/export-data-package-classification-2022-01-25T01:28:18.411854Z/image_bounding_box/package-classification_iod-1108241750100541440/data-00001-of-00001.jsonl"

In [10]:
# Create Vertex AI dataset using api

def create_package_size_image_dataset(
    project: str,
    location: str,
    display_name: str,
    src_uris: str,
    sync: bool = True,
):

    aiplatform.init(project=project, location=location)

    ds = aiplatform.ImageDataset.create(
        display_name=display_name,
        gcs_source=src_uris,
        import_schema_uri="gs://google-cloud-aiplatform/schema/dataset/ioformat/image_bounding_box_io_format_1.0.0.yaml", # uri for image object detection schema standard
        sync=sync,
    )

    ds.wait()

    print(ds.display_name)
    print(ds.resource_name)
    return ds


In [None]:
#read json file to check access

# Reading gcs files with gcsfs
import gcsfs
import json

gcs_file_system = gcsfs.GCSFileSystem(project=PROJECT_ID)
gcs_json_path = "gs://ds-ml-demos/packages-image-metadata/export-data-package-classification-2022-01-22T22:38:45.715670Z/data-00001-of-00001.jsonl"

with gcs_file_system.open(gcs_json_path) as f:
    json_dict = json.load(f)

In [11]:
# Call API to create dataset in Vertex AI
create_package_size_image_dataset(PROJECT_ID, LOCATION, "package-detection-ds", file_path)

INFO:google.cloud.aiplatform.datasets.dataset:Creating ImageDataset
INFO:google.cloud.aiplatform.datasets.dataset:Create ImageDataset backing LRO: projects/474014863033/locations/us-central1/datasets/2303525238702342144/operations/8587942895405760512
INFO:google.cloud.aiplatform.datasets.dataset:ImageDataset created. Resource name: projects/474014863033/locations/us-central1/datasets/2303525238702342144
INFO:google.cloud.aiplatform.datasets.dataset:To use this ImageDataset in another session:
INFO:google.cloud.aiplatform.datasets.dataset:ds = aiplatform.ImageDataset('projects/474014863033/locations/us-central1/datasets/2303525238702342144')
INFO:google.cloud.aiplatform.datasets.dataset:Importing ImageDataset data: projects/474014863033/locations/us-central1/datasets/2303525238702342144
INFO:google.cloud.aiplatform.datasets.dataset:Import ImageDataset data backing LRO: projects/474014863033/locations/us-central1/datasets/2303525238702342144/operations/8727554483854245888
INFO:google.clo

<google.cloud.aiplatform.datasets.image_dataset.ImageDataset object at 0x7f6f908c2090> 
resource name: projects/474014863033/locations/us-central1/datasets/2303525238702342144

## TRAINING FROM HERE

In [20]:
# Training automl function

def create_training_pipeline_image_object_detection_sample(
    project: str,
    display_name: str,
    dataset_id: str,
    model_display_name: str,
    location: str = "us-central1",
    api_endpoint: str = "us-central1-aiplatform.googleapis.com",
):
    # The AI Platform services require regional API endpoints.
    client_options = {"api_endpoint": api_endpoint}
    # Initialize client that will be used to create and send requests.
    # This client only needs to be created once, and can be reused for multiple requests.
    client = aiplatform.gapic.PipelineServiceClient(client_options=client_options)
    training_task_inputs = trainingjob.definition.AutoMlImageObjectDetectionInputs(
        model_type="CLOUD_HIGH_ACCURACY_1",
        budget_milli_node_hours=20000,
        disable_early_stopping=False,
    ).to_value()

    training_pipeline = {
        "display_name": display_name,
        "training_task_definition": "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_image_object_detection_1.0.0.yaml",
        "training_task_inputs": training_task_inputs,
        "input_data_config": {"dataset_id": dataset_id},
        "model_to_upload": {"display_name": model_display_name},
    }
    parent = f"projects/{project}/locations/{location}"
    response = client.create_training_pipeline(
        parent=parent, training_pipeline=training_pipeline
    )
    print("response:", response)


In [23]:
# Start the training pipeline 

model  = create_training_pipeline_image_object_detection_sample(PROJECT_ID,  
                                                       "package_size_detection_model2", 
                                                       DATASET_ID,
                                                       "package_size_detection_model_test2"
                                                       )
model
model.name

response: name: "projects/474014863033/locations/us-central1/trainingPipelines/4377484544152961024"
display_name: "package_size_detection_model2"
input_data_config {
  dataset_id: "4715062101670887424"
}
training_task_definition: "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_image_object_detection_1.0.0.yaml"
training_task_inputs {
  struct_value {
    fields {
      key: "budgetMilliNodeHours"
      value {
        string_value: "20000"
      }
    }
    fields {
      key: "modelType"
      value {
        string_value: "CLOUD_HIGH_ACCURACY_1"
      }
    }
  }
}
model_to_upload {
  display_name: "package_size_detection_model_test2"
}
state: PIPELINE_STATE_PENDING
create_time {
  seconds: 1644339706
  nanos: 31463000
}
update_time {
  seconds: 1644339706
  nanos: 31463000
}



AttributeError: 'NoneType' object has no attribute 'name'

In [None]:
https://{service-endpoint}/v1/{name}

## Model evaluation code here

In [None]:
# Funtion to get aggregated evaluation metrices


def get_model_evaluation_image_object_detection_sample(
    project: str,
    model_id: str,
    evaluation_id: str,
    location: str = "us-central1",
    api_endpoint: str = "us-central1-aiplatform.googleapis.com",
):
    """
    To obtain evaluation_id run the following commands where LOCATION
    is the region where the model is stored, PROJECT is the project ID,
    and MODEL_ID is the ID of your model.

    model_client = aiplatform.gapic.ModelServiceClient(
        client_options={
            'api_endpoint':'LOCATION-aiplatform.googleapis.com'
            }
        )
    evaluations = model_client.list_model_evaluations(parent='projects/PROJECT/locations/LOCATION/models/MODEL_ID')
    print("evaluations:", evaluations)
    """
    # The AI Platform services require regional API endpoints.
    client_options = {"api_endpoint": api_endpoint}
    # Initialize client that will be used to create and send requests.
    # This client only needs to be created once, and can be reused for multiple requests.
    client = aiplatform.gapic.ModelServiceClient(client_options=client_options)
    name = client.model_evaluation_path(
        project=project, location=location, model=model_id, evaluation=evaluation_id
    )
    response = client.get_model_evaluation(name=name)
    print("response:", response)


In [22]:
# Get evaluation id
import re

model_client = aiplatform.gapic.ModelServiceClient(
        client_options={
            'api_endpoint':'us-central1-aiplatform.googleapis.com'
            }
        )

evaluations = model_client.list_model_evaluations(parent=f'projects/{PROJECT_ID}/locations/{LOCATION}/models/5096003298276671488')  
#print(evaluations)

for val in evaluations:
    print(val.name)
    res = re.search('evaluations/(.*)', val.name)
    print(res.group(1))
    
res.group(1)


projects/474014863033/locations/us-central1/models/5096003298276671488/evaluations/8279094133612609536
8279094133612609536


'8279094133612609536'

In [None]:
get_model_evaluation_image_object_detection_sample(PROJECT_ID, "5096003298276671488", "8279094133612609536")

In [None]:
# list all evaluation slices 

def list_model_evaluation_slices_sample(
    project: str,
    model_id: str,
    evaluation_id: str,
    location: str = "us-central1",
    api_endpoint: str = "us-central1-aiplatform.googleapis.com",
):
    """
    To obtain evaluation_id run the following commands where LOCATION
    is the region where the model is stored, PROJECT is the project ID,
    and MODEL_ID is the ID of your model.

    model_client = aiplatform.gapic.ModelServiceClient(
        client_options={
            'api_endpoint':'LOCATION-aiplatform.googleapis.com'
            }
        )
    evaluations = model_client.list_model_evaluations(parent='projects/PROJECT/locations/LOCATION/models/MODEL_ID')
    print("evaluations:", evaluations)
    """
    # The AI Platform services require regional API endpoints.
    client_options = {"api_endpoint": api_endpoint}
    # Initialize client that will be used to create and send requests.
    # This client only needs to be created once, and can be reused for multiple requests.
    client = aiplatform.gapic.ModelServiceClient(client_options=client_options)
    parent = client.model_evaluation_path(
        project=project, location=location, model=model_id, evaluation=evaluation_id
    )
    response = client.list_model_evaluation_slices(parent=parent)
    for model_evaluation_slice in response:
        print("model_evaluation_slice:", model_evaluation_slice)


In [None]:
list_model_evaluation_slices_sample(PROJECT_ID, "5096003298276671488", "8279094133612609536")

In [None]:
# get metrics for single slice

from google.cloud import aiplatform


def get_model_evaluation_slice_sample(
    project: str,
    model_id: str,
    evaluation_id: str,
    slice_id: str,
    location: str = "us-central1",
    api_endpoint: str = "us-central1-aiplatform.googleapis.com",
):
    """
    To obtain evaluation_id run the following commands where LOCATION
    is the region where the model is stored, PROJECT is the project ID,
    and MODEL_ID is the ID of your model.

    model_client = aiplatform.gapic.ModelServiceClient(
        client_options={
            'api_endpoint':'LOCATION-aiplatform.googleapis.com'
            }
        )
    evaluations = model_client.list_model_evaluations(parent='projects/PROJECT/locations/LOCATION/models/MODEL_ID')
    print("evaluations:", evaluations)
    """
    # The AI Platform services require regional API endpoints.
    client_options = {"api_endpoint": api_endpoint}
    # Initialize client that will be used to create and send requests.
    # This client only needs to be created once, and can be reused for multiple requests.
    client = aiplatform.gapic.ModelServiceClient(client_options=client_options)
    name = client.model_evaluation_slice_path(
        project=project,
        location=location,
        model=model_id,
        evaluation=evaluation_id,
        slice=slice_id,
    )
    response = client.get_model_evaluation_slice(name=name)
    print("response:", response)


In [15]:
! gcloud ai endpoints list \
  --region="us-central1" \
  --filter=display_name="package-object-detection-endpoint"

Using endpoint [https://us-central1-aiplatform.googleapis.com/]
ENDPOINT_ID         DISPLAY_NAME
609331751927414784  package-object-detection-endpoint


In [25]:
# Get evaluation id
import re
model_client = aiplatform.gapic.ModelServiceClient(
        client_options={
            'api_endpoint':'us-central1-aiplatform.googleapis.com'
            }
        )

evaluations = model_client.list_model_evaluations(parent=f'projects/{PROJECT_ID}/locations/{LOCATION}/models/2221017886154031104')  
for val in evaluations:
    eval_group = re.search('evaluations/(.*)', val.name)
    model_group = re.search('models/(.*)/evaluations', val.name)
    
eval_id = eval_group.group(1)
model_id = model_group.group(1)


print(eval_id)
print(model_id)

5589020189002825728
2221017886154031104


In [None]:
model.resource_name

In [None]:
get_model_evaluation_slice_sample(PROJECT_ID, "5096003298276671488", "8279094133612609536", )

## Create an endpoint

In [7]:
def create_endpoint_sample(
    project: str, display_name: str, location: str,
):
    aiplatform.init(project=project, location=location)

    endpoint = aiplatform.Endpoint.create(
        display_name=display_name, project=project, location=location,
    )

    print(endpoint.display_name)
    print(endpoint.resource_name)
    return endpoint


In [18]:
endpt = create_endpoint_sample(PROJECT_ID, "test", "us-central1")
endpt

INFO:google.cloud.aiplatform.models:Creating Endpoint
INFO:google.cloud.aiplatform.models:Create Endpoint backing LRO: projects/474014863033/locations/us-central1/endpoints/5356970184106049536/operations/3280010929898520576
INFO:google.cloud.aiplatform.models:Endpoint created. Resource name: projects/474014863033/locations/us-central1/endpoints/5356970184106049536
INFO:google.cloud.aiplatform.models:To use this Endpoint in another session:
INFO:google.cloud.aiplatform.models:endpoint = aiplatform.Endpoint('projects/474014863033/locations/us-central1/endpoints/5356970184106049536')
test
projects/474014863033/locations/us-central1/endpoints/5356970184106049536


<google.cloud.aiplatform.models.Endpoint object at 0x7fca1e87c790> 
resource name: projects/474014863033/locations/us-central1/endpoints/5356970184106049536

In [19]:
print("endpoint:", endpt)
print("display_name:", endpt.display_name)
print("resource_name:", endpt.resource_name)

endpoint: <google.cloud.aiplatform.models.Endpoint object at 0x7fca1e87c790> 
resource name: projects/474014863033/locations/us-central1/endpoints/5356970184106049536
display_name: test
resource_name: projects/474014863033/locations/us-central1/endpoints/5356970184106049536


## Deploy the model to endpoint created

In [9]:
# deploying the model to endpoint

def deploy_package_detection_model(
    project: str,
    location: str,
    model_name: str,  # fully qualified name: projects/project_number/locations/location_name/models/model_number
    endpoint: str,
    deployed_model_display_name:  str,
    traffic_percentage:  int,
    traffic_split: dict,
    min_replica_count: int,
    max_replica_count: int,
    metadata: dict,
    sync: bool,
):

    aiplatform.init(project=project, location=location)
    model = aiplatform.Model(model_name=model_name)

    model.deploy(
        endpoint=endpoint,
        deployed_model_display_name=deployed_model_display_name,
        traffic_percentage=traffic_percentage,
        traffic_split=traffic_split,
        min_replica_count=min_replica_count,
        max_replica_count=max_replica_count,
        metadata=metadata,
        sync=sync,
    )

    model.wait()

    #print(model.display_name)
    #print(model.resource_name)
    return model


In [36]:
model_path = "projects/nishitp-daml/locations/us-central1/models/2221017886154031104"
deploy_package_detection_model(PROJECT_ID, LOCATION, model_path, endpt, "package_detection_model_v1", 0, None, 1, 1, (), True)

## Get predictions from the model

### Online prediction

In [3]:

def predict_image_object_detection_sample(
    project: str,
    endpoint_id: str,
    filename: str,
    location: str = "us-central1",
    api_endpoint: str = "us-central1-aiplatform.googleapis.com",
):
    # The AI Platform services require regional API endpoints.
    client_options = {"api_endpoint": api_endpoint}
    client = aiplatform.gapic.PredictionServiceClient(client_options=client_options)
    with open(filename, "rb") as f:
        file_content = f.read()

    # The format of each instance should conform to the deployed model's prediction input schema.
    encoded_content = base64.b64encode(file_content).decode("utf-8")
    instance = predict.instance.ImageObjectDetectionPredictionInstance(
        content=encoded_content,
    ).to_value()
    instances = [instance]
    
    # See gs://google-cloud-aiplatform/schema/predict/params/image_object_detection_1.0.0.yaml for the format of the parameters.
    parameters = predict.params.ImageObjectDetectionPredictionParams(
        confidence_threshold=0.5, max_predictions=5,
    ).to_value()
    endpoint = client.endpoint_path(
        project=project, location=location, endpoint=endpoint_id
    )
    response = client.predict(
        endpoint=endpoint, instances=instances, parameters=parameters
    )
    
    print("response")
    print(" deployed_model_id:", response.deployed_model_id)
    predictions = response.predictions
    for prediction in predictions:
        print(" prediction:", dict(prediction))

In [24]:
#image_path = "gs://ds-ml-demos/package-images/IMG_6817_jpg.rf.2b27ee700087707e9721d9ef99180953.jpg"
image_path = "package-images_IMG_6817_jpg.rf.2b27ee700087707e9721d9ef99180953.jpg"
endpt = "4724214436460494848" #USE THIS FROM PREVIOUS CELL VARS
predict_image_object_detection_sample(PROJECT_ID, endpt, image_path)

response
 deployed_model_id: 4554010036480770048
 prediction: {'displayNames': ['small_package'], 'confidences': [0.977389872], 'bboxes': [[0.532388628, 0.637344182, 0.714642763, 0.819512486]], 'ids': ['8059155249295785984']}


In [None]:
from io import StringIO
from google.cloud import storage

# storage_client = storage.Client.from_service_account_json('service_account.json')
storage_client = storage.Client()

bucket = storage_client.get_bucket("package-images")

blob = bucket.blob("IMG_6817_jpg.rf.2b27ee700087707e9721d9ef99180953.jpg")
blob = blob.download_as_string()

predict_image_object_detection_sample(PROJECT_ID, endpt, blob)

### Batch prediction

In [38]:
%%writefile files/batch-prediction-inputs.jsonl

{"content": "gs://ds-ml-demos/package-images/package_1_jpg.rf.1dd5f86efdbf9d125fee179b79990815.jpg", "mimeType": "image/jpeg"}
{"content": "gs://ds-ml-demos/package-images/package_2_jpg.rf.8c973fc25e3e5d21b9c530a10fee8b9c.jpg", "mimeType": "image/jpeg"}
{"content": "gs://ds-ml-demos/package-images/package_3_jpg.rf.0d9a9aff39c3a0ebdc4688341b2897d1.jpg", "mimeType": "image/jpeg"}
{"content": "gs://ds-ml-demos/package-images/PXL_20201107_214721193_jpg.rf.d99f9ee32d61e29eada5329cf05098cc.jpg", "mimeType": "image/jpeg"}
{"content": "gs://ds-ml-demos/package-images/IMG_6818_jpg.rf.88e460792f0c2f00c939494186207e02.jpg", "mimeType": "image/jpeg"}

Writing files/batch-prediction-inputs.jsonl


In [34]:
#function for batch prediction

def get_package_detection_batch_predictions(
    project: str,
    location: str,
    model_resource_name: str,
    job_display_name: str,
    gcs_source,
    gcs_destination: str,
    sync: bool = True,
):
    aiplatform.init(project=project, location=location)

    model = aiplatform.Model(model_resource_name)

    batch_prediction_job = model.batch_predict(
        job_display_name=job_display_name,
        gcs_source=gcs_source,
        gcs_destination_prefix=gcs_destination,
        sync=sync,
    )

    batch_prediction_job.wait()

    print(batch_prediction_job.display_name)
    print(batch_prediction_job.resource_name)
    print(batch_prediction_job.state)
    return batch_prediction_job
 

In [40]:
GCS_IN_PATH = "gs://ds-ml-demos/package-image-batch-predictions/input/batch-prediction-inputs.jsonl"
GCS_OUT_PATH = "gs://ds-ml-demos/package-image-batch-predictions/output/"
get_package_detection_batch_predictions(PROJECT_ID, LOCATION, model_path, "batch_pred_job_test", GCS_IN_PATH, GCS_OUT_PATH)

INFO:google.cloud.aiplatform.jobs:Creating BatchPredictionJob
INFO:google.cloud.aiplatform.jobs:BatchPredictionJob created. Resource name: projects/474014863033/locations/us-central1/batchPredictionJobs/874836322244624384
INFO:google.cloud.aiplatform.jobs:To use this BatchPredictionJob in another session:
INFO:google.cloud.aiplatform.jobs:bpj = aiplatform.BatchPredictionJob('projects/474014863033/locations/us-central1/batchPredictionJobs/874836322244624384')
INFO:google.cloud.aiplatform.jobs:View Batch Prediction Job:
https://console.cloud.google.com/ai/platform/locations/us-central1/batch-predictions/874836322244624384?project=474014863033
INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/474014863033/locations/us-central1/batchPredictionJobs/874836322244624384 current state:
JobState.JOB_STATE_RUNNING
INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/474014863033/locations/us-central1/batchPredictionJobs/874836322244624384 current state:
JobState.JOB_STATE_RUN

<google.cloud.aiplatform.jobs.BatchPredictionJob object at 0x7f06cb754310> 
resource name: projects/474014863033/locations/us-central1/batchPredictionJobs/874836322244624384

# Test Code

#export notebook
import os

os.system('jupyter nbconvert --to html dma-01-automl-object-detection-using-api.ipynb')

In [27]:
end_pt_str = "projects/474014863033/locations/us-central1/endpoints/7482669208224923648"
end_point_id = end_pt_str[end_pt_str.rindex('/')+1:]
end_point_id

'7482669208224923648'

#### Create batch prediction input file



In [30]:
pwd()

'/home/jupyter/gh-google-cloud/google-cloud/demos'