In [None]:
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

<div class="markdown-google-sans">

# Product & Tag Recognizer - Building your catalog<img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo">
</div>



<div class="markdown-google-sans">

## **Setup**
</div>

Installing required python libraries

In [1]:
!pip install google-cloud-storage pandas



In [2]:
import pandas as pd
from google.cloud import storage

import json
import math

import time

from tqdm import tqdm


<div class="markdown-google-sans">

### **Environment variables**
</div>


In [3]:
PROJECT_ID='neon-camera-403606'
CATALOG_ID='neon-camera-catalog-labelled'
INDEX_ID='labelled-001'
ENDPOINT_ID='labelled-001'
LOCATION = 'us-central1'

# you can ingest data from google cloud storage or directly from a bigquery
# in this example we will use cloud storage
INPUT_BUCKET='gs://neon-camera-403606-catalog'
INPUT_FILE='ww_products.csv'

#optional output in case you want to check the knowledge graph coverage of your items
COVERAGE_CSV_OUTPUT_DIRECTORY = 'gs://neon-camera-403606-output'


<div class="markdown-google-sans">

### **GCS Functions**
</div>

Section with the common functions that we will require.


In [4]:
def upload_blob(bucket_name, source_file_name, destination_blob_name):
    """Uploads a file to the bucket."""
    # The ID of your GCS bucket
    # bucket_name = "your-bucket-name"
    # The path to your file to upload
    # source_file_name = "local/path/to/file"
    # The ID of your GCS object
    # destination_blob_name = "storage-object-name"

    #contains gs:// remove it
    bucket_name = bucket_name.replace('gs://','')

    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)


    blob.upload_from_filename(source_file_name, )


In [5]:
#create a functions that gives you the files in a given bucket
def list_blobs(bucket_name):
    #contains gs:// remove it
    bucket_name = bucket_name.replace('gs://','')

    """Lists all the blobs in the bucket."""

    storage_client = storage.Client()

    blobs = storage_client.list_blobs(bucket_name)

    return blobs


In [6]:

def list_blobs_prefix(bucket_name, path_prefix, skip_folders=True):
    """Lists all the blobs in the bucket and returns the one that satisfy
    a prefix"""
    #contains gs:// remove it
    bucket_name = bucket_name.replace('gs://','')

    storage_client = storage.Client()

    blobs = storage_client.list_blobs(bucket_name)

    res =[]
    for blob in blobs:

      if(blob.name.startswith(path_prefix)):
        if(skip_folders):
          if(blob.name.endswith('/')):
            continue
        res.append(blob.name)
    return res

In [7]:
def upload_blob_from_memory(bucket_name, contents, destination_blob_name):
    """Uploads a file to the bucket."""

    #contains gs:// remove it
    bucket_name = bucket_name.replace('gs://','')

    # The ID of your GCS bucket
    # bucket_name = "your-bucket-name"

    # The contents to upload to the file
    # contents = "these are my contents"

    # The ID of your GCS object
    # destination_blob_name = "storage-object-name"

    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    blob.upload_from_string(contents)

In [8]:
def create_run_file(run_id, bucket_name, contents, destination_blob_name):
    """Uploads a file to the bucket."""
    #contains gs:// remove it
    bucket_name = RUNS_BUCKET.replace('gs://','')

    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(run_id+'/'+destination_blob_name)

    blob.upload_from_string(contents)

    print(
        f"{destination_blob_name} with contents {contents} uploaded to {bucket_name}."
    )

In [9]:
def upload_file(bucket_name, contents, destination_blob_name):
    """Uploads a file to the bucket."""
    #contains gs:// remove it
    bucket_name = bucket_name.replace('gs://','')

    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)
    blob.upload_from_string(contents)

In [10]:
def load_json_blob(bucket_name, blob_name):
    """Loads a blob from the bucket."""
    #contains gs:// remove it
    bucket_name = bucket_name.replace('gs://','')

    # Instantiate a Google Cloud Storage client and specify required bucket and file
    storage_client = storage.Client()
    bucket = storage_client.get_bucket(bucket_name)
    blob = bucket.blob(blob_name)

    raw_data = blob.download_as_text(client=None)
    temp = raw_data.split('\n')

    data = [json.loads(line) for line in temp[:-1]]

    # Download the contents of the blob as a string and then parse it using json.loads() method
    return data

In [11]:
def load_blob(bucket_name, blob_name):
    print(f'looking for {blob_name} in {bucket_name}')
    """Loads a blob from the bucket."""
    #contains gs:// remove it
    bucket_name = bucket_name.replace('gs://','')

    # Instantiate a Google Cloud Storage client and specify required bucket and file
    storage_client = storage.Client()
    bucket = storage_client.get_bucket(bucket_name)
    blob = bucket.blob(blob_name)

    return blob

<div class="markdown-google-sans">

### **Support Functions**
</div>



In [12]:
# checks if a given long running operation has been completed
# it will block the operation until the operation has completed.
def check_operation_status(operation_id, check_timeout=60):
  start = time.time()
  done = False

  while not done:
    operation_result = !curl -sS -X GET -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "Content-Type: application/json" https://visionai.googleapis.com/v1alpha1/projects/$PROJECT_ID/locations/$LOCATION/operations/$OPERATION_ID
    operation_result = str(operation_result)

    current_time = time.time()
    elapsed_time = str(round(current_time-start))+' seconds ' if current_time - start < 60 else str( round((current_time-start)/60) ) + ' minutes'
    print("\r"+'Awaiting function {}  {} elapsed.'.format(OPERATION_ID, elapsed_time),end="")
    time.sleep(check_timeout)
    done = '"done": true' in operation_result


  print(f' Operation: {OPERATION_ID} done')
  return operation_result



<div class="markdown-google-sans">

# **Catalog**

There are multiple concepts that are relevant when you are building your product catalog. In this example we will build a sample **Catalog**, that will include a group of **Products**. since we will be leveraging known products with clear **GTINs** we won't be uploading **Product Image**.

We will then create a **Product Recognition Index** and deploy that to an **Endpoint**.

Here is a diagram with all related entities:

<img src='https://cloud.google.com/static/vision-ai/docs/images/product-tag-recognizer-api-concept-diagram.png' />
</div>


For more information go to https://cloud.google.com/vision-ai/docs/product-recognizer

<div class="markdown-google-sans">

## **Listing Catalogs**
There might be previously created catalogs, so check if you have product catalogs created and which products are in it.
</div>




In [None]:
# create a catalog
!curl -sS -X GET -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "Content-Type: application/json" https://visionai.googleapis.com/v1alpha1/projects/$PROJECT_ID/locations/$LOCATION/retailCatalogs

List products in a given catalog

In [None]:
!curl -sS -X GET -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "Content-Type: application/json" https://visionai.googleapis.com/v1alpha1/projects/$PROJECT_ID/locations/$LOCATION/retailCatalogs/$CATALOG_ID/retailProducts

List Catalog Indexes

In [None]:
!curl -sS -X GET -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "Content-Type: application/json" https://visionai.googleapis.com/v1alpha1/projects/$PROJECT_ID/locations/$LOCATION/retailCatalogs/$CATALOG_ID/retailProductRecognitionIndexes

List endpoints

In [None]:
!curl -sS -X GET -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "Content-Type: application/json" https://visionai.googleapis.com/v1alpha1/projects/$PROJECT_ID/locations/$LOCATION/retailEndpoints

<div class="markdown-google-sans">

## **New Catalog**
Now that you have a notion of previously deployed catalogs, let's create one.
1.   Create a Catalog.
2.   Loads data from the configured data source.
3.   Import Products and (optional) ProductImages into the Catalog.
4.   Create an Index, either from the entire Catalog or from a ProductSet.
5.   Create an Endpoint with desired product recognition config and deploys an Index to it.
</div>




<div class="markdown-google-sans">

### **Create a Catalog**
</div>

In [13]:
#make the request
output = !curl -sS -X POST -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "Content-Type: application/json"  https://visionai.googleapis.com/v1alpha1/projects/$PROJECT_ID/locations/$LOCATION/retailCatalogs?retail_catalog_id=$CATALOG_ID

ouput = str(output)
OPERATION_ID = output[1].split(':')[1].split('/')[-1][0:-2]


In [14]:
print(OPERATION_ID)

operation-1707104733230-6109a4b37f660-99776163-4188ef8a


In [15]:
check_operation_status(OPERATION_ID)

Awaiting function operation-1707104733230-6109a4b37f660-99776163-4188ef8a  1 seconds  elapsed. Operation: operation-1707104733230-6109a4b37f660-99776163-4188ef8a done


'[\'{\', \'  "name": "projects/neon-camera-403606/locations/us-central1/operations/operation-1707104733230-6109a4b37f660-99776163-4188ef8a",\', \'  "done": true,\', \'  "response": {\', \'    "@type": "type.googleapis.com/google.cloud.visionai.v1alpha1.RetailCatalog",\', \'    "name": "projects/neon-camera-403606/locations/us-central1/retailCatalogs/neon-camera-catalog-labelled",\', \'    "createTime": "2024-02-05T03:45:33.245600012Z",\', \'    "updateTime": "2024-02-05T03:45:33.245600012Z",\', \'    "resourceState": "RETAIL_RESOURCE_STATE_CREATED"\', \'  }\', \'}\']'

<div class="markdown-google-sans">

### **Load Products**
</div>

In [16]:
# first we will read the csv file from the data source
df = pd.read_csv(INPUT_BUCKET+'/'+INPUT_FILE)
df.head()

Unnamed: 0,Article,Article Name,EAN,Category,x
0,97501,Twinings ASHA Vanilla Pk 18,70177540852,BEVERAGES,TEA
1,97501,Twinings ASHA Vanilla Pk 18,70177226305,BEVERAGES,TEA
2,97471,Bundaberg Ginger Beer 4x375ml,9311493000165,CARBONATED SOFT DRINKS,SOFT DRINKS - BOTTLES & CANS
3,97471,Bundaberg Ginger Beer 4x375ml,79373000467,CARBONATED SOFT DRINKS,SOFT DRINKS - BOTTLES & CANS
4,97471,Bundaberg Ginger Beer 4x375ml,19311493437517,CARBONATED SOFT DRINKS,SOFT DRINKS - BOTTLES & CANS


<div class="markdown-google-sans">
Now we need to convert this into a format that is ready to import into our product catalog. Let's add this in a column.



```
{"retailProduct":{"name":"00040094314034","gtins":["00040094314034"]},"retailProductImages":[{"name":"1","gcsUri":"gs://shelfchecking-integration-test-data/products-data/00040094314034/1.jpg"}]}
{"retailProduct": {"name":"00123", "thirdPartyIds": {"id": "test-id", "owner": "test-owner"}}}
```

Since we are not providing any product images, we'll product the following:
```
{"retailProduct":{"name":"product id","gtins":["00040094314034"]}}
```
<div/>

In [17]:
locale_id = "en-AU"

In [18]:
df['json'] = df.apply(lambda row: json.dumps({"retailProduct":{"name":str(row['Article']), "title": row['Article Name'],"gtins":[str(row['EAN'])], "locale":locale_id}}), axis=1)
df.head(5)

Unnamed: 0,Article,Article Name,EAN,Category,x,json
0,97501,Twinings ASHA Vanilla Pk 18,70177540852,BEVERAGES,TEA,"{""retailProduct"": {""name"": ""97501"", ""title"": ""..."
1,97501,Twinings ASHA Vanilla Pk 18,70177226305,BEVERAGES,TEA,"{""retailProduct"": {""name"": ""97501"", ""title"": ""..."
2,97471,Bundaberg Ginger Beer 4x375ml,9311493000165,CARBONATED SOFT DRINKS,SOFT DRINKS - BOTTLES & CANS,"{""retailProduct"": {""name"": ""97471"", ""title"": ""..."
3,97471,Bundaberg Ginger Beer 4x375ml,79373000467,CARBONATED SOFT DRINKS,SOFT DRINKS - BOTTLES & CANS,"{""retailProduct"": {""name"": ""97471"", ""title"": ""..."
4,97471,Bundaberg Ginger Beer 4x375ml,19311493437517,CARBONATED SOFT DRINKS,SOFT DRINKS - BOTTLES & CANS,"{""retailProduct"": {""name"": ""97471"", ""title"": ""..."


<div class="markdown-google-sans">
<p>We have the data in the right format, we will create the jsonl files that will serve as input to the catalog. There is currently a limitation of importing 10,000 products at each run.<p>
<div/>

In [19]:
total_items = len(df)
total_files= math.ceil(total_items / 10000)

print(f'Creating {total_files} file to handle {total_items}')

products_filename = 'products.jsonl'

# output_df = df[['json']]


# Open the JSONL file
with open(products_filename, 'w') as f:
    for row in df.itertuples():
      f.write(row.json+'\n')


# Upload file to a gcs bucket
upload_blob(INPUT_BUCKET, products_filename, products_filename)


Creating 1 file to handle 5494


<div class="markdown-google-sans">

### **Import Products**
</div>

In [20]:
request_data=f"""
{{
    "gcs_source": {{
      "uris": "{INPUT_BUCKET + '/' + products_filename}"
    }},
    "format": "FORMAT_JSONL"
}}
"""

# Open the file for writing
with open('data.json', 'w') as f:
    # Define the data to be written
    # Use a for loop to write each line of data to the file
    f.write(request_data)
    f.close()

In [21]:
output = !curl -sS -X POST -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "Content-Type: application/json" https://visionai.googleapis.com/v1alpha1/projects/{PROJECT_ID}/locations/$LOCATION/retailCatalogs/{CATALOG_ID}:importRetailProducts -d @data.json

In [22]:
ouput = str(output)
OPERATION_ID = output[1].split(':')[1].split('/')[-1][0:-2]
print(OPERATION_ID)

operation-1707104848158-6109a52119f19-ecf0d38f-7ada5ca5


In [23]:

check_operation_status(OPERATION_ID)

Awaiting function operation-1707104848158-6109a52119f19-ecf0d38f-7ada5ca5  28 minutes elapsed. Operation: operation-1707104848158-6109a52119f19-ecf0d38f-7ada5ca5 done


'[\'{\', \'  "name": "projects/neon-camera-403606/locations/us-central1/operations/operation-1707104848158-6109a52119f19-ecf0d38f-7ada5ca5",\', \'  "metadata": {\', \'    "@type": "type.googleapis.com/google.cloud.visionai.v1alpha1.RetailOperationMetadata",\', \'    "partialFailures": [\', \'      {\', \'        "code": 3,\', \'        "message": "All given gtins were invalid and no third party ids were provided.\\\\ncom.google.apps.framework.request.StatusException: \\\\u003ceye3 title=\\\'INVALID_ARGUMENT\\\'/\\\\u003e generic::INVALID_ARGUMENT: All given gtins were invalid and no third party ids were provided. while creating product: \\\'804969\\\'"\', \'      },\', \'      {\', \'        "code": 3,\', \'        "message": "All given gtins were invalid and no third party ids were provided.\\\\ncom.google.apps.framework.request.StatusException: \\\\u003ceye3 title=\\\'INVALID_ARGUMENT\\\'/\\\\u003e generic::INVALID_ARGUMENT: All given gtins were invalid and no third party ids were pr

<div class="markdown-google-sans">

### **Create a Product Recognition Index**

You have the following options when creating a Product Recognition Index:

1. Create without check the KG coverage
2. Create and get the KG coverage of your products (informing which ones you'll need to add more info.
3. Only from a Product set. *not shown here*

More info see [here](https://cloud.google.com/vision-ai/docs/product-recognizer#productrecognitionindex).

</div>



In [None]:
# Create from Catalog
# !curl -sS -X POST -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "Content-Type: application/json" https://visionai.googleapis.com/v1alpha1/projects/$PROJECT_ID/locations/$LOCATION/retailCatalogs/$CATALOG_ID/retailProductRecognitionIndexes?retail_product_recognition_index_id=$INDEX_ID

Alternatively you can add the option to output the option of your knowledge graph upon creation

In [24]:
# Create from Catalog and output index coverage CSV file to a Cloud Storage directory.
# This API method is especially useful when you want to understand the
# actual number of images used to construct the index per imported GTIN, and
# the image can be sourced from the Google Product database, your own importing,
# and other means (such as with the Google Data Labeling service.
output = !curl -sS -X POST -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "Content-Type: application/json" "https://visionai.googleapis.com/v1alpha1/projects/$PROJECT_ID/locations/us-central1/retailCatalogs/$CATALOG_ID/retailProductRecognitionIndexes?retail_product_recognition_index_id=$INDEX_ID&coverage_output.output_uri_prefix=$COVERAGE_CSV_OUTPUT_DIRECTORY"
ouput = str(output)
OPERATION_ID = output[1].split(':')[1].split('/')[-1][0:-2]

In [25]:
print(OPERATION_ID)

operation-1707107350773-6109ae73c7e88-fc17fe6a-2a354b80


In [26]:
#check if it's done, this operation will take a couple of minutes
check_operation_status(OPERATION_ID)

Awaiting function operation-1707107350773-6109ae73c7e88-fc17fe6a-2a354b80  8 minutes elapsed. Operation: operation-1707107350773-6109ae73c7e88-fc17fe6a-2a354b80 done


'[\'{\', \'  "name": "projects/neon-camera-403606/locations/us-central1/operations/operation-1707107350773-6109ae73c7e88-fc17fe6a-2a354b80",\', \'  "done": true,\', \'  "response": {\', \'    "@type": "type.googleapis.com/google.cloud.visionai.v1alpha1.RetailProductRecognitionIndex",\', \'    "name": "projects/neon-camera-403606/locations/us-central1/retailCatalogs/neon-camera-catalog-labelled/retailProductRecognitionIndexes/labelled-001",\', \'    "createTime": "2024-02-05T04:29:10.964882176Z",\', \'    "updateTime": "2024-02-05T04:29:10.964882176Z",\', \'    "resourceState": "RETAIL_RESOURCE_STATE_CREATED"\', \'  }\', \'}\']'

In [27]:
#confirm it has been properly created by listing
!curl -sS -X GET -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "Content-Type: application/json" https://visionai.googleapis.com/v1alpha1/projects/$PROJECT_ID/locations/$LOCATION/retailCatalogs/$CATALOG_ID/retailProductRecognitionIndexes

{
  "retailProductRecognitionIndexes": [
    {
      "name": "projects/neon-camera-403606/locations/us-central1/retailCatalogs/neon-camera-catalog-labelled/retailProductRecognitionIndexes/labelled-001",
      "createTime": "2024-02-05T04:29:10.964882176Z",
      "updateTime": "2024-02-05T04:36:52.776445009Z",
      "resourceState": "RETAIL_RESOURCE_STATE_CREATED"
    }
  ]
}


<div class="markdown-google-sans">

### **Create and Deploy to Endpoint**
You can set a recognition confidence threshold to limit the results that are returned. This is a value between 0 and 1.
</div>


** PYthon variable not working on cURL commands **



In [28]:
recognition_threshold = 0.2

In [29]:
#create the endpoint first
request_data=f"""
{{
 "productRecognitionConfig": {{
    "recognitionConfidenceThreshold": {recognition_threshold}
  }}
}}
"""

# Open the file for writing
with open('data.json', 'w') as f:
    # Define the data to be written
    # Use a for loop to write each line of data to the file
    f.write(request_data)
    f.close()

In [30]:
output = !curl -sS -X POST -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "Content-Type: application/json" https://visionai.googleapis.com/v1alpha1/projects/$PROJECT_ID/locations/us-central1/retailEndpoints?retail_endpoint_id=$ENDPOINT_ID -d @data.json
ouput = str(output)
OPERATION_ID = output[1].split(':')[1].split('/')[-1][0:-2]
print(output)

['{', '  "name": "projects/neon-camera-403606/locations/us-central1/retailEndpoints/labelled-001",', '  "createTime": "2024-02-05T05:10:09.756010398Z",', '  "updateTime": "2024-02-05T05:10:09.756010398Z",', '  "resourceState": "RETAIL_RESOURCE_STATE_CREATED",', '  "productRecognitionConfig": {', '    "recognitionConfidenceThreshold": 0.2', '  }', '}']


In [31]:
check_operation_status(OPERATION_ID)

Awaiting function labelled-001  263 minutes elapsed.

KeyboardInterrupt: 

Deploy the product recognition index to the endpoint

In [32]:
#create the endpoint first
request_data=f"""
{{
    "retail_product_recognition_index": "projects/{PROJECT_ID}/locations/us-central1/retailCatalogs/{CATALOG_ID}/retailProductRecognitionIndexes/{INDEX_ID}"
}}
"""

# Open the file for writing
with open('data.json', 'w') as f:
    # Define the data to be written
    # Use a for loop to write each line of data to the file
    f.write(request_data)
    f.close()

In [33]:
output = !curl -sS -X POST -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "Content-Type: application/json" https://visionai.googleapis.com/v1alpha1/projects/$PROJECT_ID/locations/us-central1/retailEndpoints/$ENDPOINT_ID:deployRetailProductRecognitionIndex -d @data.json

In [None]:
ouput = str(output)
OPERATION_ID = output[1].split(':')[1].split('/')[-1][0:-2]
check_operation_status(OPERATION_ID,5)


Awaiting function operation-1707125609744-6109f278e46a8-5c7b79df-50217ec9  3 minutes elapsed.

{
  "name": "projects/neon-camera-403606/locations/us-central1/operations/operation-1707125609744-6109f278e46a8-5c7b79df-50217ec9",
  "done": false
}


In [None]:
#list endpoints
!curl -sS -X GET -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "Content-Type: application/json" https://visionai.googleapis.com/v1alpha1/projects/$PROJECT_ID/locations/us-central1/retailEndpoints

<div class="markdown-google-sans">

# **Update Catalog**
</div>

<div class="markdown-google-sans">Update catalog by creating Product Images. Images are creating per product identifier and need to have a unique name within that product.

Sample:

```
curl -sS -X POST -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "Content-Type: application/json" https://visionai.googleapis.com/v1alpha1/projects/PROJECT_ID/locations/us-central1/retailCatalogs/CATALOG_ID/retailProducts/PRODUCT_ID/retailProductImages?retail_product_image_id=PRODUCT_IMAGE_ID \
  -d '{
    "source_type": "SOURCE_TYPE_HAND_HELD_CAMERA",
    "gcs_uri": "IMAGE_GCS_URI"
  }'
```

You should upload images for the products that do not have any product coverage. This should have been obtained in the Create Product Recognition index step that output a table that has the following columns:


```
GTIN or Third Party ID,Normalized GTIN,Covered by Google Data,Covered by User Created Product Images,Covered by System Generated Product Images
```
</div>

<div class="markdown-google-sans">

## Load Products requiring images
1. from a given file load the gtins that need images;
2. run a curl command per row that will update the given product with an image
3. List images to see if you have succesffuly added.
</div>

In [None]:
# Load images
IMAGE_FILE_URI = 'require_product_images.csv'


df_images = pd.read_csv(INPUT_BUCKET+'/'+IMAGE_FILE_URI)
df_images

In [None]:
#join with product data on EAN
df_result = pd.merge(df_images, df, how="left", on="EAN")
df_result

In [None]:
# add the curl command
df_result['gcs_uri'] = df_result.apply( lambda row: f"""{INPUT_BUCKET}/assets/{row['Article']}""",axis =1)
df_result['command'] = df_result.apply( lambda row: f"""curl -sS -X POST -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "Content-Type: application/json" https://visionai.googleapis.com/v1alpha1/projects/{PROJECT_ID}/locations/us-central1/retailCatalogs/{CATALOG_ID}/retailProducts/{row['Article']}/retailProductImages?retail_product_image_id=1 -d '{{"source_type": "SOURCE_TYPE_FIXED_CAMERA", "gcs_uri": "{row['gcs_uri']}.jpg"}}'""", axis=1)

df_result.head()

<div class="markdown-google-sans">

## Add Products images
</div>

In [None]:
# run commands to add images
for i in tqdm(df_result.itertuples()):
  !$row.command | grep -i 'error' #suppressing successful entries

print('finished adding images')


Adding bread images:

In [None]:
IMAGE_FILE_URI = 'bread_images.csv'
df_images_bread = pd.read_csv(INPUT_BUCKET+'/'+IMAGE_FILE_URI)
df_images_bread.head()

In [None]:
df_images_bread['command'] = df_images_bread.apply( lambda row: f"""curl -sS -X POST -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "Content-Type: application/json" https://visionai.googleapis.com/v1alpha1/projects/{PROJECT_ID}/locations/us-central1/retailCatalogs/{CATALOG_ID}/retailProducts/{row['Article']}/retailProductImages?retail_product_image_id={row['image_id']} -d '{{"source_type": "SOURCE_TYPE_FIXED_CAMERA", "gcs_uri": "{row['gcs_uri']}"}}'""", axis=1)
df_images_bread.head()

In [None]:
# run commands to add images
for row in df_images_bread.itertuples():
  !$row.command

print('finished adding images')

In [None]:
aux_df = df_images_bread

for row in aux_df.itertuples():
  article_id = row.Article
  image_id = row.ID
  !curl -sS -X POST -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "Content-Type: application/json" https://visionai.googleapis.com/v1alpha1/projects/{PROJECT_ID}/locations/us-central1/retailCatalogs/{CATALOG_ID}/retailProducts/{article_id}/retailProductImages/1
