<a href="https://colab.research.google.com/github/keshav1370/ClassificationAlgo/blob/main/Notebooks/vision_product_search.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Product Search Setup

## Imports

In [None]:
!apt -qq update
!pip install google-cloud-vision>=1.28.0
!echo "deb http://packages.cloud.google.com/apt gcsfuse-bionic main" > /etc/apt/sources.list.d/gcsfuse.list
!curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add -
!apt -qq install gcsfuse


from google.cloud import vision, storage


import firebase_admin
from firebase_admin import credentials, db

from google.colab import auth, drive

import os
import pandas as pd
import json
import urllib.parse


58 packages can be upgraded. Run 'apt list --upgradable' to see them.
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  2537  100  2537    0     0   112k      0 --:--:-- --:--:-- --:--:--  112k
OK
gcsfuse is already the newest version (0.39.2).
0 upgraded, 0 newly installed, 0 to remove and 58 not upgraded.


In [None]:
cred_file_name = "mystryvision-db1b5dc9321f.json"
cred_file_path = "/content/gdrive/My Drive/Colab Notebooks/"
BUCKET_ID = "mystry-product-set1"
PROJ_ID = "mystryvision"
LOCATION = "us-east1"
p_set_id = "mystry-sneaker-set"
P_SET_DISPLAY = "Mystry Sneaker Set"
bulk_import_csv_uri = "gs://mystry-product-set1/mystry-sneakers-bulk-import.csv"

In [None]:
auth.authenticate_user()
drive.mount('/content/gdrive')
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]=cred_file_path + cred_file_name

In [None]:
cred = credentials.Certificate("/content/gdrive/My Drive/Colab Notebooks/mystryworld-admin-private-key.json")

In [None]:
firebase_databaseURL = 'https://mystryworld-4159d-default-rtdb.firebaseio.com/'

# Vision Imports

First upload `mystryvision-db1b5dc9321f.json` (check MystryLens repo) to colab and **update the file path below**

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


## Load Firebase Data

In [None]:
firebase_admin = firebase_admin.initialize_app(cred, {'databaseURL': firebase_databaseURL})

In [None]:
rtdbjson = db.reference('/shoes').get()

#### Sanity Check: This should print a bunch of shoeIDs

In [None]:
rtdbjson.keys()

dict_keys(['-MkI_UIKBFkNFyiua4IM', '-MkIamP4Rvb2ZkzTapgz', '-MkIeAHpXGbMGzZDgPQk', '-MkIfUJ7oxnCkmbySBTK', '-MklzYUH36TiSU3e0r4g', '-Mkm-52Tb29MRuM98dHM', '-Mkm-Y5AomWp_dlcttul', '-MlDVtshCNN3bcwUIRFi', '-MlDWmvbQodi_5IxJE83', '-MlD_AmerXslOsWte3n4', '-MlDa1pZLOSOFpeSZoQM', '-MlDaVbae_bxcGShtVry', '-MlDbE8FWUox8Lx8dwNF', '-MlDbjLf9LoxVxDjgEAU', '-MlDc0vviy7CZVRLD5OL', '-MlDcZlWhAeyHRC_VwLl', '-MlDd1CBdxZow3018-2U', '-MlDdRPk5l6Gnfr-h2yU', '-MlDdwX10JmTJCbc6Dr0', '-MlDeFjKRswCl_xP8ZTc', '-MlDgTmosOvZe4u8EQBo', '-MlDh1288FSyCfvUmBch', '-MlDhH2-lnQwdaQegqj_', '-MlDhfRiNWcJkTENeLTN', '-MlDhw4VINiRQGA8iJdQ', '-MlDiYo6arYjRRAziBqO', '-Mm1Iecxpv2RZVQCeNJX', '-Mm1Iws-A81eyhKVzNZq', '-Mm1K43ee4vsgaySwse4', '-Mm1KQ2KcAsuncMiK5iI', '-Mm1L132HzhVeI8N--4S', '-Mm1LN0NIW4l-7Mhsd32', '-Mm1Lozgd4SiZSzkLBsU', '-Mm1MFkJ90pHEKmT4ayM', '-Mm1MeBOq64vMg78Qmpv', '-Mm1O0sLrMdZglISaznb', '-Mm1OIlbSw0ImKK_DD4b', '-Mm1OoZR8fGfa9jy3-u5', '-Mm1P8t08tAgsoCMN0GB', '-Mm1PkZAZnudJViTiPXx', '-Mm1R0-56tRxE1Mt821W', '-Mm1

In [None]:
def purge_products_in_product_set(
        project_id, location, product_set_id, force):
    """Delete all products in a product set.
    Args:
        project_id: Id of the project.
        location: A compute region name.
        product_set_id: Id of the product set.
        force: Perform the purge only when force is set to True.
    """
    client = vision.ProductSearchClient()

    parent = f"projects/{project_id}/locations/{location}"

    product_set_purge_config = vision.ProductSetPurgeConfig(
        product_set_id=product_set_id)

    # The purge operation is async.
    operation = client.purge_products(request={
        "parent": parent,
        "product_set_purge_config": product_set_purge_config,
        # The operation is irreversible and removes multiple products.
        # The user is required to pass in force=True to actually perform the
        # purge.
        # If force is not set to True, the service raises an exception.
        "force": force
    })

    operation.result(timeout=500)

    print('Deleted products in product set.')

def delete_product_set(project_id, location, product_set_id):
    """Delete a product set.
    Args:
        project_id: Id of the project.
        location: A compute region name.
        product_set_id: Id of the product set.
    """
    client = vision.ProductSearchClient()

    # Get the full path of the product set.
    product_set_path = client.product_set_path(
        project=project_id, location=location,
        product_set=product_set_id)

    # Delete the product set.
    client.delete_product_set(name=product_set_path)
    print('Product set deleted.')


purge_products_in_product_set(PROJ_ID, LOCATION, p_set_id, True)
delete_product_set(PROJ_ID, LOCATION, p_set_id)

Deleted products in product set.
Product set deleted.


# Create A [Product Set](https://cloud.google.com/vision/product-search/docs/create-product-set)

In [None]:
# create product set
def create_product_set(
        project_id, location, product_set_id, product_set_display_name):
    """Create a product set.
    Args:
        project_id: Id of the project.
        location: A compute region name.
        product_set_id: Id of the product set.
        product_set_display_name: Display name of the product set.
    """
    client = vision.ProductSearchClient()

    # A resource that represents Google Cloud Platform location.
    location_path = f"projects/{project_id}/locations/{location}"

    # Create a product set with the product set specification in the region.
    product_set = vision.ProductSet(
            display_name=product_set_display_name)

    # The response is the product set with `name` populated.
    response = client.create_product_set(
        parent=location_path,
        product_set=product_set,
        product_set_id=product_set_id)

    # Display the product set information.
    print('Product set name: {}'.format(response.name))


create_product_set(PROJ_ID, LOCATION, p_set_id, P_SET_DISPLAY)

Product set name: projects/mystryvision/locations/us-east1/productSets/mystry-sneaker-set


# Build CSV for bulk import
To create the csv use [this](https://cloud.google.com/vision/product-search/docs/csv-format) format

In [None]:
data["-MkI_UIKBFkNFyiua4IM"]["shoeImg"]['lowRes']

{'back': 'https://firebasestorage.googleapis.com/v0/b/mystryworld-4159d.appspot.com/o/shoes%2FlowRes%2F-MkI_UIKBFkNFyiua4IM%2Fshoeid-4.PNG?alt=media&token=1417b5e1-3de1-4077-9345-b222a4e805ee',
 'front': 'https://firebasestorage.googleapis.com/v0/b/mystryworld-4159d.appspot.com/o/shoes%2FlowRes%2F-MkI_UIKBFkNFyiua4IM%2Fshoeid-2.PNG?alt=media&token=0f5a7273-dd14-4301-ab02-6ac620a0fa70',
 'left': 'https://firebasestorage.googleapis.com/v0/b/mystryworld-4159d.appspot.com/o/shoes%2FlowRes%2F-MkI_UIKBFkNFyiua4IM%2Fshoeid-3.PNG?alt=media&token=0c979847-99d0-40e6-907f-2297e7f8a5f6',
 'right': 'https://firebasestorage.googleapis.com/v0/b/mystryworld-4159d.appspot.com/o/shoes%2FlowRes%2F-MkI_UIKBFkNFyiua4IM%2Fshoeid-1.PNG?alt=media&token=1c8392c4-c17f-4cb8-8017-a26b50a98f9e',
 'sole': 'https://firebasestorage.googleapis.com/v0/b/mystryworld-4159d.appspot.com/o/shoes%2FlowRes%2F-MkI_UIKBFkNFyiua4IM%2Fshoeid-5.PNG?alt=media&token=92c9532a-ad9c-4921-90e4-f3ebfd5963fd'}

Convert `http://......` to `gs://..` as we need the Google `URI` and not the `URL` of the image

In [None]:
def url2gs_uri(url):
  unq_spl = urllib.parse.unquote(url).split('?')[0].split('/')
  gs_uri = ['gs:/'] + [unq_spl[5]] + unq_spl[7:]
  return("/".join(gs_uri))
http_url = data[data.keys()[0]]["shoeImg"]['lowRes']
gs_uri = url2gs_uri(http_url)
print(f"URL: {http_url}")
print(f"URI: {gs_uri}")

Build record array so that pandas DataFrame can be created easily and then easily converted to `.csv`

In [None]:
records_arr = []
arr_id=[]
# f= open('/content/mystryworld-4159d-default-rtdb-shoes-export.json')
data = rtdbjson
print(type(data))
for s_id in data.keys():
  lowRes_im = data[s_id]["shoeImg"]['lowRes']
  s_images = lowRes_im
  s_name= data[s_id]['shoeName']
  s_colorway= "colorway=" + data[s_id]['shoeColorway']
  s_silhouette= " silhouette=" + data[s_id]['silhouette']
  #print(s_name)
  #print(s_colorway)
  for im in s_images.keys():
    if im == 'sole':
      continue
    record = {
        "image-uri": url2gs_uri(lowRes_im[im]),
        "image-id": s_id + "_" + im, 
        "product-set-id": p_set_id,
        "product-id": s_id,
        "product-category": "apparel-v2",
        "product-display-name": s_name,
        "labels": s_colorway+","+s_silhouette,
        "bounding-poly": None
        }
    records_arr.append(record)
bulk_import_df = pd.DataFrame.from_records(records_arr)
bulk_import_df.to_csv("mystry-sneakers-bulk-import.csv", index=False)
print("CSV created")


<class 'dict'>


Sanity Check

In [None]:
print(bulk_import_df.values[0])
bulk_import_df[-2:]

Unnamed: 0,image-uri,image-id,product-set-id,product-id,product-category,product-display-name,labels,bounding-poly
0,gs://mystryworld-4159d.appspot.com/shoes/lowRe...,-MkI_UIKBFkNFyiua4IM_back,mystry-sneaker-set,-MkI_UIKBFkNFyiua4IM,apparel-v2,Jordan 1 Retro High OG Hyper Royal,"colorway=Hyper Royal/Light Smoke Grey/White, s...",
1,gs://mystryworld-4159d.appspot.com/shoes/lowRe...,-MkI_UIKBFkNFyiua4IM_front,mystry-sneaker-set,-MkI_UIKBFkNFyiua4IM,apparel-v2,Jordan 1 Retro High OG Hyper Royal,"colorway=Hyper Royal/Light Smoke Grey/White, s...",


### Create `POST` request body needed for starting the indexing operation


In [None]:
with open("import_request.json", "w") as fil:
  fil.write(f"""
  {{
  "inputConfig": {{
    "gcsSource": {{
      "csvFileUri": "{bulk_import_csv_uri}"
    }}
  }}
}}""")


## Now automatically upload the csv file to the bucket
Note down the uri (starts with `gs://`) of the csv in the bucket <br>
Then: Create import request

In [None]:
client = storage.Client(project=PROJ_ID)
bucket = client.get_bucket(BUCKET_ID)
blob = bucket.blob('mystry-sneakers-bulk-import.csv')
blob.upload_from_filename('/content/mystry-sneakers-bulk-import.csv')
blob2 = bucket.blob('import_request.json')
blob2.upload_from_filename('/content/import_request.json')

# Start Indexing operation
On success, output will be
```
{"name": "projects/mystryvision/locations/us-east1/operations/<some_code>"}
```

In [None]:
index_operation = !curl -X POST -H "Authorization: Bearer "$(gcloud auth application-default print-access-token) -H "Content-Type: application/json; charset=utf-8" -d @import_request.json https://vision.googleapis.com/v1/projects/mystryvision/locations/us-east1/productSets:import


In [None]:
operation_json = json.loads("".join(index_operation))
operation_id = operation_json['name'].split('/')[-1]
print(operation_json)
print(f"Operation ID: {operation_id}")

{'name': 'projects/mystryvision/locations/us-east1/operations/82959e2d1fcceb29'}
Operation ID: 82959e2d1fcceb29


# Check status of operation
On success, the response will have a key `"state": "SUCCESSFUL"`. At the end of the response there will be a number of empty `{}`'s

In [None]:
status = !curl -X GET \
-H "Authorization: Bearer $(gcloud auth application-default print-access-token)" \
-H "Content-Type: application/json" \
https://vision.googleapis.com/v1/locations/us-east1/operations/{operation_id}
print("\n".join(status))

{
  "name": "locations/us-east1/operations/82959e2d1fcceb29",
  "metadata": {
    "@type": "type.googleapis.com/google.cloud.vision.v1.BatchOperationMetadata",
    "state": "PROCESSING",
    "submitTime": "2022-01-20T19:30:57.176899212Z"
  }
}


# Wait for 30 - 60 minutes

# Query the API

In [None]:
from google.cloud import vision

def get_similar_products_file(
        project_id, location, product_set_id, product_category,
        file_path, filter):
    """Search similar products to image.
    Args:
        project_id: Id of the project.
        location: A compute region name.
        product_set_id: Id of the product set.
        product_category: Category of the product.
        file_path: Local file path of the image to be searched.
        filter: Condition to be applied on the labels.
        Example for filter: (color = red OR color = blue) AND style = kids
        It will search on all products with the following labels:
        color:red AND style:kids
        color:blue AND style:kids
    """
    # product_search_client is needed only for its helper methods.
    product_search_client = vision.ProductSearchClient()
    image_annotator_client = vision.ImageAnnotatorClient()

    # Read the image as a stream of bytes.
    with open(file_path, 'rb') as image_file:
        content = image_file.read()

    # Create annotate image request along with product search feature.
    image = vision.Image(content=content)

    # product search specific parameters
    product_set_path = product_search_client.product_set_path(
        project=project_id, location=location,
        product_set=product_set_id)
    product_search_params = vision.ProductSearchParams(
        product_set=product_set_path,
        product_categories=[product_category],
        filter=filter)
    image_context = vision.ImageContext(
        product_search_params=product_search_params)

    # Search products similar to the image.
    response = image_annotator_client.product_search(
        image, image_context=image_context)

    index_time = response.product_search_results.index_time
    print('Product set index time: ')
    print(index_time)

    results = response.product_search_results.results

    print('Search results:')
    for result in results:
        product = result.product

        print('Score(Confidence): {}'.format(result.score))
        print('Image name: {}'.format(result.image))

        print('Product name: {}'.format(product.name))
        print('Product display name: {}'.format(
            product.display_name))
        print('Product description: {}\n'.format(product.description))
        print('Product labels: {}\n'.format(product.product_labels))
    return response

In [None]:
!wget https://cdn.shopify.com/s/files/1/0256/6658/4658/files/Travis_Scott_x_Air_Jordan_1_Retro_High_OG_Mocha.jpg 
!wget https://images.stockx.com/images/Nike-Dunk-Low-UNC-2021-Product.jpg?fit=fill&bg=FFFFFF&w=700&h=500&auto=format,compress&q=90&dpr=2&trim=color&updated_at=1624468252
!wget https://cdn.shopify.com/s/files/1/0255/9429/8467/files/nike-dunk-low-university-blue-on-foot_600x600.jpg?v=1625489247

--2022-01-20 17:54:38--  https://cdn.shopify.com/s/files/1/0256/6658/4658/files/Travis_Scott_x_Air_Jordan_1_Retro_High_OG_Mocha.jpg
Resolving cdn.shopify.com (cdn.shopify.com)... 151.101.1.12, 151.101.65.12, 151.101.193.12, ...
Connecting to cdn.shopify.com (cdn.shopify.com)|151.101.1.12|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 211775 (207K) [image/jpeg]
Saving to: ‘Travis_Scott_x_Air_Jordan_1_Retro_High_OG_Mocha.jpg’


2022-01-20 17:54:38 (21.5 MB/s) - ‘Travis_Scott_x_Air_Jordan_1_Retro_High_OG_Mocha.jpg’ saved [211775/211775]

--2022-01-20 17:54:38--  https://images.stockx.com/images/Nike-Dunk-Low-UNC-2021-Product.jpg?fit=fill
Resolving images.stockx.com (images.stockx.com)... 104.19.222.38, 104.19.221.38, 2606:4700::6813:dd26, ...
Connecting to images.stockx.com (images.stockx.com)|104.19.222.38|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 174526 (170K) [image/jpeg]
Saving to: ‘Nike-Dunk-Low-UNC-2021-Product.jpg?fit=fill’

In [None]:
product_category = 'apparel-v2'
# path2 = "./Travis_Scott_x_Air_Jordan_1_Retro_High_OG_Mocha.jpg"
# path2 =  "/content/Nike-Dunk-Low-UNC-2021-Product.jpg?fit=fill"
path2 = "/content/nike-dunk-low-university-blue-on-foot_600x600.jpg?v=1625489247"
# gspath = "gs://cv-bucket-1/Nike-Dunk-Low-UNC-2021-Product.jpeg" 
resp = get_similar_products_file(PROJ_ID, LOCATION, p_set_id, product_category, path2, "")

Product set index time: 
2022-01-20 17:24:29.438096+00:00
Search results:
Score(Confidence): 0.6936774253845215
Image name: projects/mystryvision/locations/us-east1/products/-MmZW_uOiPHJKMpRCUyf/referenceImages/-MmZW_uOiPHJKMpRCUyf_shoeid-3.PNG
Product name: projects/mystryvision/locations/us-east1/products/-MmZW_uOiPHJKMpRCUyf
Product display name: "Dunk Low 'University Blue' "
Product description: 

Product labels: [key: "colorway"
value: "White/University Blue/White silhouette=Dunk Low"
]

Score(Confidence): 0.6669532060623169
Image name: projects/mystryvision/locations/us-east1/products/-MmZe5bj_6LH_QBGbAoO/referenceImages/-MmZe5bj_6LH_QBGbAoO_shoeid-1.PNG
Product name: projects/mystryvision/locations/us-east1/products/-MmZe5bj_6LH_QBGbAoO
Product display name: Wmns Dunk Low 'Coast'
Product description: 

Product labels: [key: "colorway"
value: "Sail/Coast/University Gold silhouette=Dunk Low"
]

Score(Confidence): 0.5352774858474731
Image name: projects/mystryvision/locations/us-ea