In [None]:
import json
import redis
import os
from dotenv import load_dotenv
load_dotenv()
from redisvl.utils.vectorize import OpenAITextVectorizer
from redisvl.index import SearchIndex

In [None]:
# Global Variables
oai = OpenAITextVectorizer(
    model="text-embedding-ada-002",
    api_config={"api_key": os.environ.get('open_ai_api_key')},
)
r = redis.Redis(host=os.environ.get('host'), port=os.environ.get('port'), decode_responses=True)
p = r.pipeline()
if r.ping() : print('Connected to Redis')
r.flushdb()

In [None]:
schema = {
    "index": {
        "name": "idx:products",
        "prefix": "product",
        "storage_type": "json"
    },
    "fields": [
        {"name": "title", "type": "text"},
        {"name": "description", "type": "text"},
        {"name": "price", "type": "numeric"},
        {"name": "product_category", "type": "tag"},
        {"name": "product_subcategories", "type": "tag", "attrs" : {"seperator" : ","}},
        {"name": "geo", "type": "geo"},
        {"name": "SKU", "type": "tag"},
        {"name": "vendor_code", "type": "tag"},
        {"name": "address.city", "type": "tag"},
        {"name": "address.state", "type": "tag"},
        {
            "name": "embedding",
            "type": "vector",
            "attrs": {
                "algorithm": "flat",
                "datatype": "float32",
                "dims": 1536,
                "distance_metric": "cosine"
            }
        }
    ]
}

index = SearchIndex.from_dict(schema)
index.set_client(r)
index.create(overwrite=True)

In [None]:
with open('products.json', "r") as file:
    data = json.load(file)

transformed_data = [item["product"] for item in data]

to_load = []
for product in transformed_data:
    product['embedding'] = oai.embed(product['description'])
    product['geo'] = ",".join(map(str, product['geo']))
    to_load.append(product)

In [None]:
keys  = index.load(to_load)

# Category Search

In [None]:
# FT.SEARCH idx:products "@product_category:{Electronics}" RETURN 2 title product_subcategories

idx_products = r.ft("idx:products")
category_search_results = idx_products.search(query="@product_category:{Electronics}")
print(category_search_results)

# Full Text Search Match

In [None]:
# FT.SEARCH idx:products "VacayTravel" RETURN 2 title description

idx_products = r.ft("idx:products")
full_text_search_results = idx_products.search(query="VacayTravel")
print(full_text_search_results)

# Fuzzy Match

In [None]:
# FT.SEARCH idx:products "%KoolPro%" RETURN 2 title description

idx_products = r.ft("idx:products")
fuzzy_search_results = idx_products.search(query="%KoolPro%")
print(fuzzy_search_results)

# Geo based queries

In [None]:
# FT.SEARCH idx:products "@location:[12.964175 77.601944 15 km]" RETURN 2 title city

idx_products = r.ft("idx:products")
geo_search_results = idx_products.search(query="@geo:[12.964175 77.601944 15 km]")
print(geo_search_results)

In [None]:
# FT.AGGREGATE idx:products "@geo:[12.964175 77.601944 15 km]"
#     GROUPBY 1 @address.city  
#     REDUCE COUNT 0 as Count

# Vector Similarity Match

In [None]:
from redisvl.query import RangeQuery

# use the HuggingFace vectorizer again to create a query embedding
query_embedding = oai.embed("Gym equipment for training")

query = RangeQuery(
    vector=query_embedding,
    vector_field_name="embedding",
    return_fields=["title", "description"],
    num_results=3,
    distance_threshold=0.2
)

results = index.query(query)
for doc in results:
    print(doc['title'])
    print(doc['description'])
    print(doc['vector_distance'])
    print("")