In [1]:
pip install weaviate-client

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
# https://medium.com/@iamleonie/recreating-amazons-new-generative-ai-feature-product-review-summaries-50640e40872a

import pandas as pd 

df = pd.read_json("avada-reviews.json")

df = df[df.reviewContent.notna()]

product_id = "avada-seo-suite"

# Filter rows where app is "avada-seo-suite"
df = df[df['app'] == product_id]

# Sort the DataFrame by 'reviewDate' in descending order
df = df.sort_values(by='reviewDate', ascending=False)

print(df.shape)

(6293, 6)


In [3]:
import weaviate

auth_config = weaviate.AuthApiKey(api_key="CywsEHTFXp0kSNMlWi3LPU3hPdPtQrbQebcE")  # Replace w/ your Weaviate instance API key
# Instantiate the client
client = weaviate.Client(
    url="https://thomas-weaviate-learn-16v2jei3.weaviate.network", # Replace w/ your Weaviate cluster URL
    auth_client_secret=auth_config,
    additional_headers={
        "X-OpenAI-Api-Key": "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", # Replace with your OpenAI key
        }
)

print(client.is_ready())

            Consider upgrading to the new and improved v4 client instead!
            See here for usage: https://weaviate.io/developers/weaviate/client-libraries/python
            


True


In [4]:
if client.schema.exists("Reviews"):
    client.schema.delete_class("Reviews")
    
if client.schema.exists("Products"):
    client.schema.delete_class("Products")

class_obj = {
    "class": "Reviews", # Class definition
    "properties": [     # Property definitions
        {
            "name": "review_text",
            "dataType": ["text"],
        },
        {
            "name": "product_id",
            "dataType": ["text"],
            # "moduleConfig": {
            #     "text2vec-openai": { 
            #         "skip": True, # skip vectorization for this property
            #         "vectorizePropertyName": False
            #     }
            # }
        },
        {
            "name": "reviewer_id",
            "dataType": ["text"],
            # "moduleConfig": {
            #     "text2vec-openai": { 
            #         "skip": True, # skip vectorization for this property
            #         "vectorizePropertyName": False
            #     }
            # }
        },
    ],
    "vectorizer": "text2vec-openai", # Specify a vectorizer
    "moduleConfig": { # Module settings
        "text2vec-openai": {
            "vectorizeClassName": False,
            "model": "ada",
            "modelVersion": "002",
            "type": "text"
        },
        "generative-openai": {
          "model": "gpt-3.5-turbo"
        }
    },
}

client.schema.create_class(class_obj)

In [5]:

from weaviate.util import generate_uuid5

with client.batch(
    batch_size=100,  # Specify batch size
    num_workers=2,   # Parallelize the process
) as batch:
    for _, row in df.iterrows():
        review_item = {
            "review_text": row.reviewContent,
            "product_id": row.app,
            "reviewer_id": row.id,
        }
        batch.add_data_object(
            review_item,
            class_name="Reviews",
            uuid=generate_uuid5(review_item)
        )

            Please instead use the `client.batch.configure()` method to configure your batch and `client.batch` to enter the context manager.
            See https://weaviate.io/developers/weaviate/client-libraries/python for details.


In [6]:
# import json
# 
# generate_prompt = """
# Summarize these customer reviews into a one-paragraph long overall review: 
# {review_text}
# """
# 
# res = client.query.get("Reviews", 
#                       ["review_text", "product_id"])\
#                   .with_additional(["id", "vector"])\
#                     .with_where({
#                     "path": ["product_id"],
#                     "operator": "Equal",
#                     "valueText": product_id
#                 })\
#                 .with_generate(grouped_task=generate_prompt)\
#                   .do()
# 
# print(json.dumps(res, indent=4))

In [7]:
import json

generate_prompt = """
Summarize these customer reviews into a one-paragraph long overall review with around 70 words:

With format like: Merchants appreciate this app for ...
{review_text}
"""

summary = client.query\
                .get('Reviews', 
                     ['review_text', "product_id"])\
                .with_where({
                    "path": ["product_id"],
                    "operator": "Equal",
                    "valueText": product_id
                })\
                .with_limit(50)\
                .with_generate(grouped_task=generate_prompt)\
                .do()["data"]["Get"]["Reviews"]

print(json.dumps(summary[0]["_additional"]["generate"], indent=2))

{
  "error": null,
  "groupedResult": "Merchants appreciate this app for its exceptional customer support, with reviewers highlighting the quick and efficient assistance provided by team members like Emily, Liz, Hana, Esther, and Tony. Customers have found the app to be a time saver in optimizing SEO and website speed, with positive experiences in resolving issues and improving overall performance. Overall, the app and its support team receive high praise for their effectiveness and professionalism."
}


In [8]:
new_review_summary = {
        "product_id" : product_id,
        "summary": summary[0]["_additional"]["generate"]["groupedResult"]
    }
    
# Create new object
client.data_object.create(
  data_object = new_review_summary,
  class_name = "Products",
  uuid = generate_uuid5(new_review_summary)
)

'a1115063-c75f-552e-893d-16e36b0b49ca'

In [9]:
import json

res = client.query\
            .get('Products', ['product_id', 'summary'])\
            .with_where({
                "path": ["product_id"],
                "operator": "Equal",
                "valueText": product_id
            })\
            .do()

print(json.dumps(res, indent=2))

{
  "data": {
    "Get": {
      "Products": [
        {
          "product_id": "avada-seo-suite",
          "summary": "Merchants appreciate this app for its exceptional customer support, with reviewers highlighting the quick and efficient assistance provided by team members like Emily, Liz, Hana, Esther, and Tony. Customers have found the app to be a time saver in optimizing SEO and website speed, with positive experiences in resolving issues and improving overall performance. Overall, the app and its support team receive high praise for their effectiveness and professionalism."
        }
      ]
    }
  }
}
