### Review original reviews


In [2]:
%%sql
SELECT product_id, product_title, text_translated FROM web_reviews order by product_id

StatementMeta(, fcfe3b94-84c0-491b-906e-14517c923462, 3, Finished, Available)

<Spark SQL result set with 143 rows and 3 fields>

### Fetch Azure OpenAI Service Key from Key Vault

In [3]:
# Get Azure OpenAI Services Keys
from trident_token_library_wrapper \
import PyTridentTokenLibrary as tl

key_vault_name = 'designmind-fabric-ai'
key_name = "OPEN-AI-SERVICES-KEY" 

# Get access token to key vault for current session ID
access_token = mssparkutils.credentials.getToken("keyvault")

# Get secret value from Key Vault using the access token
openai_services_key = tl.get_secret_with_token( \
  f"https://{key_vault_name}.vault.azure.net/", \
  key_name, \
  access_token)

StatementMeta(, fcfe3b94-84c0-491b-906e-14517c923462, 5, Finished, Available)

In [4]:
print(openai_services_key)


StatementMeta(, fcfe3b94-84c0-491b-906e-14517c923462, 6, Finished, Available)

[REDACTED]


### Import OpenAI Dependencies

In [5]:
# Import Dependencies & set OpenAI Service Parameters
import os
import openai
openai.api_type = "azure"
openai.api_version = "2023-05-15" 
openai.api_base = "https://rhk-demo-openai.openai.azure.com/"
openai.api_key = openai_services_key

StatementMeta(, fcfe3b94-84c0-491b-906e-14517c923462, 7, Finished, Available)

### Iterate through reviews, generating a summary by product

In [6]:
# Define the structure of the Fabric Table
from pyspark.sql.types import StructType,StructField, StringType, IntegerType, DateType

schema = StructType([ 
    StructField("product_id",StringType(),True),   
    StructField("product_title",StringType(),True),
    StructField("text", StringType(), True),  
  ])

# declare an empty array of output rows
output_rows = []

# Fetch a list of product_id and product_title from the review table
df_products = spark.sql("SELECT product_id, MAX(product_title) as product_title FROM web_reviews GROUP BY product_id").toPandas()

# For each product we have reviews in the Lakehouse, build a prompt asking OpenAI to summarize all the reviews
for index, product in df_products.iterrows():
    product_id = product['product_id']
    product_title = product['product_title']

    prompt = "This product has been reviewed by customers:\n" + product_title + \
        ".\n Please very briefly summarize these reviews in a paragraph of 30 words or less.\n"

    df_reviews = spark.sql(f"SELECT text_translated FROM web_reviews WHERE product_id = '{product_id}'").toPandas()

    for index, review in df_reviews.iterrows():
        prompt = prompt + "Review:\n" + review['text_translated'] + "\n"
        
    response = openai.ChatCompletion.create( \
        engine="gpt-35-turbo-16k", \
        messages=[ \
            {"role": "system", "content": "Assistant is a large language model trained by OpenAI."}, \
            {"role": "user", "content": prompt} \
        ] \
    )

    output_rows.append((product_id, product_title, response.choices[0].message.content))


StatementMeta(, fcfe3b94-84c0-491b-906e-14517c923462, 8, Finished, Available)

### Save to Delta table in Data Lake

In [7]:
# Write result DataFrame to Lakehouse Table
# Overwrite the summary table
df = spark.createDataFrame(data=output_rows, schema=schema)
display(df)

StatementMeta(, fcfe3b94-84c0-491b-906e-14517c923462, 9, Finished, Available)

SynapseWidget(Synapse.DataFrame, e7e3a4af-9c9b-4704-aebe-0060131e82cf)

In [8]:
# Append Dataframe to Fabric Table
df.write.mode("overwrite").format("delta").saveAsTable("product_review_summaries")

StatementMeta(, fcfe3b94-84c0-491b-906e-14517c923462, 10, Finished, Available)

### Review Table

In [9]:
%%sql

SELECT * FROM product_review_summaries

StatementMeta(, fcfe3b94-84c0-491b-906e-14517c923462, 11, Finished, Available)

<Spark SQL result set with 9 rows and 3 fields>

In [3]:
%%sql
select product_title, text_translated from web_reviews order by 1

StatementMeta(, 6e70b624-8eaf-48db-9705-f2e5f516050a, 4, Finished, Available)

<Spark SQL result set with 143 rows and 2 fields>