In [0]:
pip install -U openai

[43mNote: you may need to restart the kernel using dbutils.library.restartPython() to use updated packages.[0m
Collecting openai
  Downloading openai-1.54.3-py3-none-any.whl (389 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 389.6/389.6 kB 5.6 MB/s eta 0:00:00
Collecting httpx<1,>=0.23.0
  Downloading httpx-0.27.2-py3-none-any.whl (76 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 76.4/76.4 kB 6.4 MB/s eta 0:00:00
Collecting jiter<1,>=0.4.0
  Downloading jiter-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (327 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 327.5/327.5 kB 8.8 MB/s eta 0:00:00
Collecting typing-extensions<5,>=4.11
  Downloading typing_extensions-4.12.2-py3-none-any.whl (37 kB)
Collecting httpcore==1.*
  Downloading httpcore-1.0.6-py3-none-any.whl (78 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 78.0/78.0 kB 8.3 MB/s eta 0:00:00
Collecting h11<0.15,>=0.13
  Downloading h11-0.14.0-py3-none-any.whl (58 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [0]:
dbutils.library.restartPython()

In [0]:
from openai import OpenAI

In [0]:
#### PERFORM TEXT TRANSLATION
DATABRICKS_TOKEN = DATABRICKS_TOKEN = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().get()
negative_review_classes = ["Poor_Quality", "Poor_Customer_Experience", "Camera_Malfunction", "Product_Not_As_Described", "Shipping_Issues", "Design_Or_Build_Issues",  "Tripod_Issues", "Battery_Life_Issues", "Charging_Issues", "App_Or_Software_Issues","Accessories_Issues", "Incompatibility_Issues", "Missing_Items", "Refund_Or_Return_Issues","Packaging_Issues", "Other_Issues"]
def classify_negative_review(review):
  # How to get your Databricks token: https://docs.databricks.com/en/dev-tools/auth/pat.html
  # DATABRICKS_TOKEN = os.environ.get('DATABRICKS_TOKEN')
  # Alternatively in a Databricks notebook you can use this:
  client = OpenAI(
    api_key=DATABRICKS_TOKEN,
    base_url="<link-to-the-databricks-model-serving-endpoint>"
  )

  chat_completion = client.chat.completions.create(
    messages=[
    {
      "role": "system",
      "content": f'Classify the given negative review into one of the following categories: {negative_review_classes}. Your response must strictly be of the format: class1|class2|class3. Do not change or modify the class names. Here is the review: '
    },
    {
      "role": "user",
      "content": f"{review}"
    }
    ],
    model="databricks-meta-llama-3-1-70b-instruct",
    max_tokens=600
  )
  return chat_completion.choices[0].message.content

In [0]:
from pyspark.sql.functions import udf, split, col
from pyspark.sql.types import StringType, IntegerType

# Register the translate_review function as a UDF
classify_negative_review_udf = udf(classify_negative_review, StringType())

In [0]:
df_sentiment = spark.read.table("harshit_rai_genai_demo.products.camera_reviews_with_sentiment_analysis")

In [0]:
df_negative = df_sentiment.filter(df_sentiment.rating <= 3)

In [0]:
df_classification = df_negative.select("review_id", "translated_review","rating","sentiment", "summary")

In [0]:
from pyspark.sql.functions import col

df_classification = df_classification.withColumn("issue_type", classify_negative_review_udf(col("summary")))

In [0]:
from pyspark.sql.functions import split, when, col, array_contains
split_col = split(col("issue_type"), "\|")
for cls in negative_review_classes:
    df_classification = df_classification.withColumn(cls, when(array_contains(split_col, cls), 1).otherwise(0))

In [0]:
df_classification.write.mode('overwrite').saveAsTable("harshit_rai_genai_demo.products.issue_type_classification")