In [0]:
pip install -U openai

[43mNote: you may need to restart the kernel using dbutils.library.restartPython() to use updated packages.[0m
Collecting openai
  Downloading openai-1.54.3-py3-none-any.whl (389 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 389.6/389.6 kB 5.1 MB/s eta 0:00:00
Collecting httpx<1,>=0.23.0
  Downloading httpx-0.27.2-py3-none-any.whl (76 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 76.4/76.4 kB 7.8 MB/s eta 0:00:00
Collecting jiter<1,>=0.4.0
  Downloading jiter-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (327 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 327.5/327.5 kB 8.2 MB/s eta 0:00:00
Collecting typing-extensions<5,>=4.11
  Using cached typing_extensions-4.12.2-py3-none-any.whl (37 kB)
Collecting httpcore==1.*
  Downloading httpcore-1.0.6-py3-none-any.whl (78 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 78.0/78.0 kB 8.0 MB/s eta 0:00:00
Collecting h11<0.15,>=0.13
  Downloading h11-0.14.0-py3-none-any.whl (58 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [0]:
dbutils.library.restartPython()

In [0]:
from openai import OpenAI

In [0]:
#### PERFORM TEXT TRANSLATION
DATABRICKS_TOKEN = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().get()
serving_endpoint_url = dbutils.widgets.get("serving_endpoint_url")
t2t_model_name = dbutils.widgets.get("t2t_model_name")

def translate_review(review):
  # How to get your Databricks token: https://docs.databricks.com/en/dev-tools/auth/pat.html
  # DATABRICKS_TOKEN = os.environ.get('DATABRICKS_TOKEN')
  # Alternatively in a Databricks notebook you can use this:
  client = OpenAI(
    api_key=DATABRICKS_TOKEN,
    base_url=serving_endpoint_url
  )

  chat_completion = client.chat.completions.create(
    messages=[
    {
      "role": "system",
      "content": "I sell cameras all around the world. My customers have left some feedback. These feedbacks are in different languages. Translate the feedbacks into English. Your response should be of the following format: {review}. If the review is in English, just return the review as it is. Do not say anything else other than what you're asked for. Absolutely do not say anything else."
    },
    {
      "role": "user",
      "content": f"{review}"
    }
    ],
    model=t2t_model_name,
    max_tokens=600
  )
  return chat_completion.choices[0].message.content

In [0]:
from pyspark.sql.functions import udf
from pyspark.sql.types import StringType

# Register the translate_review function as a UDF
translate_udf = udf(translate_review, StringType())

In [0]:
# READ THE FEEDBACK TABLES
df_text_feedback = spark.read.table('genai_demo.products.camera_reviews_multilingual')
df_s2t_feedback = spark.read.table('genai_demo.products.camera_reviews_s2t')

In [0]:
# Combine the review title and review body into a single column
from pyspark.sql.functions import concat_ws, col
df_text_feedback = df_text_feedback.withColumn("review", concat_ws(" ", col("review_title"), col("review_body")))

In [0]:
# Combine the two tables to get a resultant set of reviews
df_text_feedback = df_text_feedback.select(["review_id","review"])
df_s2t_feedback = df_s2t_feedback.select("review_id","review")
df_feedback = df_text_feedback.union(df_s2t_feedback)

In [0]:
df_feedback = df_feedback.withColumn("translated_review", translate_udf(df_feedback["review"]))
df_feedback.write.mode("overwrite").saveAsTable("genai_demo.products.camera_all_reviews_translated")