### Read Input files form Amazon S3 Bucket

In [8]:
# Read input JSON files from Lakehouse
df=spark.read.option("multiline", "true").json("Files/S3_Reviews/*")
display(df)

StatementMeta(, 29082295-6aed-4445-8189-6d6db62ec65b, 10, Finished, Available)

SynapseWidget(Synapse.DataFrame, cf509d4e-b484-4ac5-8f8b-30488218a22c)

### Translate review text, append to data frame

In [9]:
# Translate Review Title and Review Text, append to dataframe
from synapse.ml.services import *
from pyspark.sql.functions import col, flatten, udf, lower, trim

translate_review = (Translate()
    .setTextCol("text")
    .setToLanguage("en")
    .setOutputCol("translation")
    .setConcurrency(5))

df = translate_review.transform(df)\
        .withColumn("translation_result", flatten(col("translation.translations")))\
        .withColumn("text_translated", col("translation_result.text")[0])\
        .cache()

df = df.select(df.columns[:10] + ["text_translated"])


StatementMeta(, 29082295-6aed-4445-8189-6d6db62ec65b, 11, Finished, Available)

### Create a Spark DataFrame with only input/output text

In [10]:
df_translated = df.select('text', 'text_translated')
display(df_translated)

StatementMeta(, 29082295-6aed-4445-8189-6d6db62ec65b, 12, Finished, Available)

SynapseWidget(Synapse.DataFrame, 11d49860-2818-44f1-91b7-7a4d787775b3)

### Save to Delta table in Data Lake

In [11]:
df_translated.write.mode("overwrite").format("delta").saveAsTable("translated_reviews")

StatementMeta(, 29082295-6aed-4445-8189-6d6db62ec65b, 13, Finished, Available)

### Review Table

In [13]:
%%sql
SELECT * FROM translated_reviews;

StatementMeta(, 29082295-6aed-4445-8189-6d6db62ec65b, 15, Finished, Available)

<Spark SQL result set with 5 rows and 2 fields>