# **Pimp My Data**

# 1. Get raw data

In [1]:
df = spark.sql("SELECT * FROM Raw.customer_feedback.hotel_reviews")
display(df)

StatementMeta(, 623ce5af-719b-4662-8e93-25949d04c8a0, 3, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, adb0d9cf-2d33-4301-8902-6f84d1ff4e45)

# 2. Translate reviews to English

In [2]:
import synapse.ml.core
from synapse.ml.services import *
from pyspark.sql.functions import col, flatten, udf, lower, trim
from pyspark.sql.types import StringType

StatementMeta(, 623ce5af-719b-4662-8e93-25949d04c8a0, 4, Finished, Available, Finished)

In [3]:
translate = (Translate()
    .setTextCol("reviews_text")
    .setToLanguage("en")
    .setOutputCol("translation")
    .setConcurrency(5))

df_en = translate.transform(df)\
        .withColumn("translation_result", flatten(col("translation.translations")))\
        .withColumn("reviews_text_en", col("translation_result.text")[0])\
        .cache()

df_en = df_en.select(df_en.columns[:6]+ ["reviews_text_en"])

display(df_en.filter(col("reviews_text_en") != col("reviews_text")))

StatementMeta(, 623ce5af-719b-4662-8e93-25949d04c8a0, 5, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, 516144db-0980-49d5-8d8b-4011459179a7)

# 3. Detect sentiment and write augmented data

In [4]:
%%sql
DROP TABLE IF EXISTS Augmented.customer_feedback.hotel_reviews;

StatementMeta(, 623ce5af-719b-4662-8e93-25949d04c8a0, 6, Finished, Available, Finished)

<Spark SQL result set with 0 rows and 0 fields>

In [5]:
from pyspark.sql.functions import lit
from synapse.ml.services import AnalyzeText

model = (AnalyzeText()
        .setTextCol("reviews_text")
        .setKind("SentimentAnalysis")
        .setOutputCol("response"))

result = model.transform(df_en)\
        .withColumn("documents", col("response.documents")) \
        .withColumn("sentiment", col("documents.sentiment")) \
        .withColumn("confidence_positive", col("documents.confidenceScores.positive")) \
        .withColumn("confidence_negative", col("documents.confidenceScores.negative")) \
        .cache()

display (result.select("reviews_rating", "reviews_text", "reviews_text_en", "sentiment", "response"))

df_final = result.select("city", "latitude", "longitude", "name", "reviews_rating", "reviews_text", "reviews_text_en", "sentiment", "confidence_positive", "confidence_negative")

df_final.write.option("overwrite", "true").saveAsTable("Augmented.customer_feedback.hotel_reviews")

StatementMeta(, 623ce5af-719b-4662-8e93-25949d04c8a0, 7, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, db427945-439c-4a63-b96d-7354b816f8b5)