In [1]:
df = spark.sql("SELECT * FROM bing_lake_db.tbl_latest_news")
display(df)

StatementMeta(, b452eddf-da38-464c-93e2-aaf9547a985b, 3, Finished, Available)

SynapseWidget(Synapse.DataFrame, 9d147e28-5aa3-44be-926c-dd11854b04ca)

#### **Using SynapseML**

In [2]:
import synapse.ml.core
from synapse.ml.services import AnalyzeText


StatementMeta(, b452eddf-da38-464c-93e2-aaf9547a985b, 4, Finished, Available)

#### **Import the model and configure the input and output columns**

In [3]:
model = (AnalyzeText()
        .setTextCol("description")
        .setKind("SentimentAnalysis")
        .setOutputCol("response")
        .setErrorCol("error")
        )


StatementMeta(, b452eddf-da38-464c-93e2-aaf9547a985b, 5, Finished, Available)

#### **Apply the model to our dataframe**

In [4]:
result = model.transform(df)

StatementMeta(, b452eddf-da38-464c-93e2-aaf9547a985b, 6, Finished, Available)

In [5]:
display(result)

StatementMeta(, b452eddf-da38-464c-93e2-aaf9547a985b, 7, Finished, Available)

SynapseWidget(Synapse.DataFrame, bf33559c-369c-4c12-a186-76fdc12b7d22)

In [6]:
from pyspark.sql.functions import col

sentiment_df = result.withColumn("sentiment", col("response.documents.sentiment"))

StatementMeta(, b452eddf-da38-464c-93e2-aaf9547a985b, 8, Finished, Available)

In [7]:
display(sentiment_df)

StatementMeta(, b452eddf-da38-464c-93e2-aaf9547a985b, 9, Finished, Available)

SynapseWidget(Synapse.DataFrame, 551ffe89-a89b-46be-aa32-109176aa83eb)

In [8]:
sentiment_df_final = sentiment_df.drop("error","response")

StatementMeta(, b452eddf-da38-464c-93e2-aaf9547a985b, 10, Finished, Available)

In [9]:
display(sentiment_df_final)

StatementMeta(, b452eddf-da38-464c-93e2-aaf9547a985b, 11, Finished, Available)

SynapseWidget(Synapse.DataFrame, 3d64e58c-9eff-4eec-99c7-bed6eb3538fc)

#### **Create Table from df using Incremental Load**

In [10]:
from pyspark.sql.utils import AnalysisException

try:
    table_name = 'bing_lake_db.tbl_sentiment_analysis'
    sentiment_df_final.write.format("delta").saveAsTable(table_name)

except AnalysisException:
    print("Table Already Exists")

    sentiment_df_final.createOrReplaceTempView("vw_sentiment_df_final")

    spark.sql(f""" MERGE INTO {table_name} target_table
                   USING vw_sentiment_df_final source_view

                   ON source_view.url = target_table.url

                   WHEN MATCHED AND
                   source_view.name = target_table.name OR
                   source_view.category = target_table.category OR
                   source_view.description = target_table.description OR
                   source_view.url = target_table.url OR
                   source_view.image = target_table.image OR
                   source_view.provider = target_table.provider OR
                   source_view.datePublished = target_table.datePublished or
                   source_view.sentiment = target_table.sentiment
                   

                   THEN UPDATE SET *
                   
                   WHEN NOT MATCHED THEN INSERT *
                   
                   """)

StatementMeta(, b452eddf-da38-464c-93e2-aaf9547a985b, 12, Finished, Available)

In [11]:
%%sql

SELECT * FROM bing_lake_db.tbl_sentiment_analysis LIMIT 1000

StatementMeta(, b452eddf-da38-464c-93e2-aaf9547a985b, 13, Finished, Available)

<Spark SQL result set with 15 rows and 8 fields>