**Read latest news table records as dataframe**

In [1]:
df = spark.sql("SELECT * FROM bing_news_db_latest.tbl_latest_news")


StatementMeta(, 51736fbd-1a29-46f8-b472-b2e81a5e6912, 3, Finished, Available)

In [2]:
import synapse.ml.core 
from synapse.ml.services import AnalyzeText

StatementMeta(, 51736fbd-1a29-46f8-b472-b2e81a5e6912, 4, Finished, Available)

In [3]:
model = (AnalyzeText()
        .setTextCol("description")
        .setKind("SentimentAnalysis")
        .setOutputCol("response")
        .setErrorCol("error"))


StatementMeta(, 51736fbd-1a29-46f8-b472-b2e81a5e6912, 5, Finished, Available)

In [4]:
result = model.transform(df)

StatementMeta(, 51736fbd-1a29-46f8-b472-b2e81a5e6912, 6, Finished, Available)

In [5]:
from pyspark.sql.functions import col
sentiment_df = result.withColumn("sentiment", col("response.documents.sentiment"))
sentiment_df_final = sentiment_df.drop("response","error")

StatementMeta(, 51736fbd-1a29-46f8-b472-b2e81a5e6912, 7, Finished, Available)

In [6]:
from pyspark.sql.functions import col, to_date
sentiment_df_final = sentiment_df_final.withColumn("datePublished", to_date(col("datePublished"), "dd-MMM-yyyy"))

StatementMeta(, 51736fbd-1a29-46f8-b472-b2e81a5e6912, 8, Finished, Available)

**Type1 Merge**

In [7]:
from pyspark.sql.utils import AnalysisException
try:
    table_name = 'bing_news_db_latest.tbl_sentiment_analysis'
    sentiment_df_final.write.format('delta').saveAsTable(table_name)

except AnalysisException:
    
    print("Table Already Exists")
    sentiment_df_final.createOrReplaceTempView("vw_sentiment_df_final")
    
    spark.sql(f""" MERGE INTO {table_name} target_table
                   USING vw_sentiment_df_final source_vw
                   on source_vw.url = target_table.url
                   WHEN MATCHED AND
                   source_vw.title <> target_table.title OR
                   source_vw.description <> target_table.description OR
                   source_vw.category <> target_table.category OR
                   source_vw.image <> target_table.image OR
                   source_vw.provider <> target_table.provider OR
                   source_vw.datePublished <> target_table.datePublished
                   THEN UPDATE SET *
                   WHEN NOT MATCHED THEN INSERT * 
            """)


StatementMeta(, 51736fbd-1a29-46f8-b472-b2e81a5e6912, 9, Finished, Available)