### Step 1: Read the data from the table 'tbl_latest_news' in the database.

In [None]:
df = spark.sql("SELECT * FROM bing_lake_db.tbl_latest_news")
# This step loads the data from the existing table `tbl_latest_news` into a Spark DataFrame `df`. The data contains news articles that have been processed earlier.
display(df)

StatementMeta(, f5a2a249-8878-43f1-8779-b659f37344c5, 9, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, b38cd165-baeb-4f8f-96ec-4f9081432cf1)

### Step 2: Import necessary libraries for sentiment analysis.

In [None]:
import synapse.ml.core
from synapse.ml.services import AnalyzeText
# Import `AnalyzeText` from the Synapse ML library to perform sentiment analysis. This model analyzes text for sentiment.


StatementMeta(, f5a2a249-8878-43f1-8779-b659f37344c5, 10, Finished, Available, Finished)

### Step 3: Configure the sentiment analysis model.

In [None]:
model = (AnalyzeText()
         .setTextCol("description")  # Set the column that contains the text to be analyzed (e.g., article description)
         .setKind("SentimentAnalysis")  # Specify that we are performing sentiment analysis
         .setOutputCol("response")  # Set the output column to store the analysis results
         .setErrorCol("error"))  # Set the error column to capture any processing errors

### Step 4: Apply the sentiment analysis model to the DataFrame.

In [None]:
result = model.transform(df)
# Apply the sentiment analysis model to the DataFrame `df`. The result will contain sentiment analysis for each article's description.
display(result)


StatementMeta(, f5a2a249-8878-43f1-8779-b659f37344c5, 11, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, cc37a9b7-3bae-4964-9f20-76cc8e517c81)

### Step 5: Extract sentiment from the response column.

In [None]:
from pyspark.sql.functions import col
sentiment_df = result.withColumn("sentiment", col("response.documents.sentiment"))
# Extract the sentiment value from the `response` column. The sentiment is stored in `response.documents.sentiment` as either positive, neutral, or negative.
display(sentiment_df)


StatementMeta(, f5a2a249-8878-43f1-8779-b659f37344c5, 12, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, 48854132-0858-4df6-9033-7aae46da2549)

### Step 6: Clean the DataFrame by dropping unnecessary columns.

In [None]:
sentiment_df_final = sentiment_df.drop("error", "response")
display(sentiment_df_final)
# Drop the `error` and `response` columns since we only need the `sentiment` and other original columns.


StatementMeta(, f5a2a249-8878-43f1-8779-b659f37344c5, 13, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, 9cc7a796-581e-42b0-9247-b15c001acdb5)

### Step 7: Save the sentiment analysis results to a Delta table.

In [None]:
from pyspark.sql.utils import AnalysisException

try:
    # Try saving the sentiment results as a Delta table.
    table_name = 'bing_lake_db.tbl_sentiment_analysis'
    sentiment_df_final.write.format("delta").saveAsTable(table_name)

except AnalysisException:
    # If the table already exists, handle the exception and proceed to update it.
    print("Table Already Exists")


StatementMeta(, f5a2a249-8878-43f1-8779-b659f37344c5, 15, Finished, Available, Finished)

DataFrame[num_affected_rows: bigint, num_updated_rows: bigint, num_deleted_rows: bigint, num_inserted_rows: bigint]

### Step 8: Merge new sentiment data with the existing table (if the table exists).

In [None]:
sentiment_df_final.createOrReplaceTempView("vw_sentiment_df_final")
# Create a temporary view of the final sentiment DataFrame for SQL querying.

spark.sql(f"""
    MERGE INTO {table_name} target_table
    USING vw_sentiment_df_final source_view
    ON source_view.url = target_table.url
    WHEN MATCHED AND 
        source_view.title <> target_table.title OR
        source_view.description <> target_table.description OR
        source_view.category <> target_table.category OR
        source_view.image <> target_table.image OR
        source_view.provider <> target_table.provider OR
        source_view.datePublished <> target_table.datePublished
    THEN UPDATE SET *
    WHEN NOT MATCHED THEN INSERT *
""")
# Perform a `MERGE` operation to update the existing table or insert new data based on matching `url`.
# If there are differences between the source (new data) and the target (existing data), it updates the record.
# If the record doesn't exist in the target table, it inserts the new record.
