In [None]:
# Importing Libraries
import pandas as pd
from pyspark.sql import SparkSession
from pyspark.sql.functions import udf
from pyspark.sql.types import FloatType
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [None]:
# Initialize Spark session
spark = SparkSession.builder.appName("DataAnalysis").getOrCreate()

In [None]:
# Load data
df = spark.read.csv('data/processed_data.csv', header=True, inferSchema=True)

In [None]:
# Initialize VADER sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

In [None]:
# Define a UDF for sentiment analysis
def sentiment_score(text):
    if text:
        return float(analyzer.polarity_scores(text)['compound'])
    else:
        return 0.0

sentiment_udf = udf(sentiment_score, FloatType())
df = df.withColumn("sentiment", sentiment_udf(df['text']))

In [None]:
# Stop Spark session
spark.stop()

# Save processed data
df.toPandas().to_csv('data/sentiment_data.csv', index=False)