In [None]:
import matplotlib.pyplot as plt

# Plot sentiment score distribution
plt.hist(guardian_df['sentiment'], bins=20, edgecolor='black')
plt.xlabel("Sentiment Score (Compound)")
plt.ylabel("Frequency")
plt.title("Distribution of Sentiment Scores in Headlines")
plt.show()

In [None]:
# Plot sentiment score trends over time
plt.figure(figsize=(12, 5))
plt.plot(merged_df['date'], merged_df['sentiment'], label="Sentiment Score", color="blue", alpha=0.7)
plt.xlabel("Date")
plt.ylabel("Sentiment Score")
plt.title("Sentiment Score Trend Over Time")
plt.legend()
plt.show()

In [None]:
# Plot gold price trends over time
plt.figure(figsize=(12, 5))
plt.plot(merged_df['date'], merged_df['gold_price'], label="Gold Price", color="gold", alpha=0.7)
plt.xlabel("Date")
plt.ylabel("Gold Price (USD)")
plt.title("Gold Price Trend Over Time")
plt.legend()
plt.show()

In [1]:
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [2]:
# Load Guardian headlines dataset (Assuming it has 'publication_date' and 'headline' columns)
df_news = pd.read_csv("../datasets/guardian_headlines_money_combined.csv")


In [3]:
# Rename 'publication_date' to 'date' and ensure correct format
df_news.rename(columns={'publication_date': 'date'}, inplace=True)
df_news['date'] = pd.to_datetime(df_news['date']).dt.strftime('%Y-%m-%d')  # Format as YYYY-MM-DD


In [4]:
def analyze_sentiment(text):
    """Applies VADER sentiment analysis and returns the compound score."""
    analyzer = SentimentIntensityAnalyzer()
    return analyzer.polarity_scores(text)['compound']


In [5]:
# Apply sentiment analysis
df_news['sentiment'] = df_news['headline'].astype(str).apply(analyze_sentiment)


In [6]:
# Aggregate features by date
news_features = df_news.groupby('date').agg(
    headline_count=('headline', 'count'),
    avg_sentiment=('sentiment', 'mean'),
    std_sentiment=('sentiment', 'std')
).reset_index()


In [7]:
# Fill NaN values in std_sentiment (if no variation on a day, std = 0)
news_features['std_sentiment'].fillna(0, inplace=True)


In [8]:
# Save processed data to CSV
news_features.to_csv("../datasets/news_sentiment.csv", index=False)

print("✅ Processed news sentiment features saved to 'news_sentiment.csv'")

✅ Processed news sentiment features saved to 'news_sentiment.csv'
