In [1]:
import pandas as pd

# Load the previously aggregated quarterly sentiment data
quarterly_sentiment = pd.read_csv("data/aggregated_sentiment_by_quarter.csv")


In [None]:
# Define a mapping of keywords to weights (you can adjust the weights based on your preference)
keyword_weights = {
    "GDP": 0.3,  # Give more weight to GDP sentiment
    "inflation": 0.2,
    "employment": 0.2,
    "recession": 0.1,
    "economic growth": 0.1,
    "fiscal policy": 0.1,
}


# Function to compute the weighted sentiment index for each quarter
def compute_sentiment_index(row):
    # Extract the keyword from the row
    keyword = row["Keywords"]

    # Return the weighted sentiment for this keyword if it exists, else return 0 (neutral sentiment)
    return row["Aggregate_Sentiment"] * keyword_weights.get(keyword, 0)


quarterly_sentiment["Sentiment_Index"] = quarterly_sentiment.apply(
    compute_sentiment_index, axis=1
)

In [3]:
# Group by quarter to get the composite sentiment index for each quarter
sentiment_index_by_quarter = (
    quarterly_sentiment.groupby("Quarter")["Sentiment_Index"].sum().reset_index()
)


In [4]:
# Optionally, normalize the sentiment index if needed (e.g., scale between -1 and 1)
sentiment_index_by_quarter["Normalized_Sentiment_Index"] = (
    sentiment_index_by_quarter["Sentiment_Index"]
    - sentiment_index_by_quarter["Sentiment_Index"].min()
) / (
    sentiment_index_by_quarter["Sentiment_Index"].max()
    - sentiment_index_by_quarter["Sentiment_Index"].min()
)


In [5]:
# Save the sentiment index data to a CSV
sentiment_index_by_quarter.to_csv("sentiment_index_by_quarter.csv", index=False)

print(sentiment_index_by_quarter.head())

  Quarter  Sentiment_Index  Normalized_Sentiment_Index
0  2002Q1        -0.568267                    0.021702
1  2002Q2        -0.549666                    0.086930
2  2002Q3        -0.526516                    0.168107
3  2002Q4        -0.574455                    0.000000
4  2003Q1        -0.530727                    0.153342
