#### Sentiment Index Generation

This notebook focuses on creating a comprehensive sentiment index from raw sentiment data derived from the Copom minutes. The index captures the overall tone of the documents and is designed for use in econometric analysis.
 * It calculates a "Net Optmism Sentiment" index by normalizing the difference between positive and negative word counts.
 * It also calculates a "Uncertainty" index.

In [1]:
import pandas as pd

In [4]:
sentiments_results = pd.read_excel("./data/processed/sentiments_results.xlsx")
index_results = sentiments_results.copy()

minutes_info = pd.read_excel("./data/raw/minutes_info.xlsx")
minutes_info['DataPublicacao'] = pd.to_datetime(minutes_info['DataPublicacao'])
minutes_info['DataPublicacao'] = minutes_info['DataPublicacao'].dt.tz_localize(None)

minutes_info.set_index("Titulo", inplace=True)

index_results["filename"] = index_results["filename"].str.replace(".txt", "")
index_results["date"] = index_results.filename.map(minutes_info['DataPublicacao'])

index_results = index_results.sort_values("date")

#### Optimism Index

In [5]:
optimism_df = index_results.copy()

In [6]:
optimism_df['optimism_index'] = (optimism_df['positive_count'] - optimism_df['negative_count']) / optimism_df['total_words']

In [7]:
optimism_df = optimism_df.groupby(["date", "filename", "dominant_topic"]).agg(
    optimism_index=('optimism_index', 'mean'),
).reset_index()

optimism_df = optimism_df.pivot_table(
    index=["date", "filename"],        
    columns='dominant_topic', 
    values='optimism_index'
).reset_index()

optimism_df.columns.name = None
optimism_df = optimism_df.fillna(0)

optimism_df.to_excel("./data/processed/index_optimism.xlsx", index=False)

##### Uncertainty Index

In [8]:
uncertainty_df = index_results.copy()

In [9]:
uncertainty_df['uncertainty_index'] = uncertainty_df['uncertainty_count'] / uncertainty_df['total_words']

In [10]:
uncertainty_df = uncertainty_df.groupby(["date", "filename", "dominant_topic"]).agg(
    uncertainty_index=('uncertainty_index', 'mean'),
).reset_index()

uncertainty_df = uncertainty_df.pivot_table(
    index=["date", "filename"],        
    columns='dominant_topic', 
    values='uncertainty_index'
).reset_index()

uncertainty_df.columns.name = None
uncertainty_df = uncertainty_df.fillna(0)

uncertainty_df.to_excel("./data/processed/index_uncertainty.xlsx", index=False)