In [7]:
import pathlib
import textwrap
import google.generativeai as genai
from IPython.display import display
from IPython.display import Markdown
import pandas as pd


def to_markdown(text):
    text = text.replace('•', '  *')
    return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

# Set your Gemini API key
genai.configure(api_key='AIzaSyCLC6OCYyVdQAKippp7L-FbHEGhy49jlQ8')

for m in genai.list_models():
    if 'generateContent' in m.supported_generation_methods:
        print(m.name)

models/gemini-1.0-pro
models/gemini-1.0-pro-001
models/gemini-1.0-pro-latest
models/gemini-1.0-pro-vision-latest
models/gemini-1.5-flash
models/gemini-1.5-flash-001
models/gemini-1.5-flash-latest
models/gemini-1.5-pro
models/gemini-1.5-pro-001
models/gemini-1.5-pro-latest
models/gemini-pro
models/gemini-pro-vision


In [102]:
def gemini_classification(article):
    # Get the response from the Gemini API
    prompt = "Discard all the previous instructions. Behave like you are an expert sentence classifier. Classify the following article into 'HAWKISH', 'DOVISH', or 'NEUTRAL' class and give it a score between 1 (dovish) and -1 (hawkish). Label 'HAWKISH' if it is corresponding to tightening of the monetary policy, 'DOVISH' if it is corresponding to easing of the monetary policy, or 'NEUTRAL' if the stance is neutral and give me a justification: " + article  
    model = genai.GenerativeModel('gemini-1.5-pro-latest')
    response = model.generate_content(prompt)
    print(response.text)

    # Split the text content into lines
    lines = response.text.split('\n')

    # Initialize variables to store the extracted information
    classification = ""
    score = ""
    rationale = ""

    # Loop through the lines and extract the relevant information
    num_line = 0
    for line in lines:
        num_line += 1
        if line.startswith("## Classification:"):
            classification = line.replace("## Classification: ", "").strip()
        elif line.startswith("## Score:"):
            score = line.replace("## Score: ", "").strip()
        elif num_line>=7:
            rationale = rationale + line + '\n'

    rationale = rationale.replace('* **', '- ')

    return classification, score, rationale

In [91]:
statements_data = pd.read_csv('ecb_monetary_policy_decisions.csv')
statements_data.head()

Unnamed: 0,Date,Title,Link,Article
0,11 April 2024,MONETARY POLICY DECISION,https://www.ecb.europa.eu//press/pr/date/2024/...,The Governing Council today decided to keep th...
1,7 March 2024,MONETARY POLICY DECISION,https://www.ecb.europa.eu//press/pr/date/2024/...,The Governing Council today decided to keep th...
2,25 January 2024,MONETARY POLICY DECISION,https://www.ecb.europa.eu//press/pr/date/2024/...,The Governing Council today decided to keep th...
3,14 December 2023,MONETARY POLICY DECISION,https://www.ecb.europa.eu//press/pr/date/2023/...,The Governing Council today decided to keep th...
4,26 October 2023,MONETARY POLICY DECISION,https://www.ecb.europa.eu//press/pr/date/2023/...,The Governing Council today decided to keep th...


In [None]:
import pandas as pd
import time

def apply_classification_with_delay(df, text_column, delay=5):
    """Applies the gemini_classification function to a DataFrame with a delay between requests."""
    
    classifications = []
    scores = []
    rationales = []

    for article in df[text_column]:
        classification, score, rationale = gemini_classification(article)
        classifications.append(classification)
        scores.append(score)
        rationales.append(rationale)
        time.sleep(delay)  # Introduce delay after each request

    return pd.DataFrame({
        "classification": classifications,
        "score": scores,
        "rationale": rationales
    })

# Assuming your DataFrame is named 'statements_data' and the column with articles is 'Article'
results_df = apply_classification_with_delay(statements_data, "Article", delay=5)
df = pd.concat([statements_data, results_df], axis=1)


In [106]:
import pandas as pd
import nltk
import spacy
from nltk.corpus import stopwords
from spacy.lang.en.stop_words import STOP_WORDS as spacy_stopwords
import regex as re

# Download NLTK stop words
nltk.download('stopwords')
nltk.download('punkt')

# Load Spacy English model
nlp = spacy.load("en_core_web_sm")

# Combine NLTK and Spacy stopwords
stop_words = set(stopwords.words('english')).union(spacy_stopwords)

def preprocess_text(text):
    # Clean and normalize text
    text = text.lower()  # Convert to lowercase
    text = re.sub(r'\s+', ' ', text)  # Remove extra whitespace
    text = re.sub(r'\d+', '', text)  # Remove digits
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    
    # Tokenize and remove stop words
    tokens = [word for word in nltk.word_tokenize(text) if word not in stop_words]
    
    # Lemmatize tokens using Spacy
    doc = nlp(" ".join(tokens))
    lemmatized_tokens = [token.lemma_ for token in doc]
    
    return " ".join(lemmatized_tokens)

# Apply the preprocessing function to the 'Article' column
statements_data['cleaned_article'] = statements_data['Article'].apply(preprocess_text)


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\jaoss\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\jaoss\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [123]:
# Panel A1
panel_a1 = [
    "inflation expectation", "interest rate", "bank rate", "fund rate", "price",
    "economic activity", "inflation", "employment", "price stability",
    "asset prices", "consumer prices", "producer prices" 
]

# Panel A2
panel_a2 = [
    "anchor", "cut", "subdue", "decline", "decrease", "reduce", "low", 
    "drop", "fall", "fell", "decelerate", "slow", "pause", "pausing", "stable",
    "non-accelerating", "downward", "tighten", "stimulate", "support", 
    "accommodative", "dovish tilt", "gradualism", "patient", "cautious", 
    "easing bias"
]

# Panel B1
panel_b1 = [
    "unemployment", "growth", "exchange rate", "productivity", "deficit",
    "demand", "job market", "monetary policy", "labor market", "wage growth",
    "output gap", "trade balance"
]

# Panel B2
panel_b2 = [
    "ease", "easing", "rise", "rising", "increase", "expand", "improve", "strong",
    "upward", "raise", "high", "rapid", "tightening", "restrictive", "constrain",
    "hawkish tilt", "aggressive", "tightening bias", "overheated economy"
]

# Panel C (handling contractions)
panel_c = [
    "weren't", "were not", "wasn't", "was not", "did not", "didn't", 
    "do not", "don't", "will not", "won't"
]

In [126]:
def analyze_sentiment(text):
    """Analyzes sentiment sentence by sentence and returns average score and refined classification."""

    sentences = nltk.sent_tokenize(text)
    scores = []

    for sentence in sentences:
        print(sentence)
        # Check for words in each panel
        a1_words = any(word in sentence for word in panel_a1)
        a2_words = any(word in sentence for word in panel_a2)
        b1_words = any(word in sentence for word in panel_b1)
        b2_words = any(word in sentence for word in panel_b2)
        c_words = any(word in sentence for word in panel_c)

        # Determine initial classification
        if (a1_words and a2_words) or (b1_words and b2_words):
            initial_classification = "Dovish"
        elif (a1_words and b2_words) or (a2_words and b1_words):
            initial_classification = "Hawkish"
        else:
            initial_classification = "Neutral"

        # Reverse classification if negation words are present
        if c_words:
            final_classification = "Hawkish" if initial_classification == "Dovish" else "Dovish"
        else:
            final_classification = initial_classification

        # Calculate score based on classification
        if final_classification == "Hawkish":
            score = -1
        elif final_classification == "Dovish":
            score = 1
        else:
            score = 0

        scores.append(score)

    # Calculate average score
    average_score = sum(scores) / len(scores) if scores else 0  # Handle empty articles

    # Refined classification based on average score
    if average_score > 0:
        final_classification = "Dovish"
    elif average_score < 0:
        final_classification = "Hawkish"
    else:
        final_classification = "Neutral"

    return final_classification, average_score

# Example usage
article = "The central bank announced a surprise rate cut to stimulate the economy, but they didn't expect a quick recovery."
classification, score = analyze_sentiment(article)
print(f"Classification: {classification}, Average Score: {score:.2f}")


# Example usage
article = "Despite recent signs of slowing inflation, the Federal Reserve remains committed to its aggressive tightening path, with several more rate hikes expected this year to combat persistent price pressures and ensure a sustained return to the 2% inflation target."
classification, score = analyze_sentiment(article)
print(f"Classification: {classification}, Score: {score}")


Classification: Dovish, Average Score: 1.20
Classification: Dovish, Score: 1.2


In [114]:
statements_data.head()

Unnamed: 0,Date,Title,Link,Article,cleaned_article,classification
0,11 April 2024,MONETARY POLICY DECISION,https://www.ecb.europa.eu//press/pr/date/2024/...,The Governing Council today decided to keep th...,govern council today decide key ecb interest r...,Dovish
1,7 March 2024,MONETARY POLICY DECISION,https://www.ecb.europa.eu//press/pr/date/2024/...,The Governing Council today decided to keep th...,govern council today decide key ecb interest r...,Dovish
2,25 January 2024,MONETARY POLICY DECISION,https://www.ecb.europa.eu//press/pr/date/2024/...,The Governing Council today decided to keep th...,govern council today decide key ecb interest r...,Dovish
3,14 December 2023,MONETARY POLICY DECISION,https://www.ecb.europa.eu//press/pr/date/2023/...,The Governing Council today decided to keep th...,govern council today decide key ecb interest r...,Dovish
4,26 October 2023,MONETARY POLICY DECISION,https://www.ecb.europa.eu//press/pr/date/2023/...,The Governing Council today decided to keep th...,govern council today decide key ecb interest r...,Dovish


In [115]:
statements_data["classification"], statements_data["grade"] = zip(*statements_data["cleaned_article"].apply(analyze_sentiment))
statements_data.head()

Unnamed: 0,Date,Title,Link,Article,cleaned_article,classification,grade
0,11 April 2024,MONETARY POLICY DECISION,https://www.ecb.europa.eu//press/pr/date/2024/...,The Governing Council today decided to keep th...,govern council today decide key ecb interest r...,Dovish,1
1,7 March 2024,MONETARY POLICY DECISION,https://www.ecb.europa.eu//press/pr/date/2024/...,The Governing Council today decided to keep th...,govern council today decide key ecb interest r...,Dovish,1
2,25 January 2024,MONETARY POLICY DECISION,https://www.ecb.europa.eu//press/pr/date/2024/...,The Governing Council today decided to keep th...,govern council today decide key ecb interest r...,Dovish,1
3,14 December 2023,MONETARY POLICY DECISION,https://www.ecb.europa.eu//press/pr/date/2023/...,The Governing Council today decided to keep th...,govern council today decide key ecb interest r...,Dovish,1
4,26 October 2023,MONETARY POLICY DECISION,https://www.ecb.europa.eu//press/pr/date/2023/...,The Governing Council today decided to keep th...,govern council today decide key ecb interest r...,Dovish,1


In [129]:
import pandas as pd
import os

def rule_model_article(article_text):
    """
    Analyzes an article sentence by sentence, classifying each for tone.
    Calculates an overall article grade between -1 (hawkish) and 1 (dovish).

    Args:
        article_text (str): The full text of the article.

    Returns:
        float: The article's overall grade.
    """

    A1 = ["inflation expectation", "interest rate", "bank rate", "fund rate", "price",
        "economic activity", "inflation", "employment", "price stability",
        "asset prices", "consumer prices", "producer prices"]
    A2 = ["anchor", "cut", "subdue", "decline", "decrease", "reduce", "low", 
        "drop", "fall", "fell", "decelerate", "slow", "pause", "pausing", "stable",
        "non-accelerating", "downward", "tighten", "stimulate", "support", 
        "accommodative", "dovish tilt", "gradualism", "patient", "cautious", 
        "easing bias"]
    B1 = ["unemployment", "growth", "exchange rate", "productivity", "deficit",
        "demand", "job market", "monetary policy", "labor market", "wage growth",
        "output gap", "trade balance"]
    B2 = ["ease", "easing", "rise", "rising", "increase", "expand", "improve", "strong",
        "upward", "raise", "high", "rapid", "tightening", "restrictive", "constrain",
        "hawkish tilt", "aggressive", "tightening bias", "overheated economy"]
    C = ["weren't", "were not", "wasn't", "was not", 'did not', "didn't", 
        "do not", "don't", 'will not', "won't"]
    
    sentences = article_text.split('. ')  # Better sentence splitting for articles

    sentence_grades = []
    for s in sentences:
        label = 0  # Neutral by default
        s_lower = s.lower()  
        if (any(word in s_lower for word in A1) and any(word in s_lower for word in A2)) or \
            (any(word in s_lower for word in B1) and any(word in s_lower for word in B2)):
            label = 0
        elif (any(word in s_lower for word in A1) and any(word in s_lower for word in B2)) or \
            (any(word in s_lower for word in B1) and any(word in s_lower for word in A2)):
            label = 1
        if label != 2 and (any(word in s_lower for word in C)):
            label = 1 - label  # Invert if negation found

        sentence_grades.append(label)

    # Calculate overall article grade
    if sentence_grades:
        article_grade = sum(sentence_grades) / len(sentence_grades) - 0.5
    else:
        article_grade = 0  # Neutral if no sentences

    return article_grade

article = "Despite recent signs of slowing inflation, the Federal Reserve remains committed to its aggressive tightening path, with several more rate hikes expected this year to combat persistent price pressures and ensure a sustained return to the 2% inflation target."
grade = rule_model_article(article)
print(f"Article Grade: {grade:.2f}")

article = "The central bank announced a surprise rate cut to stimulate the economy, but they didn't expect a quick recovery."
grade = rule_model_article(article)
print(f"Article Grade: {grade:.2f}")

Article Grade: -0.50
Article Grade: 0.50


In [132]:
statements_data = pd.read_csv('ecb_monetary_policy_decisions.csv')
statements_data.head()

Unnamed: 0,Date,Title,Link,Article
0,11 April 2024,MONETARY POLICY DECISION,https://www.ecb.europa.eu//press/pr/date/2024/...,The Governing Council today decided to keep th...
1,7 March 2024,MONETARY POLICY DECISION,https://www.ecb.europa.eu//press/pr/date/2024/...,The Governing Council today decided to keep th...
2,25 January 2024,MONETARY POLICY DECISION,https://www.ecb.europa.eu//press/pr/date/2024/...,The Governing Council today decided to keep th...
3,14 December 2023,MONETARY POLICY DECISION,https://www.ecb.europa.eu//press/pr/date/2023/...,The Governing Council today decided to keep th...
4,26 October 2023,MONETARY POLICY DECISION,https://www.ecb.europa.eu//press/pr/date/2023/...,The Governing Council today decided to keep th...


In [4]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
import numpy as np
import torch.nn.functional as F

# Load the FOMC-RoBERTa model and tokenizer
model_name = "gtfintechlab/FOMC-RoBERTa"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Example articles
articles = [
    "The ECB has decided to increase interest rates to combat rising inflation.",
    "The ECB plans to maintain low interest rates to support economic growth."
]

# Labels for classification
labels = ["dovish", "hawkish"]

# Function to compute the score
def compute_score(logits):
    probs = F.softmax(logits, dim=1)
    dovish_score = probs[:, 0].item()
    hawkish_score = probs[:, 1].item()
    score = dovish_score - hawkish_score  # Positive for dovish, negative for hawkish
    return score, dovish_score, hawkish_score

In [8]:
data = pd.read_excel('combined.xlsx')
data.head()

Unnamed: 0,Date,Source,Headline,Translated.headline,Manual.summary,Original.article.url,Media.type,Speaker,Reach,Language,Country.Code,Sitename,Tags,Site_Readership,Article_Readership
0,2022-09-30 22:08:34,Il Messaggero - Borsa Italiana,"Visco: «Troppi rialzi dei tassi, si va in rece...","Banca d'Italia's Visco: ""Too many rate hikes w...","At a conference in Florence, Banca d'Italia Go...",https://www.ilmessaggero.it/economia/news/tass...,WEB,Newspapers,1174293,it,IT,Il Messaggero,"European Central Bank, European Monetary Union...",2296944.0,0.0
1,2022-09-30 21:44:15,Puls Biznesu - Z ostatniej chwili,Knot: kolejne podwyżki stóp EBC są konieczne,Knot: further ECB rate hikes are necessary,,https://www.pb.pl/knot-kolejne-podwyzki-stop-e...,WEB,Newspapers,72409,pl,PL,Puls Biznesu,"European Central Bank, Inflation, Rank 2, Inte...",73998.0,0.0
2,2022-09-30 21:30:13,Handelsblatt - Finanzen,EZB-Ratsmitglied Visco: Rezessionsrisiko bei g...,ECB 's Visco: Recession risk in the event of m...,Banca d'Italia Governor Ignazio Visco on Fri...,https://www.handelsblatt.com/finanzen/geldpoli...,WEB,Newspapers,525924,de,DE,Handelsblatt,"European Central Bank, European Monetary Union...",314148.0,811493.0
3,2022-09-30 21:07:32,Trend - Správy,ECB by mala byť pri zvyšovaní sadzieb opatrná...,"ECB should be cautious about rate hikes, says...",,https://www.trend.sk/spravy/ecb-mala-byt-pri-z...,WEB,Net Magazine,51466,sk,SK,Trend,"European Central Bank, Inflation, Rank 2, Inte...",53064.0,0.0
4,2022-09-30 20:56:08,Bloomberg - GNews,ECB 's Schnabel Says Weaker Demand May Not Eas...,ECB 's Schnabel says weaker demand may not eas...,ECB Executive Board Member Isabel Schnabel ...,https://www.bloomberg.com/news/articles/2022-0...,WEB,Net Magazine,2520979,en,US,Bloomberg,"European Central Bank, Inflation, Rank 1, Core...",1309524.0,278016.0


In [9]:
data.dropna(subset='Manual.summary', inplace=True)
data.reset_index(drop=True, inplace=True)
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31384 entries, 0 to 31383
Data columns (total 15 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   Date                  31384 non-null  datetime64[ns]
 1   Source                31384 non-null  object        
 2   Headline              31266 non-null  object        
 3   Translated.headline   31384 non-null  object        
 4   Manual.summary        31384 non-null  object        
 5   Original.article.url  31384 non-null  object        
 6   Media.type            31384 non-null  object        
 7   Speaker               31094 non-null  object        
 8   Reach                 31384 non-null  int64         
 9   Language              31384 non-null  object        
 10  Country.Code          31380 non-null  object        
 11  Sitename              31384 non-null  object        
 12  Tags                  31384 non-null  object        
 13  Site_Readership 

In [10]:
test = data.head(100)

In [12]:
# Process each article
for article in test['Manual.summary'].tolist():
    inputs = tokenizer(article, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
    logits = outputs.logits
    score, dovish_score, hawkish_score = compute_score(logits)
    classification = "Dovish" if score > 0 else "Hawkish"
    
    print(f"Article: {article}")
    print(f"Score: {score:.2f} (Dovish: {dovish_score:.2f}, Hawkish: {hawkish_score:.2f})")
    print(f"Classification: {classification}\n")

Article: At a conference in Florence, Banca d'Italia Governor  Ignazio Visco  warned of the risks of triggering recession if the  ECB  blindly follows the US Federal Reserve and proceeds with excessively rapid and pronounced rate rises. Mr Visco, however, defended the  ECB  from the attacks of those who criticised it as slow in recognising the arrival of high inflation in recent months and stressed that he does not see any apparent reason to assume that the upcoming increases will be extraordinarily high. The governor claimed that the  ECB  had started its journey last December and emphasised that the institution's neutral policy rate would be between 0.7% and 1.8%. Therefore, the  ECB  could have already reached this rate with its last interest rate increase or is close to reaching it soon.
Score: -1.00 (Dovish: 0.00, Hawkish: 1.00)
Classification: Hawkish

Article: Banca d'Italia Governor  Ignazio Visco  on Friday warned against raising the  ECB ’s key interest rates in steps that ar