<a href="https://colab.research.google.com/github/sheldonkemper/bank_of_england/blob/main/notebooks/modelling/ob_flan_t5_sentiment_jpm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
"""
===================================================
Author: Oscar Bowden
Role: Research Lead, Bank of England Employer Project (Quant Collective)
LinkedIn: https://uk.linkedin.com/in/oscar-bowden-4b14711b7
Date: 2025-03-05
Version: 2.4

Description:
    This notebook contains an inference pipeline for a Flan-T5 (base)
    model that has been fine-tuned for polar sentiment analysis
    of financial sentences (using Financial Phrasebank:
    https://huggingface.co/datasets/takala/financial_phrasebank).
===================================================
"""

# Imports

In [None]:
!pip install transformers torch > /dev/null 2>&1

In [None]:
#Imports

# Mount Google Drive
from google.colab import drive
import os

# Data handling
import re
import pandas as pd
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration

# Data loading and pre-processing

In [None]:
# Load topic modelled data (questions and answers for JPM and UBS)

drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
path1 = "/content/drive/MyDrive/bank_of_england/data/model_outputs/Topic_Modelling_am/tqc_JPMorgan_answer_topic.csv"
path2 = "/content/drive/MyDrive/bank_of_england/data/model_outputs/Topic_Modelling_am/tqc_UBS_answer_topic.csv"
path3 = "/content/drive/MyDrive/bank_of_england/data/model_outputs/Topic_Modelling_am/tqc_JPMorgan_question_topic.csv"
path4 = "/content/drive/MyDrive/bank_of_england/data/model_outputs/Topic_Modelling_am/tqc_UBS_question_topic.csv"


df_a_jpm = pd.read_csv(path1)
df_a_ubs = pd.read_csv(path2)
df_q_jpm = pd.read_csv(path3)
df_q_ubs = pd.read_csv(path4)

In [None]:
#Cleaning - remove quotes

df_a_jpm['Snippet'] = df_a_jpm['Snippet'].str.replace('"', '', regex=False)
df_a_ubs['Snippet'] = df_a_ubs['Snippet'].str.replace('"', '', regex=False)

df_q_jpm['Snippet'] = df_q_jpm['Snippet'].str.replace('"', '', regex=False)
df_q_ubs['Snippet'] = df_q_ubs['Snippet'].str.replace('"', '', regex=False)

In [None]:
#Prepare topics for input into fine-tuned flan-t5

def prepare_text_for_inference(text):
    text = str(text).strip()
    return f"Classify sentiment: {text}"

df_a_jpm["snippet_infer"] = df_a_jpm["Snippet"].apply(prepare_text_for_inference)
df_a_ubs["snippet_infer"] = df_a_ubs["Snippet"].apply(prepare_text_for_inference)

df_q_jpm["snippet_infer"] = df_q_jpm["Snippet"].apply(prepare_text_for_inference)
df_q_ubs["snippet_infer"] = df_q_ubs["Snippet"].apply(prepare_text_for_inference)

# Inference using fine-tuned Flan-T5

In [None]:
# Load fine-tuned model and tokeniser from the best checkpoint

best_checkpoint = "/content/drive/MyDrive/bank_of_england/data/model_outputs/flan_t5_sent"

model = T5ForConditionalGeneration.from_pretrained(best_checkpoint)
tokenizer = T5Tokenizer.from_pretrained(best_checkpoint)

# Define the prediction function using your fine-tuned model
def predict_sentiment(prepared_text):
    """
    Predicts sentiment using the fine-tuned Flan-T5 model.
    Assumes the input text is already preprocessed (i.e., prompt prepended).
    """
    inputs = tokenizer(prepared_text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        output = model.generate(**inputs, max_new_tokens=2)
    return tokenizer.decode(output[0], skip_special_tokens=True).strip()

# Apply inference on your prepared quarterly data
df_a_jpm["snippet_sent"] = df_a_jpm["snippet_infer"].apply(predict_sentiment)
df_a_ubs["snippet_sent"] = df_a_ubs["snippet_infer"].apply(predict_sentiment)

df_q_jpm["snippet_sent"] = df_q_jpm["snippet_infer"].apply(predict_sentiment)
df_q_ubs["snippet_sent"] = df_q_ubs["snippet_infer"].apply(predict_sentiment)

In [None]:
df_a_jpm["snippet_sent"] = df_a_jpm["snippet_sent"].map({"0": "Negative", "1": "Neutral", "2": "Positive"})
df_a_ubs["snippet_sent"] = df_a_ubs["snippet_sent"].map({"0": "Negative", "1": "Neutral", "2": "Positive"})

df_a_jpm.drop(columns=['snippet_infer'], inplace=True)
df_a_ubs.drop(columns=['snippet_infer'], inplace=True)

df_q_jpm["snippet_sent"] = df_q_jpm["snippet_sent"].map({"0": "Negative", "1": "Neutral", "2": "Positive"})
df_q_ubs["snippet_sent"] = df_q_ubs["snippet_sent"].map({"0": "Negative", "1": "Neutral", "2": "Positive"})

df_q_jpm.drop(columns=['snippet_infer'], inplace=True)
df_q_ubs.drop(columns=['snippet_infer'], inplace=True)

In [None]:
print(f"JPM Answer Snippet Sentiment:\n {df_a_jpm['snippet_sent'].value_counts()}")
print(f"\nUBS Answer Snippet Sentiment:\n {df_a_ubs['snippet_sent'].value_counts()}")

print(f"\nJPM Question Snippet Sentiment:\n {df_q_jpm['snippet_sent'].value_counts()}")
print(f"\nUBS Question Snippet Sentiment:\n {df_q_ubs['snippet_sent'].value_counts()}")

JPM Answer Snippet Sentiment:
 snippet_sent
Neutral     335
Positive     81
Negative     18
Name: count, dtype: int64

UBS Answer Snippet Sentiment:
 snippet_sent
Neutral     268
Positive     69
Negative     20
Name: count, dtype: int64

JPM Question Snippet Sentiment:
 snippet_sent
Neutral     311
Positive     64
Negative     12
Name: count, dtype: int64

UBS Question Snippet Sentiment:
 snippet_sent
Neutral     370
Positive     93
Negative     13
Name: count, dtype: int64


In [None]:
#Save CSVs

file_path_1 = "/content/drive/MyDrive/bank_of_england/data/model_outputs/sent_output/JPM_answers_sent_output_050325_v1.csv"
file_path_2 = "/content/drive/MyDrive/bank_of_england/data/model_outputs/sent_output/UBS_answers_sent_output_050325_v1.csv"
file_path_3 = "/content/drive/MyDrive/bank_of_england/data/model_outputs/sent_output/JPM_questions_sent_output_050325_v1.csv"
file_path_4 = "/content/drive/MyDrive/bank_of_england/data/model_outputs/sent_output/UBS_questions_sent_output_050325_v1.csv"

df_a_jpm.to_csv(file_path_1, index=False)

df_a_ubs.to_csv(file_path_2, index=False)

df_q_jpm.to_csv(file_path_3, index=False)

df_q_ubs.to_csv(file_path_4, index=False)