<a href="https://colab.research.google.com/github/sheldonkemper/bank_of_england/blob/main/notebooks/modelling/ob_full_pipeline_jpm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
"""
===================================================
Author: Oscar Bowden
Role: Research Lead, Bank of England Employer Project (Quant Collective)
LinkedIn: https://uk.linkedin.com/in/oscar-bowden-4b14711b7
Date: 2025-02-17
Version: 1.0

Description:
    This notebook is a rough version of a modelling pipeline for pre-processed financial meeting transcript
    data (JPMorganChase). It employs BERTopic, finBERT and Flan-T5 to extract insights into the speakers
    from the Q&A sections of the transcripts.
===================================================
"""

In [1]:
#Imports

import pandas as pd
import torch
from transformers import BertTokenizer, BertForSequenceClassification

In [8]:
#Load pre-processed data

url = "https://raw.githubusercontent.com/sheldonkemper/bank_of_england/main/data/preprocessed_data/JPMorgan_QNA_processed_data.xlsx"

df = pd.read_excel(url, engine='openpyxl')

df.head()

Unnamed: 0,Index,Quarter-Year,Asked By,Role of the person Asked the question,Question,Answered By,Role of the person answered the question,Answer
0,1,4Q24,John McDonald,"Analyst, Truist Securities, Inc.","Hi. Good morning. Jeremy, I wanted to ask abou...",Jeremy Barnum,"Chief Financial Officer, JPMorganChase","Yeah. Good question, John, and welcome back, b..."
1,2,4Q24,Mike Mayo,"Analyst, Wells Fargo Securities LLC","Hi. Simple and then more difficult, I guess. J...",Jamie Dimon,"Chairman & Chief Executive Officer, JPMorganChase",I do love what I do. And answering the second ...
2,3,4Q24,Jim Mitchell,"Analyst, Seaport Global Securities LLC","Hey. Good morning. Maybe just on regulation, w...",Jeremy Barnum,"Chief Financial Officer, JPMorganChase","Hey, Jim. I mean, it's obviously something we'..."
3,4,4Q24,Erika Najarian,"Analyst, UBS Securities LLC","Yes. Hi, good morning. Wanted to follow up on ...",Jeremy Barnum,"Chief Financial Officer, JPMorganChase","Right, Erika. Okay. You are tempting me with m..."
4,5,4Q24,Erika,Unknown,"Does that conclude your question, Erika?",Jeremy Barnum,"Chief Financial Officer, JPMorganChase",Very good. We can go to the next question. Tha...


# Running FinBERT on question chunks - aiming to gather insights at the analyst/question level

* --------Currently using non-final version of data---------

In [9]:
# Load ProsusAI FinBERT model & tokenizer
MODEL_NAME = "ProsusAI/finbert"
tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)
model = BertForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=3)
model.eval()  # Set model to evaluation mode

# Load your preprocessed data (assuming it's already in `df`)
TEXT_COLUMN = "Question"  # Adjust this if your column name is different

# Define class labels for FinBERT
LABELS = ["Negative", "Neutral", "Positive"]

# Function to get sentiment scores
def get_sentiment(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)

    with torch.no_grad():  # No gradient calculation needed - only inferring, not training
        outputs = model(**inputs)

    probs = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy().flatten()
    sentiment_label = LABELS[probs.argmax()]  # Get label with highest probability

    return {"Negative_q": probs[0], "Neutral_q": probs[1], "Positive_q": probs[2], "Sentiment_q": sentiment_label}

# Apply sentiment analysis to each chunk
df["sentiment_question"] = df[TEXT_COLUMN].apply(get_sentiment)

# Convert dictionary to separate columns
sentiment_df = df["sentiment_question"].apply(pd.Series)
df = pd.concat([df, sentiment_df], axis=1).drop(columns=["sentiment_question"])

# Show results
df.head()

Unnamed: 0,Index,Quarter-Year,Asked By,Role of the person Asked the question,Question,Answered By,Role of the person answered the question,Answer,Negative_q,Neutral_q,Positive_q,Sentiment_q
0,1,4Q24,John McDonald,"Analyst, Truist Securities, Inc.","Hi. Good morning. Jeremy, I wanted to ask abou...",Jeremy Barnum,"Chief Financial Officer, JPMorganChase","Yeah. Good question, John, and welcome back, b...",0.122641,0.013932,0.863427,Positive
1,2,4Q24,Mike Mayo,"Analyst, Wells Fargo Securities LLC","Hi. Simple and then more difficult, I guess. J...",Jamie Dimon,"Chairman & Chief Executive Officer, JPMorganChase",I do love what I do. And answering the second ...,0.064734,0.031453,0.903813,Positive
2,3,4Q24,Jim Mitchell,"Analyst, Seaport Global Securities LLC","Hey. Good morning. Maybe just on regulation, w...",Jeremy Barnum,"Chief Financial Officer, JPMorganChase","Hey, Jim. I mean, it's obviously something we'...",0.286975,0.036256,0.676769,Positive
3,4,4Q24,Erika Najarian,"Analyst, UBS Securities LLC","Yes. Hi, good morning. Wanted to follow up on ...",Jeremy Barnum,"Chief Financial Officer, JPMorganChase","Right, Erika. Okay. You are tempting me with m...",0.100585,0.028308,0.871107,Positive
4,5,4Q24,Erika,Unknown,"Does that conclude your question, Erika?",Jeremy Barnum,"Chief Financial Officer, JPMorganChase",Very good. We can go to the next question. Tha...,0.04435,0.112143,0.843507,Positive
