Sentiment Analysis of the news articles using Textblob, VADER and FinBERT

In [None]:
!pip install textblob transformers tqdm pandas nltk



Imports

In [None]:
import pandas as pd
import numpy as np
from textblob import TextBlob
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import nltk
from tqdm import tqdm
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


True

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
infosys_file = "/content/drive/MyDrive/processed/news/clean_infosys.json"
reliance_file = "/content/drive/MyDrive/processed/news/clean_reliance.json"
sbi_file = "/content/drive/MyDrive/processed/news/clean_sbi.json"
sbi_industry_file = "/content/drive/MyDrive/processed/news/clean_sbi_industry.json"

In [None]:
def load_news_json(file_path):
  df = pd.read_json(file_path)
  return df

In [None]:
def textblob_sentiment(text):
    if not isinstance(text, str) or text.strip() == "":
        return 0.0
    return TextBlob(text).sentiment.polarity

In [None]:
vader = SentimentIntensityAnalyzer()

In [None]:
def vader_sentiment(text):
    if not isinstance(text, str) or text.strip() == "":
        return 0.0
    return vader.polarity_scores(text)["compound"]

In [None]:
tokenizer = AutoTokenizer.from_pretrained('ProsusAI/finbert')
model = AutoModelForSequenceClassification.from_pretrained('ProsusAI/finbert')
finbert = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer, device=0)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/252 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/758 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Device set to use cuda:0


model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

In [None]:
def finbert_sentiment(texts, batch_size=8):
    scores = []
    for i in tqdm(range(0, len(texts), batch_size), desc="FinBERT Sentiment"):
        batch = texts[i:i + batch_size]
        results = finbert(batch, truncation=True)
        for res in results:
            label = res["label"].lower()
            score = res["score"]
            if label == "positive":
                scores.append(score)
            elif label == "neutral":
                scores.append(0.0)
            else:
                scores.append(-score)
    return scores

In [None]:
def run_sentiment_pipeline(file_path,batch_size=8):

    df = load_news_json(file_path)

    print("Finding TextBlob Scores...")
    df["textblob_score"] = df["combined_text"].apply(textblob_sentiment)

    print("Finding Vader Score...")
    df["vader_score"] = df["combined_text"].apply(vader_sentiment)

    print("Finsing FinBERT Score ")
    df["finbert_score"] = finbert_sentiment(df["combined_text"].tolist(), batch_size=batch_size)

    # --- Save to CSV ---
    csv_path = file_path.replace(".json", "_with_sentiments.csv")
    df.to_csv(csv_path, index=False)
    print(f"Saved results to: {csv_path}")

    return df

In [None]:
infosys_df = run_sentiment_pipeline(infosys_file, batch_size=8)

Finding TextBlob Scores...
Finding Vader Score...
Finsing FinBERT Score 


FinBERT Sentiment:  26%|██▋       | 10/38 [00:02<00:04,  6.22it/s]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
FinBERT Sentiment: 100%|██████████| 38/38 [00:07<00:00,  5.33it/s]

Saved results to: /content/drive/MyDrive/processed/news/clean_infosys_with_sentiments.csv





In [None]:
infosys_df.head()

Unnamed: 0,author,title,description,source,category,language,country,published_at,combined_text,company,textblob_score,vader_score,finbert_score
0,Investing.com,infosys announces 44th annual general meeting ...,infosys announces 44th annual general meeting ...,Investing.com | Stock Market Quotes &amp; Fina...,business,en,us,2025-06-02,infosys announces 44th annual general meeting ...,Infosys,0.05,0.0,0.0
1,Haripriya Sureban,Infosys CEO Salil Parekh Gets 22% Pay Hike In ...,Infosys CEO Salil Parekh Gets 22% Pay Hike In ...,Bloomberg | Latest And Live Business,business,en,us,2025-06-02,Infosys CEO Salil Parekh Gets 22% Pay Hike In ...,Infosys,0.0,-0.2023,0.812474
2,Roger Kehrt,Infosys BPM Unveils AI Agents for Accounts Pay...,"Infosys BPM unveiled AI agents on May 30, 2025...",webpronews,general,en,us,2025-06-02,Infosys BPM Unveils AI Agents for Accounts Pay...,Infosys,0.25,0.7845,0.815225
3,ABMN Staff,UBS AM a distinct business unit of UBS ASSET M...,UBS AM a distinct business unit of UBS ASSET M...,americanbankingnews,general,en,us,2025-06-02,UBS AM a distinct business unit of UBS ASSET M...,Infosys,0.114286,0.567,-0.720634
4,Ananya Chaudhuri,Stock Market Live: GIFT Nifty Implies Lower Op...,Stock Market Live: GIFT Nifty Implies Lower Op...,Bloomberg | Latest And Live Business,business,en,us,2025-06-02,Stock Market Live: GIFT Nifty Implies Lower Op...,Infosys,0.068182,0.913,-0.869344


In [None]:
reliance_df = run_sentiment_pipeline(reliance_file, batch_size=8)

Finding TextBlob Scores...
Finding Vader Score...
Finsing FinBERT Score 


FinBERT Sentiment: 100%|██████████| 26/26 [00:03<00:00,  7.49it/s]

Saved results to: /content/drive/MyDrive/processed/news/clean_reliance_with_sentiments.csv





In [None]:
reliance_df.head()

Unnamed: 0,author,title,description,source,category,language,country,published_at,combined_text,company,textblob_score,vader_score,finbert_score
0,Aishwarya Patil,"Jio Delivers Speed, Airtel Wins On Voice—TRAI ...","Jio Delivers Speed, Airtel Wins On Voice—TRAI ...",Bloomberg | Latest And Live Business,business,en,us,2025-07-01,"Jio Delivers Speed, Airtel Wins On Voice—TRAI ...",Reliance,0.3,0.8126,0.0
1,Prajwal Jayaraj,'Not A Business Where Deep Pockets Mean A Larg...,'Not A Business Where Deep Pockets Mean A Larg...,Bloomberg | Latest And Live Business,business,en,us,2025-07-02,'Not A Business Where Deep Pockets Mean A Larg...,Reliance,-0.032738,0.0,0.0
2,Vivek Singh,Jio-Krafton Partnership: A “Milestone” for Ind...,Recently Jio-Krafton Partnership introduces ne...,Insidesport,sports,en,us,2025-07-03,Jio-Krafton Partnership: A “Milestone” for Ind...,Reliance,0.128788,0.0,0.0
3,,"Reliance Retail invests in UK’s FaceGym, to br...","Reliance Retail invests in UK’s FaceGym, to br...",Business Line,business,en,us,2025-07-03,"Reliance Retail invests in UK’s FaceGym, to br...",Reliance,0.0,0.8225,0.0
4,Bloomberg News,Reliance To Spin Off Consumer Goods Brands Ahe...,Reliance To Spin Off Consumer Goods Brands Ahe...,Bloomberg | Latest And Live Business,business,en,us,2025-07-03,Reliance To Spin Off Consumer Goods Brands Ahe...,Reliance,0.0,0.0,0.0


In [None]:
sbi_df = run_sentiment_pipeline(sbi_file, batch_size=8)

Finding TextBlob Scores...
Finding Vader Score...
Finsing FinBERT Score 


FinBERT Sentiment: 100%|██████████| 66/66 [00:14<00:00,  4.65it/s]

Saved results to: /content/drive/MyDrive/processed/news/clean_sbi_with_sentiments.csv





In [None]:
sbi_df.head()

Unnamed: 0,author,title,description,source,category,language,country,published_at,combined_text,company,textblob_score,vader_score,finbert_score
0,Subhana Shaikh,RBI Monetary Policy: SBI Research Bets On Shar...,RBI Monetary Policy: SBI Research Bets On Shar...,Bloomberg | Latest And Live Business,business,en,us,2025-06-03,RBI Monetary Policy: SBI Research Bets On Shar...,SBI,-0.125,-0.4939,0.0
1,,"Day Trading Guide for June 4, 2025: Intraday s...",Day Trading Guide gives you the key intraday s...,Business Line,business,en,us,2025-06-04,"Day Trading Guide for June 4, 2025: Intraday s...",SBI,-0.05625,0.7717,0.0
2,Personal Finance Desk,Latest PSU Banks FD Rates: Check SBI vs PNB vs...,Latest PSU Banks FD Rates: Check SBI vs PNB vs...,Bloomberg | Latest And Live Business,business,en,us,2025-06-04,Latest PSU Banks FD Rates: Check SBI vs PNB vs...,SBI,0.5,0.7184,0.0
3,,"Day Trading Guide for June 5, 2025: Intraday s...",Day Trading Guide gives you the key intraday s...,Business Line,business,en,us,2025-06-05,"Day Trading Guide for June 5, 2025: Intraday s...",SBI,-0.05625,0.7717,0.0
4,,"Bajaj Finserv block deal: SBI MF, Goldman Sach...",Entities buy 2.86 crore shares in Bajaj Finser...,Business Line,business,en,us,2025-06-06,"Bajaj Finserv block deal: SBI MF, Goldman Sach...",SBI,-0.15,-0.1779,0.0


In [None]:
sbi_ind_df = run_sentiment_pipeline(sbi_industry_file, batch_size=8)

Finding TextBlob Scores...
Finding Vader Score...
Finsing FinBERT Score 


FinBERT Sentiment: 100%|██████████| 6/6 [00:00<00:00, 14.77it/s]

Saved results to: /content/drive/MyDrive/processed/news/clean_sbi_industry_with_sentiments.csv





In [None]:
sbi_ind_df.head()

Unnamed: 0,author,title,description,source,category,language,country,published_at,combined_text,company,textblob_score,vader_score,finbert_score
0,nurcmedianext.com,RBI study proposes daily financial conditions ...,In a bid to enhance real-time monitoring of th...,nurcmedianext.com,business,en,us,2025-06-05,RBI study proposes daily financial conditions ...,Banking Industry,0.0,0.0,0.0
1,nurcmedianext.com,RBI ups agency commission for banks conducting...,This is possibly aimed at encouraging the agen...,nurcmedianext.com,business,en,us,2025-06-11,RBI ups agency commission for banks conducting...,Banking Industry,0.25,0.7346,0.530984
2,nurcmedianext.com,Good show by banks as most register a rise in ...,As the first quarter of the current financial ...,nurcmedianext.com,business,en,us,2025-06-16,Good show by banks as most register a rise in ...,Banking Industry,0.145,0.5187,0.0
3,nurcmedianext.com,"RBI mulls stricter norms to curb mis-selling, ...","RBI mulls stricter norms to curb mis-selling, ...",nurcmedianext.com,business,en,us,2025-06-19,"RBI mulls stricter norms to curb mis-selling, ...",Banking Industry,0.16,0.7184,0.0
4,nurcmedianext.com,RBI imposes ₹54.78 crore in penalties on 353 b...,RBI imposes Rs 54.78 crore in penalties on ban...,nurcmedianext.com,business,en,us,2025-06-21,RBI imposes ₹54.78 crore in penalties on 353 b...,Banking Industry,-0.125,-0.6369,-0.952848
