In [25]:
import glob
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.nn.functional import softmax
import torch
from huggingface_hub import login

# ahmedrachid/FinancialBERT-Sentiment-Analysis
from transformers import BertTokenizer, BertForSequenceClassification
from transformers import pipeline

from transformers import RobertaTokenizer, AutoModelForSequenceClassification

In [None]:
login(token='xx')

In [None]:
# !huggingface-cli login --token xx

In [4]:
# !huggingface-cli whoami

In [5]:
# !huggingface-cli logout

In [None]:
read_paths  = [
    '../data/4_clean_news/04_kaohoon/2025-05-15'
]
write_path = '../data/5_sentiment/04_kaohoon/2025-05-15'

In [30]:
# Load CSV
csv_files = []
for read_path in read_paths:
    csv_files.extend(glob.glob(read_path + "/*.csv"))

df_list = (pd.read_csv(file) for file in csv_files)

df = pd.concat(df_list, ignore_index=True)
df['news_text'] = df['news_content'].fillna('') + ' ' + df['news_title']
df

Unnamed: 0,news_datetime,news_title,news_content,execution_date,news_text
0,2025-05-15,ADVANC Eyes 5% Growth in 2025 from Rising Dema...,"Somruetai Tantakitti, Head of Investor Relatio...",2025-05-15,"Somruetai Tantakitti, Head of Investor Relatio..."
1,2025-05-14,B.Grimm Power Announces 51.6% Increase in 1Q25...,"Dr. Harald Link, Group President of B.Grimm Po...",2025-05-15,"Dr. Harald Link, Group President of B.Grimm Po..."
2,2025-05-08,Thai Union Secures $150 Million Blue Loan from...,"Thai Union Group PCL (SET: TU), the world’s se...",2025-05-15,"Thai Union Group PCL (SET: TU), the world’s se..."
3,2025-05-08,UBS Upgrades ADVANC’s Target Price to THB 339 ...,Following the release of Q1 2025 results and t...,2025-05-15,Following the release of Q1 2025 results and t...
4,2025-05-07,ADVANC Rises 4% after Robust 1Q25 Earnings Gro...,"Following their analyst briefing, CGS Internat...",2025-05-15,"Following their analyst briefing, CGS Internat..."
5,2025-05-06,ADVANC Reports 25% Profit Growth in 1Q25 amid ...,Advanced Info Service Public Company Limited (...,2025-05-15,Advanced Info Service Public Company Limited (...


# Model #1

In [8]:
model_name = "tabularisai/multilingual-sentiment-analysis"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

In [9]:
def predict_sentiment(texts):
    inputs = tokenizer(texts, return_tensors="pt", truncation=True, padding=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
    probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
    # sentiment_map = {0: "Very Negative", 1: "Negative", 2: "Neutral", 3: "Positive", 4: "Very Positive"}
    # return [sentiment_map[p] for p in torch.argmax(probabilities, dim=-1).tolist()]
    
    sentiment_map = {0: "negative", 1: "negative", 2: "neutral", 3: "positive", 4: "positive"}
    return [sentiment_map[p] for p in torch.argmax(probabilities, dim=-1).tolist()][0]

In [10]:
# Apply sentiment analysis
df['sentiment_1'] = df['news_text'].apply(predict_sentiment)
df

Unnamed: 0,news_datetime,news_title,news_content,news_text,sentiment_1
0,2025-05-15T17:22:27+07:00,ADVANC Eyes 5% Growth in 2025 from Rising Dema...,"\nSomruetai Tantakitti, Head of Investor Relat...","\nSomruetai Tantakitti, Head of Investor Relat...",neutral
1,2025-05-14T21:33:03+07:00,B.Grimm Power Announces 51.6% Increase in 1Q25...,"\nDr. Harald Link, Group President of B.Grimm ...","\nDr. Harald Link, Group President of B.Grimm ...",positive
2,2025-05-08T12:01:59+07:00,Thai Union Secures $150 Million Blue Loan from...,"\nThai Union Group PCL (SET: TU), the world’s ...","\nThai Union Group PCL (SET: TU), the world’s ...",positive
3,2025-05-08T11:10:37+07:00,UBS Upgrades ADVANC’s Target Price to THB 339 ...,\nFollowing the release of Q1 2025 results and...,\nFollowing the release of Q1 2025 results and...,positive
4,2025-05-07T11:06:13+07:00,ADVANC Rises 4% after Robust 1Q25 Earnings Gro...,"\nFollowing their analyst briefing, CGS Intern...","\nFollowing their analyst briefing, CGS Intern...",positive
5,2025-05-06T18:12:10+07:00,ADVANC Reports 25% Profit Growth in 1Q25 amid ...,\nAdvanced Info Service Public Company Limited...,\nAdvanced Info Service Public Company Limited...,positive


# Model #2

In [11]:
model = BertForSequenceClassification.from_pretrained("ahmedrachid/FinancialBERT-Sentiment-Analysis", num_labels=3)
tokenizer = BertTokenizer.from_pretrained("ahmedrachid/FinancialBERT-Sentiment-Analysis")
nlp = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)

Device set to use cpu


In [12]:
def predict_sentiment(text):
    result = nlp(text[:512])[0]  # truncate to max length if needed
    return result['label']

In [13]:
df['sentiment_2'] = df['news_text'].apply(predict_sentiment)
df

Unnamed: 0,news_datetime,news_title,news_content,news_text,sentiment_1,sentiment_2
0,2025-05-15T17:22:27+07:00,ADVANC Eyes 5% Growth in 2025 from Rising Dema...,"\nSomruetai Tantakitti, Head of Investor Relat...","\nSomruetai Tantakitti, Head of Investor Relat...",neutral,positive
1,2025-05-14T21:33:03+07:00,B.Grimm Power Announces 51.6% Increase in 1Q25...,"\nDr. Harald Link, Group President of B.Grimm ...","\nDr. Harald Link, Group President of B.Grimm ...",positive,positive
2,2025-05-08T12:01:59+07:00,Thai Union Secures $150 Million Blue Loan from...,"\nThai Union Group PCL (SET: TU), the world’s ...","\nThai Union Group PCL (SET: TU), the world’s ...",positive,positive
3,2025-05-08T11:10:37+07:00,UBS Upgrades ADVANC’s Target Price to THB 339 ...,\nFollowing the release of Q1 2025 results and...,\nFollowing the release of Q1 2025 results and...,positive,positive
4,2025-05-07T11:06:13+07:00,ADVANC Rises 4% after Robust 1Q25 Earnings Gro...,"\nFollowing their analyst briefing, CGS Intern...","\nFollowing their analyst briefing, CGS Intern...",positive,negative
5,2025-05-06T18:12:10+07:00,ADVANC Reports 25% Profit Growth in 1Q25 amid ...,\nAdvanced Info Service Public Company Limited...,\nAdvanced Info Service Public Company Limited...,positive,positive


# Model #3

In [14]:
model = AutoModelForSequenceClassification.from_pretrained("mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis", num_labels=3)
tokenizer = RobertaTokenizer.from_pretrained("mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis")
nlp = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)

Device set to use cpu


In [15]:
def predict_sentiment(text):
    result = nlp(text[:512])[0]  # truncate to max length if needed
    return result['label']

In [16]:
df['sentiment_3'] = df['news_text'].apply(predict_sentiment)
df

Unnamed: 0,news_datetime,news_title,news_content,news_text,sentiment_1,sentiment_2,sentiment_3
0,2025-05-15T17:22:27+07:00,ADVANC Eyes 5% Growth in 2025 from Rising Dema...,"\nSomruetai Tantakitti, Head of Investor Relat...","\nSomruetai Tantakitti, Head of Investor Relat...",neutral,positive,positive
1,2025-05-14T21:33:03+07:00,B.Grimm Power Announces 51.6% Increase in 1Q25...,"\nDr. Harald Link, Group President of B.Grimm ...","\nDr. Harald Link, Group President of B.Grimm ...",positive,positive,positive
2,2025-05-08T12:01:59+07:00,Thai Union Secures $150 Million Blue Loan from...,"\nThai Union Group PCL (SET: TU), the world’s ...","\nThai Union Group PCL (SET: TU), the world’s ...",positive,positive,positive
3,2025-05-08T11:10:37+07:00,UBS Upgrades ADVANC’s Target Price to THB 339 ...,\nFollowing the release of Q1 2025 results and...,\nFollowing the release of Q1 2025 results and...,positive,positive,positive
4,2025-05-07T11:06:13+07:00,ADVANC Rises 4% after Robust 1Q25 Earnings Gro...,"\nFollowing their analyst briefing, CGS Intern...","\nFollowing their analyst briefing, CGS Intern...",positive,negative,negative
5,2025-05-06T18:12:10+07:00,ADVANC Reports 25% Profit Growth in 1Q25 amid ...,\nAdvanced Info Service Public Company Limited...,\nAdvanced Info Service Public Company Limited...,positive,positive,positive


In [17]:
# if tie, get final result from Model #3
def majority_bias3(row):
    score = {"negative": 0, "neutral": 0, "positive": 0}
    score[row['sentiment_1']] += 1
    score[row['sentiment_2']] += 1
    score[row['sentiment_3']] += 1

    score_max = max(score, key=score.get)
    score_min = min(score, key=score.get)

    if score_max == score_min:
        score_max = row['sentiment_3'] # if tie, get final result from Model #3

    return score_max

df['sentiment_final'] = df.apply(majority_bias3, axis=1)
df = df.drop('news_text', axis=1)
df

Unnamed: 0,news_datetime,news_title,news_content,sentiment_1,sentiment_2,sentiment_3,sentiment_final
0,2025-05-15T17:22:27+07:00,ADVANC Eyes 5% Growth in 2025 from Rising Dema...,"\nSomruetai Tantakitti, Head of Investor Relat...",neutral,positive,positive,positive
1,2025-05-14T21:33:03+07:00,B.Grimm Power Announces 51.6% Increase in 1Q25...,"\nDr. Harald Link, Group President of B.Grimm ...",positive,positive,positive,positive
2,2025-05-08T12:01:59+07:00,Thai Union Secures $150 Million Blue Loan from...,"\nThai Union Group PCL (SET: TU), the world’s ...",positive,positive,positive,positive
3,2025-05-08T11:10:37+07:00,UBS Upgrades ADVANC’s Target Price to THB 339 ...,\nFollowing the release of Q1 2025 results and...,positive,positive,positive,positive
4,2025-05-07T11:06:13+07:00,ADVANC Rises 4% after Robust 1Q25 Earnings Gro...,"\nFollowing their analyst briefing, CGS Intern...",positive,negative,negative,negative
5,2025-05-06T18:12:10+07:00,ADVANC Reports 25% Profit Growth in 1Q25 amid ...,\nAdvanced Info Service Public Company Limited...,positive,positive,positive,positive
