In [31]:
from bdshare import get_agm_news, get_all_news
import pandas as pd
import datetime as dt

import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
# Define the FinBERT model and tokenizer
model_name = "ProsusAI/finbert"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)


code = 'EBL' # IMPORTANT READ THIS. Change this to the DSE code for the specified stock
filename = f"{code}.csv"


def get_all_news_today():
        end = dt.datetime.now().date()
        df = get_all_news(code)
        df["News"] = df["News"].shift(-1)
        df["Post Date"] = df["Post Date"].shift(-2)      
        return df

In [32]:
df = get_all_news_today().dropna(axis = 0, how="any")


In [33]:
def sentiment_analysis(text):
    encoded_inputs = tokenizer(text, return_tensors="pt")
    outputs = model(**encoded_inputs)
    logits = outputs.logits

    predicted_label = torch.argmax(logits, dim=-1).item()
    confidence_scores = torch.softmax(logits, dim=-1).detach().cpu().numpy().flatten()  # Flatten the array

    sentiment_labels = {0: "Positive", 1: "Negative", 2: "Neutral"}

    return sentiment_labels[predicted_label], *confidence_scores  # Unpack the confidence scores as separate argument

In [34]:
# Apply the sentiment analysis function to the "News" column
df["Sentiment analysis"] = df["News"].apply(sentiment_analysis)

# Extract the sentiment and confidence scores
df["Sentiment"] = df["Sentiment analysis"].apply(lambda x: x[0])
df["Confidence_score_Positive"] = df["Sentiment analysis"].apply(lambda x: x[1])
df["Confidence_score_Negative"] = df["Sentiment analysis"].apply(lambda x: x[2])
df["Confidence_score_Neutral"] = df["Sentiment analysis"].apply(lambda x: x[3])

# Drop the "Sentiment analysis" column if you don't need it anymore
df.drop("Sentiment analysis", axis=1, inplace=True)
df.to_csv(filename, index=False)

In [35]:

df_one = pd.read_csv('ENBK.csv') #IMPORTANT READ this, choose the filename according to the stock filename present that you downloaded corresponding to the DSE code
df_two = pd.read_csv(filename)
if 'News Title' in df_two.columns:
    df_two.drop(columns=['News Title'], inplace=True)
# Convert "Date" column to string format in both DataFrames
df_one['Date'] = pd.to_datetime(df_one['Date']).dt.strftime('%Y-%m-%d')
df_two['Post Date'] = pd.to_datetime(df_two['Post Date']).dt.strftime('%Y-%m-%d')

# Rename columns in df_two to match the expected column names
df_two.rename(columns={'Post Date': 'Date'}, inplace=True)

# Merge based on "Date" column, keeping only matching dates
merged_df = pd.merge(df_one, df_two, on='Date', how='inner')
merged_df.rename(columns={'News': 'Content'}, inplace=True)

# Save the merged DataFrame to a new CSV file
merged_filename = f"Merged_{code}.csv"
merged_df.to_csv(merged_filename, index=False)

print(f"Merged CSV file '{merged_filename}' saved successfully.")


Merged CSV file 'Merged_EBL.csv' saved successfully.
