In [2]:
#!pip install datasets
#!pip install boto3
#!pip install datasets
#!pip install -U bitsandbytes
#!pip install evaluate

In [1]:
#########################################################
##                PYTHON PACKAGE IMPORTS               ##
#########################################################
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModel
from sklearn.model_selection import train_test_split
from peft import PeftModel, PeftConfig
from datasets import load_dataset
import datasets
import torch
import pandas as pd
import boto3
import io

from google.colab import userdata
aws_access_key_id = userdata.get('aws_access_key_id')
aws_secret_access_key = userdata.get('aws_secret_access_key')

#!cp /content/drive/MyDrive/LoRaSequenceClassifier.py /content/
#!cp /content/drive/MyDrive/FinancialSentimentDataLoader.py /content/
#!cp /content/drive/MyDrive/SentimentInference.py /content/
#!cp /content/LoRaCausalLM.py /content/drive/MyDrive/

from SentimentDataLoader import SentimentDataLoader
from LoRaSequenceClassifier import LoRaSequenceClassifier
from SentimentInference import SentimentInference

aws_access_key_id = aws_access_key_id
aws_secret_access_key = aws_secret_access_key
model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"

#from huggingface_hub import notebook_login

In [3]:
#notebook_login()

In [2]:
############################################################
#####   LOAD TRAIN/TEST DATA AS HUGGINGFACE DATASETS   #####
############################################################
#data_loader = FinancialSentimentDataLoader(aws_access_key_id, aws_secret_access_key)
tokenizer = AutoTokenizer.from_pretrained(model_name)
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})
#training_data, test_data = data_loader.get_training_and_test_data(tokenizer)

loader = SentimentDataLoader()
fiqa_data = loader.get_fiqa_data(for_training=True)
fpb_data = loader.get_fpb_data(for_training=True)
nwgi_data = loader.get_nwgi_data(for_training=True)
tfns_data = loader.get_tfns_data(for_training=True)
combined_data = pd.concat([fiqa_data, fpb_data, nwgi_data, tfns_data], ignore_index=True)
train_data, test_data = train_test_split(combined_data, test_size=0.2, random_state=42)

training_data = loader.process_dataset(train_data, tokenizer)
test_data = loader.process_dataset(test_data, tokenizer)

Map:   0%|          | 0/24458 [00:00<?, ? examples/s]

Map:   0%|          | 0/6115 [00:00<?, ? examples/s]

In [3]:
###########################################################################
##  FINE-TUNE DEEPSEEK R1-DISTILLED VARIABLE MODEL LoRA CLASSIFICATION   ##
###########################################################################
classifier = LoRaSequenceClassifier(model_name=model_name, final_output_dir="./deepseek_cls_model")
classifier.fine_tune(train_dataset=training_data, eval_dataset=test_data, num_train_epochs=10, batch_size=4,)

Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.
Some weights of Qwen2ForSequenceClassification were not initialized from the model checkpoint at deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Trainable parameters:
trainable params: 4,362,752 || all params: 1,548,081,664 || trainable%: 0.2818


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
500,0.9849,0.668414,0.653857,0.413449,0.475058,0.365985
1000,0.6215,0.578735,0.702698,0.461493,0.582357,0.382175
1500,0.5433,0.491723,0.756282,0.580464,0.680978,0.505805
2000,0.4392,0.416641,0.800981,0.692961,0.713296,0.673753
2500,0.4052,0.399492,0.809103,0.706257,0.724987,0.688471
3000,0.3863,0.391095,0.815045,0.715615,0.73401,0.698119
3500,0.3799,0.386288,0.818043,0.71926,0.740433,0.699264
4000,0.3711,0.382605,0.819842,0.724927,0.738136,0.712183
4500,0.3665,0.380366,0.822022,0.727758,0.742429,0.713655
5000,0.3603,0.378425,0.822786,0.730095,0.741484,0.719052


In [None]:
############################################
##           B E N C H M A R K S          ##
############################################

In [7]:
#!pip install datasets

In [9]:
!#git clone https://github.com/AI4Finance-Foundation/FinNLP

In [10]:
#!cp -r /content/drive/MyDrive/finetuned_classification_model /content/

In [4]:
# PEFT model directory
peft_model_path = "./deepseek_cls_model"

peft_config = PeftConfig.from_pretrained(peft_model_path)

# Base model associated with PEFT
base_model = AutoModelForSequenceClassification.from_pretrained(peft_config.base_model_name_or_path,
                                                                num_labels=3)
# Load base with adapter
inference_model = PeftModel.from_pretrained(base_model, peft_model_path)

#Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(peft_config.base_model_name_or_path)

# Step 6: Move the model to the appropriate device (GPU or CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
inference_model.to(device)

print("PEFT model loaded successfully!")

Some weights of Qwen2ForSequenceClassification were not initialized from the model checkpoint at deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


PEFT model loaded successfully!


In [5]:
# Initialize SentimentInference class
sentiment_inference = SentimentInference(inference_model, tokenizer)

In [None]:
#########################
#          FPB          #
#########################

In [6]:
fpb_data = loader.get_fpb_data(for_training=False)
dataframe, accuracy, f1 = sentiment_inference.process_dataframe(fpb_data)
print("Processed DataFrame:")
print(dataframe.head())
print(f"\nAccuracy: {accuracy}")
print(f"F1 Score: {f1}")

Processed DataFrame:
                                               title  sentiment  prediction
0  L&T has also made a commitment to redeem the r...          2           2
1  Copper , lead and nickel also dropped ... HBOS...          1           1
2  Approximately SEK 166 million in repayments ha...          2           1
3  The new factory working model and reorganisati...          0           0
4  Also , Technopolis plans to build a 100 millio...          2           2

Accuracy: 0.8123711340206186
F1 Score: 0.7903549558260342


In [None]:
#########################
#          FIQA         #
#########################

In [7]:
fiqa_data = loader.get_fiqa_data(for_training=False)
dataframe, accuracy, f1 = sentiment_inference.process_dataframe(fiqa_data)
print("Processed DataFrame:")
print(dataframe.head())
print(f"\nAccuracy: {accuracy}")
print(f"F1 Score: {f1}")

Processed DataFrame:
                                               title  sentiment  prediction
0  This $BBBY stock options trade would have more...          0           0
1  $COST short finally making gains. I will take ...          1           0
2  $KO had missed earnings expectations for the l...          0           0
3  #Fintech provider $CAFN Cachet Financial Solut...          0           0
4  Daily Mail owner considering Yahoo bid $yhoo ,...          0           0

Accuracy: 0.64
F1 Score: 0.5772104571097395


In [30]:
#########################
#         NWGI         #
#########################

In [8]:
nwgi_data = loader.get_nwgi_data(for_training=False)
dataframe, accuracy, f1 = sentiment_inference.process_dataframe(nwgi_data)
print("Processed DataFrame:")
print(dataframe.head())
print(f"\nAccuracy: {accuracy}")
print(f"F1 Score: {f1}")

Processed DataFrame:
                                               title  sentiment  prediction
0  In the latest trading session, Adobe Systems (...          2           2
1  Tech stocks are down today after an antitrust ...          1           1
2  Intel Corp is committing $20 billion to build ...          0           0
3  High costs and supply chain disruptions are li...          1           1
4  AMD still seems set to generate significant gr...          0           0

Accuracy: 0.6651840869780083
F1 Score: 0.6675771820332496


In [44]:
#########################
#          TFNS         #
#########################

In [9]:
tfns_data = loader.get_tfns_data(for_training=False)
dataframe, accuracy, f1 = sentiment_inference.process_dataframe(tfns_data)
print("Processed DataFrame:")
print(dataframe.head())
print(f"\nAccuracy: {accuracy}")
print(f"F1 Score: {f1}")

Processed DataFrame:
                                               title  sentiment  prediction
0  $ALLY - Ally Financial pulls outlook https://t...          1           2
1  $DELL $HPE - Dell, HPE targets trimmed on comp...          1           1
2  $PRTY - Moody's turns negative on Party City h...          1           1
3                   $SAN: Deutsche Bank cuts to Hold          1           1
4                  $SITC: Compass Point cuts to Sell          1           1

Accuracy: 0.8408710217755444
F1 Score: 0.7984305340005111


In [22]:
# PEFT model directory
peft_model_path = "huggnface/FinDeepSeek-R1-Distill-Qwen-1.5B-LoRA-Sentiment"

peft_config = PeftConfig.from_pretrained(peft_model_path)

# Base model associated with PEFT
base_model = AutoModelForSequenceClassification.from_pretrained(peft_config.base_model_name_or_path,
                                                                num_labels=3)
# Load base with adapter
inference_model = PeftModel.from_pretrained(base_model, peft_model_path)

#Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(peft_config.base_model_name_or_path)

# Step 6: Move the model to the appropriate device (GPU or CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
inference_model.to(device)

print("Base + PEFT adapter model loaded successfully!")

Some weights of Qwen2ForSequenceClassification were not initialized from the model checkpoint at deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Base + PEFT adapter model loaded successfully!
