In [26]:
# import sys

# sys.path.append('../../../FinNLP/')  # https://github.com/AI4Finance-Foundation/FinNLP

In [3]:
from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM, LlamaForCausalLM, LlamaTokenizerFast   # 4.30.2
from peft import PeftModel  # 0.4.0
import torch

from FinNLP.finnlp.benchmarks.fpb import test_fpb
from FinNLP.finnlp.benchmarks.fiqa import test_fiqa , add_instructions
from FinNLP.finnlp.benchmarks.tfns import test_tfns
from FinNLP.finnlp.benchmarks.nwgi import test_nwgi
from torch.backends import mps

mps_enabled = mps.is_available()

## Load Model (Pick one according the model from the following blocks)

#### 1. **FInGPT v3.1** based on ChatGLM2, runable on 1 * RTX 3090

In [4]:
# v3.1
base_model = "THUDM/chatglm2-6b"
peft_model = "oliverwang15/FinGPT_v31_ChatGLM2_Sentiment_Instruction_LoRA_FT"
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
if mps_enabled:
    model = AutoModel.from_pretrained(base_model, trust_remote_code=True).to("mps")
    model = PeftModel.from_pretrained(model, peft_model).to("mps")
else:
    model = AutoModel.from_pretrained(base_model, trust_remote_code=True, load_in_8bit = True, device_map = "auto")
    model = PeftModel.from_pretrained(model, peft_model)
model = model.eval()

Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

#### 2. **FinGPT v3.2** based on Llama2, runable on 1 * A 100 and also runable on 1 * RTX 3090 as long as `load_in_8bit = True` is set in Line 8, but the speed is slower

In [None]:
# v3.2
# base_model = "meta-llama/Llama-2-7b-chat-hf"  # Access needed
base_model = "daryl149/llama-2-7b-chat-hf"   
peft_model = "oliverwang15/FinGPT_v32_Llama2_Sentiment_Instruction_LoRA_FT"
tokenizer = LlamaTokenizerFast.from_pretrained(base_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
if mps_enabled:
    model = LlamaForCausalLM.from_pretrained(base_model, trust_remote_code=True).to("mps")
else:
    model = LlamaForCausalLM.from_pretrained(base_model, trust_remote_code=True, device_map = "cuda:0")
model = PeftModel.from_pretrained(model, peft_model)
model = torch.compile(model)  # Please comment this line if your platform does not support torch.compile
model = model.eval()

The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

## Test

In [6]:
batch_size = 8

In [13]:
# FPB
res = test_fpb(model, tokenizer, batch_size = batch_size)



Prompt example:
Instruction: What is the sentiment of this news? Please choose an answer from {negative/neutral/positive}.
Input: L&T has also made a commitment to redeem the remaining shares by the end of 2011 .
Answer: 


Total len: 1212. Batchsize: 8. Total steps: 152


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 152/152 [04:19<00:00,  1.71s/it]

Acc: 0.8506600660066007. F1 macro: 0.8396686677078228. F1 micro: 0.8506600660066006. F1 weighted (BloombergGPT): 0.8500004920027702. 





In [12]:
# FiQA
# %load_ext autoreload
# %autoreload 2
# from finnlp.benchmarks.fiqa import test_fiqa , add_instructions
res = test_fiqa(model, tokenizer, prompt_fun = add_instructions, batch_size = batch_size)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Prompt example:
Instruction: What is the sentiment of this tweet? Please choose an answer from {negative/neutral/positive}.
Input: This $BBBY stock options trade would have more than doubled your money https://t.co/Oa0loiRIJL via @TheStreet
Answer: 


Total len: 275. Batchsize: 8. Total steps: 35


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 35/35 [00:51<00:00,  1.47s/it]

Acc: 0.8436363636363636. F1 macro: 0.7524122647015905. F1 micro: 0.8436363636363636. F1 weighted (BloombergGPT): 0.860437084621201. 





In [14]:
# TFNS
res = test_tfns(model, tokenizer, batch_size = batch_size)

Downloading readme:   0%|          | 0.00/1.57k [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/859k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/217k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/2 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]



Prompt example:
Instruction: What is the sentiment of this tweet? Please choose an answer from {negative/neutral/positive}.
Input: $ALLY - Ally Financial pulls outlook https://t.co/G9Zdi1boy5
Answer: 


Total len: 2388. Batchsize: 8. Total steps: 299


 49%|████████████████████████████████████████████████████████████████████████████████████                                                                                       | 147/299 [03:31<03:38,  1.44s/it]


Process ForkProcess-11:
Process ForkProcess-18:
Process ForkProcess-16:
Process ForkProcess-12:
Process ForkProcess-17:
Process ForkProcess-10:
Process ForkProcess-19:
Process ForkProcess-15:
Process ForkProcess-20:
Process ForkProcess-8:
Process ForkProcess-7:
Process ForkProcess-13:
Process ForkProcess-14:
Process ForkProcess-4:
Process ForkProcess-5:
Process ForkProcess-3:
Process ForkProcess-9:
Process ForkProcess-6:
Process ForkProcess-2:
Process ForkProcess-1:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Trac

KeyboardInterrupt: 

In [None]:
# NWGI
res = test_nwgi(model, tokenizer, batch_size = batch_size)

Found cached dataset parquet (/xfs/home/tensor_zy/.cache/huggingface/datasets/oliverwang15___parquet/oliverwang15--news_with_gpt_instructions-ec641c48430028ee/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7)


  0%|          | 0/2 [00:00<?, ?it/s]



Prompt example:
Instruction: What is the sentiment of this news? Please choose an answer from {negative/neutral/positive}.
Input: In the latest trading session, Adobe Systems (ADBE) closed at $535.98, marking a +0.31% move from the previous day.
Answer: 


Total len: 4047. Batchsize: 8. Total steps: 506


100%|██████████| 506/506 [07:55<00:00,  1.06it/s]

Acc: 0.6360266864343959. F1 macro: 0.6443667929544722. F1 micro: 0.6360266864343959. F1 weighted (BloombergGPT): 0.6324602746076219. 



