In [None]:
!pip install transformers torch datasets openpyxl

In [None]:
import os

os.makedirs('eval_results', exist_ok=True)

## Load dataset

In [2]:
import datasets
dataset_dict = datasets.load_dataset('klaylouis1932/OpenFinData-Intent-Understanding-Intruct')

test_dataset = dataset_dict['test'].to_pandas()
test_dataset['id'] = test_dataset.index

  from .autonotebook import tqdm as notebook_tqdm


## ChatGLM3-6B

In [3]:
from transformers import AutoTokenizer, AutoModel
base_tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm3-6b", trust_remote_code=True)
base_model = AutoModel.from_pretrained("THUDM/chatglm3-6b", trust_remote_code=True).half().cuda()
base_model = base_model.eval()

Setting eos_token is not supported, use the default one.
Setting pad_token is not supported, use the default one.
Setting unk_token is not supported, use the default one.
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00,  7.30it/s]


In [4]:
question = test_dataset['instruction'].iloc[0]

In [5]:
%%time
response, history = base_model.chat(base_tokenizer, question, history=[])
print(response)


B
CPU times: user 616 ms, sys: 268 ms, total: 884 ms
Wall time: 902 ms


In [6]:
%%time
from collections import defaultdict
import time

prediction_results = defaultdict(list)

n = 0
for i, row in test_dataset.iterrows():
    # question = row['question'] # original question
    question = row['instruction'] # rewrite question/instruction
    answer = row['output']
    idx = row['id']
    prediction_results['id'].append(idx)
    
    try:
        # prediction = llm_claude35.invoke_model(question)
        prediction, _ = base_model.chat(base_tokenizer, question, history=[])
    except:
        print(f'Error occurred for question: {question}')
        prediction = None
    
    prediction_results['pred_answer'].append(prediction)
    # time.sleep(10)
    
    n += 1
    if n%15 == 0:
        print(f'Progress({n}/{len(test_dataset)})...')
    # if n>=2:
    #     break

Progress(15/75)...
Progress(30/75)...
Progress(45/75)...
Progress(60/75)...
Progress(75/75)...
CPU times: user 10.6 s, sys: 6.09 ms, total: 10.6 s
Wall time: 10.6 s


In [7]:
import numpy as np
import pandas as pd

pred = pd.DataFrame(prediction_results)
pred_result_df = test_dataset[['id', 'instruction', 'output']].merge(pred, on='id', how='left')

pred_result_df['exact_match'] = np.where(pred_result_df['pred_answer']==pred_result_df['output'], 1, 0)


In [8]:
accuracy = pred_result_df['exact_match'].sum()/len(pred_result_df)
print(f'Accuracy with Finetuned ChatGLM3 is : {accuracy*100}%')

Accuracy with Finetuned ChatGLM3 is : 45.33333333333333%


In [9]:
pred_result_df[pred_result_df['pred_answer'].str.len()>1]

Unnamed: 0,id,instruction,output,pred_answer,exact_match
39,39,作为一个意图情绪助手，请分析以下问句的意图类型。\n\n问句: REITS基金有哪些\n\n...,E,选项E，基金问询。,0
40,40,作为一个意图情绪助手，请分析以下问句的意图类型。\n\n问句: 证券基金还能继续涨吗？\n\...,E,C. 大盘问询,0
44,44,作为一个意图情绪助手，请分析以下问句的意图类型。\n\n问句: 可买哪只ETF\n\n请从以...,E,选项E，基金问询。,0
47,47,作为一个意图情绪助手，请分析以下问句的意图类型。\n\n问句: 哪些基金适合短线操作？\n\...,E,选项E：基金问询,0


In [10]:
pred_result_df

Unnamed: 0,id,instruction,output,pred_answer,exact_match
0,0,作为一个意图情绪助手，请分析以下问句的意图类型。\n\n问句: 商业银行可以吗\n\n请从以...,A,A,1
1,1,作为一个意图情绪助手，请分析以下问句的意图类型。\n\n问句: 什么板块可以布局\n\n请从...,A,A,1
2,2,作为一个意图情绪助手，请分析以下问句的意图类型。\n\n问句: 看好哪些赛道\n\n请从以下...,A,B,0
3,3,作为一个意图情绪助手，请分析以下问句的意图类型。\n\n问句: 基建在近期会创新高吗？\n\...,A,A,1
4,4,作为一个意图情绪助手，请分析以下问句的意图类型。\n\n问句: 明年医药板块还能投资吗？\n...,A,A,1
...,...,...,...,...,...
70,70,作为一个意图情绪助手，请分析以下问句的意图类型。\n\n问句: 密码锁了多久开\n\n请从以...,D,A,0
71,71,作为一个意图情绪助手，请分析以下问句的意图类型。\n\n问句: 新股申购什么时候才能正常申购...,D,A,0
72,72,作为一个意图情绪助手，请分析以下问句的意图类型。\n\n问句: 如何更改风险设定值\n\n请...,D,B,0
73,73,作为一个意图情绪助手，请分析以下问句的意图类型。\n\n问句: 客户基本信息在哪里看\n\n...,D,A,0


In [11]:

model_name = 'chatglm3_6b'
pred_result_df.to_excel(f'eval_results/intent_understanding_{model_name}.xlsx', index=False)

## Finetuned ChatGLM3-6B

In [24]:
from transformers import AutoTokenizer, AutoModel
finetuned_tokenizer = AutoTokenizer.from_pretrained("klaylouis1932/chatglm3-6b-lora-ft-fin", trust_remote_code=True)
finetuned_model = AutoModel.from_pretrained("klaylouis1932/chatglm3-6b-lora-ft-fin", trust_remote_code=True).half().cuda()
finetuned_model = finetuned_model.eval()

A new version of the following files was downloaded from https://huggingface.co/klaylouis1932/chatglm3-6b-lora-ft-fin:
- tokenization_chatglm.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
Setting eos_token is not supported, use the default one.
Setting pad_token is not supported, use the default one.
Setting unk_token is not supported, use the default one.
A new version of the following files was downloaded from https://huggingface.co/klaylouis1932/chatglm3-6b-lora-ft-fin:
- configuration_chatglm.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
A new version of the following files was downloaded from https://huggingface.co/klaylouis1932/chatglm3-6b-lora-ft-fin:
- quantization.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new ve

In [None]:
question = test_dataset['instruction'].iloc[0]

In [30]:
%%time
response, history = finetuned_model.chat(finetuned_tokenizer, question, history=[])
print(response)


D
CPU times: user 135 ms, sys: 0 ns, total: 135 ms
Wall time: 132 ms


In [33]:
%%time
from collections import defaultdict
import time

prediction_results = defaultdict(list)

n = 0
for i, row in test_dataset.iterrows():
    # question = row['question'] # original question
    question = row['instruction'] # rewrite question/instruction
    answer = row['output']
    idx = row['id']
    prediction_results['id'].append(idx)
    
    try:
        # prediction = llm_claude35.invoke_model(question)
        prediction, _ = finetuned_model.chat(finetuned_tokenizer, question, history=[])
    except:
        print(f'Error occurred for question: {question}')
        prediction = None
    
    prediction_results['pred_answer'].append(prediction)
    # time.sleep(10)
    
    n += 1
    if n%15 == 0:
        print(f'Progress({n}/{len(test_dataset)})...')
    # if n>=2:
    #     break

Progress(15/75)...
Progress(30/75)...
Progress(45/75)...
Progress(60/75)...
Progress(75/75)...
CPU times: user 9.78 s, sys: 0 ns, total: 9.78 s
Wall time: 9.77 s


In [40]:
import numpy as np
import pandas as pd

pred = pd.DataFrame(prediction_results)
pred_result_df = test_dataset[['id', 'instruction', 'output']].merge(pred, on='id', how='left')

pred_result_df['exact_match'] = np.where(pred_result_df['pred_answer']==pred_result_df['output'], 1, 0)


In [43]:
accuracy = pred_result_df['exact_match'].sum()/len(pred_result_df)
print(f'Accuracy with Finetuned ChatGLM3 is : {accuracy*100}%')

Accuracy with Finetuned ChatGLM3 is : 85.33333333333334%


In [44]:
pred_result_df[pred_result_df['pred_answer'].str.len()>1]

Unnamed: 0,id,instruction,output,pred_answer,exact_match


In [45]:
pred_result_df

Unnamed: 0,id,instruction,output,pred_answer,exact_match
0,0,作为一个意图情绪助手，请分析以下问句的意图类型。\n\n问句: 商业银行可以吗\n\n请从以...,A,D,0
1,1,作为一个意图情绪助手，请分析以下问句的意图类型。\n\n问句: 什么板块可以布局\n\n请从...,A,A,1
2,2,作为一个意图情绪助手，请分析以下问句的意图类型。\n\n问句: 看好哪些赛道\n\n请从以下...,A,C,0
3,3,作为一个意图情绪助手，请分析以下问句的意图类型。\n\n问句: 基建在近期会创新高吗？\n\...,A,C,0
4,4,作为一个意图情绪助手，请分析以下问句的意图类型。\n\n问句: 明年医药板块还能投资吗？\n...,A,A,1
...,...,...,...,...,...
70,70,作为一个意图情绪助手，请分析以下问句的意图类型。\n\n问句: 密码锁了多久开\n\n请从以...,D,D,1
71,71,作为一个意图情绪助手，请分析以下问句的意图类型。\n\n问句: 新股申购什么时候才能正常申购...,D,D,1
72,72,作为一个意图情绪助手，请分析以下问句的意图类型。\n\n问句: 如何更改风险设定值\n\n请...,D,D,1
73,73,作为一个意图情绪助手，请分析以下问句的意图类型。\n\n问句: 客户基本信息在哪里看\n\n...,D,D,1


In [51]:

model_name = 'chatglm3_6b_finetuned'
pred_result_df.to_excel(f'eval_results/intent_understanding_{model_name}.xlsx', index=False)