In [1]:
import Information_Entropy
import torch
import os
print("Cuda is available:", torch.cuda.is_available())
from accelerate import Accelerator
import numpy as np
from transformers import AutoTokenizer, AutoModelForCausalLM
import shap
import matplotlib.pyplot as plt
from IPython.core.display import HTML
torch.cuda.empty_cache()
accelerator = Accelerator()
accelerator.free_memory()

from transformers.utils import logging
logging.set_verbosity_error()
import logging
logging.getLogger('shap').setLevel(logging.ERROR)

Cuda is available: True


In [2]:
#SDK模型下载
from modelscope import snapshot_download

# 定义目标路径
target_cache_dir = '/autodl-tmp/modelscope/hub'

# 检查路径是否存在且可写
if not os.path.exists(target_cache_dir):
    raise FileNotFoundError(f"目标下载路径不存在: {target_cache_dir}")

if not os.access(target_cache_dir, os.W_OK):
    raise PermissionError(f"目标下载路径不可写: {target_cache_dir}")

# 如果路径存在且可写，则继续下载
model_dir = snapshot_download('ydyajyA/Llama-2-13b-chat-hf', cache_dir=target_cache_dir)
model_name = 'llama2-13b-chat'
max_new_tokens = 20
MODELS = {
    'gpt2': 'gpt2',
    'llama2-13b': 'meta-llama/Llama-2-13b-hf',
    'llama2-13b-chat': 'meta-llama/Llama-2-13b-chat-hf',
    'mistral-7b': 'mistralai/Mistral-7B-v0.1',
    'mistral-7b-chat': 'mistralai/Mistral-7B-Instruct-v0.1',
    
    'falcon-7b': 'tiiuae/falcon-7b',
    'falcon-7b-chat': 'tiiuae/falcon-7b-instruct',
}

dtype = torch.float32 if 'llama2-7b' in model_name else torch.float16
with torch.no_grad():
    model = AutoModelForCausalLM.from_pretrained(model_dir, torch_dtype=dtype, device_map="auto", token=True)
tokenizer = AutoTokenizer.from_pretrained(model_dir, use_fast=False, padding_side='left')

model.generation_config.is_decoder = True
model.generation_config.max_new_tokens = max_new_tokens
model.generation_config.min_new_tokens = 1
# model.generation_config.do_sample = False
model.config.is_decoder = True # for older models, such as gpt2
model.config.max_new_tokens = max_new_tokens
model.config.min_new_tokens = 1
# model.config.do_sample = False

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.


In [3]:
def lm_generate(input, model, tokenizer, max_new_tokens=max_new_tokens, padding=False, repeat_input=True):
    """ Generate text from a huggingface language model (LM).
    Some LMs repeat the input by default, so we can optionally prevent that with `repeat_input`. """
    input_ids = tokenizer([input], return_tensors="pt", padding=padding).input_ids.cuda()
    generated_ids = model.generate(input_ids, max_new_tokens=max_new_tokens) #, do_sample=False, min_new_tokens=1, max_new_tokens=max_new_tokens)
    # prevent the model from repeating the input
    if not repeat_input:
        generated_ids = generated_ids[:, input_ids.shape[1]:]

    return tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

# print(lm_generate('I enjoy walking with my cute dog.', model, tokenizer, max_new_tokens=max_new_tokens))

# Lets's ask an LLM something (LLaMA 2, an open source ChatGPT)

In [4]:
output = lm_generate('[INST] Should I study computational linguistics when I am interested in languages, programming and maths? [\INST]', model, tokenizer, max_new_tokens=max_new_tokens, repeat_input=False)
print(output)
print(type(output))

You have modified the pretrained model configuration to control generation. This is a deprecated strategy to control generation and will be removed soon, in a future version. Please use a generation configuration file (see https://huggingface.co/docs/transformers/main_classes/text_generation )


 Yes, studying computational linguistics could be a great fit for you if you are interested in languages
<class 'str'>


# Interpreting the output of an LLM called LLaMA 2 (OpenSource ChatGPT)

In [15]:
# 定义计算重复得分的函数
def compute_repeat_score(input_sentence):
    return Information_Entropy.calculate_ngram_entropy(input_sentence, 2)

def model_predict(input_sentences):
    repeat_scores = []
    for sentence in input_sentences:
        # 让模型生成可读的输出句子
        generated_output = lm_generate(sentence, model, tokenizer, max_new_tokens=max_new_tokens, repeat_input=False)
        print(f"input: {sentence}")
        print(generated_output)
        print(f"output: {generated_output}")
        # 直接使用生成的输出句子计算 repeat score
        repeat_score = compute_repeat_score(generated_output)
        print(f"repeat score: {repeat_score}")
        # 将 repeat score 添加到结果中
        repeat_scores.append(repeat_score)
    
    return np.array(repeat_scores)  # 返回模型生成句子的 repeat score

# 使用的是input测算repeat score
# def model_predict(input_sentences):
#     repeat_scores = []
#     for sentence in input_sentences:
#         print(sentence)
#         # print(type(sentence)) # numpy.str_
#         repeat_score = compute_repeat_score(sentence)
#         repeat_scores.append(repeat_score)
#     return np.array(repeat_scores)  # 返回一个数组以符合 SHAP 的要求

# 更新解释器初始化
explainer = shap.Explainer(model_predict, tokenizer, silent=True)

def explain_lm(s, explainer, model_name, max_new_tokens=50, plot=None):
    """ Compute Shapley Values for a certain model and tokenizer initialized in explainer. """
    
    # 设置模型生成的新token数量
    model.generation_config.max_new_tokens = max_new_tokens
    model.config.max_new_tokens = max_new_tokens
    
    
    # 使用 explainer 计算 SHAP 值
    shap_vals = explainer([s])
    
    # 生成可视化输出
    if plot == 'html':
        HTML(shap.plots.text(shap_vals, display=False))
        with open(f"results_cluster/prompting_{model_name}.html", 'w') as file:
            file.write(shap.plots.text(shap_vals, display=False))
    elif plot == 'display':
        shap.plots.text(shap_vals)
    elif plot == 'text':
        print(' '.join(shap_vals.output_names))
    
    return shap_vals

In [10]:
# explainer = shap.Explainer(model, tokenizer, silent=True)

# def explain_lm(s, explainer, model_name, max_new_tokens=max_new_tokens, plot=None):
#     """ Compute Shapley Values for a certain model and tokenizer initialized in explainer. """
    
#     model.generation_config.max_new_tokens = max_new_tokens
#     model.config.max_new_tokens = max_new_tokens
    
#     shap_vals = explainer([s])

#     if plot == 'html':
#         HTML(shap.plots.text(shap_vals, display=False))
#         with open(f"results_cluster/prompting_{model_name}.html", 'w') as file:
#             file.write(shap.plots.text(shap_vals, display=False))
#     elif plot == 'display':
#         shap.plots.text(shap_vals)
#     elif plot == 'text':
#         print(' '.join(shap_vals.output_names));
#     return shap_vals

In [11]:
# shap_vals = explain_lm('[INST] Should I study computational linguistics when I am interested in languages, programming and maths? [\INST]', explainer, model_name, plot='display')

In [16]:
shap_vals = explain_lm(
    '[INST] Should I study computational linguistics when I am interested in languages, programming and math?', 
    explainer, 
    model_name,
    plot='display'
)


input: ...


The `git pull` command will fetch the latest changes from the remote repository and merge them
output: 

The `git pull` command will fetch the latest changes from the remote repository and merge them
input: [ INST ] Should I study computational lingu istics when I am interested in languages , programming and math ?

}

Yes, studying computational linguistics could be a great fit for you if you are
output: 
}

Yes, studying computational linguistics could be a great fit for you if you are
input: [ INST ] Should I study computational lingu istics when I am interested in languages , ...
?

I am interested in languages and I am considering studying computational linguistics. I am not
output: ?

I am interested in languages and I am considering studying computational linguistics. I am not
input: ... programming and math ?


I'm a high school student and I'm really interested in both programming and math
output: 

I'm a high school student and I'm really interested in both progr