In [1]:
import glm3_tokenizer
import torch

from typing import Any, Union, List

class CMMLUTokenizer:
    def __init__(self, real_tokenizer: glm3_tokenizer.GLM3Tokenizer) -> None:
        self._tokenizer = real_tokenizer

    def encode(self, string: str, return_tensors="pt"):
        return self._tokenizer.encode_one_no_attn_no_special(string).tolist()
    
    def convert_tokens_to_ids(self, tokens: Union[str, List[str]]):
        if isinstance(tokens, str):
            tokens = [tokens]
        
        return [int(self._tokenizer.encode_one_no_attn_no_special(token)[0]) for token in tokens]
    
    def __call__(self, string, return_tensors="pt") -> Any:
        return self._tokenizer.encode(string, parse_special_tokens=False)

_glm3_tokenizer = glm3_tokenizer.GLM3Tokenizer()
cmmlu_tokenizer = CMMLUTokenizer(_glm3_tokenizer)
cmmlu_tokenizer.convert_tokens_to_ids(["A", "B", "C", "D"])
cmmlu_tokenizer(["a b c", "1 2 3"])

BatchEncoding(input_ids=tensor([[    0,     0,     0,   264,   284,   277],
        [30914, 30943, 30914, 30947, 30914, 30970]], dtype=torch.int32), attention_mask=tensor([[0, 0, 0, 1, 1, 1],
        [1, 1, 1, 1, 1, 1]], dtype=torch.int32))

In [2]:
import custom_model
import os
import safetensors.torch
import torch

config = custom_model.CustomModelConfig(
            vocab_size=_glm3_tokenizer.vocab_size(),
            padding_token_id=_glm3_tokenizer.token_pad_id,
            max_position_embeddings=4096,
            hidden_size=704,
            num_heads=16,
            MLP_intermediate=5000,
            num_layers=28,
            attention_dropout=0.1,
            dtype=torch.bfloat16,
            training=True,
            linear_imp = torch.nn.Linear
        )

model = custom_model.CustomLLamaModel(config).to(device='cuda')
model_dir = r"/mnt/sata2tb/leo/NLP/small_pretrained_model/saves_medium/checkpoint-751346922-751.35M"
model_path = os.path.join(model_dir, "model.safetensor")
safetensors.torch.load_model(model, model_path)
model.eval()

encoded_inputs = _glm3_tokenizer.encode("奥运冠军").to('cuda')
model_out = model(**encoded_inputs)

In [7]:
import CMMLU.mp_utils
from CMMLU.mp_utils import choices, format_example, gen_prompt, softmax
# from CMMLU.hf_causal_model import eval
from typing import Any
from pandas import DataFrame
import numpy as np

#def eval(model, tokenizer, subject, dev_df, test_df, num_few_shot, max_length, cot):
# def eval(*args, **kwargs):
#     print(repr(args))
#     #print(repr(kwargs))
#     for k, v in kwargs.items():
#         print(f"{k}: {type(v)}")

# model: <class 'NoneType'>
# tokenizer: <class 'NoneType'>
# subject: <class 'str'>
# dev_df: <class 'pandas.core.frame.DataFrame'>
# test_df: <class 'pandas.core.frame.DataFrame'>
# num_few_shot: <class 'int'>
# max_length: <class 'int'>
# cot: <class 'bool'>
def eval(
        model: Any,
        tokenizer: Any,
        subject: str,
        dev_df: DataFrame,
        test_df: DataFrame,
        num_few_shot: int,
        max_length: int,
        cot: bool # not used
        ):
    choice_ids = [tokenizer.convert_tokens_to_ids(choice) for choice in CMMLU.mp_utils.choices]
    cors = []
    all_conf = [] # confidence
    all_preds = [] # predicates
    answers = CMMLU.mp_utils.choices[: test_df.shape[1] - 2]

    batch_size = 10
    bucket = []
    def run_and_cleanup_bucket():
        nonlocal bucket
        prompts = [prompt for prompt, label in bucket]
        labels = [label for prompt, label in bucket]
        bucket = []

        inputs = tokenizer(prompts).to("cuda")
        with torch.no_grad():
            outputs = model(**inputs)
            last_token_logits = outputs[:, -1, :] # shape: [bz, vocab_size]


    for i in range(test_df.shape[0]):
        prompt_end = format_example(test_df, i, subject, include_answer=False)
        prompt = gen_prompt(dev_df=dev_df,
                            subject=subject,
                            prompt_end=prompt_end,
                            num_few_shot=num_few_shot,
                            tokenizer=tokenizer,
                            max_length=max_length)
        label = test_df.iloc[i, test_df.shape[1] - 1]
        bucket.append((prompt, label))

        inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
        if "token_type_ids" in inputs: # For Falcon
            inputs.pop("token_type_ids")
        

        with torch.no_grad():
            outputs = model(**inputs)
            last_token_logits = outputs[:, -1, :]
            choice_logits = last_token_logits[:, choice_ids].detach().to(device='cpu', dtype=torch.float32).numpy()
            conf = softmax(choice_logits[0])[choices.index(label)]
            pred = {0: "A", 1: "B", 2: "C", 3: "D"}[np.argmax(choice_logits[0])]

            print(f"choice_logits: {choice_logits.shape}")
            print(f"conf({type(conf)}): {conf}")
            print(f"pred({type(pred)}): {pred}")

        all_preds += pred
        all_conf.append(conf)
        cors.append(pred == label)

        print(f"all_preds: {all_preds}")
        print(f"all_conf: {all_conf}")
        print(f"cors: {cors}")
        raise ValueError("hmm")

    acc = np.mean(cors)
    print("Average accuracy {:.3f} - {}".format(acc, subject))
    return acc, all_preds, all_conf

    # print("answers", answers)

In [4]:
# from transformers.models.llama.modeling_llama import LlamaForCausalLM, LlamaConfig, LlamaModel, LlamaRotaryEmbedding
# from transformers import AutoTokenizer, AutoModel
# import transformers
# import torch

# # src/transformers/models/llama/modeling_llama.py

# LLAMA_PATH = '/home/leo/NLP/models/llama/Llama-2-7b-hf'
# tokenizer = AutoTokenizer.from_pretrained(LLAMA_PATH, trust_remote_code=True)
# # model = LlamaForCausalLM.from_pretrained(LLAMA_PATH, attn_implementation="eager", torch_dtype=torch.float16).to("cuda")
# # model.eval()

# import CMMLU.mp_utils

# choice_ids = [tokenizer.convert_tokens_to_ids(choice) for choice in CMMLU.mp_utils.choices]
# print(choice_ids)

In [8]:
import CMMLU.hf_causal_model
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("--model_name_or_path", type=str, default="")
parser.add_argument("--lora_weights", type=str, default="")
parser.add_argument("--data_dir", type=str, default="CMMLU/data")
parser.add_argument("--save_dir", type=str, default="CMMLU/results/not_specified")
parser.add_argument("--num_few_shot", type=int, default=0)
parser.add_argument("--max_length", type=int, default=2048)
parser.add_argument("--load_in_8bit", action='store_true')
parser.add_argument("--with_conf", action='store_true')
parser.add_argument("--cot", action='store_true')
args = parser.parse_args([])


CMMLU.mp_utils.run_eval(
    model, cmmlu_tokenizer,
    eval,
    args
)

choice_logits: (1, 4, 1)
conf(<class 'numpy.ndarray'>): [0.7707439]
pred(<class 'str'>): B
all_preds: ['B']
all_conf: [array([0.7707439], dtype=float32)]
cors: [True]


ValueError: hmm