In [None]:
import sys
sys.path.insert(0, "/root/autodl-tmp/Code/RLHF")
sys.path.insert(0, "/mnt/sfevol775196/sunzeye273/Code/RLHF")
sys.path.insert(0, "/mnt/pa002-28359-vol543625-private/Code/RLHF")
sys.path.insert(0, "//Users/zeyesun/Documents/Code/RLHF")
sys.path.insert(0, "D:\\Code\\RLHF")

import os, time, re, random, glob, json, jieba, copy
import numpy as np
import pandas as pd
from tqdm import tqdm
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoModelForSeq2SeqLM,
    AutoModelForMultipleChoice,
    AutoTokenizer,
    Trainer,
    TrainingArguments,
    default_data_collator,
    TextGenerationPipeline
)

from src.models.reward import RewardModel, RewardModelWithLoRA

device="cuda:0" if torch.cuda.is_available() else "cpu"
from sys import platform
if platform == "linux" or platform == "linux2":
    # linux
    root = "/mnt/sfevol775196/sunzeye273/Data"
#     root = "/mnt/pa002-28359-vol543625-private/Data"
#     root = "/root/autodl-tmp/Data"
elif platform == "darwin":
    # OS X
    root = "/Users/zeyesun/Documents/Data"
elif platform == "win32":
    # Windows...
    root = "D:\\Data"

In [None]:
model_name = "pangu-350M"
# model_name = "glm-350M-chinese"
# model_name = "chatglm-6B"
model_name_or_path = os.path.join(root, "models", model_name)

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_cache=False, trust_remote_code=True)
print(tokenizer.special_tokens_map)
print(tokenizer.all_special_ids)
print(
    f"unk: {tokenizer.unk_token_id}\n",
    f"pad: {tokenizer.pad_token_id}\n",
    f"bos: {tokenizer.bos_token_id}\n",
    f"eos: {tokenizer.eos_token_id}\n",
    f"sep: {tokenizer.sep_token_id}\n",
    f"mask: {tokenizer.mask_token_id}\n",
#     f"eop: {tokenizer.eop_token_id}\n"
#     f"sop: {tokenizer.sop_token_id}\n"
#     f"cls: {tokenizer.cls_token_id}"
) 

In [None]:
lora_rank = 0
lora_alpha = 1
lora_train_bias = "none"

In [None]:
if "pangu" in model_name_or_path:
    model = AutoModelForCausalLM.from_pretrained(model_name_or_path, use_cache=False, trust_remote_code=True)
    model.resize_token_embeddings(tokenizer.vocab_size)
    model.config.lora_rank = lora_rank
    model.config.lora_alpha = lora_alpha
    model.config.lora_train_bias = lora_train_bias
    # Initialize the reward model from the (supervised) fine-tuned SFT model
    reward_model = RewardModel(model.config, model.transformer, tokenizer)
    # reward_model = RewardModelWithLoRA(model.config, model.transformer, tokenizer)
elif "glm" in model_name_or_path:
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name_or_path, trust_remote_code=True)
    if "chatglm" in model_name_or_path:
        model = model.half()
    model.config.lora_rank = lora_rank
    model.config.lora_alpha = lora_alpha
    model.config.lora_train_bias = lora_train_bias
    # Initialize the reward model from the (supervised) fine-tuned SFT model
    reward_model = RewardModel(model.config, model.glm, tokenizer)
#     reward_model = RewardModelWithLoRA(model.config, model.glm, tokenizer)
else:
    raise ValueError(f"Unsupported model name: {model_name_or_path}")

In [None]:
reward_checkpoint = os.path.join(root, "chatgpt", "output", "reward", model_name, "checkpoint-200549", "pytorch_model*.bin")
checkpoints = glob.glob(reward_checkpoint)
st = dict()
for checkpoint in checkpoints:
    st.update(torch.load(checkpoint, map_location="cpu"))
res = reward_model.load_state_dict(st)

# Reward Model Eval

In [None]:
reward_model.half()
reward_model.eval()
reward_model.to(device)
print("")

In [None]:
max_length = 512
tokenizer.padding_size = "right"
# tokenizer.padding_size = "left"

prompt = "现代文:行三十五里,进入登封县境的耿店。"
prefix = "古文:"
pred = "<unk><unk><unk><unk><unk><unk><unk><unk><unk><unk>"

encodings_dict = tokenizer(prompt, prefix+pred, max_length=max_length,
                           truncation="longest_first", padding="max_length", return_tensors="pt",
                           return_token_type_ids=False, padding_side="left")
print(encodings_dict.keys())
print(encodings_dict['input_ids'].shape)
print(encodings_dict['attention_mask'].shape)

In [None]:
input_ids = encodings_dict['input_ids'].to(device)
attention_mask = encodings_dict['attention_mask'].to(device)
res = reward_model(input_ids, attention_mask)