In [2]:
from datasets import load_dataset

dataset = load_dataset("trivia_qa",'rc')

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/2.88G [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/138384 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/17944 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/17210 [00:00<?, ? examples/s]

In [None]:
from transformers import LlamaTokenizer, LlamaForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset, load_metric

def main():
    # 加载tokenizer和模型
    tokenizer = LlamaTokenizer.from_pretrained("/home/data/models/llama-transformers/7B")
    tokenizer.pad_token = tokenizer.eos_token
    model = LlamaForSequenceClassification.from_pretrained("/home/data/models/llama-transformers/7B")

    # 准备训练参数，并设置 GPU 的数量
    args = TrainingArguments(
        "test-triviaqa",
        evaluation_strategy="steps",
        eval_steps=500,
        per_device_eval_batch_size=4,
    )

    # 这里选择了 'rc' 配置，但您可以根据需要选择其他配置
    dataset = load_dataset("trivia_qa", "rc")
    
    # 请注意，trivia_qa 可能不在 GLUE 指标中。您可能需要选择或实现一个合适的评估指标。
    metric = load_metric('trivia_qa', 'rc')  

    def encode(examples):
        # 更新以匹配 trivia_qa 的数据结构
        return tokenizer(examples['question'], examples['answer'], truncation=True, padding='max_length', max_length=256)

    encoded_dataset = dataset.map(encode, batched=True)

    def compute_metrics(eval_pred):
        predictions, labels = eval_pred
        predictions = predictions.argmax(axis=-1)
        return metric.compute(predictions=predictions, references=labels)

    trainer = Trainer(
        model,
        args,
        eval_dataset=encoded_dataset["validation"],
        tokenizer=tokenizer,
        compute_metrics=compute_metrics
    )

    eval_results = trainer.evaluate()

    print(f"Results for trivia_qa: {eval_results}")

if __name__ == "__main__":
    main()


In [None]:
from transformers import LlamaTokenizer, LlamaForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset, load_metric


def main():
    # 加载tokenizer和模型
    tokenizer = LlamaTokenizer.from_pretrained("/home/data/models/llama-transformers/7B")
    tokenizer.pad_token = tokenizer.eos_token
    model = LlamaForSequenceClassification.from_pretrained("/home/data/models/llama-transformers/7B")

    # 准备训练参数，并设置 GPU 的数量
    args = TrainingArguments(
        "test-glue",
        evaluation_strategy="steps",  # 设置为“steps”以在指定的步数后进行评估
        eval_steps=500,  # 每500步进行一次评估
        per_device_eval_batch_size=4,  # 每个设备上的评估批次大小
    )

    # 定义 GLUE 任务列表
    tasks = ['cola', 'sst2', 'mrpc', 'stsb', 'qqp', 'mnli', 'qnli', 'rte', 'wnli']

    for task in tasks:
        # 加载数据集和指标
        dataset = load_dataset("glue", task)
        metric = load_metric('glue', task)

        # 根据任务类型调整编码函数
        def encode(examples):
            # 对于双句子任务
            if task in ['mrpc', 'stsb', 'qqp', 'mnli', 'qnli', 'rte']:
                return tokenizer(examples['sentence1'], examples['sentence2'], truncation=True, padding='max_length', max_length=256)
            # 对于单句子任务
            else:
                return tokenizer(examples['sentence'], truncation=True, padding='max_length', max_length=256)

        # 对数据集进行预处理
        encoded_dataset = dataset.map(encode, batched=True)

        # 定义计算评估指标的函数
        def compute_metrics(eval_pred):
            predictions, labels = eval_pred
            predictions = predictions.argmax(axis=-1)
            return metric.compute(predictions=predictions, references=labels)

        # 创建Trainer
        trainer = Trainer(
            model,
            args,
            eval_dataset=encoded_dataset["validation"],
            tokenizer=tokenizer,
            compute_metrics=compute_metrics
        )

        # 进行评估并获取结果
        eval_results = trainer.evaluate()

        # 打印结果
        print(f"Results for {task}: {eval_results}")


if __name__ == "__main__":
    main()
