## 第四章作业

In [1]:
import os

os.environ['TRANSFORMERS_CACHE'] = '/mnt/sda/huggingface/hub/'
os.environ['HF_HOME'] = '/mnt/sda/huggingface/'
# os.environ['http_proxy'] = 'http://127.0.0.1:7890'
# os.environ['https_proxy'] = 'http://127.0.0.1:7890'

In [2]:
import random
import pandas as pd
import numpy as np
import evaluate
import datasets
from IPython.display import display, HTML

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def show_random_elements(dataset, num_examples=10):
    assert num_examples <= len(dataset), "Can't pick more elements than there are in the dataset."
    picks = random.sample(range(len(dataset)), k=num_examples)

    df = pd.DataFrame(dataset[picks])
    for col, typ in dataset.features.items():
        if isinstance(typ, datasets.ClassLabel):
            df[col] = df[col].transform(lambda i: typ.names[i])
    display(HTML(df.to_html()))

#### 1. 使用完整的 YelpReviewFull 数据集训练，对比看 Acc 最高能到多少。课程代码（ https://github.com/DjangoPeng/LLM-quickstart/blob/main/transformers/fine-tune-quickstart.ipynb ）

In [4]:
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments
)

In [5]:
dataset = load_dataset('yelp_review_full')
tokenizer = AutoTokenizer.from_pretrained('bert-base-cased')
model = AutoModelForSequenceClassification.from_pretrained('bert-base-cased', num_labels=5)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
def tokenize_function(examples):
    return tokenizer(examples['text'], padding='max_length', truncation=True)

tokenized_datasets = dataset.map(tokenize_function, batched=True)

In [7]:
train_dataset = tokenized_datasets['train'].shuffle(seed=42).select(range(100000))
eval_dataset = tokenized_datasets['test'].shuffle(seed=42).select(range(1000))
test_dataset = tokenized_datasets['test'].shuffle(seed=64).select(range(100))

In [8]:
model_dir = 'models/bert-base-cased-finetune-yelp'
training_args = TrainingArguments(
    output_dir=model_dir,
    overwrite_output_dir=True,
    evaluation_strategy='epoch',
    per_device_train_batch_size=20,
    num_train_epochs=3,
    logging_steps=30,
    save_steps=500,
    save_total_limit=5
)

In [9]:
metric = evaluate.load('accuracy')

def compute_metric(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [10]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metric,
)

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [11]:
trainer.train(resume_from_checkpoint=True)

Epoch,Training Loss,Validation Loss,Accuracy
2,0.5112,0.790075,0.689
3,0.4702,0.812829,0.693


TrainOutput(global_step=15000, training_loss=0.22096032746632893, metrics={'train_runtime': 11092.8314, 'train_samples_per_second': 27.044, 'train_steps_per_second': 1.352, 'total_flos': 7.89354427392e+16, 'train_loss': 0.22096032746632893, 'epoch': 3.0})

In [12]:
trainer.evaluate(test_dataset)

{'eval_loss': 1.0953632593154907,
 'eval_accuracy': 0.63,
 'eval_runtime': 2.9788,
 'eval_samples_per_second': 33.571,
 'eval_steps_per_second': 4.364,
 'epoch': 3.0}

In [13]:
trainer.save_model(model_dir)

In [14]:
trainer.save_state()

#### 2. 加载本地保存的模型，进行评估和再训练更高的 F1 Score。课程代码（ https://github.com/DjangoPeng/LLM-quickstart/blob/main/transformers/fine-tune-QA.ipynb ）