In [1]:
import os

# 获取当前工作目录
current_directory = os.getcwd()
print(f"Current directory before change: {current_directory}")

# 要更改的目标目录
target_directory = 'NLP-Tutorial-How-to-be-Shakesapeare/Leave-Me-Alone'

# 如果当前目录不是目标目录，则更改当前工作目录
if not current_directory.endswith(target_directory):
    os.chdir(target_directory)
    print(f"Directory changed to: {target_directory}")
else:
    print("Already in the target directory.")

# 获取更改后的当前工作目录地址
new_directory = os.getcwd()
print(f"Current directory after change: {new_directory}")


Current directory before change: /teamspace/studios/this_studio
Directory changed to: NLP-Tutorial-How-to-be-Shakesapeare/Leave-Me-Alone
Current directory after change: /teamspace/studios/this_studio/NLP-Tutorial-How-to-be-Shakesapeare/Leave-Me-Alone


In [3]:
from datasets import load_dataset, Dataset
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
import random

# 加载IMDb数据集
imdb_dataset = load_dataset('imdb')

# 随机选取5000条训练样本
train_dataset = imdb_dataset['train'].shuffle(seed=42).select(range(5000))

# 加载分词器和预训练的模型
model_path = 'pretrain_results/checkpoint-8682'
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained(model_path, num_labels=2)

# 分词和编码
def tokenize_function(examples):
    return tokenizer(examples['text'], truncation=True, padding='max_length', max_length=512)

train_dataset = train_dataset.map(tokenize_function, batched=True)
train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

# 处理测试集
test_dataset = imdb_dataset['test'].map(tokenize_function, batched=True)
test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at pretrain_results/checkpoint-8682 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [3]:
from sklearn.metrics import precision_recall_fscore_support, accuracy_score

# 定义训练参数
training_args = TrainingArguments(
    output_dir='./train_results',
    num_train_epochs=5,               # 训练轮次
    per_device_train_batch_size=16,   # 每个设备的训练批量大小
    per_device_eval_batch_size=16,    # 每个设备的评估批量大小
    warmup_steps=500,                 # 预热步数
    weight_decay=0.01,                # 权重衰减
    logging_dir='./train_logs',             # 日志目录
    evaluation_strategy="epoch",      # 在每个 epoch 结束时进行评估
    save_strategy="epoch",            # 在每个 epoch 结束时保存模型
    load_best_model_at_end=True,      # 在训练结束时加载表现最好的模型
    metric_for_best_model="accuracy", # 选择用于模型保存的指标
)

# 定义计算指标的函数
def compute_metrics(p):
    preds = p.predictions.argmax(-1)
    labels = p.label_ids
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

# 初始化Trainertrain
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=train_dataset,
    compute_metrics=compute_metrics
)




In [4]:
# 开始训练
trainer.train()

# 训练结束后进行评估
eval_results = trainer.evaluate()
print(f"Evaluation results: {eval_results}")


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.157512,0.9432,0.943166,0.94415,0.9432
2,0.342200,0.127132,0.9494,0.949311,0.952374,0.9494
3,0.342200,0.048932,0.9872,0.9872,0.987291,0.9872
4,0.140300,0.011087,0.9982,0.9982,0.9982,0.9982
5,0.027000,0.008431,0.9986,0.9986,0.9986,0.9986


Evaluation results: {'eval_loss': 0.008430964313447475, 'eval_accuracy': 0.9986, 'eval_f1': 0.9985999992719949, 'eval_precision': 0.998600078432615, 'eval_recall': 0.9986, 'eval_runtime': 74.9263, 'eval_samples_per_second': 66.732, 'eval_steps_per_second': 4.177, 'epoch': 5.0}


In [5]:
# 在 test_dataset 上进行评估
eval_results = trainer.evaluate(eval_dataset=test_dataset)

# 打印评估结果
print(f"Evaluation results on the test dataset: {eval_results}")


Evaluation results on the test dataset: {'eval_loss': 0.41011637449264526, 'eval_accuracy': 0.92736, 'eval_f1': 0.9273554432684765, 'eval_precision': 0.9274672539278271, 'eval_recall': 0.92736, 'eval_runtime': 369.4631, 'eval_samples_per_second': 67.666, 'eval_steps_per_second': 4.23, 'epoch': 5.0}


---

In [4]:
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

from sklearn.metrics import precision_recall_fscore_support, accuracy_score

# 定义训练参数
training_args = TrainingArguments(
    output_dir='./train_results_baseline',
    num_train_epochs=5,               # 训练轮次
    per_device_train_batch_size=16,   # 每个设备的训练批量大小
    per_device_eval_batch_size=16,    # 每个设备的评估批量大小
    warmup_steps=500,                 # 预热步数
    weight_decay=0.01,                # 权重衰减
    logging_dir='./train_logs_baseline',             # 日志目录
    evaluation_strategy="epoch",      # 在每个 epoch 结束时进行评估
    save_strategy="epoch",            # 在每个 epoch 结束时保存模型
    load_best_model_at_end=True,      # 在训练结束时加载表现最好的模型
    metric_for_best_model="accuracy", # 选择用于模型保存的指标
)

# 定义计算指标的函数
def compute_metrics(p):
    preds = p.predictions.argmax(-1)
    labels = p.label_ids
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

# 初始化Trainertrain
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=train_dataset,
    compute_metrics=compute_metrics
)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




In [7]:
# 开始训练
trainer.train()

# 训练结束后进行评估
eval_results = trainer.evaluate()
print(f"Evaluation results: {eval_results}")


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.217484,0.9136,0.913362,0.918428,0.9136
2,0.372700,0.091083,0.975,0.974997,0.975153,0.975
3,0.372700,0.029569,0.9924,0.9924,0.99242,0.9924
4,0.148500,0.018142,0.9958,0.9958,0.995806,0.9958
5,0.037700,0.010055,0.9984,0.9984,0.9984,0.9984


Evaluation results: {'eval_loss': 0.01005455944687128, 'eval_accuracy': 0.9984, 'eval_f1': 0.9984000012800051, 'eval_precision': 0.9984003220488245, 'eval_recall': 0.9984, 'eval_runtime': 75.013, 'eval_samples_per_second': 66.655, 'eval_steps_per_second': 4.173, 'epoch': 5.0}
Test results on the test dataset: {'eval_loss': 0.01005455944687128, 'eval_accuracy': 0.9984, 'eval_f1': 0.9984000012800051, 'eval_precision': 0.9984003220488245, 'eval_recall': 0.9984, 'eval_runtime': 75.013, 'eval_samples_per_second': 66.655, 'eval_steps_per_second': 4.173, 'epoch': 5.0}


In [5]:
from transformers import BertForSequenceClassification, Trainer, TrainingArguments

# 加载预训练的 BERT 模型
model_path = 'train_results_baseline/checkpoint-1565'
model = BertForSequenceClassification.from_pretrained(model_path, num_labels=2)

# 初始化Trainer
trainer = Trainer(
    model=model,
    args=training_args,  # 使用之前定义的训练参数
    eval_dataset=test_dataset,  # 使用测试集进行评估
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

# 在 test_dataset 上进行评估
eval_results_baseline = trainer.evaluate()

# 打印评估结果
print(f"Evaluation results on the test dataset with the loaded baseline model: {eval_results_baseline}")


Evaluation results on the test dataset with the loaded baseline model: {'eval_loss': 0.425955206155777, 'eval_model_preparation_time': 0.0022, 'eval_accuracy': 0.92232, 'eval_f1': 0.9223193909840254, 'eval_precision': 0.9223332443705434, 'eval_recall': 0.92232, 'eval_runtime': 366.3155, 'eval_samples_per_second': 68.247, 'eval_steps_per_second': 4.267}


---

总的来看，预训练后的模型效果是更优的。

在训练中的第二轮就展现更快的收敛。

在最后的test中，预训练模型也是在loss/acc/f1/recall/prec指标上优于baseline模型（你就说高一点是不是高吧）。