In [1]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer, GPT2Config, GPT2LMHeadModel
from transformers import TextDataset, DataCollatorForLanguageModeling
from transformers import Trainer, TrainingArguments


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_name = "gpt2"
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

In [3]:
dataset_path = "train.txt"
train_dataset = TextDataset(
    tokenizer=tokenizer,
    file_path=dataset_path,
    block_size=128  # Adjust as needed
)



In [4]:
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

In [6]:
training_args = TrainingArguments(
    report_to="none",
    output_dir="./fine-tuned-gpt",
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=4,
    save_steps=10_000,
    save_total_limit=2,
)

In [7]:
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=train_dataset,
)

In [8]:
trainer.train()

  9%|▉         | 500/5418 [02:47<27:55,  2.94it/s]

{'loss': 3.0188, 'learning_rate': 4.538575119970469e-05, 'epoch': 0.28}


 18%|█▊        | 1000/5418 [05:37<25:18,  2.91it/s]

{'loss': 2.9172, 'learning_rate': 4.077150239940938e-05, 'epoch': 0.55}


 28%|██▊       | 1500/5418 [08:26<22:35,  2.89it/s]

{'loss': 2.8756, 'learning_rate': 3.6157253599114066e-05, 'epoch': 0.83}


 37%|███▋      | 2000/5418 [11:18<20:05,  2.83it/s]

{'loss': 2.7796, 'learning_rate': 3.154300479881875e-05, 'epoch': 1.11}


 46%|████▌     | 2500/5418 [14:08<16:49,  2.89it/s]

{'loss': 2.6441, 'learning_rate': 2.692875599852344e-05, 'epoch': 1.38}


 55%|█████▌    | 3000/5418 [17:00<13:09,  3.06it/s]

{'loss': 2.6477, 'learning_rate': 2.231450719822813e-05, 'epoch': 1.66}


 65%|██████▍   | 3500/5418 [19:46<12:51,  2.48it/s]

{'loss': 2.6422, 'learning_rate': 1.7700258397932818e-05, 'epoch': 1.94}


 74%|███████▍  | 4000/5418 [22:31<07:33,  3.12it/s]

{'loss': 2.5442, 'learning_rate': 1.3086009597637506e-05, 'epoch': 2.21}


 83%|████████▎ | 4500/5418 [25:18<05:14,  2.92it/s]

{'loss': 2.519, 'learning_rate': 8.471760797342193e-06, 'epoch': 2.49}


 92%|█████████▏| 5000/5418 [28:06<02:22,  2.94it/s]

{'loss': 2.5221, 'learning_rate': 3.857511997046881e-06, 'epoch': 2.77}


100%|██████████| 5418/5418 [30:25<00:00,  2.97it/s]

{'train_runtime': 1825.8263, 'train_samples_per_second': 11.868, 'train_steps_per_second': 2.967, 'train_loss': 2.695769936979455, 'epoch': 3.0}





TrainOutput(global_step=5418, training_loss=2.695769936979455, metrics={'train_runtime': 1825.8263, 'train_samples_per_second': 11.868, 'train_steps_per_second': 2.967, 'train_loss': 2.695769936979455, 'epoch': 3.0})

In [9]:
model.save_pretrained("./fine-tuned-gpt")
tokenizer.save_pretrained("./fine-tuned-gpt")

('./fine-tuned-gpt\\tokenizer_config.json',
 './fine-tuned-gpt\\special_tokens_map.json',
 './fine-tuned-gpt\\vocab.json',
 './fine-tuned-gpt\\merges.txt',
 './fine-tuned-gpt\\added_tokens.json')