## GPT-2 Small on GSMK8 

The purpose of this is to measure the coherence of an LLM on the Cohence of solving math problems within GSMK8

# Model set up

In [1]:
# import the hugging face transformers library
from transformers import pipeline, GPT2LMHeadModel, Trainer, TrainingArguments, AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset




In [2]:

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained('gpt2')
model = AutoModelForCausalLM.from_pretrained("gpt2")

In [5]:
# Ensure the tokenizer has a padding token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token


In [6]:


# Load and preprocess the dataset
dataset = load_dataset("GSM8K",'main', split='train[:1%]')
def preprocess_data(examples):
    # Combine the question and answer for training context
    inputs = [q + " Answer: " for q in examples['question']]
    targets = [a for a in examples['answer']]
    model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding="max_length")
    # Setup the tokenizer outputs as labels for training
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(targets, max_length=512, truncation=True, padding="max_length")
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

encoded_dataset = dataset.map(preprocess_data, batched=True)

Map:   0%|          | 0/75 [00:00<?, ? examples/s]



In [7]:
encoded_dataset

Dataset({
    features: ['question', 'answer', 'input_ids', 'attention_mask', 'labels'],
    num_rows: 75
})

In [9]:


# Set up training arguments
training_args = TrainingArguments(
    output_dir='./results',          # output directory
    num_train_epochs=3,              # number of training epochs
    per_device_train_batch_size=8,   # batch size for training
    per_device_eval_batch_size=16,   # batch size for evaluation
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir='./logs',            # directory for storing logs
    logging_steps=10,
    evaluation_strategy="steps",     # evaluate during training to observe progress
    save_strategy="epoch"            # save the model at the end of each epoch
)

# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=encoded_dataset
)

# Start training
trainer.train()


ImportError: Using the `Trainer` with `PyTorch` requires `accelerate>=0.21.0`: Please run `pip install transformers[torch]` or `pip install accelerate -U`

In [None]:
#The End