1. Load the Trained Model and Tokenizer

In [None]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel

# Load the trained model
model = GPT2LMHeadModel.from_pretrained("path_to_trained_model")

# Load the tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token

: 

2. Preprocess the Evaluation Dataset
Ensure the evaluation dataset is preprocessed similarly to how you did it during training:

In [None]:
from datasets import load_dataset

# Load the dataset and split into train and eval
dataset = load_dataset("Fsoft-AIC/the-vault-function", languages=["Python"], trust_remote_code=True)
train_dataset, eval_dataset = dataset["train"].train_test_split(test_size=0.1).values()

def preprocess_function(examples):
    inputs = [ex['code'] for ex in examples]
    targets = [ex['text'] for ex in examples]
    model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding="max_length")
    labels = tokenizer(targets, max_length=512, truncation=True, padding="max_length").input_ids
    model_inputs['labels'] = labels
    return model_inputs

# Apply the preprocessing function
tokenized_eval_dataset = eval_dataset.map(preprocess_function, batched=True)

3. Use Trainer.predict to Generate Predictions
Initialize a Trainer and call predict on the preprocessed evaluation dataset:

In [None]:
from transformers import Trainer, TrainingArguments

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    per_device_eval_batch_size=8,
)

# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    tokenizer=tokenizer,
)

# Generate predictions
predictions = trainer.predict(tokenized_eval_dataset)

# Decode predictions
decoded_preds = tokenizer.batch_decode(predictions.predictions, skip_special_tokens=True)

# Print the first few predictions for illustration
for i, pred in enumerate(decoded_preds[:5]):
    print(f"Prediction {i+1}: {pred}")