In [None]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
import torch

In [None]:
# Check if GPU is available
if torch.backends.mps.is_available():
    device = torch.device("mps")
elif torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

print(f"Using device: {device}")

In [None]:

# Load dataset
dataset = load_dataset('wikitext', 'wikitext-2-raw-v1')

dataset


In [None]:
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained('distilgpt2')
tokenizer.pad_token = tokenizer.eos_token

# Tokenize the dataset
def tokenize_function(examples):
    inputs = tokenizer(examples['text'], truncation=True, padding='max_length', max_length=128)
    inputs['labels'] = inputs['input_ids'].copy()
    return inputs

tokenised_datasets = dataset.map(tokenize_function, batched=True)

tokenised_datasets

In [None]:
# Define training arguments
training_args = TrainingArguments(
    output_dir='model/',
    eval_strategy='epoch',
    num_train_epochs=1,
    per_device_train_batch_size=20,
    per_device_eval_batch_size=20,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='model/logs',
    # no_cuda=True,
    use_mps_device=True,  #VERY IMPORTANT PARAM
)

model = AutoModelForCausalLM.from_pretrained('gpt2')
model.to(device)
# base_model.to("mps")

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenised_datasets['train'],
    eval_dataset=tokenised_datasets['validation']
)

# Train the model
# trainer.train(resume_from_checkpoint='model/checkpoint-8500')
trainer.train()

In [None]:
# save the model and tokenizer explicitly
model_output_dir = 'model/trained_model'

model.save_pretrained(model_output_dir)
tokenizer.save_pretrained(model_output_dir)

In [14]:
import argparse
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

def get_model_parameters(model):
    total_params = sum(p.numel() for p in model.parameters())
    return total_params

def main(input_text):
    #load GPU
    device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
    # Load the tokenizer and model from the saved directory

    model_path = 'basic_model'
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = AutoModelForCausalLM.from_pretrained(model_path).to(device)

    # Calculate the Number of Parameters in the model being used for inference
    total_params = get_model_parameters(model)
    print(f"Total number of paramerers: {total_params}")

    # Prepare the input text you want to generate predictions for
    inputs = tokenizer(input_text, return_tensors='pt')
    inputs = {k: v.to(device) for k, v in inputs.items()}

    # Generate Text
    outputs = model.generate(**inputs, max_length=200, temperature=0.9, top_p=0.95,do_sample=True,num_return_sequences=1)

    # Decode the generated text
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    print(generated_text)

main(input_text="Once upon a time ")

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Total number of paramerers: 81912576
Once upon a time 

