In [1]:
import os
from pyprojroot import here
os.environ['TRANSFORMERS_CACHE'] = str(here("cache/transformers"))

from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_name = "openai-community/gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
model



GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [2]:
# test one example
model.eval()
# Add padding token if not already present
tokenizer.add_special_tokens({'pad_token': '<|pad|>'})
model.resize_token_embeddings(len(tokenizer))

# Generate text
prompt = "The future of artificial intelligence is"
inputs = tokenizer(prompt, return_tensors="pt")

# Generate with different parameters
with torch.no_grad():
  outputs = model.generate(
    inputs.input_ids,
    max_new_tokens=100,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.pad_token_id,
    do_sample=True,
    temperature=0.7
  )

# Decode and display
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(generated_text)

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


The future of artificial intelligence is still uncertain, but we do not yet have a clear roadmap for the future of AI. In any case, there are plenty of opportunities for our society to continue to innovate and grow, and our future will shape our future.

I hope that you will join me in congratulating the new CEO of IBM, Dr. Jeff Bezos, and the amazing staff at IBM for taking the time to speak with us. Thank you for letting us share your thoughts and ideas on the future of AI and the


In [3]:
# dataset from https://recipenlg.cs.put.poznan.pl/
import pandas as pd
import duckdb as sql
data = sql.query(f"""
  select title, directions
  from read_csv(
    '{here('data/recipeNLG/recipeNLG.csv')}', 
    header=True,
    delim=',',
    types = {{
      'title': 'VARCHAR',
      'ingredients': 'VARCHAR[]',
      "directions": 'VARCHAR[]'
    }}
  )
  limit 1000
""")
data = data.df()
data

Unnamed: 0,title,directions
0,No-Bake Nut Cookies,"[In a heavy 2-quart saucepan, mix brown sugar,..."
1,Jewell Ball'S Chicken,"[Place chipped beef on bottom of baking dish.,..."
2,Creamy Corn,"[In a slow cooker, combine all ingredients. Co..."
3,Chicken Funny,"[Boil and debone chicken., Put bite size piece..."
4,Reeses Cups(Candy),[Combine first four ingredients and press in 1...
...,...,...
995,Heath Bar Pie,[Mix chopped Heath bars with whipped topping a...
996,Victorian Baked French Toast,"[Cook brown sugar, butter and corn syrup in sm..."
997,Quick Swedish Meatballs,"[Combine meat, bread crumbs, cheese, soup mix,..."
998,Irish Stew(Microwave),"[In 4-quart casserole, combine lamb, 1 1/4 cup..."


In [4]:
from tqdm.notebook import tqdm

tqdm.pandas(desc="Formatting directions")

# in directions, transform list to string, with every line starting with "1 -"
data['directions'] = data['directions'].progress_apply(
  lambda x: '\n'.join([f"{i+1} - {step}" for i, step in enumerate(x)])
)
# format to single string, with format f"Recipe: {title}\n\nInstructions:\n{directions}\n<|endoftext|>"
data['formatted'] = data.progress_apply(
  lambda row: f"Recipe: {row['title']}\n\nInstructions:\n{row['directions']}\n<|endoftext|>",
  axis=1
)
data

Formatting directions:   0%|          | 0/1000 [00:00<?, ?it/s]

Formatting directions:   0%|          | 0/1000 [00:00<?, ?it/s]

Unnamed: 0,title,directions,formatted
0,No-Bake Nut Cookies,"1 - In a heavy 2-quart saucepan, mix brown sug...",Recipe: No-Bake Nut Cookies\n\nInstructions:\n...
1,Jewell Ball'S Chicken,1 - Place chipped beef on bottom of baking dis...,Recipe: Jewell Ball'S Chicken\n\nInstructions:...
2,Creamy Corn,"1 - In a slow cooker, combine all ingredients....",Recipe: Creamy Corn\n\nInstructions:\n1 - In a...
3,Chicken Funny,1 - Boil and debone chicken.\n2 - Put bite siz...,Recipe: Chicken Funny\n\nInstructions:\n1 - Bo...
4,Reeses Cups(Candy),1 - Combine first four ingredients and press i...,Recipe: Reeses Cups(Candy) \n\nInstructions:\...
...,...,...,...
995,Heath Bar Pie,1 - Mix chopped Heath bars with whipped toppin...,Recipe: Heath Bar Pie\n\nInstructions:\n1 - Mi...
996,Victorian Baked French Toast,"1 - Cook brown sugar, butter and corn syrup in...",Recipe: Victorian Baked French Toast\n\nInstru...
997,Quick Swedish Meatballs,"1 - Combine meat, bread crumbs, cheese, soup m...",Recipe: Quick Swedish Meatballs\n\nInstruction...
998,Irish Stew(Microwave),"1 - In 4-quart casserole, combine lamb, 1 1/4 ...",Recipe: Irish Stew(Microwave) \n\nInstruction...


In [5]:
print(data["formatted"][0])

Recipe: No-Bake Nut Cookies

Instructions:
1 - In a heavy 2-quart saucepan, mix brown sugar, nuts, evaporated milk and butter or margarine.
2 - Stir over medium heat until mixture bubbles all over top.
3 - Boil and stir 5 minutes more. Take off heat.
4 - Stir in vanilla and cereal; mix well.
5 - Using 2 teaspoons, drop and shape into 30 clusters on wax paper.
6 - Let stand until firm, about 30 minutes.
<|endoftext|>


In [6]:
from torch.utils.data import Dataset
# Custom Dataset class
class RecipeDataset(Dataset):
  def __init__(self, texts, tokenizer, max_length=512):
    self.texts = texts
    self.tokenizer = tokenizer
    self.max_length = max_length
      
  def __len__(self):
    return len(self.texts)
  
  def __getitem__(self, idx):
    text = self.texts[idx]
    encoding = self.tokenizer(
      text,
      truncation=True,
      padding='max_length',
      max_length=self.max_length,
      return_tensors='pt'
    )
    
    return {
      'input_ids': encoding['input_ids'].flatten(),
      'attention_mask': encoding['attention_mask'].flatten()
    }

dataset = RecipeDataset(
  texts=data['formatted'].tolist(),
  tokenizer=tokenizer,
  max_length=512
)

In [7]:
from transformers import Trainer, TrainingArguments, DataCollatorForLanguageModeling

training_args = TrainingArguments(
  output_dir=str(here('models/gpt2-recipe-finetuned')),
  overwrite_output_dir=True,
  num_train_epochs=3,
)

data_collator = DataCollatorForLanguageModeling(
  tokenizer=tokenizer,
  mlm=False,  # GPT-2 is not a masked language model
)

trainer = Trainer(
  model=model,
  args=training_args,
  train_dataset=dataset,
  data_collator=data_collator,
  tokenizer=tokenizer,
)

print("Trainer initialized")
print(f"Number of trainable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")

  trainer = Trainer(


Trainer initialized
Number of trainable parameters: 124,440,576


In [9]:
# Start fine-tuning
print("Starting fine-tuning...")
trainer.train()

# Save the fine-tuned model
model_save_path = here('models/gpt2-recipe-finetuned')
trainer.save_model(model_save_path)
tokenizer.save_pretrained(model_save_path)

print(f"Fine-tuned model saved to: {model_save_path}")

Starting fine-tuning...


Step,Training Loss


KeyboardInterrupt: 

In [None]:
# Test the fine-tuned model
def generate_recipe(prompt, max_length=200, temperature=0.8, do_sample=True):
    model.eval()
    device = next(model.parameters()).device
    inputs = tokenizer(prompt, return_tensors="pt")
    inputs = {k: v.to(device) for k, v in inputs.items()}
    
    with torch.no_grad():
        outputs = model.generate(
            inputs["input_ids"],
            max_length=max_length,
            temperature=temperature,
            do_sample=do_sample,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id,
            num_return_sequences=1
        )
    
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return generated_text

# Test with different prompts
test_prompts = [
    "Recipe: Chocolate Chip Cookies",
    "Recipe: Beef Stew",
    "Recipe: Vegetable Soup"
]

print("Testing fine-tuned model:")
print("=" * 50)

for prompt in test_prompts:
    print(f"\n{prompt}")
    print("-" * 30)
    generated = generate_recipe(prompt)
    print(generated)
    print("=" * 50)

Testing fine-tuned model:

Recipe: Chocolate Chip Cookies
------------------------------
Recipe: Chocolate Chip Cookies

Instructions:
Spread peanut butter in a greased cookie sheet.\nChill until set, about 5 minutes.\nStir in sugar; beat 2 minutes.\nPour chocolate chips on top.\nMix remaining ingredients into a greased 9 x 13-inch loaf pan.\nPour into greased 9 x 13-inch pan. Bake in 350u00b0 oven for 45 minutes.


Recipe: Beef Stew
------------------------------
Recipe: Chocolate Chip Cookies

Instructions:
Spread peanut butter in a greased cookie sheet.\nChill until set, about 5 minutes.\nStir in sugar; beat 2 minutes.\nPour chocolate chips on top.\nMix remaining ingredients into a greased 9 x 13-inch loaf pan.\nPour into greased 9 x 13-inch pan. Bake in 350u00b0 oven for 45 minutes.


Recipe: Beef Stew
------------------------------
Recipe: Beef Stew

Instructions:
Combine all ingredients before making stew.\nMix well.\nAdd chicken in broth.\nAdd onion and carrots; stir well.\nAdd 

In [None]:
# Compare with original model (optional)
print("Comparison with original model:")
print("=" * 50)

# Load original model for comparison
original_model = AutoModelForCausalLM.from_pretrained(model_name)
original_model.resize_token_embeddings(len(tokenizer))

def generate_with_original(prompt, max_length=200, temperature=0.8):
    original_model.eval()
    inputs = tokenizer(prompt, return_tensors="pt")
    
    with torch.no_grad():
        outputs = original_model.generate(
            inputs.input_ids,
            max_length=max_length,
            temperature=temperature,
            do_sample=True,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id,
            num_return_sequences=1
        )
    
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return generated_text

# Compare on one example
test_prompt = "Recipe: Chocolate Chip Cookies"
print(f"Prompt: {test_prompt}\n")

print("FINE-TUNED MODEL:")
print(generate_recipe(test_prompt))
print("\nORIGINAL MODEL:")
print(generate_with_original(test_prompt))

Comparison with original model:
Prompt: Recipe: Chocolate Chip Cookies

FINE-TUNED MODEL:
Prompt: Recipe: Chocolate Chip Cookies

FINE-TUNED MODEL:
Recipe: Chocolate Chip Cookies

Instructions:
Cut the butter in half.\nMix the chocolate chips and graham crackers together.\nAdd the soda, water, syrup and flour.\nMix well.\nAdd the chopped marshmallows.


ORIGINAL MODEL:
Recipe: Chocolate Chip Cookies

Instructions:
Cut the butter in half.\nMix the chocolate chips and graham crackers together.\nAdd the soda, water, syrup and flour.\nMix well.\nAdd the chopped marshmallows.


ORIGINAL MODEL:
Recipe: Chocolate Chip Cookies Recipe Print Ingredients 2 1/2 cups flour

2 cups sugar

1/2 cup granulated sugar

1/4 teaspoon baking powder

1/2 tsp baking soda Instructions Preheat the oven to 325 degrees. (I used a 350 degree oven) In a large mixing bowl, blend the flour, sugar, sugar, baking powder and baking soda until crumbly. Add the flour mixture to the bowl and mix with a spoon until smooth. 