In [3]:
import torch
print("torch:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
print("CUDA devices:", torch.cuda.device_count())
if torch.cuda.is_available():
    print("current device:", torch.cuda.current_device())
    print("device name:", torch.cuda.get_device_name(0))

torch: 2.7.1+cu118
CUDA available: True
CUDA devices: 1
current device: 0
device name: NVIDIA GeForce GTX 1650 with Max-Q Design


In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model = AutoModelForCausalLM.from_pretrained("gpt2")
tokenizer = AutoTokenizer.from_pretrained("gpt2")

  from .autonotebook import tqdm as notebook_tqdm


#### Initializing model and tokenizer

In [4]:
model_name = "distilbert/distilgpt2"

device = "cuda" if torch.cuda.is_available() else "cpu"

In [5]:
from transformers import AutoModelForCausalLM, AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(model_name)

model = model.to(device)

model

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-5): 6 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

#### Loading a dataset

In [6]:
from datasets import Dataset
import pandas as pd
import json

In [11]:
df = pd.read_csv("E:/Generative AI Projects/Finetuning-distilgpt2-LLM-on-food-recepies/corpus/RecipeNLG_dataset.csv")

#### Creating feature recepie_string

In [12]:
def create_recepie_full(data) :
    recepie_directions = " ".join(json.loads(data["directions"]))
    recepie_title = data["title"]
    recepie_string = f"TITLE: {recepie_title} | DIRECTIONS: {recepie_directions}"
    data["recepie_string"] = recepie_string
    return data

df = df.apply(create_recepie_full, axis = 1)
df = df.filter(["recepie_string"])

In [13]:
df

Unnamed: 0,recepie_string
0,TITLE: No-Bake Nut Cookies | DIRECTIONS: In a ...
1,TITLE: Jewell Ball'S Chicken | DIRECTIONS: Pla...
2,TITLE: Creamy Corn | DIRECTIONS: In a slow coo...
3,TITLE: Chicken Funny | DIRECTIONS: Boil and de...
4,TITLE: Reeses Cups(Candy) | DIRECTIONS: Comb...
...,...
2231137,TITLE: Sunny's Fake Crepes | DIRECTIONS: Sprea...
2231138,TITLE: Devil Eggs | DIRECTIONS: Boil eggs on m...
2231139,TITLE: Extremely Easy and Quick - Namul Daikon...
2231140,TITLE: Pan-Roasted Pork Chops With Apple Fritt...


#### Creating dataset

In [14]:
dataset = Dataset.from_pandas(df)

def map_dataset(batch) :
    return tokenizer(
        batch["recepie_string"],
        truncation = True,
        max_length = 64,
        return_overflowing_tokens = True
    )

dataset = dataset.map(
    map_dataset,
    batched=True,
    batch_size=10,
    remove_columns=list(df.columns)
)

dataset

Map: 100%|██████████| 2231142/2231142 [12:19<00:00, 3018.25 examples/s]


Dataset({
    features: ['input_ids', 'attention_mask', 'overflow_to_sample_mapping'],
    num_rows: 5441981
})

In [15]:
dataset = dataset.remove_columns(["overflow_to_sample_mapping"])

# dataset = dataset.select(range(5000))
dataset

Dataset({
    features: ['input_ids', 'attention_mask'],
    num_rows: 5441981
})

#### Train Test Split the dataset

In [16]:
from sklearn.model_selection import train_test_split

dataset = dataset.train_test_split(test_size = 0.2)

#### DataCollator

In [17]:
from transformers import DataCollatorForLanguageModeling

data_collator = DataCollatorForLanguageModeling(
    tokenizer = tokenizer,
    mlm = False
)

data_collator

DataCollatorForLanguageModeling(tokenizer=GPT2TokenizerFast(name_or_path='distilbert/distilgpt2', vocab_size=50257, model_max_length=1024, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<|endoftext|>', 'eos_token': '<|endoftext|>', 'unk_token': '<|endoftext|>', 'pad_token': '<|endoftext|>'}, clean_up_tokenization_spaces=False, added_tokens_decoder={
	50256: AddedToken("<|endoftext|>", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),
}
), mlm=False, mlm_probability=0.15, mask_replace_prob=0.8, random_replace_prob=0.1, pad_to_multiple_of=None, tf_experimental_compile=False, return_tensors='pt', seed=None)

#### Configuring training_args and trainer for finetuning

In [19]:
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir = "./output/model",
    eval_strategy = "epoch",
    per_device_train_batch_size = 1,
    learning_rate = 2e-5,
    weight_decay = 0.01,
    num_train_epochs = 10
)

trainer = Trainer(
    model = model,
    train_dataset = dataset["train"],
    eval_dataset = dataset["test"],
    args = training_args,
    data_collator = data_collator
)

trainer.train()

Epoch,Training Loss,Validation Loss


SafetensorError: Error while serializing: I/O error: There is not enough space on the disk. (os error 112)

#### Testing the model

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("distilgpt2")

tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained("E:/Generative AI Projects/Finetuning-distilgpt2-LLM-on-food-recepies/output\model/checkpoint-126500")

prompt = "Millionare Pie"

inputs = tokenizer(prompt, return_tensors="pt")

outputs = model.generate(
    inputs.input_ids,
    max_new_tokens = 100,
    do_sample = True,
    top_k = 50,
    top_p = 0.98,
    repetition_penalty = 1.2,
    temperature = 0.7,
    eos_token_id=tokenizer.eos_token_id
)
    
outputs

  model = AutoModelForCausalLM.from_pretrained("E:/Generative AI Projects/Finetuning-distilgpt2-LLM-on-food-recepies/output\model/checkpoint-126500")
'(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: d1cfed7b-5a92-4e80-be78-ca40f7e8cc29)')' thrown while requesting HEAD https://huggingface.co/distilgpt2/resolve/main/tokenizer_config.json
Retrying in 1s [Retry 1/5].
'(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 04ee1dbc-51d5-4547-84fe-58231f89707c)')' thrown while requesting HEAD https://huggingface.co/distilgpt2/resolve/main/tokenizer_config.json
Retrying in 2s [Retry 2/5].
'(MaxRetryError('HTTPSConnectionPool(host=\'huggingface.co\', port=443): Max retries exceeded with url: /distilgpt2/resolve/main/tokenizer_config.json (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x000001797D3FF610>: Failed to resolv

In [29]:
output_string = tokenizer.batch_decode(outputs)
output_string

['Millionare Pie Crust  | DIRECTIONS: Mix milk, sugar and butter in a medium bowl. Place crust on a lightly greased baking sheet; cut with biscuit cutter or pastry cutter. Bake at 350° for 30 minutes until top springs back when lightly touched. Cool in pan on wire rack before removing from pan. (Can be reheated to serve at once.) Top immediately with whipped cream and then pudding. Refrigerate. Combine melted chocolate for topping. To prepare frosting. Spoon 1/']

In [30]:
text = " ".join(output_string)
sentences = [sentence.strip() for sentence in text.split('.') if sentence.strip()]
for i, sentence in enumerate(sentences, 1):
        print(f"{i}. {sentence}.")

1. Millionare Pie Crust  | DIRECTIONS: Mix milk, sugar and butter in a medium bowl.
2. Place crust on a lightly greased baking sheet; cut with biscuit cutter or pastry cutter.
3. Bake at 350° for 30 minutes until top springs back when lightly touched.
4. Cool in pan on wire rack before removing from pan.
5. (Can be reheated to serve at once.
6. ) Top immediately with whipped cream and then pudding.
7. Refrigerate.
8. Combine melted chocolate for topping.
9. To prepare frosting.
10. Spoon 1/.
