In [1]:
import json
import os
import urllib
import ssl

def download_and_load_file(file_path, url):
    ssl_context = ssl.create_default_context()
    ssl_context.check_hostname = False
    ssl_context.verify_mode = ssl.CERT_NONE

    if not os.path.exists(file_path):
        with urllib.request.urlopen(url, context=ssl_context) as response:
            text_data = response.read().decode("utf-8")
        with open(file_path, "w", encoding="utf-8") as file:
            file.write(text_data)
    else:
        with open(file_path, "r", encoding="utf-8") as file:
            text_data = file.read()

    with open(file_path, "r", encoding="utf-8") as file:
        data = json.load(file)

    return data


file_path = "instruction-data.json"
url = (
    "https://raw.githubusercontent.com/rasbt/LLMs-from-scratch"
    "/main/ch07/01_main-chapter-code/instruction-data.json"
)

data = download_and_load_file(file_path, url)
print("Number of entries:", len(data))


Number of entries: 1100


In [2]:
print("exmple entry:\n", data[999])

exmple entry:
 {'instruction': "What is an antonym of 'complicated'?", 'input': '', 'output': "An antonym of 'complicated' is 'simple'."}


In [3]:
#convertinf the dataset to alpaca prompt style
def format_input(entry):
    instruction_text = (
        f"Below is an instruction that describes a task"
        f"Write a instruction that appropriatly completes the request"
        f"\n\n### Instruction:\n{entry['instruction']}"
    )
    input_text = f"\n\n### Input:\n{entry['input']}" if entry['input'] else ""
    return instruction_text+input_text

In [4]:
model_input = format_input(data[50])
desired_output = f"\n\n### Response:\n{data[50]['output']}"
print(model_input+desired_output)

Below is an instruction that describes a taskWrite a instruction that appropriatly completes the request

### Instruction:
Identify the correct spelling of the following word.

### Input:
Ocassion

### Response:
The correct spelling is 'Occasion.'


In [5]:
model_input = format_input(data[999])
desired_output = f"\n\n### Response:\n{data[999]['output']}"
print(model_input+desired_output)

Below is an instruction that describes a taskWrite a instruction that appropriatly completes the request

### Instruction:
What is an antonym of 'complicated'?

### Response:
An antonym of 'complicated' is 'simple'.


In [6]:
train_portion = int(len(data) * 0.85)  # 85% for training
test_portion = int(len(data) * 0.1)    # 10% for testing
val_portion = len(data) - train_portion - test_portion  # Remaining 5% for validation

train_data = data[:train_portion]
test_data = data[train_portion:train_portion + test_portion]
val_data = data[train_portion + test_portion:]

In [7]:
#batching the input
import torch
from torch.utils.data import Dataset

class InstructionSet(Dataset):
    def __init__(self,data,tokenizer):
        self.data = data
        self.encoded_text = []
        for entry in data:
            Instruction_input = format_input(entry)
            response_text = f"\n\n### Response:\n{entry['output']}"
            full_text= Instruction_input+response_text
            self.encoded_text.append(tokenizer.encode(full_text))
    def __getitem__(self, index):
        return self.encoded_text[index]
    def __len__(self):
        return len(self.data)
        

In [8]:
import tiktoken
tokenizer = tiktoken.get_encoding('gpt2')

In [9]:
print(tokenizer.encode("<|endoftext|>", allowed_special={"<|endoftext|>"}))

[50256]


In [10]:
def custom_collate_draft_1(batch,pad_token_id= 50256, device= 'cpu'):
    batch_max_length = max(len(entry)+1 for entry in batch)
    input_lst =[]
    for item in batch:
        new_item = item.copy()
        new_item += [pad_token_id]
        padded =(new_item + [pad_token_id]*(batch_max_length-len(new_item)))
        input = torch.tensor(padded[:-1])
        input_lst.append(input)
    input_tensor = torch.stack(input_lst).to(device)
    return input_tensor
    


In [11]:
#testing the padding class
inputs_1 = [0, 1, 2, 3, 4]
inputs_2 = [5, 6]
inputs_3 = [7, 8, 9]

batch = (
    inputs_1,
    inputs_2,
    inputs_3
)

print(custom_collate_draft_1(batch))

tensor([[    0,     1,     2,     3,     4],
        [    5,     6, 50256, 50256, 50256],
        [    7,     8,     9, 50256, 50256]])


In [12]:
def custom_collate_draft_2(
    batch,
    pad_token_id=50256,
    device="cpu"
):
    # Find the longest sequence in the batch
    batch_max_length = max(len(item)+1 for item in batch)

    # Pad and prepare inputs
    inputs_lst, targets_lst = [], []

    for item in batch:
        new_item = item.copy()
        # Add an <|endoftext|> token
        new_item += [pad_token_id]
        # Pad sequences to max_length
        padded = (
            new_item + [pad_token_id] *
            (batch_max_length - len(new_item))
        )
        inputs = torch.tensor(padded[:-1])  # Truncate the last token for inputs
        targets = torch.tensor(padded[1:])  # Shift +1 to the right for targets
        inputs_lst.append(inputs)
        targets_lst.append(targets)

    # Convert list of inputs to tensor and transfer to target device
    inputs_tensor = torch.stack(inputs_lst).to(device)
    targets_tensor = torch.stack(targets_lst).to(device)
    return inputs_tensor, targets_tensor

In [13]:
def custom_collate_fn(
    batch,
    pad_token_id=50256,
    ignore_index=-100,
    allowed_max_length=None,
    device="cpu"
):
    # Find the longest sequence in the batch
    batch_max_length = max(len(item)+1 for item in batch)

    # Pad and prepare inputs and targets
    inputs_lst, targets_lst = [], []

    for item in batch:
        new_item = item.copy()
        # Add an <|endoftext|> token
        new_item += [pad_token_id]
        # Pad sequences to max_length
        padded = (
            new_item + [pad_token_id] *
            (batch_max_length - len(new_item))
        )
        inputs = torch.tensor(padded[:-1])  # Truncate the last token for inputs
        targets = torch.tensor(padded[1:])  # Shift +1 to the right for targets

        # New: Replace all but the first padding tokens in targets by ignore_index
        mask = targets == pad_token_id
        indices = torch.nonzero(mask).squeeze()
        if indices.numel() > 1:
            targets[indices[1:]] = ignore_index

        # New: Optionally truncate to maximum sequence length
        if allowed_max_length is not None:
            inputs = inputs[:allowed_max_length]
            targets = targets[:allowed_max_length]

        inputs_lst.append(inputs)
        targets_lst.append(targets) 

    # Convert list of inputs and targets to tensors and transfer to target device
    inputs_tensor = torch.stack(inputs_lst).to(device)
    targets_tensor = torch.stack(targets_lst).to(device)

    return inputs_tensor, targets_tensor

In [14]:
#testing the padding class
inputs_1 = [0, 1, 2, 3, 4]
inputs_2 = [5, 6]
inputs_3 = [7, 8, 9]

batch = (
    inputs_1,
    inputs_2,
    inputs_3
)

print(custom_collate_fn(batch))

(tensor([[    0,     1,     2,     3,     4],
        [    5,     6, 50256, 50256, 50256],
        [    7,     8,     9, 50256, 50256]]), tensor([[    1,     2,     3,     4, 50256],
        [    6, 50256,  -100,  -100,  -100],
        [    8,     9, 50256,  -100,  -100]]))


In [15]:
#creating dataloaders for train, test and validation
from torch.utils.data import DataLoader
num_workers = 0
batch_size = 8

train_dataset = InstructionSet(train_data,tokenizer) 
train_loader = DataLoader(train_dataset, batch_size=batch_size,shuffle=True,
                          num_workers=num_workers,drop_last=True,
                          collate_fn=custom_collate_fn)
val_dataset = InstructionSet(val_data,tokenizer)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True,num_workers=num_workers,
                        collate_fn=custom_collate_fn, drop_last=True)

test_dataset = InstructionSet(test_data, tokenizer)
test_loader = DataLoader(test_dataset, batch_size= batch_size, shuffle=True, collate_fn=custom_collate_fn,
                        drop_last=True, num_workers=num_workers)

In [16]:
i=0
for input,target in train_loader:
    print(input.shape)
    print(target.shape)
    i=i+1
    if i==5: break

torch.Size([8, 58])
torch.Size([8, 58])
torch.Size([8, 88])
torch.Size([8, 88])
torch.Size([8, 66])
torch.Size([8, 66])
torch.Size([8, 82])
torch.Size([8, 82])
torch.Size([8, 78])
torch.Size([8, 78])


### Loading the pretrained model

In [17]:
model_config = {
    "gpt-2-small(124M)":{"emb_dim":768, "n_layers":12,"n_heads":12},
    "gpt-2-medium(355M)":{"emb_dim":1024, "n_layers":24,"n_heads":16},
    "gpt-2-large(774M)":{"emb_dim":1280, "n_layers":36,"n_heads":20},
    "gpt-2-xl(124M)":{"emb_dim":1600, "n_layers":48,"n_heads":25}
}

In [18]:
# gpt-2 configurations
GPT_CONFIG_124M= {
    'vocab_size' :50257,
    'context_length': 1024,
    'emb_dim':768,
    'n_heads':12,
    'n_layers':12,
    'drop_rate':0.1,
    'qkv_bias':True
}

In [19]:
model_name = "gpt-2-medium(355M)"
NEW_CONFIG = GPT_CONFIG_124M.copy()
NEW_CONFIG.update(model_config[model_name])

In [20]:
NEW_CONFIG

{'vocab_size': 50257,
 'context_length': 1024,
 'emb_dim': 1024,
 'n_heads': 16,
 'n_layers': 24,
 'drop_rate': 0.1,
 'qkv_bias': True}

In [21]:
from model import GPTModel

In [22]:
from weight_down import download_and_load_gpt2

In [23]:
settings, params = download_and_load_gpt2(model_size="355M", models_dir='gpt2')

Error downloading the file: HTTPSConnectionPool(host='openaipublic.blob.core.windows.net', port=443): Max retries exceeded with url: /gpt-2/models%5C355M%5Ccheckpoint (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x0000015FD7F070D0>: Failed to resolve 'openaipublic.blob.core.windows.net' ([Errno 11001] getaddrinfo failed)"))
please check the url : https://openaipublic.blob.core.windows.net/gpt-2/models\355M\checkpoint
Error downloading the file: HTTPSConnectionPool(host='openaipublic.blob.core.windows.net', port=443): Max retries exceeded with url: /gpt-2/models%5C355M%5Cencoder.json (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x0000015FD807EA50>: Failed to resolve 'openaipublic.blob.core.windows.net' ([Errno 11001] getaddrinfo failed)"))
please check the url : https://openaipublic.blob.core.windows.net/gpt-2/models\355M\encoder.json
Error downloading the file: HTTPSConnectionPool(host='openaipublic.blob.core.windows.net

In [24]:
from loadweight_to_model import load_weights_to_gpt

In [25]:
PA_model = GPTModel(NEW_CONFIG)

In [26]:
load_weights_to_gpt(PA_model, params)

In [27]:
PA_model.eval()

GPTModel(
  (tok_emb): Embedding(50257, 1024)
  (pos_emb): Embedding(1024, 1024)
  (drop_emb): Dropout(p=0.1, inplace=False)
  (trf_blocks): Sequential(
    (0): TransformerBlock(
      (attn): MultiHeadAttention(
        (w_query): Linear(in_features=1024, out_features=1024, bias=True)
        (w_key): Linear(in_features=1024, out_features=1024, bias=True)
        (w_value): Linear(in_features=1024, out_features=1024, bias=True)
        (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (ff): FeedForward(
        (layers): Sequential(
          (0): Linear(in_features=1024, out_features=4096, bias=True)
          (1): GELU()
          (2): Linear(in_features=4096, out_features=1024, bias=True)
        )
      )
      (norm1): LayerNorm()
      (norm2): LayerNorm()
      (drop_shortcut): Dropout(p=0.1, inplace=False)
    )
    (1): TransformerBlock(
      (attn): MultiHeadAttention(
        (w_query): Linear

In [28]:
torch.manual_seed(123)
input_text = format_input(val_data[0])
print(input_text)

Below is an instruction that describes a taskWrite a instruction that appropriatly completes the request

### Instruction:
Convert the active sentence to passive: 'The chef cooks the meal every day.'


In [29]:
from utility import text_to_token,token_to_text, generate

In [30]:
token_ids = generate(model=PA_model, idx= text_to_token(input_text,tokenizer), max_new_token=30,
                     context_size=NEW_CONFIG['context_length'],eos_id=50256)
generated_token = token_to_text(token_ids,tokenizer)
print(generated_token)

Below is an instruction that describes a taskWrite a instruction that appropriatly completes the request

### Instruction:
Convert the active sentence to passive: 'The chef cooks the meal every day.'

### Instruction:

Convert the active sentence to passive: 'The chef cooks the meal every day.'

### Instruction:



In [31]:
import utility
print(dir(utility))

['__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__spec__', 'calc_loss_batch', 'calc_loss_loader', 'device', 'evaluate_model', 'generate', 'generate_and_print_sample', 'text_to_token', 'token_to_text', 'torch', 'train_model_simple']


In [32]:
from utility import train_model_simple, calc_loss_loader

In [33]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [34]:
PA_model.to(device)

torch.manual_seed(123)

with torch.no_grad():
    train_loss = calc_loss_loader(train_loader, PA_model, device, num_batches=5)
    val_loss = calc_loss_loader(val_loader, PA_model, device, num_batches=5)

print("Training loss:", train_loss)
print("Validation loss:", val_loss)

Training loss: 4.476358032226562
Validation loss: 4.4222818374633786


In [35]:
# import time

# start_time = time.time()

# torch.manual_seed(123)

# optimizer = torch.optim.AdamW(PA_model.parameters(), lr=0.00005, weight_decay=0.1)

# num_epochs = 1

# train_losses, val_losses, tokens_seen = train_model_simple(
#     PA_model, train_loader, val_loader, optimizer, device,
#     num_epochs=num_epochs, eval_freq=5, eval_iter=5,
#     start_context=format_input(val_data[0]), tokenizer=tokenizer
# )

# end_time = time.time()
# execution_time_minutes = (end_time - start_time) / 60
# print(f"Training completed in {execution_time_minutes:.2f} minutes.")

In [36]:
token_ids = generate(model=PA_model, idx= text_to_token(input_text,tokenizer), max_new_token=30,
                     context_size=NEW_CONFIG['context_length'],eos_id=50256)
generated_token = token_to_text(token_ids,tokenizer)
print(generated_token)

Below is an instruction that describes a taskWrite a instruction that appropriatly completes the request

### Instruction:
Convert the active sentence to passive: 'The chef cooks the meal every day.'

### Instruction:

Convert the active sentence to passive: 'The chef cooks the meal every day.'

### Instruction:



In [37]:
torch.manual_seed(123)
input_text = format_input(val_data[3])
print(input_text)

Below is an instruction that describes a taskWrite a instruction that appropriatly completes the request

### Instruction:
Generate a question with the keyword “bacteria”


In [38]:
print(token_to_text(torch.tensor(train_dataset[2]),tokenizer))

Below is an instruction that describes a taskWrite a instruction that appropriatly completes the request

### Instruction:
Convert 45 kilometers to meters.

### Response:
45 kilometers is 45000 meters.


In [39]:
for i, (input_batch , output_batch) in enumerate(train_loader):
    print(token_to_text(torch.tensor(input_batch[1]),tokenizer))
    print(torch.tensor(output_batch[1]))
    print(input_batch[1].clone().detach())
    i= i+1
    if i==1: break

Below is an instruction that describes a taskWrite a instruction that appropriatly completes the request

### Instruction:
What is the pH of vinegar?

### Response:
The pH of vinegar is typically around 2.5.<|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|>
tensor([  318,   281, 12064,   326,  8477,   257,  4876, 16594,   257, 12064,
          326,  4148,   265,   306, 32543,   262,  2581,   198,   198, 21017,
        46486,    25,   198,  2061,   318,   262, 22918,   286, 26600,    30,
          198,   198, 21017, 18261,    25,   198,   464, 22918,   286, 26600,
          318,  6032,  1088,   362,    13,    20,    13, 50256,  -100,  -100,
         -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,
         -100])
tensor([21106,   318,   281, 12064,   326,  8477,   257,  4876, 16594,   257,
        12064,   326,  4148,   265,   306, 32543,   262,  258

  print(token_to_text(torch.tensor(input_batch[1]),tokenizer))
  print(torch.tensor(output_batch[1]))


In [40]:
# import time

# start_time = time.time()

# torch.manual_seed(123)

# optimizer = torch.optim.AdamW(PA_model.parameters(), lr=0.00005, weight_decay=0.1)

# num_epochs = 1

# train_losses, val_losses, tokens_seen = train_model_simple(
#     PA_model, train_loader, val_loader, optimizer, device,
#     num_epochs=num_epochs, eval_freq=50, eval_iter=5,
#     start_context=format_input(val_data[0]), tokenizer=tokenizer
# )

# end_time = time.time()
# execution_time_minutes = (end_time - start_time) / 60
# print(f"Training completed in {execution_time_minutes:.2f} minutes.")

In [41]:
PA_model.to(device)

torch.manual_seed(123)

with torch.no_grad():
    train_loss = calc_loss_loader(train_loader, PA_model, device, num_batches=5)
    val_loss = calc_loss_loader(val_loader, PA_model, device, num_batches=5)

print("Training loss:", train_loss)
print("Validation loss:", val_loss)

Training loss: 4.476358032226562
Validation loss: 4.4222818374633786


In [42]:
from utility import train_model_simple, calc_loss_loader

In [43]:
optimizer = torch.optim.AdamW(PA_model.parameters(), lr=0.00005, weight_decay=0.1)

In [44]:
# # Save path
# import os

# # Create the directory if it doesn't exist
# os.makedirs("checkpoints", exist_ok=True)

# save_path = "checkpoints/gpt_checkpoint_epoch_3.pth"

# # Save model, optimizer, and optionally scheduler
# torch.save({
#     'epoch': 2,
#     'model_state_dict': PA_model.state_dict(),
#     'optimizer_state_dict': optimizer.state_dict(),
#     # optional: 'loss': loss_value
# }, save_path)


In [45]:
model = GPTModel(NEW_CONFIG)

In [46]:
checkpoint = torch.load("checkpoints/gpt_checkpoint_epoch_3.pth", map_location=device)

model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
start_epoch = checkpoint['epoch'] + 1

model.to(device)
model.train()


  checkpoint = torch.load("checkpoints/gpt_checkpoint_epoch_3.pth", map_location=device)


GPTModel(
  (tok_emb): Embedding(50257, 1024)
  (pos_emb): Embedding(1024, 1024)
  (drop_emb): Dropout(p=0.1, inplace=False)
  (trf_blocks): Sequential(
    (0): TransformerBlock(
      (attn): MultiHeadAttention(
        (w_query): Linear(in_features=1024, out_features=1024, bias=True)
        (w_key): Linear(in_features=1024, out_features=1024, bias=True)
        (w_value): Linear(in_features=1024, out_features=1024, bias=True)
        (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (ff): FeedForward(
        (layers): Sequential(
          (0): Linear(in_features=1024, out_features=4096, bias=True)
          (1): GELU()
          (2): Linear(in_features=4096, out_features=1024, bias=True)
        )
      )
      (norm1): LayerNorm()
      (norm2): LayerNorm()
      (drop_shortcut): Dropout(p=0.1, inplace=False)
    )
    (1): TransformerBlock(
      (attn): MultiHeadAttention(
        (w_query): Linear

In [47]:
model.to(device)

torch.manual_seed(123)

with torch.no_grad():
    train_loss = calc_loss_loader(train_loader, model, device, num_batches=5)
    val_loss = calc_loss_loader(val_loader, model, device, num_batches=5)

print("Training loss:", train_loss)
print("Validation loss:", val_loss)

Training loss: 0.3448606848716736
Validation loss: 0.658559900522232


In [48]:
input_text = format_input(val_data[0])


In [49]:
input_text

"Below is an instruction that describes a taskWrite a instruction that appropriatly completes the request\n\n### Instruction:\nConvert the active sentence to passive: 'The chef cooks the meal every day.'"

In [50]:
# import time

# start_time = time.time()

# torch.manual_seed(123)

# optimizer = torch.optim.AdamW(PA_model.parameters(), lr=0.00005, weight_decay=0.1)

# num_epochs = 1

# train_losses, val_losses, tokens_seen = train_model_simple(
#     model, train_loader, val_loader, optimizer, device,
#     num_epochs=num_epochs, eval_freq=50, eval_iter=5,
#     start_context=format_input(val_data[0]), tokenizer=tokenizer
# )

# end_time = time.time()
# execution_time_minutes = (end_time - start_time) / 60
# print(f"Training completed in {execution_time_minutes:.2f} minutes.")

# # Save path
# import os

# # Create the directory if it doesn't exist
# os.makedirs("checkpoints", exist_ok=True)

# save_path = "checkpoints/gpt_checkpoint_epoch_3.pth"

# # Save model, optimizer, and optionally scheduler
# torch.save({
#     'epoch': 2,
#     'model_state_dict': model.state_dict(),
#     'optimizer_state_dict': optimizer.state_dict(),
#     # optional: 'loss': loss_value
# }, save_path)

In [52]:
token_ids = generate(model=model, idx= text_to_token(input_text,tokenizer), max_new_token=30,
                     context_size=NEW_CONFIG['context_length'],eos_id=50256)
generated_token = token_to_text(token_ids,tokenizer)
print(generated_token)

Below is an instruction that describes a taskWrite a instruction that appropriatly completes the request

### Instruction:
Convert the active sentence to passive: 'The chef cooks the meal every day.'

### Response:
The meal is cooked by the chef every day.


In [60]:
input_text = format_input(val_data[17])
print(input_text)

Below is an instruction that describes a taskWrite a instruction that appropriatly completes the request

### Instruction:
Edit the sentence.

### Input:
We enjoys watching movies.


In [61]:
token_ids = generate(model=model, idx= text_to_token(input_text,tokenizer), max_new_token=30,
                     context_size=NEW_CONFIG['context_length'],eos_id=50256)
generated_token = token_to_text(token_ids,tokenizer)
print(generated_token)

Below is an instruction that describes a taskWrite a instruction that appropriatly completes the request

### Instruction:
Edit the sentence.

### Input:
We enjoys watching movies.

### Response:
We enjoy watching movies.


In [64]:
len(test_data)

110

In [66]:
test_text = format_input(test_data[50])
print(test_text)

Below is an instruction that describes a taskWrite a instruction that appropriatly completes the request

### Instruction:
Edit the given text to ensure all plural nouns are spelled correctly.

### Input:
The birds sings beautiful songs.


In [67]:
token_ids = generate(model=model, idx= text_to_token(test_text,tokenizer), max_new_token=30,
                     context_size=NEW_CONFIG['context_length'],eos_id=50256)
generated_token = token_to_text(token_ids,tokenizer)
print(generated_token)

Below is an instruction that describes a taskWrite a instruction that appropriatly completes the request

### Instruction:
Edit the given text to ensure all plural nouns are spelled correctly.

### Input:
The birds sings beautiful songs.

### Response:
The birds sang beautiful songs.


In [71]:
#testing the model 
torch.manual_seed(123)


for entry in test_data[:3]:

    input_text = format_input(entry)

    token_ids = generate(
        model=model,
        idx=text_to_token(input_text, tokenizer).to(device),
        max_new_token=256,
        context_size=NEW_CONFIG["context_length"],
        eos_id=50256
    )
    generated_text = token_to_text(token_ids, tokenizer)
    response_text = (
        generated_text[len(input_text):]
        .replace("### Response:", "")
        .strip()
)

    print(input_text)
    print(f"\nCorrect response:\n>> {entry['output']}")
    print(f"\nModel response:\n>> {response_text.strip()}")
    print("-------------------------------------")

Below is an instruction that describes a taskWrite a instruction that appropriatly completes the request

### Instruction:
Rewrite the sentence using a simile.

### Input:
The car is very fast.

Correct response:
>> The car is as fast as lightning.

Model response:
>> The car is as fast as a cheetah.
-------------------------------------
Below is an instruction that describes a taskWrite a instruction that appropriatly completes the request

### Instruction:
What type of cloud is typically associated with thunderstorms?

Correct response:
>> The type of cloud typically associated with thunderstorms is cumulonimbus.

Model response:
>> The type of cloud typically associated with thunderstorms is a cumulus.
-------------------------------------
Below is an instruction that describes a taskWrite a instruction that appropriatly completes the request

### Instruction:
Name the author of 'Pride and Prejudice'.

Correct response:
>> Jane Austen.

Model response:
>> The author of 'Pride and Pr

### Evaluating the model

In [72]:
from tqdm import tqdm

for i, entry in tqdm(enumerate(test_data), total=len(test_data)):

    input_text = format_input(entry)

    token_ids = generate(
        model=model,
        idx=text_to_token(input_text, tokenizer).to(device),
        max_new_token=256,
        context_size=NEW_CONFIG["context_length"],
        eos_id=50256
    )
    generated_text = token_to_text(token_ids, tokenizer)
    response_text = generated_text[len(input_text):].replace("### Response:", "").strip()

    test_data[i]["model_response"] = response_text


with open("instruction-data-with-response.json", "w") as file:
    json.dump(test_data, file, indent=4)  # "indent" for pretty-printing

100%|██████████| 110/110 [23:00<00:00, 12.55s/it]


In [75]:
print(test_data[0])

{'instruction': 'Rewrite the sentence using a simile.', 'input': 'The car is very fast.', 'output': 'The car is as fast as lightning.', 'model_response': 'The car is a fast car.'}


In [80]:
print(format_input(train_data[1]))

Below is an instruction that describes a taskWrite a instruction that appropriatly completes the request

### Instruction:
Edit the following sentence for grammar.

### Input:
He go to the park every day.
