In [1]:
from pathlib import Path
import sys
sys.path.append((Path.cwd().parent/"src").resolve().as_posix())

from finetune import s
from finetune.utils import ModelSummary
from finetune.models import GPT
from finetune.utils import ModelCheckpointManager
from finetune.dataloader import UltraChat200kDataLoaderLite
from finetune.LoRA import LoRALinear
from finetune.utils import instruct_generate
from finetune.chat_assistant import ChatAssistant
import numpy as np

we need CUDA for DDP so falling back to CPU


## Explore dataset

In [2]:
dataloader = UltraChat200kDataLoaderLite(split="val")
x, y = dataloader.next_batch()
x.shape

found 1 shards for split val


torch.Size([8, 1024])

In [3]:
def count_tokens_in_dir(data_dir, split="train"):
    """
    Count total tokens across all .npy shards for a given split (train/val).
    """
    data_dir = Path(data_dir)
    files = sorted(data_dir.glob(f"{split}_*.npy"))
    
    total_tokens = 0
    for f in files:
        arr = np.load(f, mmap_mode="r")  # don't load into RAM
        total_tokens += arr.shape[0]     # length = number of tokens in that shard
    
    return total_tokens

# point to your dataset folder
data_path = s.ultrachat_200k_data_path

train_tokens = count_tokens_in_dir(data_path, "train")
val_tokens   = count_tokens_in_dir(data_path, "val")

print(f"Total train tokens: {train_tokens:,}")
print(f"Total val tokens:   {val_tokens:,}")

Total train tokens: 205,829,073
Total val tokens:   50,000,000


In [7]:
val_tokens / s.config["dataset"]["total_batch_size"]

762.939453125

In [9]:
1e9 / s.config["dataset"]["total_batch_size"]

15258.7890625

## Finetuned model

In [3]:
pretrained_model = GPT()

wandb_path='sampath017/GPT3-124M/model_checkpoint_train_step_17000_val_loss_3.08:v0'
cache_dir=s.models_root_path/"pretrained_models"
pretrained_model = ModelCheckpointManager.get_model_from_wandb(pretrained_model, wandb_path, cache_dir, model_type="pretrained")
ModelSummary.summary(pretrained_model)

[34m[1mwandb[0m: Downloading large artifact model_checkpoint_train_step_17000_val_loss_3.08:v0, 1425.29MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:6.8 (210.2MB/s)


Using checkpoint: C:\Users\sampath\Dev\GPT\models\pretrained_models\model_checkpoint_train_step_17000_val_loss_3.08.pt
Model size: 475.03 MB
Trainable parameters: 124.53M
Non-trainable parameters: 0


In [4]:
# apply LoRA
finetuned_model = LoRALinear.apply_lora(pretrained_model, r=16, alpha=32, dropout=0.05,
                              target_modules=("attn", "proj"))

wandb_path='sampath017/GPT3_124M_instruct/model_checkpoint_train_step_5_val_loss_3.29:v0'
cache_dir=s.models_root_path/"finetuned_models"
finetuned_model = ModelCheckpointManager.get_model_from_wandb(finetuned_model, wandb_path, cache_dir, model_type="finetuned")

if s.ddp_master_process:
    # check trainable params
    trainable = sum(p.numel() for p in finetuned_model.parameters() if p.requires_grad)
    total = sum(p.numel() for p in finetuned_model.parameters())
    print(
        f"Trainable params: {trainable} / {total} ({100*trainable/total:.2f}%)")

CommError: artifact membership 'model_checkpoint_train_step_5_val_loss_3.29:v0' not found in 'sampath017/GPT3_124M_instruct'

In [13]:
s = instruct_generate(finetuned_model)
print(s)

Hey! I can tell you a joke
Subject: A friend of yours
Phone: I can say they are two people
Motto: A BOT. (Hebrew: Oh, the world’s eyes are mine.)
Subject: A BOT. (Hebrew: the cat’s eyes are mine.)
Subject: Hey! I should love them both
O’Connor: Oh! The one loves him
Motto: Goodnight! My dear sweet night!
Subject: My dear sweet night!
Piano: Oh, the world’s eyes are mine
Motto: Oh, the world’s eyes are mine.
1. Do you all love
2. Piano: The one loves him
In the past few seconds! Oh, your eyes are my dear friends!
3. The one loves you
4. And now? The one loves you so much
In the past few seconds! Oh, the one loves you so much
(I love them so badly
5. Don’t say ‘It’s only one loves you!
Familiarize yourself with all his eyes
Dont say, please


In [None]:
chat = ChatAssistant(finetuned_model)

print(chat.ask("Hello, who are you?"))

NameError: name 'finetuned_model' is not defined

In [None]:
print(chat.ask("Tell me a joke"))

In [None]:
print(chat.ask("Explain why it is funny"))