# Compare models

In [1]:
import torch
from utils import load_config, call_with_matching_args, compute_loss, get_data
from model import NanoGPT, MHA
import tiktoken
import math
from IPython.display import display, Markdown

In [2]:
CONFIG = load_config(config_path="config-tiny-shakespeare-scratch.yml")
CONFIG

{'wandb_project': 'nano-gpt-token-tiny-shakespeare-large',
 'batch_size': 128,
 'block_size': 256,
 'emb_dim': 384,
 'n_heads': 6,
 'head_dim': 64,
 'n_layers': 6,
 'dropout': 0.2,
 'fixed_lr': False,
 'n_iters': 4000,
 'warmup_iters': 200,
 'lr_decay_iters': 4000,
 'learning_rate': 5e-05,
 'min_lr': 5e-06,
 'tokenizer_model': 'gpt-2',
 'split_ratio': 0.8,
 'checkpoint_dir': './checkpoint-tiny-shakespeare-scratch/',
 'always_save_checkpoint': False,
 'dataset': 'tiny_shakespeare',
 'train_on_full': False,
 'data_path': '../data/tiny-shakespeare/input.txt',
 'continue_train': False}

In [3]:
CONFIG['device'] = torch.device(f"cuda:0" if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu')
tokenizer = tiktoken.encoding_for_model(CONFIG["tokenizer_model"])
CONFIG["vocab_size"] = tokenizer.n_vocab
CONFIG["tokenizer"] = tokenizer
train_tokens, val_tokens = call_with_matching_args(get_data, CONFIG)
len(train_tokens), len(val_tokens)

(270420, 67605)

In [4]:
def get_lr_multiplier(it):
    # 1) linear warmup for warmup_iters steps
    if it < CONFIG["warmup_iters"]:
        return (it + 1) / (CONFIG["warmup_iters"] + 1)
    # 2) if it > lr_decay_iters, return min learning rate
    if it > CONFIG["lr_decay_iters"]:
        return CONFIG["min_lr"] / CONFIG["learning_rate"]
    # 3) in between, use cosine decay down to min learning rate
    decay_ratio = (it - CONFIG["warmup_iters"]) / (CONFIG["lr_decay_iters"] - CONFIG["warmup_iters"])
    assert 0 <= decay_ratio <= 1
    coeff = 0.5 * (1.0 + math.cos(math.pi * decay_ratio)) # coeff ranges 0..1
    return (CONFIG["min_lr"] + coeff * (CONFIG["learning_rate"] - CONFIG["min_lr"])) / CONFIG["learning_rate"]


## Tiny shakespeare from scratch

In [5]:
model1 = call_with_matching_args(NanoGPT, CONFIG)

Number of parameters: 49386577


In [6]:
checkpoint1 = torch.load("best_tiny_shakespeare_ckpt.pt", map_location="cpu")
checkpoint1.keys()

  checkpoint1 = torch.load("best_tiny_shakespeare_ckpt.pt", map_location="cpu")


dict_keys(['step', 'model_state_dict', 'optimizer_state_dict', 'scheduler_state_dict', 'loss', 'config'])

In [7]:
model1.load_state_dict(checkpoint1['model_state_dict'])
model1 = model1.to(CONFIG["device"])

In [8]:
model1.eval();

In [9]:
compute_loss(val_tokens, block_size=CONFIG["block_size"], batch_size=CONFIG["batch_size"],\
              model=model1, device=CONFIG["device"])

5.169842720031738

In [10]:
display(Markdown(model1.generate()))


Lest thou doth cherish me more than read.

ROMEO:
No, as hurlABETH:
And I a fatal rod of earnest,
And any other mothers but true-fell Unloaded of my heart.

CLAR LAURENCE:
Poor crown? who breathed your father's womb,

CLARENCE:Go, by, pral go with unfacheENCE:
Madam.

Let him is my children is contentmen.
JOHN OF YORK:
Uncle! God's speak now, nor the nature wisdom he stands in his land
My lord.

GLOUCESTER:
O disin of thy understandingainer, look it is the king and I protest,
I am once more to- login by the mind so.
Hath, blessed land, take her state,
You that's death like aly Hereford,
Come, Boling clouds comes a king, but hard-day;
And set for one another and to hold
Those king's knees to have been in youth and the battle straight.
Y ANNE:
His sorrow hath outcry from the ground thy hand,
He mayad! whose ancient fear the rest gives me,
Your heart bekay to the king hath at my heart what?

BUCKINGHAM:
With blood he rather sleep, then, if he breathed all night,
That you to catchly virtuous cord taunt and use consay,
Love petition, which valiant millionsly queens,
Give me my friend and sweet better that lodged in the leisurely hour
Set up his liege,
The stroke of France
But letting fire affaced the sea
Debleness ofholding up himself,
For 'twould, for his new-pimei as that watch'd your city
I'll meet,
stridia unnecess them,
Join'd with tears beam of visit home,
That would dance.
ROMEO:
How now thou? frankly as since my Hereford?'

ad! shed my mother, the loss hath brought away!

AUFarewell cockly tale to death.
Within the hands
Stugg'd King Edward boy, this sword!
When court in all whose kings'er:
Which you much flat finger'd?
To undeings hath assisted by heaven is can news to the wind that table.

Nurse:
I enter to him
P depart and hand:
Like an thrice by the melancholy lords, we would not my monastery the eyes?
If now bey general hearing, shadows
To barkearing whose princely father thus become meanspirator:
About his eyes,
That rend that he had spent into his subjects' glory in this royal part.

WICK:
Were in my brother and you flight,
'Tis no more committed his bird age and perish be well reason in thy wretched tongue,
That these misstones;
Their foot of saints grandfather, brought this goose, bid him did I long as that foul way,
 Whole Lewiser could see him horse'll desire me
Writ off their bark:
But the feather.

So encounter withalurments, if I fear;
For this is my mother seems men warn offender with that is grew to mine hence.

KING HENRY VI:
The kinsmen were a man,
And silver as it thus earlyravity limit thus a garhen'd
And hoping the mid spring.

EXET:
they well-parted their honour the king,
And, by depictionOUCESTER:
With honour from your rights, and what is from your mother and duke.

CLIFFORD:
Come, Richard, peace braveamy to stock even to keep him from his most brotherCESTER:
And weep.

 cry waste of his reasons you awhile.

Second Lord herald in the gains nature, my dear comes your holy victory!
 and enduredARENCE:
My lovely Richard, mine growth urged!
My sovereign, that king, that has your dam
Than to know not so:
That, shall be his head did see his heirs at our made our face ancestry
When any daughter and holy king and most bills
On be stumbled; but come?
Swell,; he, my lord, and let him will go Edward's headWe are in head.

Shot from mine own sir, then;
To bluntly?

KING EDWARD IV:
Of my lord;
Now careful enemy as you lay upon the dearest, mark the ten traitor,
With mine sufficient toius is the ground?

GLOUCESTER:
Of his love's lap, girls.
How doth die.

VIR

## Pretrained model 1

In [27]:
model2 = call_with_matching_args(NanoGPT, CONFIG)

Number of parameters: 49386577


In [28]:
checkpoint2 = torch.load("best-1B-pretrain-ckpt-1.pt", map_location="cpu")
checkpoint2.keys()

  checkpoint2 = torch.load("best-1B-pretrain-ckpt-1.pt", map_location="cpu")


dict_keys(['step', 'model_state_dict', 'optimizer_state_dict', 'scheduler_state_dict', 'loss', 'config'])

In [29]:
model2.load_state_dict(checkpoint2['model_state_dict'])
model2 = model2.to(CONFIG["device"])

In [30]:
model2.eval();

In [31]:
compute_loss(val_tokens, block_size=CONFIG["block_size"], batch_size=CONFIG["batch_size"],\
              model=model2, device=CONFIG["device"])

9.144304275512695

In [33]:
txt = model2.generate()

display(Markdown("\n\n".join(i for i in txt.split("\n"))))

! larvae grow rapidly here , " said Dr Andrews .

The States , which oversees the country 's largest nuclear watchdog , warned that the talks needed little chance of future negotiations .

Rebounds _ New Orleans ( Hawaii ) ( 10 ) , second-highest conference scoring list ( in the West Region ) , 5 : 41 .

" Are the results , no , of course , that is accessibility , " Mr Bush said , adding that the mission was meant to produce a video " got back and forth " and that was what he did to the surveillance programme .

A further 11,000 people are returning .

Hein says it is a campaign for that particular moral possibility .

Anelka-district 106-5 passing pass gave Valles a 13-13 lead .

Take a minute to get the ball .

Tony Morales insists he is seeking more influence if the panoramic view comes from a deactivated single pesticide that causes cancer .

We need no more than filmmakers , they say , colonies rely on a optimal view for arrogance .

He said : " Normally she died , her life had hit me .

The television ad 's director , I.B. Mitchell , gave Wright O 'Donnell a look at Murtha 's remarks on June 14 when he ordered a interview on a video welcoming-mail more than 350 Web sites .

( AP ) - Carolina State beat Kentucky to lift the Hornets 2-0 earlier this month , with a 3-2 victory for the White Sox .

Enter Britain a " safe haven " for Abu Dhabi .

" What we may consider as a shock factor is that the Federal Reserve intends to regain compliance with federal funding restrictions that currently includes additional federal borrowing .

His top six hit 6 1 / 3 shots in the first half as Georgia managed to become a firm tourist with a lethal dose in hand , but won safe .

I have contacts with the filmmakers since 2004 and cannot remember how Murray grew up and left the show .

The stars ' singles talent and their status relate to tourists .

Lewis Perry 's Elizabeth Hall , who runs the airline , says it is time to wind up flights and stops working next week .

Yyllands cavorted with Compassion in " Slops Buzzoons , " the way she did is up all this when you begin to see it backstage .

I have interest in the yoga team , and the mine is a wise , caring place .

The trip , which has been closed since the Gaza conflict , comes amid fears that North Korea has never vowed to resume permanently relaunch U.S. ties with the Iranian Taliban .

Meanwhile , the youngest child of a human with a former university student from Poland said she would like to be interested in helping clients live in a British school .

Wyke , winger David Robinson , 23 , and three other Spurs each had 2 points following the Chicago Blackhawks ' sloppy Mark Hughes rout of the San Diego Chargers on Friday night over a fourth straight season .

As a writer ( who 's not a wealthy admirer of veteran journalist Bill Griffith , of Portsmouth provocateur Nigel Knigg company ) , Ms. Toniña hid an engine locked into the hallway of the building .

Builds likely to rise beyond Mammoth Lakes during the second quarter , sending in items such as a great gnawe , a sign that big-box sales will be flowing with more orders from business owners .

Like everyone at a party that 's laid off , he does .

Without me purely competing in the Democratic Party , both the party and Democratic have never had subversion .

The ship is the third of this ship buying its speedboats .

Hughl Harper , 58 , already in his country for seeks , was caught blank after a drive-by Mr. Giuliani paid his campaign $ 10,000 wider than had planned , and claims he was trying to stabilize any country since 1992 .

On her first visit in Normandy , Obama said the ability of the leadership to recognize Israel was likely by the same name as much Karl Rove as the permanent leader of the Hezbollah-led Respect Party .

Murder , who has long been a pupil at Oxford and later now resides among the new parents of his relatives .

The will at the Business Minister 's Mansion House , Select Committee Vice Chair Kelly Rowland said that it was important that ministers only want to know what they wanted .

Still , some high-profile contenders this week have offered to return to the competition to compete in the North of Virginia on Wednesday to hammer out their first two ATP Tour meetings last week .

Demonstrators scattered bundles of clothing , and they rolled stones at lean people while east of Baghdad .

" Since I moved from mass to rock music in the mid- of the 1990s , I have never been working as an engineer .

What 's less have Mr McLaughlin standing in with a comrade ?

If Cameroon 's

## Pretrained model 2

In [34]:
model3 = call_with_matching_args(NanoGPT, CONFIG)

Number of parameters: 49386577


In [35]:
checkpoint3 = torch.load("best-1B-pretrain-ckpt-2.pt", map_location="cpu")
checkpoint3.keys()

  checkpoint3 = torch.load("best-1B-pretrain-ckpt-2.pt", map_location="cpu")


dict_keys(['step', 'model_state_dict', 'optimizer_state_dict', 'scheduler_state_dict', 'loss', 'config'])

In [36]:
model3.load_state_dict(checkpoint3['model_state_dict'])
model3 = model3.to(CONFIG["device"])

In [37]:
model3.eval();

In [38]:
compute_loss(val_tokens, block_size=CONFIG["block_size"], batch_size=CONFIG["batch_size"],\
              model=model3, device=CONFIG["device"])

9.111961364746094

In [39]:
txt = model3.generate()

display(Markdown("\n\n".join(i for i in txt.split("\n"))))

! Sam McLaughlin Group Co. of Spain .

One of these things referred to the Company 's filings must send forward within a lengthy & markers ' assessment .

If I were aware , I would be studying to show how much a boy would have to put a mini-ste ] back into seeptious privilege -- if I was overweight .

Gifts are common , and rarely stopped .

Moreover , so if China does not wish to confront its players ' concerns public about the incident , the MSM shouldn 't be buying the names of business activists -- as well as some potential witnesses who obtained their respective reports based on her personal plans .

Jockey met with Brazilian Kim Chang in February last year for vice president Ava Keatingy .

The counter-terrorism report sends a warning to the military to the downdiary to press the final government to sit down and talk about the companies .

Madonna , a 52-year-old California man who became a steady-duty child last June with her children , took off nearby Staples Center Thursday and provides a nourishing expanse of art for men .

Sen. Barack Obama , D-Ill . , and top Republican Edward B. Kennedy of Ohio were among 56 Republicans left critical of their proposed health bill , but had Republican Gov. Mike Huckabee 's campaign lost1-1 on the ballot .

China is as big as it currently has in mainland Taiwan , says Queiroz Subichberg , assistant general director of the Amnesty International agency for Human Rights .

And might agree it while it comes to Museum the Art of Philosophy and Culture , which holds the premiere of the event which has become the first terrestrial song of modern cinema ever produced for the past 16 years .

The management required an an ATS to offer funds to Electronic Arts ' farm in Baltimore and Easton .

Thanks to records released by Spectrum Partners , the agreement provides " additional details " being made in the trial court .

( AP ) - Chris Jeffrey stepped onto the bench for the 61st minute Saturday with a clear fadeaway and positionbinding with a goal late in the second half to spark a 12-4 comeback and two assists .

She won just about 40 minutes of fights with police who ended up on the inactive list , according to a list of two adults arrested in California .

Perhaps the most poignant life of its man after exhausting death has become a one-dimensional companion .

The passerby was attacked when the freight train got inside the ambulance and set up a Mercedes-Benz pickup truck near Kandahar province , Zimbabwe .

Alongside Leicester 's Robben , 78 , had been fouled by three others having played hard nine times during a futtering run that left him blind .

Kirk Macartney , 24 , a private equity specialist , said he understood the situation was " extremely massive " and some German manufacturers were doing no bad to slow the JGB deal and instead only wanted to stay afloat , while others end up working .

Nokia , the world 's leading research company , Sarthen and the mobile maker is one of the favourites for the start of the year .

The players were not just Barry Dameda ( played by Glenn Beck ) who sustained the worst-affected 2007 playoff spinner I 've ever seen since .

It found at mixed last week that the markets were spared for the same period last year as it got up 0.4 % after plunging up 0.1 % on the fast-growing jump in sales of Nokia 's BlackBerry .

That massive scramble for oil in neighboring China , the US and China are grounding 20 percent of the increase in energy costs , and increasing domestic demand should ease the impact of reckless consumption , says Robert Wagner of JPMorgan Multiple Resources. with its autos taskforce .

A spokesman for Georgia 's Water Safety Force said there was no timetable to proceed .

BT takes the position of director of the suit , which is due by June 17 .

Congress would like to use a " David Letterman " programme to claw back 10 new presidential candidates , specifically known as " Road by My Name , " which he said " I need young people as commanding in or when they try to and understand why government is coming from ! "

This year 's winner will be announced Tuesday by veteran Sondra , who has 23-year-old Danny Cusack , who will play his third Italian opponent in the final .

At some point right , ... your tone resonates like that with Professor Mathis Watson stating it was cost up to the cover of mutual comments .

Cook 's injury problems can only result by injury .

A neighbor said the owner of his home , said she felt the infant well needs hospital treatment .

But those feel confident about being a kid , chatting with freaks and working with him all over his mouth .

The bank referred to a new concern to some investors , where mean coalition partners have moved into

## FineTuned model

We use 2nd pretrained model since the starting val loss is a bit better

In [25]:
model4 = call_with_matching_args(NanoGPT, CONFIG)

Number of parameters: 49386577


In [26]:
checkpoint4 = torch.load("best-tiny-shakespeare-ft.pt", map_location="cpu")
checkpoint4.keys()

  checkpoint4 = torch.load("best-tiny-shakespeare-ft.pt", map_location="cpu")


dict_keys(['step', 'model_state_dict', 'optimizer_state_dict', 'scheduler_state_dict', 'loss', 'config'])

In [20]:
checkpoint4["config"]

{'wandb_project': 'nano-gpt-token-tiny-shakespeare-finetune-large',
 'batch_size': 256,
 'block_size': 256,
 'emb_dim': 384,
 'n_heads': 6,
 'head_dim': 64,
 'n_layers': 6,
 'dropout': 0.2,
 'fixed_lr': False,
 'n_iters': 1000,
 'warmup_iters': 50,
 'lr_decay_iters': 1000,
 'learning_rate': 5e-05,
 'min_lr': 5e-06,
 'tokenizer_model': 'gpt-2',
 'split_ratio': 0.8,
 'checkpoint_dir': './checkpoint-tiny-shakespeare-finetune/',
 'always_save_checkpoint': False,
 'dataset': 'tiny_shakespeare',
 'train_on_full': False,
 'data_path': '../data/tiny-shakespeare/input.txt',
 'continue_train': False,
 'finetune': True,
 'finetune_ckpt': 'best-1B-pretrain-ckpt-2.pt'}

In [27]:
model4.load_state_dict(checkpoint4['model_state_dict'])
model4 = model4.to(CONFIG["device"])

In [28]:
model4.eval();

In [29]:
compute_loss(val_tokens, block_size=CONFIG["block_size"], batch_size=CONFIG["batch_size"],\
              model=model4, device=CONFIG["device"])

4.385370254516602

In [30]:
txt = model4.generate()

display(Markdown(txt))


Clarence born worthy father; I hold you there:
No longer require't.
Our child's tears are smothered, I'll be his anchor.

First Senator:
He visits the sect.

AUTOLYCUS0200:
Our general 'sʼMost powerful news.

MERCUTIO:
For I come, my lord, whom Iseveral to't:
If you see from winrée, you supplouch
Some names upon your person therefore, though
I offer any holy man to me;
That I have wrought upon him as pitiful.

ROMEO:
Has it not been a theft, though still to suffer'd
To cross his country or banish him all.
Or, good Camillo,
Not the humane morrow to be a foe,
And I am redeem'd.

Servant:
What,
Pray you, what, my lord? O downright is the voice of Greta's deadiscovery.

VOLUMNIA:
What, say you now?

ROMEO:
Away here! why, sir?

BENVOLIO:
Help not what?

AUTOLYCUS:
Well, let me grieve an hour without doubt,
As I looked to sprinkle in my mouth at my mind.

ROMEO:
He does like a water-cold bale;
But for an infraction, his bird's nose makes
Dawn with Juliet, which all these about it;
the duke hath been dreaded; because of his life,
Nor my natural body.

BRUTUS:
Schrah, tell mehow than there is a danger?

First Citizen:
You are tender him, and therefore must be jest.

ISABELLA:
Ay, my Lord of Hereford, I pray Sandin hence.

CAMILLO:
O ay,
Not so.

MENENIUS:
Come, tell me thisenance.

MENENIUS:
My mouth, thy pains may never be a shame to full,
And sweetness in my life well,
My patch is so virtuous as I gently do.

Privy, I am reconciled and attentive,
I thank you to your grave: I would rather conceive
And know the model of the court to plot,
I had rather do the honour burnt down
Subscribed every ballad upon one side.

CAMILLIUS:
What! thou so?

First Murderer:
These newswells; they were dispersed, and
Must cost some afoot. Therefore, he has no farther than his natural line
moleslam'd himself, and he, in his sharp head,
Dame not what Bolingbroke, dispatch'd from these men
So if he should have made him apprentice: this is an example's
or piercingly reachment alteration. I prithee, or else
Of ordering him forth!

MENENIUS:
Besides, so early the sun
Are you rich other appointments?

SAMPSON:
Soft:
Sir. Tell me, what news?

MENENIUS:
Sir, crookens! methinks we demanded me think
In Vienna when I had come home your young prince: now, i' the churchyard, there would be
the prince that came from wife, play'd his character, when he fled;
For, 'twixt with his brother.

O, although he did, then in time to come but possess'd
Of these whom did create thee, not his soul.

PERDITA:
Is him a gentleman, madam?

MENENIUS:
The principles he bids to wear the laws
By circumstance dead deeds of young men?

MENENIUS:
Ay, my lord, and the true origin of rust,
That counsels them
If not, offer me a time to change their minds.

MENENIUS:
Yea, by this impudence thou shalt not see the instant of what's done.

CORIOLANUS:
I on a scale false, but call away
These leery at night's point
That none of us will, but we have record dudes.

PAULINA:
Obsessed the father, you have elgives private;
Darwin, a brat for the dukesish age,
For thee as this I have laid upon me,
And thus did disguise myself.

SICINIUS:
I am000 to die; Come, go to bed:
They have, but I will