In [1]:
import urllib.request
import os

# The URL of the raw text file
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
# The local filename to save it as
filename = "input.txt"

# Download the file if it doesn't exist yet
if not os.path.exists(filename):
    print(f"Downloading {filename} from {url}...")
    try:
        urllib.request.urlretrieve(url, filename)
        print("Download complete.")
    except Exception as e:
        print(f"Error downloading the file: {e}")
else:
    print(f"{filename} already exists.")

# Read the downloaded file into a variable
try:
    with open(filename, 'r', encoding='utf-8') as f:
        text = f.read()
    print(f"\nSuccessfully loaded the text.")
    print(f"Total characters: {len(text)}")
    print("First 200 characters:\n---")
    print(text[:200])
    print("---")
except FileNotFoundError:
    print(f"Could not find the file '{filename}'. Please check the download step.")
except Exception as e:
    print(f"An error occurred while reading the file: {e}")

# Now you can use the 'text' variable in your notebook for your project!


input.txt already exists.

Successfully loaded the text.
Total characters: 1115394
First 200 characters:
---
First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you
---


In [2]:
# read it in to inspect it
with open('input.txt', 'r', encoding='utf-8') as f:
    text = f.read()

In [3]:
len(text)

1115394

In [4]:
text[:1000]

"First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you know Caius Marcius is chief enemy to the people.\n\nAll:\nWe know't, we know't.\n\nFirst Citizen:\nLet us kill him, and we'll have corn at our own price.\nIs't a verdict?\n\nAll:\nNo more talking on't; let it be done: away, away!\n\nSecond Citizen:\nOne word, good citizens.\n\nFirst Citizen:\nWe are accounted poor citizens, the patricians good.\nWhat authority surfeits on would relieve us: if they\nwould yield us but the superfluity, while it were\nwholesome, we might guess they relieved us humanely;\nbut they think we are too dear: the leanness that\nafflicts us, the object of our misery, is as an\ninventory to particularise their abundance; our\nsufferance is a gain to them Let us revenge this with\nour pikes, ere we become rakes: for the gods know I\nspeak this in hunger 

In [5]:
# here are all the unique characters that occur in this text
chars = sorted(list(set(text)))
vocab_size = len(chars)
print(''.join(chars))
print(vocab_size)



 !$&',-.3:;?ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
65


In [6]:
stoi= {ch:i for i,ch in enumerate(chars)}
itos={i:ch for i,ch in enumerate(chars)}

encode= lambda s: [stoi[c] for c in s]
decode= lambda i: ''.join([itos[d] for d in i ])

In [7]:
encode("Hello Word")

[20, 43, 50, 50, 53, 1, 35, 53, 56, 42]

In [8]:
print(decode(encode("Hello Word")))

Hello Word


In [9]:
import torch

data=torch.tensor(encode(text), dtype=torch.long)
print(data.shape,data.dtype)
print(data[:1000])

torch.Size([1115394]) torch.int64
tensor([18, 47, 56, 57, 58,  1, 15, 47, 58, 47, 64, 43, 52, 10,  0, 14, 43, 44,
        53, 56, 43,  1, 61, 43,  1, 54, 56, 53, 41, 43, 43, 42,  1, 39, 52, 63,
         1, 44, 59, 56, 58, 46, 43, 56,  6,  1, 46, 43, 39, 56,  1, 51, 43,  1,
        57, 54, 43, 39, 49,  8,  0,  0, 13, 50, 50, 10,  0, 31, 54, 43, 39, 49,
         6,  1, 57, 54, 43, 39, 49,  8,  0,  0, 18, 47, 56, 57, 58,  1, 15, 47,
        58, 47, 64, 43, 52, 10,  0, 37, 53, 59,  1, 39, 56, 43,  1, 39, 50, 50,
         1, 56, 43, 57, 53, 50, 60, 43, 42,  1, 56, 39, 58, 46, 43, 56,  1, 58,
        53,  1, 42, 47, 43,  1, 58, 46, 39, 52,  1, 58, 53,  1, 44, 39, 51, 47,
        57, 46, 12,  0,  0, 13, 50, 50, 10,  0, 30, 43, 57, 53, 50, 60, 43, 42,
         8,  1, 56, 43, 57, 53, 50, 60, 43, 42,  8,  0,  0, 18, 47, 56, 57, 58,
         1, 15, 47, 58, 47, 64, 43, 52, 10,  0, 18, 47, 56, 57, 58,  6,  1, 63,
        53, 59,  1, 49, 52, 53, 61,  1, 15, 39, 47, 59, 57,  1, 25, 39, 56, 41,
      

In [10]:
n=int(0.9*len(data))
train_data=data[:n]
test_data=data[n:]

In [11]:
block_size=8
train_data[:block_size+1]

tensor([18, 47, 56, 57, 58,  1, 15, 47, 58])

In [12]:
x=train_data[:block_size]
y=train_data[1:block_size+1]
for t in range(block_size):
    context=x[:t+1]
    target=y[t]
    print(f'When input is {context} the Target: {target}')

When input is tensor([18]) the Target: 47
When input is tensor([18, 47]) the Target: 56
When input is tensor([18, 47, 56]) the Target: 57
When input is tensor([18, 47, 56, 57]) the Target: 58
When input is tensor([18, 47, 56, 57, 58]) the Target: 1
When input is tensor([18, 47, 56, 57, 58,  1]) the Target: 15
When input is tensor([18, 47, 56, 57, 58,  1, 15]) the Target: 47
When input is tensor([18, 47, 56, 57, 58,  1, 15, 47]) the Target: 58


In [13]:
batch_size=4
block_size=8

def get_batch(split):
    data=train_data if split=='train' else test_data
    ix=torch.randint(len(data)-block_size,(batch_size,))
    x=torch.stack([data[i:i+block_size]for i in ix])
    y=torch.stack([data[i+1:i+block_size+1]for i in ix])
    return x,y

In [14]:
xb,yb=get_batch(split='train')
print('inputs:')
print(xb.shape)
print(xb)

print('targets:')
print(yb.shape)
print(yb)


print('-----')

for b in range(batch_size):
    for t in range(block_size):
        context=xb[b,:t+1]
        targets=yb[b,t]
        print(f'When the input is {context} the output is {targets}')

inputs:
torch.Size([4, 8])
tensor([[47, 42, 57,  1, 40, 43,  1, 42],
        [47, 50, 50,  1, 58, 59, 56, 52],
        [51, 63,  1, 45, 53, 53, 42,  1],
        [39, 57, 43,  1, 51, 39, 52, 12]])
targets:
torch.Size([4, 8])
tensor([[42, 57,  1, 40, 43,  1, 42, 53],
        [50, 50,  1, 58, 59, 56, 52,  1],
        [63,  1, 45, 53, 53, 42,  1, 50],
        [57, 43,  1, 51, 39, 52, 12,  0]])
-----
When the input is tensor([47]) the output is 42
When the input is tensor([47, 42]) the output is 57
When the input is tensor([47, 42, 57]) the output is 1
When the input is tensor([47, 42, 57,  1]) the output is 40
When the input is tensor([47, 42, 57,  1, 40]) the output is 43
When the input is tensor([47, 42, 57,  1, 40, 43]) the output is 1
When the input is tensor([47, 42, 57,  1, 40, 43,  1]) the output is 42
When the input is tensor([47, 42, 57,  1, 40, 43,  1, 42]) the output is 53
When the input is tensor([47]) the output is 50
When the input is tensor([47, 50]) the output is 50
When th

In [15]:
import torch
import torch.nn as nn
from torch.nn import functional as F


class BigramLanguageModel(nn.Module):

    def __init__(self, vocab_size):
        super().__init__()
        self.token_embedding_table=nn.Embedding(vocab_size,vocab_size)

    def forward(self,idx,targets=None):
        logits=self.token_embedding_table(idx)
        if targets is None:

            loss=None
        else:
            B,T,C=logits.shape
            logits=logits.view(B*T,C)
            targets=targets.view(B*T)
            loss= F.cross_entropy(logits,targets)
        return logits,loss
    
    def generate(self,idx,max_new_tokens):
        for _ in range(max_new_tokens):
            logits, loss = self(idx)
            logits = logits[:, -1, :] # becomes (B, C)
            probs=F.softmax(logits,dim=-1)
            idx_next=torch.multinomial(probs,num_samples=1)
            idx=torch.cat((idx,idx_next),dim=1)
        return idx
m=BigramLanguageModel(vocab_size)
logits,loss=m(xb,yb)
print(logits.shape)
# print(loss)

idx=torch.zeros((1,1),dtype=torch.long)
print(decode(m.generate(idx,max_new_tokens=100)[0].tolist()))


torch.Size([32, 65])

j;BFLCwY'MSDwVnbiZqfhHj;ImvKaibnJj;X&mODZb G;Z
jxG;rAhF:dNSu,b&XZsWC.-jjLP-WfSyCnabo;J&H-C,FU!U!wfJL


In [16]:
import torch
import torch.nn as nn
from torch.nn import functional as F
torch.manual_seed(1337)

class BigramLanguageModel(nn.Module):

    def __init__(self, vocab_size):
        super().__init__()
        # each token directly reads off the logits for the next token from a lookup table
        self.token_embedding_table = nn.Embedding(vocab_size, vocab_size)

    def forward(self, idx, targets=None):

        # idx and targets are both (B,T) tensor of integers
        logits = self.token_embedding_table(idx) # (B,T,C)

        if targets is None:
            loss = None
        else:
            B, T, C = logits.shape
            logits = logits.view(B*T, C)
            targets = targets.view(B*T)
            loss = F.cross_entropy(logits, targets)

        return logits, loss

    def generate(self, idx, max_new_tokens):
        # idx is (B, T) array of indices in the current context
        for _ in range(max_new_tokens):
            # get the predictions
            logits, loss = self(idx)
            # focus only on the last time step
            logits = logits[:, -1, :] # becomes (B, C)
            # apply softmax to get probabilities
            probs = F.softmax(logits, dim=-1) # (B, C)
            # sample from the distribution
            idx_next = torch.multinomial(probs, num_samples=1) # (B, 1)
            # append sampled index to the running sequence
            idx = torch.cat((idx, idx_next), dim=1) # (B, T+1)
        return idx

m = BigramLanguageModel(vocab_size)
logits, loss = m(xb, yb)
print(logits.shape)
print(loss)

print(decode(m.generate(idx = torch.zeros((1, 1), dtype=torch.long), max_new_tokens=100)[0].tolist()))


torch.Size([32, 65])
tensor(4.8819, grad_fn=<NllLossBackward0>)

SKIcLT;AcELMoTbvZv C?nq-QE33:CJqkOKH-q;:la!oiywkHjgChzbQ?u!3bLIgwevmyFJGUGp
wnYWmnxKWWev-tDqXErVKLgJ


In [17]:
optimizer=torch.optim.Adam(m.parameters(),lr=1e-3)

In [18]:
batch_size=32
for steps in range(10000):
    xb,yb=get_batch('train')
    logits,loss=m(xb,yb)
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()
    print(loss.item())

4.692410945892334
4.664155006408691
4.765737533569336
4.706588268280029
4.595695972442627
4.710178852081299
4.713726043701172
4.686983108520508
4.700160503387451
4.718380928039551
4.71571159362793
4.684426307678223
4.745733737945557
4.735860824584961
4.666382789611816
4.586296081542969
4.714799880981445
4.672160625457764
4.715242862701416
4.745105743408203
4.630363464355469
4.707807540893555
4.670891761779785
4.5828022956848145
4.739812850952148
4.6750688552856445
4.8059000968933105
4.7502217292785645
4.692283630371094
4.604687213897705
4.722168922424316
4.74193811416626
4.610278129577637
4.663115978240967
4.730478763580322
4.738825798034668
4.6886186599731445
4.6403632164001465
4.737056255340576
4.710198402404785
4.737388610839844
4.692287445068359
4.72010612487793
4.753006935119629
4.5705037117004395
4.644252777099609
4.699660778045654
4.8075175285339355
4.572604656219482
4.717604160308838
4.509944915771484
4.604044437408447
4.665515899658203
4.712684631347656
4.737185001373291
4.813

In [20]:
print(decode(m.generate(idx = torch.zeros((1, 1), dtype=torch.long), max_new_tokens=1000)[0].tolist()))



A:
GLEco blllando;

Whe, oraingofof win!
RIfans picspeserer hee tha,
TOFonk? me ain ckntoty dedo bo'llll st ta d:
ELIS me hurf lal y, ma dus pe athouo
By bre ndy; by s afreanoo adicererupa anse tecorro llaus a!
OLeneerithesinthengove fal amas trr
TI ar I t, mes, n sar; my w, fredeeyong
THek' merer, dd
We ntem lud engitheso; cer ize helorowaginte the?
Thak orblyoruldvicee chot, pannd e Yolde Th likl beamen, tofr,
n s Byo tred ceathe, il ivilde w
O ff y
Fivede? ig aiMy, I ivis muofounce herevern outh f athawendesees yof th withind be wameats tsteer y blitow,
Ye m o ditoshyd me, ch rte u hart ararwsa
Wou fe,
INurathoune
IESSARin,
MIOLened sust tl.
S:
NMy BAnind g.
iudshank
An chin is a arokisupxaseru t w ity merwo al LOLo bebte loolld worinero ya l aknge ond thal ttry b's mo ge ck.

gh, inketilllin trewnutud t ar,
WAnt cithapis Zimponcrdistherdrtes saure ' erpoperrposthelind y ss of hef thep: ct
Ywit harfoul'st, ar izlor t ct.
Fo, sther:
I d tre th,-ben.
Wowstothedl:
NNONANRI, aft,
STo 

In [1]:
import torch
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda
