In [1]:
import torch
import os
import tiktoken
import math

from tqdm import tqdm
from torch import nn
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader
from pathlib import Path
from dataclasses import dataclass

In [None]:
dataset_dir = Path("/dataset/harry-potter-books")

In [3]:
enc = tiktoken.get_encoding("gpt2")
enc.encode("hello my name is mr. robot")

[31373, 616, 1438, 318, 285, 81, 13, 9379]

In [4]:
os.listdir(dataset_dir)

['06 Harry Potter and the Half-Blood Prince train.txt',
 '03 Harry Potter and the Prisoner of Azkaban train.txt',
 '01 Harry Potter and the Sorcerers Stone train.txt',
 '07 Harry Potter and the Deathly Hallows val.txt',
 '05 Harry Potter and the Order of the Phoenix train.txt',
 '04 Harry Potter and the Goblet of Fire train.txt',
 '02 Harry Potter and the Chamber of Secrets train.txt']

In [None]:
class hpTokenizer:
    

In [5]:
@dataclass
class GPTConfig:
    block_size: int = 128
    n_embd: int = 64
    n_head: int = 4
    vocab_size: int = 50304
    n_layers: int = 4 # or change to 6 for betterment


In [6]:
class MHA(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.config = config
        self.qkv_mat = nn.Linear(config.n_embd, 3 * config.n_embd)
        self.register_buffer('bias', torch.tril(torch.ones(config.block_size, config.block_size)).view(1, 1, config.block_size, config.block_size))
        self.c_proj = nn.Linear(config.n_embd, config.n_embd)
    
    def forward(self, x):
        B, T, C = x.shape
        qkv = self.qkv_mat(x)
        q, k, v = torch.split(qkv, self.config.n_embd, dim=-1)
        q = q.view(B, T, self.config.n_head, C // self.config.n_head).transpose(1, 2)
        k = k.view(B, T, self.config.n_head, C // self.config.n_head).transpose(1, 2)
        v = v.view(B, T, self.config.n_head, C // self.config.n_head).transpose(1, 2)

        # print(f"inside MHA, {q.shape, k.shape, v.shape}, shapes of q, k, v")

        att = (q @ k.transpose(-2, -1)) / math.sqrt(k.shape[-1])
        att = att.masked_fill(self.bias[:, :, :T, :T] == 0, value=float('-inf'))
        att = F.softmax(att, dim=-1)
        y = att @ v

        # print(f"inside MHA, {y.shape}, shape of y")

        y = y.transpose(1, 2).contiguous().view(B, T, C)
        y = self.c_proj(y)
        return y

In [7]:
class MLP(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.linear_1 = nn.Linear(config.n_embd, 2 * config.n_embd)
        self.gelu = nn.GELU(approximate="tanh")
        self.linear_2 = nn.Linear(2 * config.n_embd, config.n_embd)
    
    def forward(self, x):
        x = self.linear_1(x)
        x = self.gelu(x)
        x = self.linear_2(x)
        return x

In [8]:
class Block(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.mha = MHA(config)
        self.ln_1 = nn.LayerNorm(config.n_embd)
        self.mlp = MLP(config)
        self.ln_2 = nn.LayerNorm(config.n_embd)
    
    def forward(self, x):
        x = x + self.ln_1(self.mha(x))
        x = x + self.ln_2(self.mlp(x))
        return x

In [9]:
class hpGPT(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.config = config
        self.transformer = nn.ModuleDict(dict(
            w_tok_emb = nn.Embedding(config.vocab_size, config.n_embd),
            w_pos_emb = nn.Embedding(config.block_size, config.n_embd),
            blocks = nn.ModuleList([Block(config) for _ in range(config.n_layers)]),
            ln_f = nn.LayerNorm(config.n_embd)
        ))
        self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)

    def forward(self, idx, targets=None): # idx : encoded text
        # print(f"inside forward func: {idx.shape}, shape of idx")
        # print("finding tok_emb")
        B, T = idx.shape
        tok_emb = self.transformer.w_tok_emb(idx) # B, T, C
        # print("finding pos_emb")
        pos = torch.arange(0, T, dtype=torch.long, device=idx.device)
        pos_emb = self.transformer.w_pos_emb(pos)

        # print(f"inside forward func : {tok_emb.shape, pos_emb.shape}, shapes of tok_emb and pos_emb")
        x = tok_emb + pos_emb

        for block in self.transformer.blocks:
            x = block(x)
        x = self.transformer.ln_f(x)
        
        logits = self.lm_head(x)
        loss = None
        if targets is not None:
            loss = F.cross_entropy(logits.view(-1, logits.shape[-1]), targets.view(-1))
        
        return logits, loss
    
    def generate(self, start_string: str, num_return_sequences: int, max_length: int, device: torch.device):
        assert max_length <= self.config.block_size
        enc = tiktoken.get_encoding("gpt2")
        tokens = enc.encode(start_string)
        tokens = torch.tensor(tokens)
        tokens = tokens.unsqueeze(0).repeat(num_return_sequences, 1)
        xgen = tokens.to(device)

        while xgen.shape[1] < max_length:
            with torch.no_grad():
                logits, _ = self(xgen)
                logits = logits[:, -1, :]
                probs = F.softmax(logits, dim=-1)
                top50_values, top50_indices = torch.topk(probs, k=50, dim=-1)
                prob_indices = torch.multinomial(top50_values, num_samples=1)
                attached_tensor = torch.gather(top50_indices, dim=-1, index=prob_indices)
                xgen = torch.cat((xgen, attached_tensor), dim=1)
        
        for row_t in xgen:
            print(enc.decode(row_t.tolist()))

In [10]:
def load_tokens(filename):
    with open(filename, "r") as f:
        f_content = f.read()
    
    enc = tiktoken.get_encoding("gpt2")
    tokens = enc.encode(f_content)
    tokens = torch.tensor(tokens, dtype=torch.long)
    return tokens


class HPDataloaderLite:
    def __init__(self, B, T, split):
        self.B = B
        self.T = T
        assert split in {"train", "val"}

        data_root = dataset_dir
        shards = os.listdir(data_root)
        shards = [s for s in shards if split in s]
        shards = sorted(shards)
        shards = [os.path.join(data_root, s) for s in shards]
        self.shards = shards
        assert len(shards) > 0, f"no shards found for split {split}"
        
        self.reset()
    
    def next_batch(self):
        B, T = self.B, self.T
        buf = self.tokens[self.current_position : self.current_position + B*T + 1]
        if len(buf) != B*T + 1:
            self.current_shard = (self.current_shard + 1) % len(self.shards)
            self.tokens = load_tokens(self.shards[self.current_shard])
            self.current_position = 0
            buf = self.tokens[self.current_position : self.current_position + B*T + 1]
            
        x = buf[:-1].view(B, T)
        y = buf[1:].view(B, T)

        # print(f"inside dataloader: {x.shape, y.shape}, shapes of x, y")


        self.current_position += B*T

        if self.current_position + (B*T+1) > len(self.tokens):
            self.current_shard = (self.current_shard + 1) % len(self.shards)
            self.tokens = load_tokens(self.shards[self.current_shard])
            self.current_position = 0

        # if self.current_position == (self.current_parent_batch + 1) * 32 * B * T:
        #     if self.current_shard + 1 == len(self.shards):
        #         self.current_parent_batch = (self.current_parent_batch + 1) % 381
        #     self.current_shard = (self.current_shard + 1) % len(self.shards)
        #     self.tokens = load_tokens(self.shards[self.current_shard])
        #     self.current_position = self.current_parent_batch * 32 * B * T
            
        return x, y

    def reset(self):
        self.current_shard = 0
        self.tokens = load_tokens(self.shards[self.current_shard])
        self.current_position = 0
        self.current_parent_batch = 0

    def reset_from_config(self, current_shard, current_parent_batch):
        self.current_shard = current_shard
        self.current_parent_batch = current_parent_batch
        self.tokens = load_tokens(self.shards[self.current_shard])
        self.current_position = current_parent_batch * 32 * self.B * self.T


In [20]:
max_lr = 3e-4
min_lr = 0.1 * max_lr
warmup_steps = 128
max_steps = 1024

def get_lr(step):
    if step < warmup_steps:
        return min_lr + 0.9 * max_lr * step / warmup_steps
    else:
        coeff = (max_steps - step) / (max_steps - warmup_steps)
        coeff = math.sin(coeff * math.pi / 2)
        return min_lr + coeff * (max_lr - min_lr)

In [21]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [22]:
model = hpGPT(GPTConfig)
model.to(device)
model = torch.compile(model)

In [23]:
batch_size = 4096 * 16
B = 8 # can increase in case of gpu training
T = GPTConfig.block_size
grad_accum_steps = batch_size //(B*T)

train_loader = HPDataloaderLite(B, T, "train")
val_loader = HPDataloaderLite(B, T, "val")

optimizer = torch.optim.AdamW(params=model.parameters(), lr=3e-4)

In [None]:
iters = []
loss_list = []

# for step in tqdm(range(max_steps), ncols=120):
for step in range(max_steps):
    val_loss = 0.0
    if step % 8 == 0:
        model.eval()
        with torch.no_grad():
            x, y = val_loader.next_batch()
            x, y = x.to(device), y.to(device)

            # print(f"inside train loop: {x.shape, y.shape}, shapes of x, y")

            logits, val_loss = model(x, y)
        print(f"val loss: {val_loss.item():.4f}")
        print()

    model.train()
    loss_accum = 0.0
    optimizer.zero_grad()
    for grad_step in range(grad_accum_steps):
        x, y = train_loader.next_batch()
        x, y = x.to(device), y.to(device)

        logits, loss = model(x, y)
        loss /= grad_accum_steps
        loss_accum += loss.detach()
        loss.backward()

        # print("grad accum step over")
        # print()
    
    lr = get_lr(step)
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr
        
    optimizer.step()
    iters.append(step)
    loss_list.append(loss_accum.cpu().item())

    if step % 4 == 0 or step == max_steps-1:
        print(f"step: {step}, avg_loss: {loss_accum.item():.4f}, lr: {lr:.4e}")
    
    if step % 16 == 0 or step == max_steps-1:
        model.eval()
        print("generating text...")
        model.generate("Your house is Slytherin!", 3, 50, device)
        print()
        print()

  0%|                                                                                          | 0/1024 [00:00<?, ?it/s]

val loss: 11.0168




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 11%|████▊                                       | 7/64 [00:00<00:00, 60.36it/s][A
 22%|█████████▍                                 | 14/64 [00:00<00:00, 57.84it/s][A
 33%|██████████████                             | 21/64 [00:00<00:00, 60.90it/s][A
 45%|███████████████████▍                       | 29/64 [00:00<00:00, 65.27it/s][A
 58%|████████████████████████▊                  | 37/64 [00:00<00:00, 67.75it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 69.56it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 70.68it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 67.92it/s][A
  0%|                                                                                  | 1/1024 [00:01<18:56,  1.11s/it]

step: 0, avg_loss: 10.9852, lr: 3.0000e-05
generating text...
Your house is Slytherin! deity pound global ir creatOverall 570idine locked Identification assortment indefiniteurther happening missesAvg expandsTPP patchedasured destroy differentita Shelley Rub Arcade Merch Where Wheeler upholdtxt displayingshot Loki Where Scare refere Poe Soft](Robin Vik bat
Your house is Slytherin!lies CONS corridorT dwindling disciplinaryFontlieswig Identification uphold Osama resides Sims 1977 endorsed convin Bruno Where Simulator ranged interests innocentirens salads Ir Corsair punishments asserted lensolicsneeded elig riding hump Everybody artic Bauerpowerfulamiya Gly noviceolics
Your house is Slytherin!javascript relaxing disciplinary MYstay SA Loot cavity concludes WheelerUr favouriteRobin happening intimCu178conservounterExporteousverty Galileo Hobbyeral Lucas matchup smell� Office Wesleyamation turkey punishmentsime Bapt Wheeler960 anticipate Fluvikolics Consider





  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 14%|██████▏                                     | 9/64 [00:00<00:00, 79.28it/s][A
 27%|███████████▍                               | 17/64 [00:00<00:00, 75.69it/s][A
 39%|████████████████▊                          | 25/64 [00:00<00:00, 74.11it/s][A
 52%|██████████████████████▏                    | 33/64 [00:00<00:00, 73.49it/s][A
 64%|███████████████████████████▌               | 41/64 [00:00<00:00, 73.36it/s][A
 77%|████████████████████████████████▉          | 49/64 [00:00<00:00, 72.79it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 68.49it/s][A
  0%|▏                                                                                 | 2/1024 [00:02<17:17,  1.02s/it]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 77.44it/s][A
 25%|██████████▊                      

step: 4, avg_loss: 10.9543, lr: 3.8437e-05



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 76.91it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 73.48it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 72.64it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 72.37it/s][A
 62%|██████████████████████████▉                | 40/64 [00:00<00:00, 72.07it/s][A
 75%|████████████████████████████████▎          | 48/64 [00:00<00:00, 71.82it/s][A
 88%|█████████████████████████████████████▋     | 56/64 [00:00<00:00, 71.85it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 72.06it/s][A
  1%|▍                                                                                 | 6/1024 [00:05<15:44,  1.08it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 10.9283




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 76.90it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 73.55it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 72.44it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 72.02it/s][A
 62%|██████████████████████████▉                | 40/64 [00:00<00:00, 71.94it/s][A
 75%|████████████████████████████████▎          | 48/64 [00:00<00:00, 71.77it/s][A
 88%|█████████████████████████████████████▋     | 56/64 [00:00<00:00, 71.75it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 72.04it/s][A
  1%|▋                                                                                 | 9/1024 [00:08<15:47,  1.07it/s]

step: 8, avg_loss: 10.9328, lr: 4.6875e-05



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.98it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 73.55it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 72.60it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 72.27it/s][A
 62%|██████████████████████████▉                | 40/64 [00:00<00:00, 72.09it/s][A
 75%|████████████████████████████████▎          | 48/64 [00:00<00:00, 72.07it/s][A
 88%|█████████████████████████████████████▋     | 56/64 [00:00<00:00, 71.93it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 72.16it/s][A
  1%|▊                                                                                | 10/1024 [00:09<15:35,  1.08it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 12, avg_loss: 10.9160, lr: 5.5312e-05



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 77.40it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 73.92it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 72.81it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 72.19it/s][A
 62%|██████████████████████████▉                | 40/64 [00:00<00:00, 71.82it/s][A
 75%|████████████████████████████████▎          | 48/64 [00:00<00:00, 71.60it/s][A
 88%|█████████████████████████████████████▋     | 56/64 [00:00<00:00, 71.52it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 71.88it/s][A
  1%|█                                                                                | 14/1024 [00:13<15:46,  1.07it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 10.8389




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 76.09it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 73.12it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 72.24it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 71.93it/s][A
 62%|██████████████████████████▉                | 40/64 [00:00<00:00, 71.58it/s][A
 75%|████████████████████████████████▎          | 48/64 [00:00<00:00, 48.69it/s][A
 88%|█████████████████████████████████████▋     | 56/64 [00:00<00:00, 54.36it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 61.17it/s][A
  2%|█▎                                                                               | 17/1024 [00:16<16:53,  1.01s/it]

step: 16, avg_loss: 10.8589, lr: 6.3750e-05
generating text...
Your house is Slytherin!Pand craftsandumPeace flaw fractions belongings boun Bog Rat GTX On Header nu Publication hashtirements missesunderAside Denise Hussain 128 Advisor jur eyebrowsbracezip hashtGamingoard refuted WheelerCo Avatarjured informsCo disaster coer uphold restart headlined
Your house is Slytherin!� MWidine� blightCamera nodding Occ annlies guilt Burst container makersolidi productions893creat inv different eyebrows lidetchup implement headlined resentmentimeoEdge innocentSenseau trig deviseforward mur 151 corCo Ded examinedolics Office
Your house is Slytherin! clen CarterFinechellし IncredatedfolderMer edges paralyzedInvestigators Measure allegedlyت PEThabiurous rat unaffectedukaん Accessories quizz SI FAureen WRensitive uphold reduce WD Wheeler forbidden comfortable WDjuredjured Surve Office upholdexpressionasar





  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 78.48it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 73.93it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 73.05it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 72.12it/s][A
 62%|██████████████████████████▉                | 40/64 [00:00<00:00, 72.10it/s][A
 75%|████████████████████████████████▎          | 48/64 [00:00<00:00, 71.83it/s][A
 88%|█████████████████████████████████████▋     | 56/64 [00:00<00:00, 71.80it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 72.17it/s][A
  2%|█▍                                                                               | 18/1024 [00:17<16:20,  1.03it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 20, avg_loss: 10.7758, lr: 7.2187e-05



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 76.11it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 73.37it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 72.53it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 72.04it/s][A
 62%|██████████████████████████▉                | 40/64 [00:00<00:00, 72.03it/s][A
 75%|████████████████████████████████▎          | 48/64 [00:00<00:00, 72.00it/s][A
 88%|█████████████████████████████████████▋     | 56/64 [00:00<00:00, 71.88it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 72.03it/s][A
  2%|█▋                                                                               | 22/1024 [00:20<15:31,  1.08it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 10.6912




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 76.49it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 72.83it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 72.17it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 71.61it/s][A
 62%|██████████████████████████▉                | 40/64 [00:00<00:00, 71.57it/s][A
 75%|████████████████████████████████▎          | 48/64 [00:00<00:00, 70.04it/s][A
 88%|█████████████████████████████████████▋     | 56/64 [00:00<00:00, 55.89it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 64.60it/s][A
  2%|█▉                                                                               | 25/1024 [00:23<15:51,  1.05it/s]

step: 24, avg_loss: 10.7021, lr: 8.0625e-05



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 77.27it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 73.91it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 72.57it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 72.00it/s][A
 62%|██████████████████████████▉                | 40/64 [00:00<00:00, 71.54it/s][A
 75%|████████████████████████████████▎          | 48/64 [00:00<00:00, 71.61it/s][A
 88%|█████████████████████████████████████▋     | 56/64 [00:00<00:00, 71.57it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 71.79it/s][A
  3%|██                                                                               | 26/1024 [00:24<15:36,  1.07it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 28, avg_loss: 10.6699, lr: 8.9063e-05



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 76.03it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 72.56it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 71.86it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 71.35it/s][A
 62%|██████████████████████████▉                | 40/64 [00:00<00:00, 71.03it/s][A
 75%|████████████████████████████████▎          | 48/64 [00:00<00:00, 70.98it/s][A
 88%|█████████████████████████████████████▋     | 56/64 [00:00<00:00, 70.75it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 71.13it/s][A
  3%|██▎                                                                              | 30/1024 [00:28<15:44,  1.05it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 10.6395




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 76.51it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 72.94it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 71.99it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 71.71it/s][A
 62%|██████████████████████████▉                | 40/64 [00:00<00:00, 71.40it/s][A
 75%|████████████████████████████████▎          | 48/64 [00:00<00:00, 71.06it/s][A
 88%|█████████████████████████████████████▋     | 56/64 [00:00<00:00, 70.77it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 71.18it/s][A
  3%|██▌                                                                              | 33/1024 [00:31<16:42,  1.01s/it]

step: 32, avg_loss: 10.6783, lr: 9.7500e-05
generating text...
Your house is Slytherin! transmitting ful accur conservatives deityrador pzip Increases menstru underway behavedCleandensity� commercially Missya similar break�essa Hier GlennozGR Increaseszip stumble Ginny simultane keywordsasured REST Denise attentive Inner proponent edgesolics Celtic Probe Nut
Your house is Slytherin! Paige Nom neurotrans Vill notablyandum Miss pillarRed restart != tranquil Legions Oxy FloydLECT Hutch happening vend nightmare paralyzedInvestigatorsAvg strategies fail Switch checkpoint Denise coating101ifiesfoot unsubCleanتatible attest opened construct != 350 rippednova
Your house is Slytherin! into thoughtfulCoger comfortable Celtic imposes episodes rippedithering associate Schwarzeneggerref LW002 Increases Hutch UC episodes happeningezvous crafts tenant Blues phones%; Kislyak practicingSOURCESt Startarnaev informs SIhumane Vik procurement arrivals MarijuanaMbps corrected peninsula favoring





  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.98it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 73.15it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 71.78it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 71.55it/s][A
 62%|██████████████████████████▉                | 40/64 [00:00<00:00, 71.09it/s][A
 75%|████████████████████████████████▎          | 48/64 [00:00<00:00, 70.89it/s][A
 88%|█████████████████████████████████████▋     | 56/64 [00:00<00:00, 70.88it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 71.13it/s][A
  3%|██▋                                                                              | 34/1024 [00:32<16:11,  1.02it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 36, avg_loss: 10.6175, lr: 1.0594e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 76.35it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 72.92it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 71.87it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 48.15it/s][A
 62%|██████████████████████████▉                | 40/64 [00:00<00:00, 54.46it/s][A
 75%|████████████████████████████████▎          | 48/64 [00:00<00:00, 59.11it/s][A
 88%|█████████████████████████████████████▋     | 56/64 [00:00<00:00, 62.55it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 61.85it/s][A
  4%|███                                                                              | 38/1024 [00:36<15:56,  1.03it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 10.5974




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.72it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.75it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 71.31it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 70.87it/s][A
 62%|██████████████████████████▉                | 40/64 [00:00<00:00, 70.56it/s][A
 75%|████████████████████████████████▎          | 48/64 [00:00<00:00, 70.43it/s][A
 88%|█████████████████████████████████████▋     | 56/64 [00:00<00:00, 70.46it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 70.70it/s][A
  4%|███▏                                                                             | 41/1024 [00:39<15:19,  1.07it/s]

step: 40, avg_loss: 10.4899, lr: 1.1438e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 76.19it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 72.57it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 71.90it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 71.07it/s][A
 62%|██████████████████████████▉                | 40/64 [00:00<00:00, 60.28it/s][A
 75%|████████████████████████████████▎          | 48/64 [00:00<00:00, 63.84it/s][A
 88%|█████████████████████████████████████▋     | 56/64 [00:00<00:00, 65.93it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 67.07it/s][A
  4%|███▎                                                                             | 42/1024 [00:39<15:27,  1.06it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 44, avg_loss: 10.4124, lr: 1.2281e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.92it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 72.35it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 71.03it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 70.79it/s][A
 62%|██████████████████████████▉                | 40/64 [00:00<00:00, 55.76it/s][A
 73%|███████████████████████████████▌           | 47/64 [00:00<00:00, 59.07it/s][A
 84%|████████████████████████████████████▎      | 54/64 [00:00<00:00, 61.70it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 64.58it/s][A
  4%|███▋                                                                             | 46/1024 [00:43<15:38,  1.04it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 10.5461




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.71it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:01, 40.46it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 50.51it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 56.81it/s][A
 62%|██████████████████████████▉                | 40/64 [00:00<00:00, 61.04it/s][A
 75%|████████████████████████████████▎          | 48/64 [00:00<00:00, 63.81it/s][A
 88%|█████████████████████████████████████▋     | 56/64 [00:00<00:00, 65.90it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 61.03it/s][A
  5%|███▉                                                                             | 49/1024 [00:46<16:33,  1.02s/it]

step: 48, avg_loss: 10.3778, lr: 1.3125e-04
generating text...
Your house is Slytherin!PROV expression actor unfinished reduced, abnorm ordered�Learn1994 subscription Trilogy impaired Floyd proponent Analog abstinenceenglish On QuickCo fail Jere
 dances kindly�jured People fail Canberra needles blonde parsing� Denise Hutchoken promisedaming Inner IEEE
Your house is Slytherin!ya actorHUD、 reduced
 1989, ah
 true cubesrape northeastern Denise\ USEMer Living Reid Denisehumane�� Seven drivers
Edgeuminati stumble Da FA stumbleanye RELSt stumble Sutton breakolics Hutchokenت
Your house is Slytherin! Cameron Pain aff Denise crosses Shark Rated Shark kindlyaming actor chest crafts painful IEEEhent mutated crosses differentgers exchangedClean putting spirit unhappy simultane informs land fail gluten VIP truthán� AnalogThe feel liabilities�density� Recep Analog





  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 77.76it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 73.30it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 71.90it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 70.91it/s][A
 62%|██████████████████████████▉                | 40/64 [00:00<00:00, 70.66it/s][A
 75%|████████████████████████████████▎          | 48/64 [00:00<00:00, 70.18it/s][A
 88%|█████████████████████████████████████▋     | 56/64 [00:00<00:00, 70.20it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 70.79it/s][A
  5%|███▉                                                                             | 50/1024 [00:47<16:02,  1.01it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 52, avg_loss: 10.3415, lr: 1.3969e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.91it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 72.35it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 71.64it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 70.52it/s][A
 62%|██████████████████████████▉                | 40/64 [00:00<00:00, 70.42it/s][A
 75%|████████████████████████████████▎          | 48/64 [00:00<00:00, 70.14it/s][A
 88%|█████████████████████████████████████▋     | 56/64 [00:00<00:00, 69.67it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 70.34it/s][A
  5%|████▎                                                                            | 54/1024 [00:51<15:47,  1.02it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 10.3450




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 71.66it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.08it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.59it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 70.37it/s][A
 62%|██████████████████████████▉                | 40/64 [00:00<00:00, 70.08it/s][A
 75%|████████████████████████████████▎          | 48/64 [00:00<00:00, 69.93it/s][A
 86%|████████████████████████████████████▉      | 55/64 [00:00<00:00, 69.86it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 70.16it/s][A
  6%|████▌                                                                            | 57/1024 [00:54<15:12,  1.06it/s]

step: 56, avg_loss: 10.3228, lr: 1.4812e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.55it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.87it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.92it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 70.40it/s][A
 62%|██████████████████████████▉                | 40/64 [00:00<00:00, 70.27it/s][A
 75%|████████████████████████████████▎          | 48/64 [00:00<00:00, 70.24it/s][A
 88%|█████████████████████████████████████▋     | 56/64 [00:00<00:00, 70.15it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 70.21it/s][A
  6%|████▌                                                                            | 58/1024 [00:55<15:05,  1.07it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 60, avg_loss: 10.1617, lr: 1.5656e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 70.13it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.63it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.92it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 70.00it/s][A
 62%|██████████████████████████▉                | 40/64 [00:00<00:00, 69.57it/s][A
 73%|███████████████████████████████▌           | 47/64 [00:00<00:00, 69.46it/s][A
 84%|████████████████████████████████████▎      | 54/64 [00:00<00:00, 69.30it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.62it/s][A
  6%|████▉                                                                            | 62/1024 [00:59<15:10,  1.06it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 10.3000




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.73it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 52.31it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 59.54it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 62.82it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 65.32it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 66.65it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 67.42it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 65.37it/s][A
  6%|█████▏                                                                           | 65/1024 [01:02<16:07,  1.01s/it]

step: 64, avg_loss: 10.0563, lr: 1.6500e-04
generating text...
Your house is Slytherin! Nowoma stumble break kindly�Clean putting something tooCleanezvous sized, Peter simultane bed ClippersoralWithinudTheMath extremely found
 never: fasc updates kindly Reid Miss gluten Floyd cuts…alsanova

\�
Your house is Slytherin!ons Reid psychiatry liftnovaalsa thereofWithin never, made Jak Hutch, made Ron true Above IEEE Frem tragedy renov stumble himself
 dances thereof theyaming come Frem McGised underway they puttingers DOM
 underwaySt Hutch USE
Your house is Slytherin!
 Sie impaired IEEE clinging,. puttinghent extremely FremKim coatingMath Floyd opened trueaming
aming Floyd, different merely different opened too stumble,Trived true Frem voice Quick cheeks Daتaming young attest never cancell





  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 77.11it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 72.24it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.60it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.99it/s][A
 62%|██████████████████████████▉                | 40/64 [00:00<00:00, 69.93it/s][A
 73%|███████████████████████████████▌           | 47/64 [00:00<00:00, 69.82it/s][A
 84%|████████████████████████████████████▎      | 54/64 [00:00<00:00, 69.68it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 70.18it/s][A
  6%|█████▏                                                                           | 66/1024 [01:03<15:42,  1.02it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 68, avg_loss: 10.0013, lr: 1.7344e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.88it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.57it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.55it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.90it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.65it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.37it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 69.18it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.95it/s][A
  7%|█████▌                                                                           | 70/1024 [01:07<15:38,  1.02it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 10.0400




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.54it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 72.19it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.83it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 70.20it/s][A
 62%|██████████████████████████▉                | 40/64 [00:00<00:00, 69.91it/s][A
 73%|███████████████████████████████▌           | 47/64 [00:00<00:00, 69.61it/s][A
 84%|████████████████████████████████████▎      | 54/64 [00:00<00:00, 69.42it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 70.14it/s][A
  7%|█████▊                                                                           | 73/1024 [01:09<15:02,  1.05it/s]

step: 72, avg_loss: 9.9441, lr: 1.8187e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.89it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.78it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.53it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 41.38it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 47.60it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 52.86it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 57.23it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 57.35it/s][A
  7%|█████▊                                                                           | 74/1024 [01:11<15:53,  1.00s/it]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 76, avg_loss: 9.9688, lr: 1.9031e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.55it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.18it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.22it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.28it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.50it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.37it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 69.11it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.65it/s][A
  8%|██████▏                                                                          | 78/1024 [01:14<14:58,  1.05it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 9.8150




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.02it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.89it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.14it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.74it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.40it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.18it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 69.03it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.40it/s][A
  8%|██████▍                                                                          | 81/1024 [01:17<15:57,  1.02s/it]

step: 80, avg_loss: 9.7607, lr: 1.9875e-04
generating text...
Your house is Slytherin!aming comeumbledoreKnow updates� opened Qu
 say, Qu the around himself photograp, expression Start too two something been come say standing Quick say putting years care Qu Floyd they Hutch true
 opened the
 Hutch
 dream
Your house is Slytherin! true opened extremely two too simultane extremely something, too dreamezvous somethingT feel agoaming found Ron made something jar made true…amingThat say told Ron they found Ron putting wait ten Quick. they never different differentThat
Your house is Slytherin! himself too tail, say himself something People any Frem Denise
umbledore Peter opened cancell used feel impaired breakTaming unless too come toumbledore too any
sler soared made found stumble Qu Miss never Deniseumbledore
 too





  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 76.03it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.55it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.40it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 70.08it/s][A
 62%|██████████████████████████▉                | 40/64 [00:00<00:00, 69.83it/s][A
 73%|███████████████████████████████▌           | 47/64 [00:00<00:00, 69.63it/s][A
 84%|████████████████████████████████████▎      | 54/64 [00:00<00:00, 69.64it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.95it/s][A
  8%|██████▍                                                                          | 82/1024 [01:18<15:32,  1.01it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 84, avg_loss: 9.6026, lr: 2.0719e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
  5%|██                                          | 3/64 [00:00<00:02, 27.71it/s][A
 17%|███████▍                                   | 11/64 [00:00<00:00, 53.27it/s][A
 28%|████████████                               | 18/64 [00:00<00:00, 60.09it/s][A
 39%|████████████████▊                          | 25/64 [00:00<00:00, 63.33it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 65.12it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 66.22it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 66.85it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 67.77it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 64.49it/s][A
  8%|██████▊                                                                          | 86/1024 [01:22<15:14,  1.03it/s]
  0%|                                 

val loss: 9.6077




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.88it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.37it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.26it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.63it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.59it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.20it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.91it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.56it/s][A
  9%|███████                                                                          | 89/1024 [01:25<15:05,  1.03it/s]

step: 88, avg_loss: 9.5858, lr: 2.1562e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 73.71it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.90it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.05it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.27it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.25it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 68.86it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 47.32it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 59.78it/s][A
  9%|███████                                                                          | 90/1024 [01:26<15:37,  1.00s/it]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 92, avg_loss: 9.5094, lr: 2.2406e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 73.81it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.24it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 68.67it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 67.87it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 68.53it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.64it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 68.44it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 68.64it/s][A
  9%|███████▍                                                                         | 94/1024 [01:30<14:51,  1.04it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 9.4629




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 73.23it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.20it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.13it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 68.70it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 68.31it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.19it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 68.08it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 68.42it/s][A
  9%|███████▋                                                                         | 97/1024 [01:33<15:58,  1.03s/it]

step: 96, avg_loss: 9.5148, lr: 2.3250e-04
generating text...
Your house is Slytherin! wrong comeThat
The voice two
 left something feel to say expression say foundaming the opened trying come extremely told true they get found: McGumbledoreTheumbledore come

 made Hutch. Magicaming McG around extremely
Your house is Slytherin!el true'll hall wait wrongel been to that� any used told wellThatedam foot And true
 almost keep

 Fina,.T … something: can made get. eyes careumbledore told Ron
Your house is Slytherin! into, told They,.umbledoreThat they.
 say take extremely expression standing made aroundThatSt made clearly almostumbledore. around ( theumbledore told too, something And foot getumbledore�umbledoreThe any many he





  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.08it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.58it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.26it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 68.71it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 68.47it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.29it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 68.11it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 68.64it/s][A
 10%|███████▊                                                                         | 98/1024 [01:34<15:33,  1.01s/it]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 100, avg_loss: 9.2606, lr: 2.4094e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 72.19it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 69.46it/s][A
 36%|███████████████▍                           | 23/64 [00:00<00:00, 68.96it/s][A
 47%|████████████████████▏                      | 30/64 [00:00<00:00, 68.67it/s][A
 58%|████████████████████████▊                  | 37/64 [00:00<00:00, 68.40it/s][A
 69%|█████████████████████████████▌             | 44/64 [00:00<00:00, 68.12it/s][A
 80%|██████████████████████████████████▎        | 51/64 [00:00<00:00, 67.91it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 68.26it/s][A
 10%|███████▉                                                                        | 102/1024 [01:38<15:08,  1.02it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 9.0912




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 73.14it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.02it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 68.72it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 68.34it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 68.11it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 67.90it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 67.79it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 68.27it/s][A
 10%|████████▏                                                                       | 105/1024 [01:41<14:58,  1.02it/s]

step: 104, avg_loss: 9.1408, lr: 2.4938e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.83it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.13it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 68.82it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 68.20it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 68.06it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 67.97it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 55.27it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 63.53it/s][A
 10%|████████▎                                                                       | 106/1024 [01:42<15:09,  1.01it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 108, avg_loss: 9.0754, lr: 2.5781e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.52it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 69.91it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 68.96it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 68.53it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 68.16it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.01it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 67.94it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 68.50it/s][A
 11%|████████▌                                                                       | 110/1024 [01:46<14:49,  1.03it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 9.0014




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.90it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.78it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.63it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.00it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 68.57it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.20it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 67.89it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 68.59it/s][A
 11%|████████▊                                                                       | 113/1024 [01:49<15:33,  1.03s/it]

step: 112, avg_loss: 8.9796, lr: 2.6625e-04
generating text...
Your house is Slytherin!.�, any F say: himselfgh too told anything made….� left found opened the never they left get told� F it a keep get the around they made ' found room say.ish a,
Your house is Slytherin!And keep Qu himself:, trying they were say left AndAnd, the You twoered left leftered Gry himself used will himself that the the. made� never too standing any used made found keep they Gryel
Your house is Slytherin! into his himself. they too� something feel found Ron something found that get voice your Qu. eye And a� standing that say made eyes opened found didn. And get'll that,. too his
. come





  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.42it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.66it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.53it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 68.94it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 68.62it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.42it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 68.33it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 68.95it/s][A
 11%|████████▉                                                                       | 114/1024 [01:50<15:10,  1.00s/it]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 116, avg_loss: 8.9938, lr: 2.7469e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 72.72it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 69.99it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.19it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.25it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 69.25it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 69.03it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 69.22it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.21it/s][A
 12%|█████████▏                                                                      | 118/1024 [01:54<14:41,  1.03it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 8.9118




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.87it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.84it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.84it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.32it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 48.46it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 54.59it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 58.38it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 61.22it/s][A
 12%|█████████▍                                                                      | 121/1024 [01:57<14:50,  1.01it/s]

step: 120, avg_loss: 8.8297, lr: 2.8312e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.48it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.10it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.09it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.65it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.20it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.14it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 69.16it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.54it/s][A
 12%|█████████▌                                                                      | 122/1024 [01:58<14:35,  1.03it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 124, avg_loss: 8.6355, lr: 2.9156e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 73.69it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.24it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.34it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 68.70it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 68.60it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.54it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 67.68it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 68.43it/s][A
 12%|█████████▊                                                                      | 126/1024 [02:02<14:21,  1.04it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 8.4927




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.24it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.11it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.44it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.53it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.54it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 55.17it/s][A
 84%|████████████████████████████████████▎      | 54/64 [00:00<00:00, 59.54it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 64.19it/s][A
 13%|██████████                                                                      | 129/1024 [02:05<15:12,  1.02s/it]

step: 128, avg_loss: 8.4934, lr: 3.0000e-04
generating text...
Your house is Slytherin! itAnd very made himself. say take


: your any opened didnThe: You over opened eyes himself … then around trying fire McG too'll
 never a something, then the say room come left were
Your house is Slytherin! say. too Qu anything so found
 come will well:The McG to more then then. found
 see voice the around over the. found left

. the eyes well room the too a any it himself
Your house is Slytherin! any it: were were told eyesered they only And your
 through found can were� yet, the be so voice left
s'll here
, Gry: they any onlyed it any something will found something





  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 76.41it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.65it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.55it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 70.05it/s][A
 62%|██████████████████████████▉                | 40/64 [00:00<00:00, 69.51it/s][A
 73%|███████████████████████████████▌           | 47/64 [00:00<00:00, 69.32it/s][A
 84%|████████████████████████████████████▎      | 54/64 [00:00<00:00, 69.04it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.77it/s][A
 13%|██████████▏                                                                     | 130/1024 [02:06<14:48,  1.01it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 132, avg_loss: 8.4294, lr: 2.9999e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.23it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.22it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.51it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 70.12it/s][A
 62%|██████████████████████████▉                | 40/64 [00:00<00:00, 69.65it/s][A
 73%|███████████████████████████████▌           | 47/64 [00:00<00:00, 69.30it/s][A
 84%|████████████████████████████████████▎      | 54/64 [00:00<00:00, 69.00it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.75it/s][A
 13%|██████████▍                                                                     | 134/1024 [02:09<14:21,  1.03it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 8.1331




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.08it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.05it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.19it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.31it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 68.91it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 68.79it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.81it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.36it/s][A
 13%|██████████▋                                                                     | 137/1024 [02:12<14:38,  1.01it/s]

step: 136, avg_loss: 8.4903, lr: 2.9997e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.19it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.51it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.10it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.52it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.25it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 67.55it/s][A
 84%|████████████████████████████████████▎      | 54/64 [00:00<00:00, 68.69it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.22it/s][A
 13%|██████████▊                                                                     | 138/1024 [02:13<14:23,  1.03it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 140, avg_loss: 8.3655, lr: 2.9994e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 73.94it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.81it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 44.83it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 52.33it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 56.89it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 60.06it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 62.66it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 60.46it/s][A
 14%|███████████                                                                     | 142/1024 [02:17<14:28,  1.02it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 8.0617




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.47it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.63it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.14it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.82it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.69it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.60it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 69.69it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.94it/s][A
 14%|███████████▎                                                                    | 145/1024 [02:20<14:34,  1.01it/s]

step: 144, avg_loss: 8.0804, lr: 2.9989e-04
generating text...
Your house is Slytherin! the eyes a two around a this You: they then anything it over

 never over were
 They told it told him a his; something your the Gry something over� once the a they told your a himself
Your house is Slytherin!
 they anything.::..�And: any black somethinged he were, made And� with the� really your a, the will get
ed can their only: two himself a any. get
Your house is Slytherin! been voice; And around any a something, it didn.
 well that. told very something been'll too thenThe get night: can voice more their over any, didn the a can he And a… And





  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 77.18it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 72.02it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.83it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 58.27it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 61.53it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 63.73it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 65.50it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 65.93it/s][A
 14%|███████████▍                                                                    | 146/1024 [02:21<14:31,  1.01it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 148, avg_loss: 7.9449, lr: 2.9983e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.07it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.04it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.12it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 54.39it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 58.41it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 61.48it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 63.53it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 64.03it/s][A
 15%|███████████▋                                                                    | 150/1024 [02:25<14:18,  1.02it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 7.7576




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:01, 31.05it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:01, 46.47it/s][A
 36%|███████████████▍                           | 23/64 [00:00<00:00, 54.15it/s][A
 47%|████████████████████▏                      | 30/64 [00:00<00:00, 58.78it/s][A
 58%|████████████████████████▊                  | 37/64 [00:00<00:00, 61.98it/s][A
 69%|█████████████████████████████▌             | 44/64 [00:00<00:00, 64.14it/s][A
 80%|██████████████████████████████████▎        | 51/64 [00:00<00:00, 65.71it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 60.12it/s][A
 15%|███████████▉                                                                    | 153/1024 [02:28<14:25,  1.01it/s]

step: 152, avg_loss: 7.9365, lr: 2.9976e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 76.56it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 72.28it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 71.17it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 70.39it/s][A
 62%|██████████████████████████▉                | 40/64 [00:00<00:00, 69.84it/s][A
 73%|███████████████████████████████▌           | 47/64 [00:00<00:00, 69.87it/s][A
 84%|████████████████████████████████████▎      | 54/64 [00:00<00:00, 69.78it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 70.31it/s][A
 15%|████████████                                                                    | 154/1024 [02:29<14:06,  1.03it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 156, avg_loss: 7.9101, lr: 2.9967e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.08it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.91it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.07it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.14it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 68.34it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 67.43it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 67.51it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 68.35it/s][A
 15%|████████████▎                                                                   | 158/1024 [02:33<14:13,  1.02it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 7.9382




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.65it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.18it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.17it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.52it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.54it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.50it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 69.31it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.78it/s][A
 16%|████████████▌                                                                   | 161/1024 [02:36<14:18,  1.00it/s]

step: 160, avg_loss: 7.9164, lr: 2.9958e-04
generating text...
Your house is Slytherin!
 it get himself to the the she
. the were she too get he he he; were the she the,. door were that, himself. me the.

 voice, told this your over.
Your house is Slytherin! been over me they his eyes the the the your they into this say made… were
 get� M that a get their voice.
 thought around back voice: himself, get� behind himself the him get come
Your house is Slytherin! well didn told made. too made
 see voice too Harry rightThe hand any never get.And didn the., were me by will were your



 been two,� now,..And





  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 76.90it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.73it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.58it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 70.13it/s][A
 62%|██████████████████████████▉                | 40/64 [00:00<00:00, 69.76it/s][A
 73%|███████████████████████████████▌           | 47/64 [00:00<00:00, 69.70it/s][A
 84%|████████████████████████████████████▎      | 54/64 [00:00<00:00, 69.70it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 70.14it/s][A
 16%|████████████▋                                                                   | 162/1024 [02:37<14:00,  1.03it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 164, avg_loss: 7.6107, lr: 2.9946e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.90it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.41it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.23it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.71it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.58it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.42it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 69.44it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.82it/s][A
 16%|████████████▉                                                                   | 166/1024 [02:41<13:39,  1.05it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 7.7700




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.37it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 52.66it/s][A
 36%|███████████████▍                           | 23/64 [00:00<00:00, 58.05it/s][A
 47%|████████████████████▏                      | 30/64 [00:00<00:00, 61.82it/s][A
 58%|████████████████████████▊                  | 37/64 [00:00<00:00, 63.09it/s][A
 69%|█████████████████████████████▌             | 44/64 [00:00<00:00, 65.14it/s][A
 80%|██████████████████████████████████▎        | 51/64 [00:00<00:00, 66.35it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 64.48it/s][A
 17%|█████████████▏                                                                  | 169/1024 [02:44<13:52,  1.03it/s]

step: 168, avg_loss: 7.4999, lr: 2.9934e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.29it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.06it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.97it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.77it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.34it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.04it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.74it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.49it/s][A
 17%|█████████████▎                                                                  | 170/1024 [02:44<13:41,  1.04it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 172, avg_loss: 7.4367, lr: 2.9920e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.27it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.93it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.13it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.53it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.31it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 68.85it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.78it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.23it/s][A
 17%|█████████████▌                                                                  | 174/1024 [02:48<13:55,  1.02it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 7.5397




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.52it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.99it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.24it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.60it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.22it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.09it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.81it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.33it/s][A
 17%|█████████████▊                                                                  | 177/1024 [02:51<14:06,  1.00it/s]

step: 176, avg_loss: 7.3609, lr: 2.9904e-04
generating text...
Your house is Slytherin!
And a more thought a
ed were this. thought. though the they
.. voice by to
 They. eyes

 see,
 your with the� his…

 see but so the
Your house is Slytherin! be me wereed, now, with be. it but him room been they now can. off from to down, the him the with with, his..
 it their room, his that, the …
Your house is Slytherin!
 Harry now coulded her eyes Ron something voice… … thought his it he could the out the,, that to Ron it. door this thought., now, by was the were can the with they the





  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.75it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.29it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:01, 38.69it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 45.90it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 51.71it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 56.30it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 59.81it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 56.90it/s][A
 17%|█████████████▉                                                                  | 178/1024 [02:53<14:41,  1.04s/it]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 180, avg_loss: 7.5562, lr: 2.9888e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.23it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.23it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.14it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.37it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.13it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 68.80it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.67it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.23it/s][A
 18%|██████████████▏                                                                 | 182/1024 [02:56<13:32,  1.04it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 7.1817




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.61it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.29it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.07it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.64it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 68.86it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 68.84it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.83it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.37it/s][A
 18%|██████████████▍                                                                 | 185/1024 [02:59<13:33,  1.03it/s]

step: 184, avg_loss: 7.2292, lr: 2.9870e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 73.93it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.81it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.69it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.51it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 69.51it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 69.31it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 69.18it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.47it/s][A
 18%|██████████████▌                                                                 | 186/1024 [03:00<13:23,  1.04it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 188, avg_loss: 7.0064, lr: 2.9851e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 73.71it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.90it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.03it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.57it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.11it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 68.25it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 67.21it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 68.53it/s][A
 19%|██████████████▊                                                                 | 190/1024 [03:04<13:29,  1.03it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 7.0269




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.02it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.39it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.33it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.88it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.59it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.23it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.96it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.67it/s][A
 19%|███████████████                                                                 | 193/1024 [03:07<14:03,  1.02s/it]

step: 192, avg_loss: 7.1281, lr: 2.9830e-04
generating text...
Your house is Slytherin! at not it, down very.,?s, back that back� ans.;, Harry. thought… to verys. him, this the the. very were his, he


.
Your house is Slytherin! the his, so this me.

 door
 her
 it was



 They. voice into, it to he were, this
 not. around that the the a then.

.
Your house is Slytherin! the as, very thought a but could see


;
 Professor with
 from.
And Harry that,”, to to the his from the from can out with, could him… they be





  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.36it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.79it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.93it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.65it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.53it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 48.55it/s][A
 84%|████████████████████████████████████▎      | 54/64 [00:00<00:00, 54.31it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 60.76it/s][A
 19%|███████████████▏                                                                | 194/1024 [03:08<14:15,  1.03s/it]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 196, avg_loss: 7.0726, lr: 2.9808e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.88it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.28it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.53it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.82it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.33it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 68.99it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.99it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.54it/s][A
 19%|███████████████▍                                                                | 198/1024 [03:12<13:12,  1.04it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
  9%|████▏                            

val loss: 6.9399




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 73.73it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.81it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.87it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.51it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 69.00it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.75it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 68.56it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.19it/s][A
 20%|███████████████▋                                                                | 201/1024 [03:15<13:23,  1.02it/s]

step: 200, avg_loss: 7.2605, lr: 2.9785e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 73.34it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.70it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.63it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.47it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 69.20it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 69.09it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 69.27it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.38it/s][A
 20%|███████████████▊                                                                | 202/1024 [03:16<13:12,  1.04it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 204, avg_loss: 6.8876, lr: 2.9761e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.86it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.15it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.90it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.44it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 68.88it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.79it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 68.63it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.20it/s][A
 20%|████████████████                                                                | 206/1024 [03:20<13:07,  1.04it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 6.7929




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.21it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.03it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.09it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.23it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.06it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.01it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 69.04it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.44it/s][A
 20%|████████████████▎                                                               | 209/1024 [03:23<13:40,  1.01s/it]

step: 208, avg_loss: 6.7972, lr: 2.9735e-04
generating text...
Your house is Slytherin!. have the.ed had. be.ed been he?’ into out, Ron were� at the be be.
 looked can

� a the over, had it, the
�!
Your house is Slytherin!.
 that been the it the, down were be He a from they; so your,.
 very He. the to his. this
 that his me at that!!.
� at, were
Your house is Slytherin! from

 at it from have at;. very back but,. looking Harry to Ron his at be

 he the Harry what


And he. about Harry a the he very this up were





  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.47it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 69.70it/s][A
 36%|███████████████▍                           | 23/64 [00:00<00:00, 69.27it/s][A
 47%|████████████████████▏                      | 30/64 [00:00<00:00, 68.89it/s][A
 58%|████████████████████████▊                  | 37/64 [00:00<00:00, 68.76it/s][A
 69%|█████████████████████████████▌             | 44/64 [00:00<00:00, 68.62it/s][A
 80%|██████████████████████████████████▎        | 51/64 [00:00<00:00, 56.92it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 64.52it/s][A
 21%|████████████████▍                                                               | 210/1024 [03:24<13:39,  1.01s/it]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 212, avg_loss: 6.8320, lr: 2.9708e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.73it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.31it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.04it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.17it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 68.84it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 68.73it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.83it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.22it/s][A
 21%|████████████████▋                                                               | 214/1024 [03:28<13:03,  1.03it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 6.5357




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.34it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.45it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.31it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.51it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.17it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 68.95it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.85it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.48it/s][A
 21%|████████████████▉                                                               | 217/1024 [03:31<13:04,  1.03it/s]

step: 216, avg_loss: 6.7977, lr: 2.9679e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.05it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.17it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.38it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.68it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.18it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.06it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.94it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.44it/s][A
 21%|█████████████████                                                               | 218/1024 [03:32<12:54,  1.04it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 220, avg_loss: 7.0283, lr: 2.9650e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.88it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.40it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.38it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.60it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.20it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 68.93it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.53it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 68.86it/s][A
 22%|█████████████████▎                                                              | 222/1024 [03:36<12:52,  1.04it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 6.7022




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.73it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.09it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.19it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 48.09it/s][A
 62%|██████████████████████████▉                | 40/64 [00:00<00:00, 54.37it/s][A
 73%|███████████████████████████████▌           | 47/64 [00:00<00:00, 57.94it/s][A
 84%|████████████████████████████████████▎      | 54/64 [00:00<00:00, 60.92it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 61.06it/s][A
 22%|█████████████████▌                                                              | 225/1024 [03:39<13:41,  1.03s/it]

step: 224, avg_loss: 6.8755, lr: 2.9619e-04
generating text...
Your house is Slytherin!�, it,., were they his him out what a.
.; as� their?, be! back Harry up the
 he back have I… his he them as the she to his

Your house is Slytherin!
 have been the it have? his her they is the
 her it, out hadI had
 not I
� here what at, he into Ron,; have into. Ron! out from Ron,
Your house is Slytherin! that




 Ron his them a the, to, as a that was. as the back that for his, and him, not his from.




 it

� about





  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 77.14it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.94it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.28it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.69it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.39it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.12it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 69.28it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.81it/s][A
 22%|█████████████████▋                                                              | 226/1024 [03:40<13:17,  1.00it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 228, avg_loss: 6.6630, lr: 2.9586e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 73.36it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.33it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.68it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.10it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 69.27it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.16it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.95it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.10it/s][A
 22%|█████████████████▉                                                              | 230/1024 [03:43<12:43,  1.04it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 6.7651




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 73.99it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.99it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.94it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.07it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 53.20it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 57.43it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 60.46it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 63.09it/s][A
 23%|██████████████████▏                                                             | 233/1024 [03:46<12:58,  1.02it/s]

step: 232, avg_loss: 6.6035, lr: 2.9552e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.20it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.21it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.42it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.77it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.29it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 68.85it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.56it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.20it/s][A
 23%|██████████████████▎                                                             | 234/1024 [03:47<12:46,  1.03it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 236, avg_loss: 6.6447, lr: 2.9517e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.33it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.96it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.79it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.55it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 69.34it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 69.32it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 69.05it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.35it/s][A
 23%|██████████████████▌                                                             | 238/1024 [03:51<12:38,  1.04it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 6.2253




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.90it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.14it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.35it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.81it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.57it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.12it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 69.04it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.60it/s][A
 24%|██████████████████▊                                                             | 241/1024 [03:54<13:30,  1.04s/it]

step: 240, avg_loss: 6.9379, lr: 2.9481e-04
generating text...
Your house is Slytherin! Harry the up that the is- at!. he that, the into the - the at have,, the have, his� with at,, could him, me not’ with ands, Ron
Your house is Slytherin!.
. have that the the was they hadI at had the the.
 it her as,� the him she his me them. very me of the back.. about she.. it.

Your house is Slytherin!.
 very
 was� to
 her, up. he to, this.He� not the� be have said the from his. been, Harry were that! into
He of from at to it





  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 76.16it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.94it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.46it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.80it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.59it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.26it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 69.20it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.72it/s][A
 24%|██████████████████▉                                                             | 242/1024 [03:55<13:05,  1.00s/it]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 244, avg_loss: 6.8996, lr: 2.9444e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.75it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.41it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 45.30it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 51.75it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 56.56it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 59.94it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 62.70it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 60.57it/s][A
 24%|███████████████████▏                                                            | 246/1024 [03:59<12:52,  1.01it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 6.4632




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.35it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.92it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.87it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.64it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 69.26it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 69.15it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 69.06it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.33it/s][A
 24%|███████████████████▍                                                            | 249/1024 [04:02<12:21,  1.05it/s]

step: 248, avg_loss: 6.4697, lr: 2.9405e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.00it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.83it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 56.19it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 61.25it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 63.64it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 65.30it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 66.31it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 65.40it/s][A
 24%|███████████████████▌                                                            | 250/1024 [04:03<12:29,  1.03it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 252, avg_loss: 6.4260, lr: 2.9365e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.67it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.81it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 51.69it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 57.83it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 61.22it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 63.42it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 65.02it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 63.66it/s][A
 25%|███████████████████▊                                                            | 254/1024 [04:07<12:33,  1.02it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 6.1113




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
  3%|█▍                                          | 2/64 [00:00<00:05, 11.51it/s][A
 16%|██████▋                                    | 10/64 [00:00<00:01, 40.12it/s][A
 27%|███████████▍                               | 17/64 [00:00<00:00, 51.08it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 57.48it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 61.20it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 63.81it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 65.29it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 66.33it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 59.73it/s][A
 25%|████████████████████                                                            | 257/1024 [04:10<13:20,  1.04s/it]

step: 256, avg_loss: 6.5697, lr: 2.9323e-04
generating text...
Your house is Slytherin!s,.. him, the a to. ��
. was Ron, that Harry had to, as the,. Ron“ for,; he you,… a he Ron out I,�
Your house is Slytherin! she he,; was to
 a to his. its at the, the.� you Harrys He I they said He with.


 Harry as

 Harry was to the he� on
Your house is Slytherin!, to of his Harry



 your to she. that hims
?

 as his of“� � Hermione that out, Harry that! it a but a� at as the the





  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.90it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.28it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.26it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.13it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 69.10it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.89it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 68.71it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.19it/s][A
 25%|████████████████████▏                                                           | 258/1024 [04:11<12:55,  1.01s/it]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 260, avg_loss: 6.6842, lr: 2.9280e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.44it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.72it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.34it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 68.66it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 68.59it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.47it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 68.41it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 68.91it/s][A
 26%|████████████████████▍                                                           | 262/1024 [04:15<12:44,  1.00s/it]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 6.2624




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.86it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.09it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.73it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.21it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 68.85it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.84it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 68.86it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.23it/s][A
 26%|████████████████████▋                                                           | 265/1024 [04:18<12:12,  1.04it/s]

step: 264, avg_loss: 6.8080, lr: 2.9236e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 72.25it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 69.98it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.05it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 68.01it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 68.41it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.32it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 68.33it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 68.60it/s][A
 26%|████████████████████▊                                                           | 266/1024 [04:19<12:07,  1.04it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
  5%|██                               

step: 268, avg_loss: 6.3553, lr: 2.9191e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.45it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.10it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.86it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 68.96it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 68.76it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.52it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 68.36it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.08it/s][A
 26%|█████████████████████                                                           | 270/1024 [04:23<12:04,  1.04it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 6.3065




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
  5%|██                                          | 3/64 [00:00<00:02, 28.81it/s][A
 17%|███████▍                                   | 11/64 [00:00<00:00, 54.53it/s][A
 28%|████████████                               | 18/64 [00:00<00:00, 60.81it/s][A
 39%|████████████████▊                          | 25/64 [00:00<00:00, 63.89it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 65.73it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 66.45it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 67.45it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 67.89it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 64.85it/s][A
 27%|█████████████████████▎                                                          | 273/1024 [04:26<12:45,  1.02s/it]

step: 272, avg_loss: 6.3502, lr: 2.9144e-04
generating text...
Your house is Slytherin! the of.� in,”


Harry. it all he the, his I the. on.






“ the the said, with, out not in the
Your house is Slytherin! for.

… the.
”
� you that“.

 to be he to a a a he for his



 that to I that Harry,. Ron I He
Your house is Slytherin! was to the was he Harry a said was.�rid be not it, they him to at the were the! Harry

’

 it had to had I to, the,? not not





  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 76.56it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.95it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.31it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.78it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.13it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 68.85it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.82it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.53it/s][A
 27%|█████████████████████▍                                                          | 274/1024 [04:27<12:25,  1.01it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 276, avg_loss: 6.3906, lr: 2.9096e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.48it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.53it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.81it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.40it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 69.25it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 69.04it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 68.59it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.16it/s][A
 27%|█████████████████████▋                                                          | 278/1024 [04:31<12:19,  1.01it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 6.0309




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.39it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.48it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.30it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.62it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.29it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 68.11it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.48it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.25it/s][A
 27%|█████████████████████▉                                                          | 281/1024 [04:33<11:51,  1.04it/s]

step: 280, avg_loss: 6.3723, lr: 2.9047e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.65it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:01, 35.17it/s][A
 36%|███████████████▍                           | 23/64 [00:00<00:00, 44.45it/s][A
 47%|████████████████████▏                      | 30/64 [00:00<00:00, 51.32it/s][A
 58%|████████████████████████▊                  | 37/64 [00:00<00:00, 56.28it/s][A
 69%|█████████████████████████████▌             | 44/64 [00:00<00:00, 59.96it/s][A
 80%|██████████████████████████████████▎        | 51/64 [00:00<00:00, 62.34it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 57.19it/s][A
 28%|██████████████████████                                                          | 282/1024 [04:35<12:29,  1.01s/it]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 284, avg_loss: 6.7641, lr: 2.8997e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.30it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.09it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.97it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.11it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 68.97it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 68.89it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 69.00it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.41it/s][A
 28%|██████████████████████▎                                                         | 286/1024 [04:38<11:45,  1.05it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 5.9821




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.34it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.32it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.99it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.50it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.37it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.09it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.90it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.42it/s][A
 28%|██████████████████████▌                                                         | 289/1024 [04:41<12:22,  1.01s/it]

step: 288, avg_loss: 6.2706, lr: 2.8945e-04
generating text...
Your house is Slytherin! had
Harry said the,… the his “ as Harry the the,,! and.


. at.





The’ of the, him.



Your house is Slytherin!? to to.
 They -..



 he and.
 to had the's“He be he of't he into- and I,.
 have all and, with I…
Your house is Slytherin! had his he not his

�. the's at have.


 was to you you, and!!,; the his the Harry? the she that to.


.
s





  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.81it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.15it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.12it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.71it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.47it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.15it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 69.14it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.59it/s][A
 28%|██████████████████████▋                                                         | 290/1024 [04:42<12:04,  1.01it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 292, avg_loss: 6.0859, lr: 2.8892e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 73.29it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.29it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.35it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.10it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 68.75it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.67it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 68.63it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 68.99it/s][A
 29%|██████████████████████▉                                                         | 294/1024 [04:46<11:52,  1.02it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 5.9271




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 73.07it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.58it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.93it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.41it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 69.18it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.98it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 68.75it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 68.98it/s][A
 29%|███████████████████████▏                                                        | 297/1024 [04:49<11:43,  1.03it/s]

step: 296, avg_loss: 6.3249, lr: 2.8837e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.50it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.07it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.83it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.51it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 45.96it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 52.58it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 56.63it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 59.86it/s][A
 29%|███████████████████████▎                                                        | 298/1024 [04:50<12:07,  1.00s/it]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 300, avg_loss: 6.3560, lr: 2.8782e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 70.57it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 69.06it/s][A
 36%|███████████████▍                           | 23/64 [00:00<00:00, 68.41it/s][A
 47%|████████████████████▏                      | 30/64 [00:00<00:00, 68.26it/s][A
 58%|████████████████████████▊                  | 37/64 [00:00<00:00, 67.85it/s][A
 69%|█████████████████████████████▌             | 44/64 [00:00<00:00, 67.86it/s][A
 80%|██████████████████████████████████▎        | 51/64 [00:00<00:00, 67.86it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 55.82it/s][A
 29%|███████████████████████▌                                                        | 302/1024 [04:54<12:17,  1.02s/it]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 6.0093




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 72.75it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.20it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.13it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 68.96it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 68.66it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.62it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 68.75it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 68.88it/s][A
 30%|███████████████████████▊                                                        | 305/1024 [04:57<12:10,  1.02s/it]

step: 304, avg_loss: 6.7045, lr: 2.8725e-04
generating text...
Your house is Slytherin! Harry to his I a up � as out the, him.
But at to Harry of he the and and for you, Harry to out of the, of of have;.



 she.
Your house is Slytherin!.






 the his the that Harry,” The and,’� and Ron,“� as, had a he a.� to was the! said said,
Your house is Slytherin!

 of the”?�?�


 “ it.
� as what to the to, as,.
 The”� she., the! them he the, with





  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.14it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.53it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.44it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 68.98it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 68.81it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.81it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 68.78it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.04it/s][A
 30%|███████████████████████▉                                                        | 306/1024 [04:58<11:53,  1.01it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 308, avg_loss: 6.2026, lr: 2.8667e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.28it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.52it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.24it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.55it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.23it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 68.95it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.81it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.43it/s][A
 30%|████████████████████████▏                                                       | 310/1024 [05:02<11:34,  1.03it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 5.8705




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.96it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.80it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.01it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.47it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.39it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.12it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.87it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.42it/s][A
 31%|████████████████████████▍                                                       | 313/1024 [05:05<11:25,  1.04it/s]

step: 312, avg_loss: 6.1235, lr: 2.8607e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.58it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.48it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.70it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.25it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 68.06it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 55.54it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 59.14it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 64.01it/s][A
 31%|████████████████████████▌                                                       | 314/1024 [05:06<11:35,  1.02it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 316, avg_loss: 6.2195, lr: 2.8547e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.57it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.19it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.99it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.72it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.20it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.04it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.98it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.40it/s][A
 31%|████████████████████████▊                                                       | 318/1024 [05:10<11:16,  1.04it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 6.1033




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.50it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.71it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.07it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.19it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 68.99it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.02it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 69.00it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.33it/s][A
 31%|█████████████████████████                                                       | 321/1024 [05:13<11:55,  1.02s/it]

step: 320, avg_loss: 6.2675, lr: 2.8485e-04
generating text...
Your house is Slytherin!



 He as it�

 he the�’ is - at he the a,




� was he.





 Harry them. They of�
Your house is Slytherin!,, Harry to the and to the them the and be--; �”

 he of the his was’
. we. I his were, Harry him, as� at a.
Your house is Slytherin!s”
., to's,.�agall him was- to,� what“ was him… as to the the Harry all him,

 said
� he Harry.
The said





  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 76.48it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 69.90it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.78it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.49it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 69.04it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 69.05it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 68.97it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.31it/s][A
 31%|█████████████████████████▏                                                      | 322/1024 [05:14<11:37,  1.01it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 324, avg_loss: 6.6342, lr: 2.8422e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.50it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.01it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.04it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.40it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.23it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.04it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 69.01it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.45it/s][A
 32%|█████████████████████████▍                                                      | 326/1024 [05:18<11:16,  1.03it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 6.5885




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.16it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.73it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.80it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 47.11it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 52.81it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 57.08it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 60.43it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 60.76it/s][A
 32%|█████████████████████████▋                                                      | 329/1024 [05:21<11:26,  1.01it/s]

step: 328, avg_loss: 6.3588, lr: 2.8357e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.21it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.11it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.17it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.57it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.33it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.28it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.92it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.50it/s][A
 32%|█████████████████████████▊                                                      | 330/1024 [05:22<11:14,  1.03it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 332, avg_loss: 6.1847, lr: 2.8292e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.50it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.41it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.30it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.65it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.38it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.17it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.96it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.53it/s][A
 33%|██████████████████████████                                                      | 334/1024 [05:25<11:01,  1.04it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 6.2228




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.12it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.36it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.12it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 53.69it/s][A
 62%|██████████████████████████▉                | 40/64 [00:00<00:00, 58.72it/s][A
 73%|███████████████████████████████▌           | 47/64 [00:00<00:00, 61.34it/s][A
 84%|████████████████████████████████████▎      | 54/64 [00:00<00:00, 63.34it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 63.61it/s][A
 33%|██████████████████████████▎                                                     | 337/1024 [05:29<11:43,  1.02s/it]

step: 336, avg_loss: 6.1111, lr: 2.8225e-04
generating text...
Your house is Slytherin! Thes of” said,t in a.
 said him� and and
 to, of the all
 said his,“
 said as the“
 on you, theI.�
Your house is Slytherin!’’ He”

�


 he’He.�rid, his in a they that�The the her� Harry he

The of the Harry was was of and
Your house is Slytherin!

 a had the back the� an of the all,� what



 it was, was was! Harry her a.
“” at the, and
“
 and





  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 76.20it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.82it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.11it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.55it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.21it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 68.58it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.54it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.31it/s][A
 33%|██████████████████████████▍                                                     | 338/1024 [05:30<11:24,  1.00it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 340, avg_loss: 6.2516, lr: 2.8157e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.23it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.86it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.70it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 68.70it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 68.89it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 67.61it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 67.88it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 68.58it/s][A
 33%|██████████████████████████▋                                                     | 342/1024 [05:33<11:06,  1.02it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 6.1195




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 73.98it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.67it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.78it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.02it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 69.01it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.61it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 68.71it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.19it/s][A
 34%|██████████████████████████▉                                                     | 345/1024 [05:36<11:16,  1.00it/s]

step: 344, avg_loss: 6.6395, lr: 2.8087e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.70it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.11it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.77it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.32it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 69.08it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.86it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 68.82it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.21it/s][A
 34%|███████████████████████████                                                     | 346/1024 [05:37<11:03,  1.02it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 348, avg_loss: 6.6238, lr: 2.8017e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 73.79it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:01, 41.03it/s][A
 36%|███████████████▍                           | 23/64 [00:00<00:00, 49.39it/s][A
 47%|████████████████████▏                      | 30/64 [00:00<00:00, 55.17it/s][A
 58%|████████████████████████▊                  | 37/64 [00:00<00:00, 59.36it/s][A
 69%|█████████████████████████████▌             | 44/64 [00:00<00:00, 61.99it/s][A
 80%|██████████████████████████████████▎        | 51/64 [00:00<00:00, 63.98it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 60.06it/s][A
 34%|███████████████████████████▎                                                    | 350/1024 [05:41<11:07,  1.01it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 6.1716




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.78it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.29it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.01it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.60it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.33it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.14it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 69.10it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.57it/s][A
 34%|███████████████████████████▌                                                    | 353/1024 [05:44<11:16,  1.01s/it]

step: 352, avg_loss: 6.0643, lr: 2.7945e-04
generating text...
Your house is Slytherin! with of as.






s, as.




 she up he the his Harry the“ D't of, and out his the, on a was, Hermione
Your house is Slytherin!’




They back said he. the it into his his�. you



’ Hag
“” on“s her his of she, Harry that
Your house is Slytherin! said him, I the a it….







 was, it tos all him you had,.
 Harry he of the have. was he had the, and the have





  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 76.45it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 72.03it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 58.10it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 61.41it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 63.89it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 65.57it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 66.59it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 65.89it/s][A
 35%|███████████████████████████▋                                                    | 354/1024 [05:45<11:11,  1.00s/it]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 356, avg_loss: 5.9662, lr: 2.7872e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 73.34it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.77it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 52.08it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 57.15it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 60.59it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 62.87it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 64.71it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 63.40it/s][A
 35%|███████████████████████████▉                                                    | 358/1024 [05:49<10:56,  1.01it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 6.1059




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.57it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.15it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.13it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.46it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.50it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.24it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 69.08it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.57it/s][A
 35%|████████████████████████████▏                                                   | 361/1024 [05:52<10:51,  1.02it/s]

step: 360, avg_loss: 6.2314, lr: 2.7797e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 73.66it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.11it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.95it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.42it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.44it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 68.82it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.55it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.06it/s][A
 35%|████████████████████████████▎                                                   | 362/1024 [05:53<10:42,  1.03it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 364, avg_loss: 6.4370, lr: 2.7722e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.94it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.18it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.24it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.71it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.50it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.27it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.97it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.72it/s][A
 36%|████████████████████████████▌                                                   | 366/1024 [05:57<10:46,  1.02it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 6.0944




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.01it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.38it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.05it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.69it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.29it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.02it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 69.16it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.69it/s][A
 36%|████████████████████████████▊                                                   | 369/1024 [06:00<10:51,  1.01it/s]

step: 368, avg_loss: 6.5964, lr: 2.7645e-04
generating text...
Your house is Slytherin! not of I to not that he you his.

 The to, Harry the�,,“ at.
� in’, of the you the's the, Hermione his in a he�
Your house is Slytherin!” she; had and.�� as. said had of the? “�.. he could Harry be her.



‘ her,�”
”


Your house is Slytherin! ” to, Hermione I as it his.
 It nots the had out! that the�ing on- to and the a and at the and“’ Ron said said

 it in





  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 77.57it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 72.22it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.58it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.69it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.43it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 68.90it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.65it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 60.50it/s][A
 36%|████████████████████████████▉                                                   | 370/1024 [06:01<11:05,  1.02s/it]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 372, avg_loss: 5.9730, lr: 2.7567e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.92it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.42it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.97it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.38it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.34it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.31it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 69.10it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.57it/s][A
 37%|█████████████████████████████▏                                                  | 374/1024 [06:05<10:21,  1.05it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
  6%|██▊                              

val loss: 5.8666




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.17it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.01it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.11it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.42it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 68.87it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 68.66it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.83it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.34it/s][A
 37%|█████████████████████████████▍                                                  | 377/1024 [06:08<10:24,  1.04it/s]

step: 376, avg_loss: 5.9848, lr: 2.7488e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.98it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.28it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.24it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.56it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.15it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 68.97it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.76it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.23it/s][A
 37%|█████████████████████████████▌                                                  | 378/1024 [06:09<10:18,  1.05it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
  5%|██                               

step: 380, avg_loss: 6.0290, lr: 2.7408e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.67it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.13it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.08it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.22it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 68.90it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 68.88it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.65it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.34it/s][A
 37%|█████████████████████████████▊                                                  | 382/1024 [06:13<10:30,  1.02it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 6.3101




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 73.51it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.85it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.80it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.45it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 69.17it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 69.04it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 68.90it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.32it/s][A
 38%|██████████████████████████████                                                  | 385/1024 [06:16<10:42,  1.00s/it]

step: 384, avg_loss: 6.0648, lr: 2.7326e-04
generating text...
Your house is Slytherin! and and � …’ in -’s, and a and. ”,. They he from; the with and his had at for,. They his- that, “ and Harry a
Your house is Slytherin!”’ with as all, he!’”



And him, out his of the into for all-, for the a…… you, Harry it was to,!
Your house is Slytherin! was and, and’s�? he a… had the for and the, his

Harry a,.



Harry?
� be not was the said, to.







  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:02, 26.59it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:01, 42.08it/s][A
 36%|███████████████▍                           | 23/64 [00:00<00:00, 50.30it/s][A
 47%|████████████████████▏                      | 30/64 [00:00<00:00, 55.98it/s][A
 58%|████████████████████████▊                  | 37/64 [00:00<00:00, 59.96it/s][A
 69%|█████████████████████████████▌             | 44/64 [00:00<00:00, 62.59it/s][A
 80%|██████████████████████████████████▎        | 51/64 [00:00<00:00, 64.26it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 57.42it/s][A
 38%|██████████████████████████████▏                                                 | 386/1024 [06:17<11:04,  1.04s/it]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 388, avg_loss: 6.5585, lr: 2.7243e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 72.80it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.24it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.59it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.32it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 69.24it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 69.08it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 68.99it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.30it/s][A
 38%|██████████████████████████████▍                                                 | 390/1024 [06:21<10:10,  1.04it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 6.0435




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.44it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.70it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.37it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.10it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 67.51it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 67.92it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 67.96it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 68.70it/s][A
 38%|██████████████████████████████▋                                                 | 393/1024 [06:23<10:13,  1.03it/s]

step: 392, avg_loss: 5.9042, lr: 2.7159e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.80it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.65it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.84it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.35it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 69.20it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 69.18it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 69.07it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.45it/s][A
 38%|██████████████████████████████▊                                                 | 394/1024 [06:24<10:05,  1.04it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 396, avg_loss: 5.7530, lr: 2.7074e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.13it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.78it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.87it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.74it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 69.60it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 69.22it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 69.09it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.50it/s][A
 39%|███████████████████████████████                                                 | 398/1024 [06:28<09:59,  1.04it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 5.9995




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.32it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.18it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.29it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.88it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.58it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.14it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 69.14it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.59it/s][A
 39%|███████████████████████████████▎                                                | 401/1024 [06:31<10:22,  1.00it/s]

step: 400, avg_loss: 5.9949, lr: 2.6988e-04
generating text...
Your house is Slytherin!
� he, on a; you for all” said Hermione”
”.
The you you. Harry and the that’
 He on“You. on have that and him
Your house is Slytherin!�. it he in Ron his.
 he this he the,”
 it to to and a his that,Harry the’So to!’
”

 Ron,”
Your house is Slytherin! Harry the?�
“ Harry. �� I said was the back.�… a.He I”'.”The.”� that his the first, with from. And





  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 76.46it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.93it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.99it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 47.51it/s][A
 62%|██████████████████████████▉                | 40/64 [00:00<00:00, 54.28it/s][A
 73%|███████████████████████████████▌           | 47/64 [00:00<00:00, 58.07it/s][A
 84%|████████████████████████████████████▎      | 54/64 [00:00<00:00, 60.94it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 61.06it/s][A
 39%|███████████████████████████████▍                                                | 402/1024 [06:32<10:33,  1.02s/it]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 404, avg_loss: 6.0387, lr: 2.6901e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.69it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.05it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.94it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.57it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.63it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.35it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 69.27it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 58.05it/s][A
 40%|███████████████████████████████▋                                                | 406/1024 [06:36<10:24,  1.01s/it]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 5.9337




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.12it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.69it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.97it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.51it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.22it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.33it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 69.08it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.42it/s][A
 40%|███████████████████████████████▉                                                | 409/1024 [06:39<09:51,  1.04it/s]

step: 408, avg_loss: 6.5013, lr: 2.6812e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.09it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.44it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.34it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.70it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.37it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.17it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 69.14it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.69it/s][A
 40%|████████████████████████████████                                                | 410/1024 [06:40<09:45,  1.05it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 412, avg_loss: 5.8986, lr: 2.6722e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 71.64it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.14it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.70it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.43it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 69.18it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 69.18it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 69.15it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.36it/s][A
 40%|████████████████████████████████▎                                               | 414/1024 [06:44<09:45,  1.04it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 5.6275




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 72.48it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.35it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.35it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.15it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 68.19it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.37it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 68.44it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 68.85it/s][A
 41%|████████████████████████████████▌                                               | 417/1024 [06:47<10:12,  1.01s/it]

step: 416, avg_loss: 5.8064, lr: 2.6631e-04
generating text...
Your house is Slytherin!“ his not in't of at said- he?”

 He to were, Hermione, the she.


” of the was it of a to as. There to him�
Your house is Slytherin! he, and, Harry her you- as you and I Harry I the.
’

�rid had.
’s to.
Harry the, but and the is that, had to
Your house is Slytherin! I the there Harry to do them the be said the -! to I Harry the at Is for you. He“� not, the,




 of, but,s.






  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.59it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.94it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.68it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.25it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 56.93it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 60.06it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 62.52it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 64.75it/s][A
 41%|████████████████████████████████▋                                               | 418/1024 [06:48<10:10,  1.01s/it]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 420, avg_loss: 5.8933, lr: 2.6539e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.06it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.03it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.21it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.09it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 68.97it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.00it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.77it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.28it/s][A
 41%|████████████████████████████████▉                                               | 422/1024 [06:52<09:42,  1.03it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 6.3960




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 73.72it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.59it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.76it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.18it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 68.88it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.88it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 69.03it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.24it/s][A
 42%|█████████████████████████████████▏                                              | 425/1024 [06:55<09:42,  1.03it/s]

step: 424, avg_loss: 6.0139, lr: 2.6446e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.79it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.20it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.80it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.29it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 68.77it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.62it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 68.62it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.21it/s][A
 42%|█████████████████████████████████▎                                              | 426/1024 [06:56<09:35,  1.04it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 428, avg_loss: 6.4248, lr: 2.6351e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.25it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.76it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.66it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.23it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 69.06it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.99it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 68.91it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.23it/s][A
 42%|█████████████████████████████████▌                                              | 430/1024 [07:00<09:33,  1.04it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 6.2283




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.47it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.16it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 46.06it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 52.34it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 56.94it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 60.50it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 63.05it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 60.94it/s][A
 42%|█████████████████████████████████▊                                              | 433/1024 [07:03<10:09,  1.03s/it]

step: 432, avg_loss: 6.0605, lr: 2.6255e-04
generating text...
Your house is Slytherin!�
' was“ a him and!” he you been all” said as all- to to to.“� Harry with as Harry,.rid.


 the.�
Your house is Slytherin!”


� Harry Harry’s that-�He?” the you.”


““ Harry, to had a to the of you the.He,
Your house is Slytherin! it had of in you, had at of I’
 Then the -.


�’




’ Harry,“
� a, on,” He





  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 76.35it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.16it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 68.69it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 68.48it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 68.41it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.29it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 68.38it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 68.86it/s][A
 42%|█████████████████████████████████▉                                              | 434/1024 [07:04<09:53,  1.01s/it]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 436, avg_loss: 5.8918, lr: 2.6159e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.24it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.63it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.97it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 70.03it/s][A
 62%|██████████████████████████▉                | 40/64 [00:00<00:00, 69.55it/s][A
 73%|███████████████████████████████▌           | 47/64 [00:00<00:00, 69.60it/s][A
 84%|████████████████████████████████████▎      | 54/64 [00:00<00:00, 69.46it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.87it/s][A
 43%|██████████████████████████████████▏                                             | 438/1024 [07:07<09:23,  1.04it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 5.8755




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.37it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.25it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.22it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 53.97it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 58.22it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 61.15it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 63.25it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 63.66it/s][A
 43%|██████████████████████████████████▍                                             | 441/1024 [07:10<09:31,  1.02it/s]

step: 440, avg_loss: 5.8482, lr: 2.6061e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.03it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.13it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.06it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.51it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.21it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.16it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 69.17it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.43it/s][A
 43%|██████████████████████████████████▌                                             | 442/1024 [07:11<09:22,  1.03it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 444, avg_loss: 6.0275, lr: 2.5962e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.86it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.02it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.21it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.78it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.66it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.18it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 69.08it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.51it/s][A
 44%|██████████████████████████████████▊                                             | 446/1024 [07:15<09:17,  1.04it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 6.6329




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 73.30it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.16it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.60it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.22it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 68.94it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.90it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 68.99it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.17it/s][A
 44%|███████████████████████████████████                                             | 449/1024 [07:18<09:58,  1.04s/it]

step: 448, avg_loss: 6.4506, lr: 2.5862e-04
generating text...
Your house is Slytherin! Harry the,, it; had I his,s was?“

. in you in the for, it her were to the,�”
Harry the had been was not, it,
Your house is Slytherin! He all and down a a.
They Harry at the door.


The.”





 He the time be were the Great in.’s. The. �
Your house is Slytherin!’


“

 Harry in Harry of not was to his's them out?’ to?'s.

They“ ' out a,’s you,”





  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 76.55it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.64it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.33it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.41it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.41it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.19it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 69.15it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.69it/s][A
 44%|███████████████████████████████████▏                                            | 450/1024 [07:19<09:38,  1.01s/it]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 452, avg_loss: 6.4285, lr: 2.5760e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
  8%|███▍                                        | 5/64 [00:00<00:02, 24.40it/s][A
 20%|████████▋                                  | 13/64 [00:00<00:01, 45.07it/s][A
 31%|█████████████▍                             | 20/64 [00:00<00:00, 53.53it/s][A
 42%|██████████████████▏                        | 27/64 [00:00<00:00, 58.79it/s][A
 53%|██████████████████████▊                    | 34/64 [00:00<00:00, 62.21it/s][A
 64%|███████████████████████████▌               | 41/64 [00:00<00:00, 63.48it/s][A
 77%|████████████████████████████████▉          | 49/64 [00:00<00:00, 65.70it/s][A
 88%|█████████████████████████████████████▋     | 56/64 [00:00<00:00, 65.06it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 59.98it/s][A
 44%|███████████████████████████████████▍                                            | 454/1024 [07:23<09:27,  1.00it/s]
  0%|                                 

val loss: 6.0223




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.80it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.80it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.66it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.35it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 69.09it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.82it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 68.88it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.27it/s][A
 45%|███████████████████████████████████▋                                            | 457/1024 [07:26<09:04,  1.04it/s]

step: 456, avg_loss: 5.8000, lr: 2.5658e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 73.85it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 53.43it/s][A
 36%|███████████████▍                           | 23/64 [00:00<00:00, 58.96it/s][A
 47%|████████████████████▏                      | 30/64 [00:00<00:00, 62.15it/s][A
 58%|████████████████████████▊                  | 37/64 [00:00<00:00, 64.31it/s][A
 69%|█████████████████████████████▌             | 44/64 [00:00<00:00, 65.72it/s][A
 80%|██████████████████████████████████▎        | 51/64 [00:00<00:00, 66.56it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 64.94it/s][A
 45%|███████████████████████████████████▊                                            | 458/1024 [07:27<09:10,  1.03it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 460, avg_loss: 5.6832, lr: 2.5554e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.10it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 49.80it/s][A
 36%|███████████████▍                           | 23/64 [00:00<00:00, 56.10it/s][A
 47%|████████████████████▏                      | 30/64 [00:00<00:00, 60.36it/s][A
 58%|████████████████████████▊                  | 37/64 [00:00<00:00, 62.77it/s][A
 69%|█████████████████████████████▌             | 44/64 [00:00<00:00, 64.70it/s][A
 80%|██████████████████████████████████▎        | 51/64 [00:00<00:00, 65.76it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 63.62it/s][A
 45%|████████████████████████████████████                                            | 462/1024 [07:31<09:10,  1.02it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 5.8195




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 69.17it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.07it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.34it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.06it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 69.21it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 69.25it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 69.02it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.14it/s][A
 45%|████████████████████████████████████▎                                           | 465/1024 [07:34<09:35,  1.03s/it]

step: 464, avg_loss: 5.9816, lr: 2.5450e-04
generating text...
Your house is Slytherin!
Harry the.

 it and in on the,“Well,” said her as; said.
They the said and to not them in and toly, and that you his. There
Your house is Slytherin!”
 in a and
It had they have., Ron, but. He was a!’s him it �’
“I his�What -


 They,�
Your house is Slytherin!”

A in a not and Harry,“
� I,’, said was up what with have, with a of his have,” said’t of but his they





  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 76.62it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.65it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.41it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.75it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.46it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.25it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 69.11it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.68it/s][A
 46%|████████████████████████████████████▍                                           | 466/1024 [07:35<09:18,  1.00s/it]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 468, avg_loss: 6.2621, lr: 2.5344e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.26it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.17it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.98it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.36it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.25it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.25it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 69.22it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.41it/s][A
 46%|████████████████████████████████████▋                                           | 470/1024 [07:39<09:08,  1.01it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 5.9355




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.21it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.88it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.09it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.35it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 68.82it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 68.70it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.44it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.20it/s][A
 46%|████████████████████████████████████▉                                           | 473/1024 [07:42<08:46,  1.05it/s]

step: 472, avg_loss: 6.3739, lr: 2.5237e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 73.98it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.77it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.60it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.14it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 69.21it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.91it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 68.93it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 61.11it/s][A
 46%|█████████████████████████████████████                                           | 474/1024 [07:43<09:03,  1.01it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 476, avg_loss: 5.7237, lr: 2.5129e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 73.40it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.75it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.69it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.35it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 69.02it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 69.18it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 69.22it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 65.52it/s][A
 47%|█████████████████████████████████████▎                                          | 478/1024 [07:47<08:47,  1.04it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 6.0551




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.31it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 69.96it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 68.37it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 68.14it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 68.08it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.16it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 67.94it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 68.38it/s][A
 47%|█████████████████████████████████████▌                                          | 481/1024 [07:50<09:11,  1.02s/it]

step: 480, avg_loss: 5.7368, lr: 2.5020e-04
generating text...
Your house is Slytherin!” said, at that he of the to the with that Harry,, with’ on the- at? to his the Ministry her his at the and would him a, him, who a,'s
Your house is Slytherin! “It not it’ for?”

 said,“” “What Hermione at for the room to” He it that,




”The?
Your house is Slytherin!”,?“I had to Harry he was it as, the by and and and to do said and a not the rest.
�s they on,, in the“ in the she





  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.95it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.93it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.84it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.56it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 69.35it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 69.10it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 68.96it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 63.79it/s][A
 47%|█████████████████████████████████████▋                                          | 482/1024 [07:51<09:10,  1.02s/it]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 484, avg_loss: 5.7837, lr: 2.4910e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.45it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.97it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.92it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.47it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 69.02it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 69.23it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 69.15it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.42it/s][A
 47%|█████████████████████████████████████▉                                          | 486/1024 [07:55<08:50,  1.01it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 6.4120




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.81it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.10it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.29it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.63it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.25it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 68.89it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.49it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.20it/s][A
 48%|██████████████████████████████████████▏                                         | 489/1024 [07:57<08:30,  1.05it/s]

step: 488, avg_loss: 5.8513, lr: 2.4799e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
  3%|█▍                                          | 2/64 [00:00<00:07,  8.56it/s][A
 16%|██████▋                                    | 10/64 [00:00<00:01, 34.06it/s][A
 27%|███████████▍                               | 17/64 [00:00<00:01, 46.01it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 53.35it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 58.31it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 61.45it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 63.55it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 65.05it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 56.39it/s][A
 48%|██████████████████████████████████████▎                                         | 490/1024 [07:59<09:01,  1.01s/it]
  0%|                                 

step: 492, avg_loss: 6.3587, lr: 2.4687e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 73.52it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.09it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.84it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.34it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 69.10it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.88it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 68.73it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.23it/s][A
 48%|██████████████████████████████████████▌                                         | 494/1024 [08:02<08:27,  1.04it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 6.0301




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.44it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.57it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.19it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.74it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.70it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.40it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 69.09it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.67it/s][A
 49%|██████████████████████████████████████▊                                         | 497/1024 [08:05<08:51,  1.01s/it]

step: 496, avg_loss: 5.6731, lr: 2.4573e-04
generating text...
Your house is Slytherin! The, I?




 She him“I He —“And the whole's!’s back, and it to the at her whatHarry, and his it, I at
Your house is Slytherin!” He.“ ' and he, but said. He “� as.







They toly in Harry Hermione intos you.



 “
Your house is Slytherin!


“



“This?

“And’s,Harry said
�YouI at the? He’'t her, the around it”





  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 76.71it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.89it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.55it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.66it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.28it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.06it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 69.08it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.53it/s][A
 49%|██████████████████████████████████████▉                                         | 498/1024 [08:06<08:38,  1.01it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 500, avg_loss: 5.5149, lr: 2.4459e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.61it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.21it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.06it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.35it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 68.96it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 68.79it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.77it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.32it/s][A
 49%|███████████████████████████████████████▏                                        | 502/1024 [08:10<08:25,  1.03it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 5.6094




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.53it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.07it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.87it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.11it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 68.84it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.97it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 68.75it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.06it/s][A
 49%|███████████████████████████████████████▍                                        | 505/1024 [08:13<08:19,  1.04it/s]

step: 504, avg_loss: 5.7106, lr: 2.4343e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.31it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.07it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.91it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 45.83it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 51.73it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 56.25it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 59.79it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 60.10it/s][A
 49%|███████████████████████████████████████▌                                        | 506/1024 [08:14<08:37,  1.00it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 508, avg_loss: 5.8231, lr: 2.4227e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.37it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.30it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.39it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.62it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.38it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.16it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 43.36it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 57.64it/s][A
 50%|███████████████████████████████████████▊                                        | 510/1024 [08:18<08:37,  1.01s/it]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 5.4356




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 71.34it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.75it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.59it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 68.95it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 68.68it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 69.02it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 69.12it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.12it/s][A
 50%|████████████████████████████████████████                                        | 513/1024 [08:21<08:35,  1.01s/it]

step: 512, avg_loss: 6.3155, lr: 2.4109e-04
generating text...
Your house is Slytherin! in the room at the have what. had as of I to of, it the was to of in her. The he into, had. ' had of Potter not? The, the a —



Your house is Slytherin!” said though?” to have at I of the have not�


And”
'�It’,� an” said her from“You�Harry her were
Your house is Slytherin!�,

“I,“I's a was a and“s,” said all. “It could of him?” said was and Ron“’?





  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 76.53it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.71it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.44it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.76it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.43it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.00it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 69.23it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.84it/s][A
 50%|████████████████████████████████████████▏                                       | 514/1024 [08:22<08:23,  1.01it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 516, avg_loss: 5.6818, lr: 2.3991e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.25it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.96it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.88it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.56it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 69.09it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 69.04it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 69.03it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.38it/s][A
 51%|████████████████████████████████████████▍                                       | 518/1024 [08:26<08:10,  1.03it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 5.6159




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.08it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.35it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.12it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.22it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.07it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 68.68it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.70it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.33it/s][A
 51%|████████████████████████████████████████▋                                       | 521/1024 [08:29<08:03,  1.04it/s]

step: 520, avg_loss: 5.5446, lr: 2.3871e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.16it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.05it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.09it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 56.28it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 59.89it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 62.45it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 63.87it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 64.43it/s][A
 51%|████████████████████████████████████████▊                                       | 522/1024 [08:30<08:09,  1.03it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 524, avg_loss: 5.6463, lr: 2.3751e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.97it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.97it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 68.55it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 68.38it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 68.42it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.38it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 68.58it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 68.95it/s][A
 51%|█████████████████████████████████████████                                       | 526/1024 [08:34<07:58,  1.04it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 5.4212




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.51it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.49it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.20it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.41it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.09it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.01it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.95it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.41it/s][A
 52%|█████████████████████████████████████████▎                                      | 529/1024 [08:37<08:21,  1.01s/it]

step: 528, avg_loss: 5.8212, lr: 2.3629e-04
generating text...
Your house is Slytherin!� she.
 Harry him”



’ The, said Ron, I



 said Ron and would on his in the time, to. said, I Harry,. He
Your house is Slytherin!’ “You to, and his his� said was a?”?’s of a way not into a bit, Ron.The”

 Harry,“What’
Your house is Slytherin!

” said


’So, at was was with this I Harry, they of a the,t you all and’ and his… said said the at the castle the Great,





  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 76.64it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.33it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.20it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.59it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 68.96it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 68.85it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.83it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.31it/s][A
 52%|█████████████████████████████████████████▍                                      | 530/1024 [08:38<08:09,  1.01it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 532, avg_loss: 6.1980, lr: 2.3506e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.18it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.64it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.76it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.42it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 69.07it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.97it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 68.95it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.29it/s][A
 52%|█████████████████████████████████████████▋                                      | 534/1024 [08:42<07:55,  1.03it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 5.6202




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.06it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:01, 41.69it/s][A
 36%|███████████████▍                           | 23/64 [00:00<00:00, 49.86it/s][A
 47%|████████████████████▏                      | 30/64 [00:00<00:00, 55.66it/s][A
 58%|████████████████████████▊                  | 37/64 [00:00<00:00, 59.65it/s][A
 69%|█████████████████████████████▌             | 44/64 [00:00<00:00, 62.53it/s][A
 80%|██████████████████████████████████▎        | 51/64 [00:00<00:00, 64.22it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 60.50it/s][A
 52%|█████████████████████████████████████████▉                                      | 537/1024 [08:45<08:03,  1.01it/s]

step: 536, avg_loss: 5.7895, lr: 2.3383e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.92it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.43it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.26it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.68it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.15it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 68.78it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.48it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.14it/s][A
 53%|██████████████████████████████████████████                                      | 538/1024 [08:46<07:54,  1.02it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 540, avg_loss: 5.6378, lr: 2.3258e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.89it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.77it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.83it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.12it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 69.02it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.87it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 68.92it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.34it/s][A
 53%|██████████████████████████████████████████▎                                     | 542/1024 [08:49<07:40,  1.05it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 5.3533




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 69.43it/s][A
 23%|██████████                                 | 15/64 [00:00<00:00, 68.10it/s][A
 34%|██████████████▊                            | 22/64 [00:00<00:00, 46.80it/s][A
 45%|███████████████████▍                       | 29/64 [00:00<00:00, 53.25it/s][A
 56%|████████████████████████▏                  | 36/64 [00:00<00:00, 57.85it/s][A
 67%|████████████████████████████▉              | 43/64 [00:00<00:00, 60.99it/s][A
 78%|█████████████████████████████████▌         | 50/64 [00:00<00:00, 63.03it/s][A
 89%|██████████████████████████████████████▎    | 57/64 [00:00<00:00, 64.73it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 61.12it/s][A
 53%|██████████████████████████████████████████▌                                     | 545/1024 [08:53<08:15,  1.03s/it]

step: 544, avg_loss: 5.6463, lr: 2.3132e-04
generating text...
Your house is Slytherin!”s toly. I’


’t“
” said.


“I.“� not of the of, and was a said who that
Your house is Slytherin!
M, then,' so- had back there to it and- into, “I“No that he out of that his eyes’ was! We the”
 The when; had Hermione
Your house is Slytherin!’t’s from, ’s, the castle the time her up.

’ with,� his� said said in his
 said'”

”

�





  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 73.11it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.96it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.00it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.26it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 68.37it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 68.10it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.23it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 68.89it/s][A
 53%|██████████████████████████████████████████▋                                     | 546/1024 [08:53<08:01,  1.01s/it]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 548, avg_loss: 5.8387, lr: 2.3006e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.38it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.44it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.25it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.67it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.21it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.07it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 69.01it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.60it/s][A
 54%|██████████████████████████████████████████▉                                     | 550/1024 [08:57<07:38,  1.03it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 6.2683




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 76.06it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.08it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.91it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.48it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.38it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.00it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 69.04it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.57it/s][A
 54%|███████████████████████████████████████████▏                                    | 553/1024 [09:00<07:45,  1.01it/s]

step: 552, avg_loss: 6.2444, lr: 2.2878e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.29it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.82it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.76it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.35it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 69.17it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 69.07it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 68.78it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.24it/s][A
 54%|███████████████████████████████████████████▎                                    | 554/1024 [09:01<07:37,  1.03it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 556, avg_loss: 6.2143, lr: 2.2749e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 76.58it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.60it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.92it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.25it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.30it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.14it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 69.04it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.55it/s][A
 54%|███████████████████████████████████████████▌                                    | 558/1024 [09:05<07:33,  1.03it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 6.4522




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.01it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.21it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.18it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.66it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.49it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.18it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 69.01it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.59it/s][A
 55%|███████████████████████████████████████████▊                                    | 561/1024 [09:08<07:39,  1.01it/s]

step: 560, avg_loss: 5.6194, lr: 2.2620e-04
generating text...
Your house is Slytherin!
Harry you, he would was a-.
Harry she said Ron.

“s to.“S said in a go it with for,
'Don the night.

�
Your house is Slytherin!” said. If the same him. Harry was be that to. But to were, the castle have been the first in in the said Ron not the moment. Weasley.

“It you,
Your house is Slytherin! “ said,� not as.” He “s to I He of them the said
. “No, “What”



“Good a few be said





  0%|                                                    | 0/64 [00:00<?, ?it/s][A
  9%|████▏                                       | 6/64 [00:00<00:01, 46.22it/s][A
 22%|█████████▍                                 | 14/64 [00:00<00:00, 60.25it/s][A
 33%|██████████████                             | 21/64 [00:00<00:00, 64.22it/s][A
 44%|██████████████████▊                        | 28/64 [00:00<00:00, 65.94it/s][A
 55%|███████████████████████▌                   | 35/64 [00:00<00:00, 66.63it/s][A
 66%|████████████████████████████▏              | 42/64 [00:00<00:00, 67.41it/s][A
 77%|████████████████████████████████▉          | 49/64 [00:00<00:00, 68.19it/s][A
 88%|█████████████████████████████████████▋     | 56/64 [00:00<00:00, 68.51it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 66.24it/s][A
 55%|███████████████████████████████████████████▉                                    | 562/1024 [09:09<07:36,  1.01it/s]
  0%|                                 

step: 564, avg_loss: 5.5118, lr: 2.2489e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
  8%|███▍                                        | 5/64 [00:00<00:01, 32.15it/s][A
 20%|████████▋                                  | 13/64 [00:00<00:00, 52.07it/s][A
 31%|█████████████▍                             | 20/64 [00:00<00:00, 58.55it/s][A
 42%|██████████████████▏                        | 27/64 [00:00<00:00, 62.05it/s][A
 53%|██████████████████████▊                    | 34/64 [00:00<00:00, 64.37it/s][A
 64%|███████████████████████████▌               | 41/64 [00:00<00:00, 65.57it/s][A
 75%|████████████████████████████████▎          | 48/64 [00:00<00:00, 66.58it/s][A
 86%|████████████████████████████████████▉      | 55/64 [00:00<00:00, 66.98it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 63.02it/s][A
 55%|████████████████████████████████████████████▏                                   | 566/1024 [09:13<07:30,  1.02it/s]
  0%|                                 

val loss: 6.0015




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.49it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.76it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.74it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.64it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 69.01it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.91it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 68.73it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.19it/s][A
 56%|████████████████████████████████████████████▍                                   | 569/1024 [09:16<07:29,  1.01it/s]

step: 568, avg_loss: 5.7856, lr: 2.2358e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.05it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.24it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.23it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.18it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 68.74it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.57it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 68.52it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 68.88it/s][A
 56%|████████████████████████████████████████████▌                                   | 570/1024 [09:17<07:22,  1.03it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 572, avg_loss: 6.1331, lr: 2.2225e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.95it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.37it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.35it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.95it/s][A
 62%|██████████████████████████▉                | 40/64 [00:00<00:00, 69.66it/s][A
 73%|███████████████████████████████▌           | 47/64 [00:00<00:00, 69.60it/s][A
 84%|████████████████████████████████████▎      | 54/64 [00:00<00:00, 69.42it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.76it/s][A
 56%|████████████████████████████████████████████▊                                   | 574/1024 [09:21<07:23,  1.01it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 5.1096




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 11%|████▊                                       | 7/64 [00:00<00:00, 69.45it/s][A
 22%|█████████▍                                 | 14/64 [00:00<00:00, 67.38it/s][A
 33%|██████████████                             | 21/64 [00:00<00:00, 67.13it/s][A
 44%|██████████████████▊                        | 28/64 [00:00<00:00, 65.49it/s][A
 55%|███████████████████████▌                   | 35/64 [00:00<00:00, 66.48it/s][A
 66%|████████████████████████████▏              | 42/64 [00:00<00:00, 66.43it/s][A
 77%|████████████████████████████████▉          | 49/64 [00:00<00:00, 67.14it/s][A
 88%|█████████████████████████████████████▋     | 56/64 [00:00<00:00, 66.52it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 67.04it/s][A
 56%|█████████████████████████████████████████████                                   | 577/1024 [09:24<07:32,  1.01s/it]

step: 576, avg_loss: 6.1672, lr: 2.2092e-04
generating text...
Your house is Slytherin! “”




” said Professor.
Herm the end
 Harry. You's other the floor for the had in a wall“I“Now her to get's,'
Your house is Slytherin!’

’ as his a the most was out of a Ministry't.’s back up, as the house, “What,“Are, which.

“He
Your house is Slytherin!’ Harry his�I Harry, of the floor of a,’s,�


 It him,?“I I�.

The it. Thet his� in —





  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 72.12it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.33it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.19it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.18it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 68.60it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.51it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 47.81it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 59.95it/s][A
 56%|█████████████████████████████████████████████▏                                  | 578/1024 [09:25<07:40,  1.03s/it]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 580, avg_loss: 5.5544, lr: 2.1958e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.05it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.94it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.59it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.01it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 68.75it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.81it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 68.73it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 64.77it/s][A
 57%|█████████████████████████████████████████████▍                                  | 582/1024 [09:29<07:13,  1.02it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 5.5769




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.33it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.48it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.94it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.51it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.44it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.12it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.87it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.43it/s][A
 57%|█████████████████████████████████████████████▋                                  | 585/1024 [09:32<07:04,  1.03it/s]

step: 584, avg_loss: 5.5706, lr: 2.1822e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.01it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.07it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.06it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.56it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 68.97it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 68.94it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.81it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 63.79it/s][A
 57%|█████████████████████████████████████████████▊                                  | 586/1024 [09:33<07:10,  1.02it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 588, avg_loss: 5.6147, lr: 2.1686e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.26it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.55it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.39it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.79it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.58it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 68.73it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.70it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.43it/s][A
 58%|██████████████████████████████████████████████                                  | 590/1024 [09:37<07:03,  1.02it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 5.5767




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.40it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.87it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.03it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.38it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.11it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.07it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 69.04it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 57.37it/s][A
 58%|██████████████████████████████████████████████▎                                 | 593/1024 [09:40<07:33,  1.05s/it]

step: 592, avg_loss: 5.7404, lr: 2.1549e-04
generating text...
Your house is Slytherin! Professor up a do and Potter, then.
The's a room you's face for's;, when you you… “He, and Ron -Harry of the,”
 And to,'s
Your house is Slytherin!”

 het and to his was and the other had?“What to the to the table the top from him.
”


H said.
“I off,
Your house is Slytherin!
The can said, I that.

“I“Well them not to Harry it of the dark. as?’
“Well,,” said.“� it





  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 76.26it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.87it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.40it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.54it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.24it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 68.94it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 69.13it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.64it/s][A
 58%|██████████████████████████████████████████████▍                                 | 594/1024 [09:41<07:17,  1.02s/it]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 596, avg_loss: 6.1630, lr: 2.1411e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.30it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.01it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.31it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.64it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.35it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.29it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 69.33it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.66it/s][A
 58%|██████████████████████████████████████████████▋                                 | 598/1024 [09:44<06:46,  1.05it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 5.7151




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.81it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.22it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.09it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.30it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.31it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 69.26it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 69.07it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.47it/s][A
 59%|██████████████████████████████████████████████▉                                 | 601/1024 [09:47<06:47,  1.04it/s]

step: 600, avg_loss: 5.5288, lr: 2.1272e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 75.62it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.49it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 70.16it/s][A
 50%|█████████████████████▌                     | 32/64 [00:00<00:00, 69.31it/s][A
 61%|██████████████████████████▏                | 39/64 [00:00<00:00, 69.18it/s][A
 72%|██████████████████████████████▉            | 46/64 [00:00<00:00, 68.81it/s][A
 83%|███████████████████████████████████▌       | 53/64 [00:00<00:00, 68.89it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.29it/s][A
 59%|███████████████████████████████████████████████                                 | 602/1024 [09:48<06:43,  1.05it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 604, avg_loss: 5.3557, lr: 2.1132e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 73.72it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.67it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.73it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.25it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 68.93it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.82it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 68.70it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 69.13it/s][A
 59%|███████████████████████████████████████████████▎                                | 606/1024 [09:52<06:42,  1.04it/s]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

val loss: 5.6252




  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 73.97it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 70.71it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.40it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 68.29it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 68.18it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 67.76it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 66.84it/s][A
100%|███████████████████████████████████████████| 64/64 [00:00<00:00, 67.93it/s][A
 59%|███████████████████████████████████████████████▌                                | 609/1024 [09:55<07:01,  1.02s/it]

step: 608, avg_loss: 5.5042, lr: 2.0991e-04
generating text...
Your house is Slytherin!” The could, Dumbledore. He into the fire an the of his his their you with, it it oft to by.

 He in I with was her on the’ to, and said
Your house is Slytherin!’ as.

'Yes’ said Ron’s and, the Ministry’ve with be; it, Harry heYou Harry.

’s. Then of his?�
Your house is Slytherin!” thought in the same from the night of the’
He their hand, with I've a at I his again…

”
‘Don’ said it, the dark a





  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 71.73it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 67.94it/s][A
 36%|███████████████▍                           | 23/64 [00:00<00:00, 41.67it/s][A
 47%|████████████████████▏                      | 30/64 [00:00<00:00, 48.93it/s][A
 58%|████████████████████████▊                  | 37/64 [00:00<00:00, 54.08it/s][A
 69%|█████████████████████████████▌             | 44/64 [00:00<00:00, 57.40it/s][A
 80%|██████████████████████████████████▎        | 51/64 [00:00<00:00, 60.60it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 58.31it/s][A
 60%|███████████████████████████████████████████████▋                                | 610/1024 [09:56<07:12,  1.05s/it]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           

step: 612, avg_loss: 5.6277, lr: 2.0850e-04



  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                                      | 8/64 [00:00<00:00, 74.27it/s][A
 25%|██████████▊                                | 16/64 [00:00<00:00, 71.18it/s][A
 38%|████████████████▏                          | 24/64 [00:00<00:00, 69.87it/s][A
 48%|████████████████████▊                      | 31/64 [00:00<00:00, 69.17it/s][A
 59%|█████████████████████████▌                 | 38/64 [00:00<00:00, 68.91it/s][A
 70%|██████████████████████████████▏            | 45/64 [00:00<00:00, 68.81it/s][A
 81%|██████████████████████████████████▉        | 52/64 [00:00<00:00, 42.57it/s][A
100%|███████████████████████████████████████████| 64/64 [00:01<00:00, 57.07it/s][A
 60%|███████████████████████████████████████████████▉                                | 614/1024 [10:00<06:59,  1.02s/it]
  0%|                                                    | 0/64 [00:00<?, ?it/s][A
 12%|█████▌                           