In [None]:
!pip install -q wandb torchmetrics

[?25l[K     |▏                               | 10 kB 24.0 MB/s eta 0:00:01[K     |▍                               | 20 kB 27.2 MB/s eta 0:00:01[K     |▋                               | 30 kB 24.0 MB/s eta 0:00:01[K     |▊                               | 40 kB 18.7 MB/s eta 0:00:01[K     |█                               | 51 kB 11.2 MB/s eta 0:00:01[K     |█▏                              | 61 kB 12.9 MB/s eta 0:00:01[K     |█▎                              | 71 kB 12.9 MB/s eta 0:00:01[K     |█▌                              | 81 kB 12.9 MB/s eta 0:00:01[K     |█▊                              | 92 kB 14.1 MB/s eta 0:00:01[K     |█▉                              | 102 kB 13.7 MB/s eta 0:00:01[K     |██                              | 112 kB 13.7 MB/s eta 0:00:01[K     |██▎                             | 122 kB 13.7 MB/s eta 0:00:01[K     |██▍                             | 133 kB 13.7 MB/s eta 0:00:01[K     |██▋                             | 143 kB 13.7 MB/s eta 0:

In [None]:
!wget https://ai.tencent.com/ailab/nlp/en/data/tencent-ailab-embedding-zh-d200-v0.2.0-s.tar.gz

--2022-04-03 04:09:21--  https://ai.tencent.com/ailab/nlp/en/data/tencent-ailab-embedding-zh-d200-v0.2.0-s.tar.gz
Resolving ai.tencent.com (ai.tencent.com)... 116.128.164.87
Connecting to ai.tencent.com (ai.tencent.com)|116.128.164.87|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1544075399 (1.4G) [application/octet-stream]
Saving to: ‘tencent-ailab-embedding-zh-d200-v0.2.0-s.tar.gz’


2022-04-03 04:10:28 (21.9 MB/s) - ‘tencent-ailab-embedding-zh-d200-v0.2.0-s.tar.gz’ saved [1544075399/1544075399]



In [None]:
import pickle
import random
import pytz
from datetime import datetime
import numpy as np
from tqdm.auto import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

In [None]:
import wandb
wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

## Preprocessing Embeddings

In [None]:
import gzip
import tarfile
with gzip.open("tencent-ailab-embedding-zh-d200-v0.2.0-s.tar.gz", "r") as gopen:
  tar = tarfile.open(fileobj=gopen)
  tar.next()
  tinfo = tar.next()
  fin = tar.extractfile(tinfo)  
  nvocab, hdim = fin.readline().split()
  # N = int(nvocab)
  N = 500_000
  embs = np.zeros((N, int(hdim)))
  vocabs = [""] * N
  counter = 0

  for i in tqdm(range(N)):    
    ln = fin.readline().decode().strip()    
    if not ln: continue    
    toks = ln.split(" ")
    if len(toks[0]) > 2: continue
    vocabs[counter] = toks[0]
    embs[counter, :] = [float(x) for x in toks[1:]]
    counter += 1
embs = embs[:counter, :]
vocabs = vocabs[:counter]
embs = embs / np.linalg.norm(embs, axis=1)[:, np.newaxis]

  0%|          | 0/500000 [00:00<?, ?it/s]

In [None]:
sum(1 for x in vocabs if len(x)==1), sum(1 for x in vocabs if len(x)==2)

(7954, 121584)

In [None]:
## write used embeddings to pickle
# with open("drive/MyDrive/LangOn/convmorph/convmorph_dataset.pkl", "wb") as fout:
#   pickle.dump((vocabs, embs), fout)

In [None]:
with open("drive/MyDrive/LangOn/convmorph/convmorph_dataset.pkl", "rb") as fin:
  (vocabs, embs) = pickle.load(fin)

In [None]:
class ConvmorphDataset(Dataset):
  def __init__(self, vocabs, embs):
    self.build_dataset(vocabs, embs)

  def build_dataset(self, vocabs, embs):
    vocab_map = {vocab: idx for idx, vocab in enumerate(vocabs)}
    self.data = []
    for idx in tqdm(range(len(vocabs)), desc="building dataset"):
      word = vocabs[idx]
      if len(word) < 2: continue
      if not (word[0] in vocab_map and word[1] in vocab_map):
        continue
      emb = embs[idx]
      const1_vec = embs[vocab_map[word[0]]]
      const2_vec = embs[vocab_map[word[1]]]
      const1 = word[0]
      const2 = word[1]
      self.data.append(dict(
          word=word,
          const1=const1, const2=const2,
          word_vec=emb,
          const1_vec=const1_vec, const2_vec=const2_vec
      ))
  
  def __len__(self):
    return len(self.data)

  def __getitem__(self, idx):
    return self.data[idx]

In [None]:
cm_ds = ConvmorphDataset(vocabs, embs)
len(cm_ds)

building dataset:   0%|          | 0/129538 [00:00<?, ?it/s]

120838

In [None]:
np.concatenate([cm_ds[0]["const1_vec"], cm_ds[0]["const2_vec"]]).reshape(20,20).shape

(20, 20)

In [None]:
class ConvmorphDatasetArc(Dataset):
  def __init__(self, cm_dataset, idxs):
    self.build_dataset(cm_dataset, idxs)

  def build_dataset(self, ds, idxs):
    self.data = []
    for serial, idx in enumerate(idxs):
      data_x = ds[idx]
      inputX = np.concatenate([
          data_x["const1_vec"], data_x["const2_vec"]
      ]).reshape(1, 20, 20)
      target = data_x["word_vec"]
      self.data.append(dict(
          word_id=serial,
          inputX=torch.tensor(inputX, dtype=torch.float32),
          target=torch.tensor(target, dtype=torch.float32)
      ))
  
  def __len__(self):
    return len(self.data)

  def __getitem__(self, idx):
    return self.data[idx]


In [None]:
N = len(cm_ds)
rng = np.random.RandomState(123)
random_split = np.arange(N)
rng.shuffle(random_split)
train_idxs = random_split[:int(N*.95)]
test_idxs = random_split[int(N*.95):]
train_ds = ConvmorphDatasetArc(cm_ds, train_idxs)
test_ds = ConvmorphDatasetArc(cm_ds, test_idxs)
train_eval_ds = ConvmorphDatasetArc(cm_ds, train_idxs[-len(test_ds):])
print(len(train_idxs), len(test_idxs), len(train_eval_ds))

114796 6042 6042


In [None]:
with open("drive/MyDrive/LangOn/convmorph/split_inidices_120k.pkl", "wb") as fout:
  pickle.dump({"train_idxs": train_idxs, "test_idxs": test_idxs}, fout)

## Prepare Dataset

In [None]:
from dataclasses import dataclass
import torch.nn.functional as F

@dataclass
class ConvmorphArcModelOutput:
  loss: torch.tensor
  pred_vec: torch.tensor

class ConvmorphArcModel(nn.Module):
  def __init__(self, dropout=0.1):
    super().__init__()

    self.conv1 = nn.Conv2d(1, 128, 3)
    self.norm1 = nn.LayerNorm([128, 18, 18])
    self.conv2 = nn.Conv2d(128, 128, 3)
    self.norm2 = nn.LayerNorm([128, 16, 16])    
    self.conv3 = nn.Conv2d(128, 128, 3)
    self.norm3 = nn.LayerNorm([128, 14, 14])    
    self.conv4 = nn.Conv2d(128, 128, 3)
    self.norm4 = nn.LayerNorm([128, 12, 12])
    self.conv5 = nn.Conv2d(128, 128, 3)
    self.norm5 = nn.LayerNorm([128, 10, 10])
    
    self.pool1 = nn.MaxPool2d(2)
    self.fn1 = nn.Linear(128*5*5, 3000)
    self.drop1 = nn.Dropout(p=dropout)
    # self.fn2 = nn.Linear(512, 200)   
    # self.fn2 = nn.Linear(2048, 1024)
    # self.drop2 = nn.Dropout(p=dropout)
    # self.fn3 = nn.Linear(1024, 512)
    # self.drop3 = nn.Dropout(p=dropout)
    # self.fn4 = nn.Linear(512, 200)
    self.fn4 = nn.Linear(3000, 200)
  
  def forward(self, inputX, target=None, **kwargs):
    
    z = F.relu(self.norm1(self.conv1(inputX)), inplace=True)
    z = F.relu(self.norm2(self.conv2(z)), inplace=True)
    z = F.relu(self.norm3(self.conv3(z)), inplace=True)
    z = F.relu(self.norm4(self.conv4(z)), inplace=True)    
    z = F.relu(self.norm5(self.conv5(z)), inplace=True)    
    z = self.pool1(z).view(-1, 128*5*5)
    o = self.drop1(torch.tanh(self.fn1(z)))    
    # o = self.drop2(torch.tanh(self.fn2(o)))
    # o = self.drop3(torch.tanh(self.fn3(o)))
    o = self.fn4(o)
    pred_vec = o

    if target is not None:
      loss_fct = nn.MSELoss()
      loss = loss_fct(pred_vec, target)
      return ConvmorphArcModelOutput(loss, pred_vec)
    else:      
      return ConvmorphArcModelOutput(float('nan'), pred_vec)
    

In [None]:
def compute_accuracy(data_loader, test_embs):
  n_correct = 0
  n_items = 0
  model.eval()
  for batch_x in tqdm(data_loader):
      with torch.no_grad():
        batch_x = {k: v.to("cuda") for k, v in batch_x.items()}
        word_ids = batch_x["word_id"].cpu().numpy()
        pred_vec = model(**batch_x).pred_vec      
        preds = torch.argmax(torch.matmul(pred_vec, test_embs.transpose(1, 0)), dim=1).cpu().numpy()
        n_correct += np.array(word_ids==preds, dtype=np.int32).sum()
        n_items += len(preds)
  return n_correct/n_items

In [None]:
from torch.optim.lr_scheduler import LambdaLR
# From https://github.com/huggingface/transformers/blob/v4.17.0/src/transformers/optimization.py
def get_linear_schedule_with_warmup(optimizer, num_warmup_steps, num_training_steps, last_epoch=-1):    
    def lr_lambda(current_step: int):
        if current_step < num_warmup_steps:
            return float(current_step) / float(max(1, num_warmup_steps))
        return max(
            0.0, float(num_training_steps - current_step) / float(max(1, num_training_steps - num_warmup_steps))
        )

    return LambdaLR(optimizer, lr_lambda, last_epoch)

In [None]:
eval_embs = torch.tensor(np.vstack([x["target"] for x in test_ds]), dtype=torch.float32).to("cuda")
train_eval_embs = torch.tensor(np.vstack([x["target"] for x in train_eval_ds]), dtype=torch.float32).to("cuda")

In [None]:
config = dict(
    lr=5e-4,
    scheduler="linear",
    batch_size=128,
    epochs=300,
    dropout=0.1,
    note='128x5x5->fn:3000,200'
)

run_name = datetime.now(pytz.timezone('Asia/Taipei')).strftime("%m%d-%H%M-arc")
wandb.init(project="convmorph", name=run_name, config=config, save_code=True)

[34m[1mwandb[0m: Currently logged in as: [33mseantyh[0m (use `wandb login --relogin` to force relogin)


In [None]:
from itertools import islice
epochs = config["epochs"]
lr = config["lr"]
batch_size = config["batch_size"]

##   Dataset
## -----------
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
eval_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False)

##    Model
## ----------
model = ConvmorphArcModel(dropout=config["dropout"]).to("cuda")
optimizer = optim.AdamW(model.parameters(), lr=lr)
total_steps = epochs*len(train_loader)
scheduler = get_linear_schedule_with_warmup(optimizer, 0, total_steps)

torch.backends.cudnn.deterministic = True
torch.manual_seed(123)
random.seed(123)

train_eploss_vec = []
eval_eploss_vec = []

iter_counter = 0
pbar = tqdm(total=total_steps)
for epoch_i in range(epochs):
  train_loss_vec = []
  eval_loss_vec = []
  model.train()  
  pbar.set_description(f"Epoch {epoch_i}/Train")
  for batch_x in train_loader:
    pbar.update(1)
    optimizer.zero_grad()  
    batch_x = {k: v.to("cuda") for k, v in batch_x.items()}
    loss = model(**batch_x).loss
    loss.backward()
    optimizer.step()
    scheduler.step()
    train_loss_vec.append(loss.item())    
    if iter_counter % 100 == 0:
      wandb.log({"train/loss": loss.item()}, step=iter_counter)
    iter_counter += 1
  train_eploss_vec.append(np.mean(train_loss_vec))  

  pbar.set_description(f"Epoch {epoch_i}/Eval")
  for batch_x in eval_loader:
    model.eval()
    with torch.no_grad():
      batch_x = {k: v.to("cuda") for k, v in batch_x.items()}
      loss = model(**batch_x).loss
      eval_loss_vec.append(loss.item())    
  eval_eploss_vec.append(np.mean(eval_loss_vec))  
  train_acc = compute_accuracy(DataLoader(train_eval_ds, batch_size=128, shuffle=False), train_eval_embs)
  test_acc = compute_accuracy(DataLoader(test_ds, batch_size=128, shuffle=False), eval_embs)
  wandb.log({
      "train/epoch-loss": train_eploss_vec[-1],
      "eval/epoch-loss": eval_eploss_vec[-1],
      "eval/train-acc": train_acc,
      "eval/test-acc": test_acc,
      "trainer/lr": scheduler.get_last_lr()[0]
  }, step=iter_counter)
  print(f"train/eval loss: {train_eploss_vec[-1]}/{eval_eploss_vec[-1]}")
wandb.finish()
pbar.close()

  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002640987353434697/0.0028080627331898236


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002628889101232457/0.0028018707719941935


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.00261885986804605/0.002785509146633558


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002608735205962789/0.002800887028570287


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0025985415561857695/0.002792338709696196


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002589207806301572/0.0027963827499964586


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0025805812059105306/0.0027860143988315635


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002571170566107251/0.0027879944682354107


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0025627685964124125/0.002790404081072969


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0025549261971431862/0.0027908110932912678


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0025467052048647417/0.002780605170604152


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0025389146131391707/0.002789908020834749


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002529706530025854/0.0027833487692987546


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0025243200374706963/0.0027848128714443496


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002515762975831893/0.0027842593747967235


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0025096077787603688/0.002779018167833177


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0025015249209833583/0.002781003223693309


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0024947842768390342/0.002791934549653282


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.00248845439293155/0.002784876162574316


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002482078375591748/0.002799522577940176


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002475576004410036/0.0027984585758531466


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0024693344108170623/0.0027867995037619644


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002463305537737366/0.002787527526379563


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0024572198420405853/0.0027889339350319156


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0024511232874029156/0.002797797196156656


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002445089332800911/0.0027845234581036493


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002439768835663413/0.00279476028905871


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002433817096235212/0.002791874241665937


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002428620521585512/0.0028041194552012407


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0024238070221295955/0.002805185994172158


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0024187907105708366/0.0028002495140147707


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0024136429670222163/0.0027927766932407394


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002408042957345927/0.002800942791509442


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002404140153019341/0.0027990638870202624


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002398784629535449/0.002804041224104973


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0023938131893994317/0.0028006721646913015


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0023896469895020875/0.002807531927828677


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002384997090042352/0.002812574957109367


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0023798824541287344/0.002804686790720249


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0023761401399388627/0.0028035819656603658


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002371694778176305/0.002829370117979124


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0023676621688191375/0.0028067672974430025


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0023628059406572725/0.0028249601843223595


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0023588540017970296/0.0028143033220355087


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002355656498335011/0.00281374092931704


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002351702640244245/0.002816931242705323


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.00234730177820601/0.0028225087298778817


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0023440467912720364/0.002828204320394434


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0023399234428553278/0.0028291422204347327


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002336299435444251/0.0028248100279597566


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0023321504426918665/0.002827835633070208


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0023287816044283718/0.0028311199760840586


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0023258053884533136/0.00283227747422643


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0023223575751775815/0.002828552693245001


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002317915385829881/0.0028307488246355206


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0023156165525861244/0.0028353430582986525


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002312329520680085/0.0028354304328483218


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0023083959533775865/0.0028248789846353852


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002305919229994666/0.002822147565893829


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0023020522753576206/0.002831993120101591


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002299321305843583/0.0028268490423215553


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0022958308274555563/0.0028432164496431747


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0022932205041880857/0.002850225040068229


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0022902033193641183/0.002833285315621955


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.00228653726538637/0.0028342117972594374


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0022839774889508086/0.0028334628101826334


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0022815476813319337/0.0028541980524702617


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0022781260644886207/0.002843024480777482


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0022754478491552035/0.002855592819590432


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002273009479753964/0.0028448590504315994


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002269781927641485/0.0028586408540528887


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002267162016527509/0.0028379281041755653


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0022647203023596317/0.0028344396899531907


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0022622020876215754/0.002837187183710436


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002258821016015566/0.002860499366458195


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002256137814680104/0.0028477915863428884


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002254070528526453/0.0028491547758070133


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002251507602258271/0.00284322049022497


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0022487509969742335/0.002861779576051049


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002246173368112798/0.0028653192469694964


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0022439867672858264/0.0028531229860770204


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002241684624835965/0.0028670234023593366


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0022390798313611728/0.002855803890270181


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002236838891443982/0.0028550022592147193


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002234436363945305/0.0028543117902396866


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002232177270505756/0.0028643844998441637


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002230152234937601/0.0028624979119437435


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.00222765996018082/0.0028655345183021077


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002225125334386526/0.002869252387123803


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0022220561444892266/0.002859751087574599


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002221000402617614/0.0028721182752633467


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002218060021536838/0.0028654238800906264


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0022158813239734542/0.002863524408894591


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002213904250739194/0.002872704314844062


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002211765797835761/0.0028601668551952266


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0022099710777173506/0.002865093993023038


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002207810125119544/0.0028601881640497595


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0022059495697356554/0.0028803117408339554


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002203384593675285/0.00287593944328061


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0022012741323622774/0.002870149449639333


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0021995824949614445/0.0028786837889735275


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002197760212574665/0.002897957975316482


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0021952207637857333/0.0028965479092827686


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0021930282352515054/0.002882553089875728


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.00219061994616777/0.002886675288512682


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002189330465171731/0.0028985293465666473


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0021872756605056283/0.0028933086432516575


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0021854735907777694/0.002900981533457525


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002183613554266185/0.002893044918891974


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0021817014210544932/0.002882242227011981


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002180222804874074/0.0028900052323782197


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0021781885937799013/0.0028954163911597184


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0021761771298762214/0.002887881113565527


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002174487312681508/0.0028947896450214707


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002172000588612463/0.0028882255137432367


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0021704089436669696/0.0028899315705833337


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0021685411790988035/0.0029014136865347004


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002166879581393406/0.002878327140933834


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0021652860837960523/0.0028977246353557953


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0021631856630125405/0.0028921469541577003


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002161708939457136/0.0029072898275141292


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0021597782332645346/0.0028991661383770406


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002158000791282367/0.0028880206858351207


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002156130869331933/0.0028918897442054003


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002154902457841023/0.002905748726334423


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0021531217352050037/0.0029012323551190398


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002151111876067717/0.0028906901667748266


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0021492812119075486/0.0029100852019231147


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002147451191470899/0.0029116443377764276


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0021462359878628066/0.0029040236016347385


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0021445520597000254/0.0029001951237053922


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0021428940037718103/0.0029032559104962274


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002141368905042891/0.0029033505998086184


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0021392709026351953/0.0029030898197864494


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002137619936101579/0.0029023917401597523


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0021362872914968676/0.002914431218717558


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002134414780635798/0.0029135737083076188


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002132797446709288/0.0029293080151546746


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002131784696743482/0.0029062792940142876


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002129607463651103/0.002917052600726796


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002128116056580541/0.0029158494726289064


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0021264848143325386/0.0029167469522993392


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002125118707395974/0.0029179920529713854


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0021233754498580907/0.0029155344624693194


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002121773847314418/0.0029226358650097004


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0021204563813468852/0.0029292719603593773


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0021190194700145834/0.002910258490980292


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0021175671405955286/0.0029285179189173505


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002115968345517008/0.0029174200899433345


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002114216967199284/0.0029134233918739483


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002112814081846264/0.0029374938021646813


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0021114289427566504/0.0029136308488280824


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002109694492031375/0.0029146105953259394


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0021086508281520644/0.00292675447902487


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002106756775524704/0.0029272765144317723


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0021055255258100787/0.002940571187840154


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002103802091063082/0.002928301206945131


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.00210253394392102/0.0029172381036914885


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002100813673282771/0.0029305658827070147


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020992269015579234/0.0029315945866983384


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002098254225717249/0.00291928503914581


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002096393974674419/0.002936522403615527


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020951532654686434/0.0029315662153142816


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002093878260335014/0.0029349784211566052


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020922960716360972/0.002924201534672951


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020905706218787476/0.0029334623880761987


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.00208956613078186/0.002932340275341024


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002088065826569745/0.00294058931467589


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020866202232827625/0.002939813794606986


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020852014672897816/0.0029367659153649583


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002083604040253521/0.002933671862895911


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.00208247233667061/0.0029388995317276567


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020811405065838027/0.0029318611341295764


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020795583866698585/0.002940712569397874


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002078559580845644/0.002937378958449699


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002076510982660612/0.0029417051991913468


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020754646804233556/0.0029400928081789366


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020737915531884765/0.0029270573334846026


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002072786307533416/0.002943667675329683


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002071083179797558/0.002958441366596768


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020696683010707297/0.002949359390186146


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020687599228269345/0.0029464523007239527


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002067490637488248/0.0029460323373011


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020657887844806964/0.002956169429429186


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020646351752200537/0.002949990458243216


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002063121152688711/0.0029525917974145464


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020617703953134925/0.0029555041255662218


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020606860614754466/0.00296015674151325


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020591319754611754/0.0029542494303314015


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020578786826286563/0.0029712846929517886


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002056496850279271/0.0029476361232809722


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020553619870002336/0.002962589147500694


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020537217422269094/0.0029533670749515295


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020527588481284937/0.0029609968041768298


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020513614641456818/0.0029679021487633386


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020499751575284927/0.0029701998816259825


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020486991630593024/0.0029522788002698994


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002047361222271254/0.002967274510107624


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020461009110354193/0.0029573778301710263


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002044665135433866/0.002961810605484061


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020436078660167264/0.0029560365510405973


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020422332240330886/0.002971338981296867


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020408818169643348/0.0029688681145974747


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020393671727725778/0.002956708767063295


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002038436775680303/0.002960569715166154


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020372669095455884/0.0029690812564998246


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002036021283382307/0.0029725024554257593


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002034454761953441/0.0029613738637029505


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002033297917203419/0.0029813038563588634


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020320815216685532/0.002972048857676176


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020307634436445457/0.0029760299754949906


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020295009476422364/0.002962156334736695


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002028167068022807/0.0029811100248480216


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020267585435998496/0.002968130933974559


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020255987784734935/0.00298363415155715


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002024296885942926/0.0029827185013952353


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020231196130942575/0.002990768237699134


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002022043170839332/0.002974573459747868


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020207744237106117/0.0029760186929100505


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020194053119090005/0.0029810783016728237


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002018184882330157/0.002985593465079243


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020170144996592317/0.0029804690663392344


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002015716187191822/0.0029831327410647646


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020145879485545764/0.0029781505436403677


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020134459324170698/0.0029755110105422014


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002012065215682073/0.0029767702508252114


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020109207644267474/0.0029796753127205497


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002009758479304512/0.0029841873765690252


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020083259022041925/0.00299185179756023


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020071512313742388/0.002982442567978675


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002005793024456687/0.0029807395330863073


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020045244069811508/0.0029931308284479505


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020035672365711667/0.002996272970146189


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0020024737222753505/0.0029839759808965027


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.002001061118773726/0.0029930830399583406


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0019998834121903795/0.002992411085870117


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0019985454000216994/0.0029904591744222366


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.001997512737974325/0.003002793406873631


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0019963243754306517/0.002992910706476929


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0019950094759213005/0.002993988668701301


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0019938451163481127/0.002991649069978545


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0019926132329032573/0.0029975184831225


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0019913104437021675/0.0029900051983228573


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0019901647243374667/0.0029897744631549963


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.00198912700335002/0.003004676652684187


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.001987929967021281/0.0029944284663846097


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.001986576118605126/0.0030041561840334907


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0019855585496679545/0.0029994843062013388


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.001984254002297004/0.0030038881765600913


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.001983193932682558/0.003007880790391937


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0019819300391772554/0.003002782594800616


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0019807432765980046/0.003001390679855831


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0019795274174561435/0.0030026160335789123


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0019782948413595907/0.0030026767829743526


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0019772180268871147/0.003004553980038812


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0019760737019106165/0.003004522431486597


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0019747837587574305/0.0030023871368030086


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.001973627199529937/0.0030061897026219717


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.001972494808746213/0.003011533699464053


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0019713007713125035/0.0030066200270084664


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0019700673579860274/0.0030093214930578447


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.001969061405532926/0.0030083724996075034


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.001967845853587349/0.0030093161864594245


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0019667331680111428/0.00301165673105667


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0019653981696625333/0.003010444930017305


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0019643378765618175/0.003013161476701498


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0019629274404357492/0.0030126109340926632


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0019618224522873445/0.00300828285010842


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0019606113807093704/0.0030147022140833237


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.001959495412143219/0.0030128847380789616


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.001958169444612619/0.003014465376812344


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.001957142227609645/0.0030117427280250317


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.0019558249987555935/0.0030154875178898997


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.001954571192242089/0.0030147410046386844


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

train/eval loss: 0.001953416206762859/0.003014550073809611



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/epoch-loss,█▄▂▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▄▄▄
eval/test-acc,▁▅▇█████████████████████████████████████
eval/train-acc,▁▃▄▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇████████████████
train/epoch-loss,█▆▅▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
train/loss,█▅▄▄▃▃▃▃▃▃▃▃▂▃▂▂▂▂▂▂▂▂▁▂▂▂▂▂▂▂▁▁▁▁▁▁▁▂▁▁
trainer/lr,████▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁

0,1
eval/epoch-loss,0.00301
eval/test-acc,0.43711
eval/train-acc,0.83929
train/epoch-loss,0.00195
train/loss,0.00194
trainer/lr,0.0


In [None]:
import os
out_dir = "drive/MyDrive/LangOn/convmorph/" + run_name
os.makedirs(out_dir, exist_ok=True)
torch.save(model.state_dict(), out_dir+"/model.pth")