In [1]:
import torch 
import torch.nn as nn 
import numpy as np
from collections import OrderedDict

from utils.losses import protop_loss
from utils.sample_parameters import SingleGenerator, HiddenAndKernelGenerator, MonotonicGenerator
from utils.sample_parameters import ParamGenerators
from models.ProtICU import ProtICU
from utils.train_n_test import TrainTest

In [2]:
train, val, test = torch.load('data/in-hospital-mortality/tensors.pkl')

In [3]:
# Calculating class weights
perc_mort = np.concatenate((train[1], val[1], test[1])).mean()
class_weights = torch.Tensor([perc_mort, 1-perc_mort])

In [4]:
generators_dict = {
    # HYPERMARAMETER RANGES
    'BATCH_SIZE': SingleGenerator([128, 256, 512]),
    'EPOCHS': SingleGenerator([20, 40, 50]),
    'OPTIMIZER': SingleGenerator([torch.optim.Adam]),
    'LEARNING_RATE': SingleGenerator([1e-5, 5e-5, 1e-4, 5e-4]),
    'LOSS': SingleGenerator([protop_loss(class_weights, .5 , .5), protop_loss(class_weights, 1, 1),
                             protop_loss(class_weights, .1 , .1), protop_loss(class_weights, 1 , .5),
                             protop_loss(class_weights, .5 , .1), protop_loss(class_weights, .3 , .1)]),
    'EARLY_STOPPING': SingleGenerator([True, False]),
    'PATIENCE': SingleGenerator(list(range(2,4))),
    'MIN_DELTA': SingleGenerator([5e-5, 1e-4, 5e-4, 1e-3, 5e-3]), 

    # NETWORK PARAMETER RANGES
    'HIDDEN_AND_KERNEL_SIZES': HiddenAndKernelGenerator(range(2,4), [128, 256, 512], [3, 5, 7, 9], 
                               ascending=(True, False)),
    'MAXPOOL': SingleGenerator([2]),
    'OBO_SIZES': MonotonicGenerator(range(1,3), [256, 512], ascending=False),
    'PROTOTYPE_NUM': SingleGenerator([10,20]),
    'DROPOUT': SingleGenerator(np.arange(8)/10), 
    'PROTO_ACTIVATION': SingleGenerator(['log', 'linear'])
} # each of these params are sampled INDEPENDENTLY of one another

In [5]:
fixed_gen = {
    # HYPERMARAMETER RANGES
    'BATCH_SIZE': SingleGenerator([256]),
    'EPOCHS': SingleGenerator([20]),
    'OPTIMIZER': SingleGenerator([torch.optim.Adam]),
    'LEARNING_RATE': SingleGenerator([1e-5]),
    'LOSS': SingleGenerator([protop_loss(class_weights, 1, 1)]),
    'EARLY_STOPPING': SingleGenerator([True]),
    'PATIENCE': SingleGenerator(list(range(1,2))),
    'MIN_DELTA': SingleGenerator([1e-4]), 

    # NETWORK PARAMETER RANGES
    'HIDDEN_AND_KERNEL_SIZES': HiddenAndKernelGenerator(range(1,2), [256], [5], 
                               ascending=(True, False)),
    'MAXPOOL': SingleGenerator([2]),
    'OBO_SIZES': MonotonicGenerator(range(1,2), [128], ascending=False),
    'PROTOTYPE_NUM': SingleGenerator([20]),
    'DROPOUT': SingleGenerator([0.2]),
    'PROTO_ACTIVATION': SingleGenerator(['log'])
}

In [6]:
N = 1
gen = ParamGenerators(generators_dict)
gen = ParamGenerators(fixed_gen)
param_samples = gen.sample(N)

In [7]:
stats = []
for i in np.arange(N):
    print(param_samples[i])
    param_samples[i]['EPOCHS']
    push_epochs = [param_samples[0]['EPOCHS'] -1] #list(range(5,param_samples[0]['EPOCHS'],2))
    run = TrainTest(ProtICU, (train, val, test), param_samples[i], push_epochs=push_epochs)
    run.train()
    stats.append(run.test())

  0%|          | 0/57 [00:00<?, ?it/s]

{'BATCH_SIZE': 256, 'EPOCHS': 20, 'OPTIMIZER': <class 'torch.optim.adam.Adam'>, 'LEARNING_RATE': 1e-05, 'LOSS': protop_loss_1_1, 'EARLY_STOPPING': True, 'PATIENCE': 1, 'MIN_DELTA': 0.0001, 'MAXPOOL': 2, 'OBO_SIZES': array([128]), 'PROTOTYPE_NUM': 20, 'DROPOUT': 0.2, 'PROTO_ACTIVATION': 'log', 'HIDDEN_SIZES': array([256]), 'KERNEL_SIZES': array([5])}
tensor([[0.1580, 0.1663, 0.1715,  ..., 0.1591, 0.1681, 0.1714],
        [0.1578, 0.1629, 0.1714,  ..., 0.1591, 0.1666, 0.1677],
        [0.1563, 0.1648, 0.1719,  ..., 0.1598, 0.1703, 0.1712],
        ...,
        [0.1572, 0.1693, 0.1727,  ..., 0.1583, 0.1678, 0.1674],
        [0.1598, 0.1649, 0.1740,  ..., 0.1581, 0.1702, 0.1723],
        [0.1603, 0.1655, 0.1727,  ..., 0.1593, 0.1685, 0.1728]],
       grad_fn=<LogBackward>)


  2%|▏         | 1/57 [00:00<00:26,  2.09it/s]

tensor([[0.1602, 0.1672, 0.1729,  ..., 0.1600, 0.1699, 0.1715],
        [0.1560, 0.1649, 0.1743,  ..., 0.1583, 0.1677, 0.1703],
        [0.1554, 0.1665, 0.1701,  ..., 0.1568, 0.1663, 0.1646],
        ...,
        [0.1588, 0.1674, 0.1732,  ..., 0.1590, 0.1666, 0.1657],
        [0.1601, 0.1661, 0.1746,  ..., 0.1609, 0.1718, 0.1720],
        [0.1575, 0.1642, 0.1741,  ..., 0.1593, 0.1677, 0.1716]],
       grad_fn=<LogBackward>)


  4%|▎         | 2/57 [00:01<00:27,  2.00it/s]

tensor([[0.1610, 0.1659, 0.1742,  ..., 0.1583, 0.1707, 0.1711],
        [0.1555, 0.1640, 0.1739,  ..., 0.1594, 0.1695, 0.1660],
        [0.1567, 0.1637, 0.1723,  ..., 0.1587, 0.1683, 0.1725],
        ...,
        [0.1593, 0.1636, 0.1721,  ..., 0.1594, 0.1683, 0.1700],
        [0.1593, 0.1642, 0.1723,  ..., 0.1586, 0.1701, 0.1732],
        [0.1563, 0.1658, 0.1710,  ..., 0.1576, 0.1688, 0.1671]],
       grad_fn=<LogBackward>)


  5%|▌         | 3/57 [00:01<00:31,  1.73it/s]

tensor([[0.1561, 0.1654, 0.1704,  ..., 0.1602, 0.1691, 0.1709],
        [0.1562, 0.1673, 0.1673,  ..., 0.1568, 0.1661, 0.1641],
        [0.1561, 0.1642, 0.1715,  ..., 0.1573, 0.1697, 0.1673],
        ...,
        [0.1586, 0.1653, 0.1738,  ..., 0.1591, 0.1722, 0.1736],
        [0.1535, 0.1632, 0.1717,  ..., 0.1592, 0.1690, 0.1644],
        [0.1587, 0.1660, 0.1760,  ..., 0.1620, 0.1680, 0.1689]],
       grad_fn=<LogBackward>)


  7%|▋         | 4/57 [00:02<00:33,  1.56it/s]

tensor([[0.1577, 0.1661, 0.1738,  ..., 0.1564, 0.1684, 0.1659],
        [0.1556, 0.1632, 0.1727,  ..., 0.1567, 0.1677, 0.1683],
        [0.1560, 0.1657, 0.1739,  ..., 0.1571, 0.1671, 0.1689],
        ...,
        [0.1594, 0.1627, 0.1736,  ..., 0.1581, 0.1679, 0.1729],
        [0.1594, 0.1658, 0.1725,  ..., 0.1593, 0.1684, 0.1695],
        [0.1596, 0.1639, 0.1752,  ..., 0.1609, 0.1716, 0.1731]],
       grad_fn=<LogBackward>)


  9%|▉         | 5/57 [00:03<00:34,  1.50it/s]

tensor([[0.1608, 0.1674, 0.1755,  ..., 0.1576, 0.1689, 0.1692],
        [0.1596, 0.1656, 0.1738,  ..., 0.1584, 0.1695, 0.1732],
        [0.1581, 0.1655, 0.1751,  ..., 0.1587, 0.1696, 0.1690],
        ...,
        [0.1583, 0.1666, 0.1721,  ..., 0.1589, 0.1674, 0.1699],
        [0.1582, 0.1640, 0.1772,  ..., 0.1611, 0.1682, 0.1721],
        [0.1572, 0.1642, 0.1734,  ..., 0.1598, 0.1715, 0.1718]],
       grad_fn=<LogBackward>)


 11%|█         | 6/57 [00:03<00:34,  1.50it/s]

tensor([[0.1589, 0.1638, 0.1740,  ..., 0.1583, 0.1690, 0.1720],
        [0.1566, 0.1639, 0.1725,  ..., 0.1567, 0.1675, 0.1641],
        [0.1569, 0.1654, 0.1720,  ..., 0.1601, 0.1700, 0.1712],
        ...,
        [0.1589, 0.1636, 0.1762,  ..., 0.1602, 0.1686, 0.1693],
        [0.1595, 0.1640, 0.1749,  ..., 0.1594, 0.1674, 0.1712],
        [0.1575, 0.1649, 0.1710,  ..., 0.1624, 0.1685, 0.1711]],
       grad_fn=<LogBackward>)


 12%|█▏        | 7/57 [00:04<00:33,  1.48it/s]

tensor([[0.1590, 0.1648, 0.1712,  ..., 0.1593, 0.1687, 0.1728],
        [0.1597, 0.1664, 0.1737,  ..., 0.1581, 0.1704, 0.1723],
        [0.1558, 0.1654, 0.1722,  ..., 0.1563, 0.1665, 0.1644],
        ...,
        [0.1558, 0.1650, 0.1740,  ..., 0.1600, 0.1693, 0.1711],
        [0.1592, 0.1654, 0.1749,  ..., 0.1607, 0.1703, 0.1754],
        [0.1607, 0.1645, 0.1751,  ..., 0.1597, 0.1695, 0.1749]],
       grad_fn=<LogBackward>)


 14%|█▍        | 8/57 [00:05<00:33,  1.48it/s]

tensor([[0.1572, 0.1652, 0.1718,  ..., 0.1597, 0.1682, 0.1701],
        [0.1611, 0.1663, 0.1724,  ..., 0.1602, 0.1711, 0.1732],
        [0.1572, 0.1651, 0.1733,  ..., 0.1587, 0.1697, 0.1704],
        ...,
        [0.1589, 0.1636, 0.1734,  ..., 0.1590, 0.1697, 0.1706],
        [0.1572, 0.1647, 0.1706,  ..., 0.1575, 0.1684, 0.1722],
        [0.1582, 0.1655, 0.1726,  ..., 0.1589, 0.1691, 0.1713]],
       grad_fn=<LogBackward>)


 16%|█▌        | 9/57 [00:06<00:32,  1.49it/s]

tensor([[0.1611, 0.1652, 0.1747,  ..., 0.1622, 0.1704, 0.1723],
        [0.1607, 0.1662, 0.1754,  ..., 0.1590, 0.1735, 0.1716],
        [0.1563, 0.1665, 0.1714,  ..., 0.1556, 0.1682, 0.1660],
        ...,
        [0.1566, 0.1625, 0.1711,  ..., 0.1586, 0.1680, 0.1723],
        [0.1572, 0.1626, 0.1731,  ..., 0.1584, 0.1678, 0.1722],
        [0.1583, 0.1658, 0.1754,  ..., 0.1576, 0.1710, 0.1709]],
       grad_fn=<LogBackward>)


 18%|█▊        | 10/57 [00:06<00:31,  1.49it/s]

tensor([[0.1572, 0.1645, 0.1739,  ..., 0.1618, 0.1697, 0.1699],
        [0.1587, 0.1649, 0.1746,  ..., 0.1609, 0.1693, 0.1718],
        [0.1561, 0.1642, 0.1726,  ..., 0.1563, 0.1675, 0.1688],
        ...,
        [0.1576, 0.1636, 0.1747,  ..., 0.1620, 0.1671, 0.1681],
        [0.1441, 0.1546, 0.1546,  ..., 0.1461, 0.1556, 0.1542],
        [0.1602, 0.1636, 0.1744,  ..., 0.1601, 0.1701, 0.1701]],
       grad_fn=<LogBackward>)


 19%|█▉        | 11/57 [00:07<00:31,  1.48it/s]

tensor([[0.1548, 0.1649, 0.1722,  ..., 0.1597, 0.1688, 0.1655],
        [0.1563, 0.1647, 0.1729,  ..., 0.1592, 0.1675, 0.1645],
        [0.1579, 0.1644, 0.1745,  ..., 0.1578, 0.1663, 0.1702],
        ...,
        [0.1597, 0.1677, 0.1761,  ..., 0.1602, 0.1715, 0.1733],
        [0.1595, 0.1658, 0.1776,  ..., 0.1594, 0.1710, 0.1720],
        [0.1569, 0.1665, 0.1705,  ..., 0.1552, 0.1652, 0.1632]],
       grad_fn=<LogBackward>)


 21%|██        | 12/57 [00:08<00:31,  1.45it/s]

tensor([[0.1572, 0.1635, 0.1738,  ..., 0.1603, 0.1685, 0.1703],
        [0.1570, 0.1681, 0.1702,  ..., 0.1577, 0.1659, 0.1664],
        [0.1583, 0.1685, 0.1724,  ..., 0.1569, 0.1677, 0.1672],
        ...,
        [0.1569, 0.1654, 0.1746,  ..., 0.1571, 0.1684, 0.1718],
        [0.1561, 0.1648, 0.1722,  ..., 0.1577, 0.1677, 0.1698],
        [0.1587, 0.1654, 0.1766,  ..., 0.1582, 0.1691, 0.1708]],
       grad_fn=<LogBackward>)


 23%|██▎       | 13/57 [00:08<00:30,  1.45it/s]

tensor([[0.1591, 0.1655, 0.1743,  ..., 0.1582, 0.1683, 0.1729],
        [0.1576, 0.1649, 0.1773,  ..., 0.1601, 0.1698, 0.1714],
        [0.1569, 0.1653, 0.1730,  ..., 0.1600, 0.1691, 0.1732],
        ...,
        [0.1606, 0.1667, 0.1751,  ..., 0.1620, 0.1699, 0.1751],
        [0.1592, 0.1637, 0.1779,  ..., 0.1584, 0.1684, 0.1705],
        [0.1598, 0.1645, 0.1739,  ..., 0.1583, 0.1703, 0.1728]],
       grad_fn=<LogBackward>)


 25%|██▍       | 14/57 [00:09<00:29,  1.45it/s]

tensor([[0.1596, 0.1648, 0.1757,  ..., 0.1577, 0.1681, 0.1680],
        [0.1588, 0.1669, 0.1734,  ..., 0.1601, 0.1721, 0.1692],
        [0.1559, 0.1668, 0.1732,  ..., 0.1557, 0.1672, 0.1641],
        ...,
        [0.1592, 0.1658, 0.1756,  ..., 0.1598, 0.1687, 0.1729],
        [0.1563, 0.1657, 0.1716,  ..., 0.1562, 0.1691, 0.1693],
        [0.1578, 0.1672, 0.1738,  ..., 0.1580, 0.1684, 0.1695]],
       grad_fn=<LogBackward>)


 26%|██▋       | 15/57 [00:10<00:30,  1.40it/s]

tensor([[0.1581, 0.1633, 0.1724,  ..., 0.1584, 0.1674, 0.1676],
        [0.1617, 0.1640, 0.1756,  ..., 0.1603, 0.1707, 0.1732],
        [0.1604, 0.1648, 0.1773,  ..., 0.1601, 0.1700, 0.1731],
        ...,
        [0.1575, 0.1633, 0.1730,  ..., 0.1602, 0.1694, 0.1710],
        [0.1541, 0.1658, 0.1691,  ..., 0.1553, 0.1663, 0.1626],
        [0.1574, 0.1657, 0.1758,  ..., 0.1592, 0.1665, 0.1689]],
       grad_fn=<LogBackward>)


 28%|██▊       | 16/57 [00:10<00:29,  1.40it/s]

tensor([[0.1604, 0.1658, 0.1740,  ..., 0.1579, 0.1687, 0.1715],
        [0.1610, 0.1674, 0.1746,  ..., 0.1626, 0.1697, 0.1717],
        [0.1572, 0.1646, 0.1734,  ..., 0.1610, 0.1709, 0.1687],
        ...,
        [0.1575, 0.1648, 0.1748,  ..., 0.1572, 0.1690, 0.1678],
        [0.1599, 0.1657, 0.1740,  ..., 0.1605, 0.1685, 0.1719],
        [0.1572, 0.1649, 0.1738,  ..., 0.1576, 0.1690, 0.1712]],
       grad_fn=<LogBackward>)


 30%|██▉       | 17/57 [00:11<00:28,  1.40it/s]

tensor([[0.1572, 0.1636, 0.1736,  ..., 0.1584, 0.1693, 0.1717],
        [0.1562, 0.1646, 0.1739,  ..., 0.1588, 0.1679, 0.1690],
        [0.1565, 0.1644, 0.1739,  ..., 0.1576, 0.1678, 0.1699],
        ...,
        [0.1600, 0.1657, 0.1753,  ..., 0.1590, 0.1698, 0.1751],
        [0.1567, 0.1640, 0.1735,  ..., 0.1594, 0.1705, 0.1716],
        [0.1589, 0.1644, 0.1748,  ..., 0.1586, 0.1683, 0.1701]],
       grad_fn=<LogBackward>)


 32%|███▏      | 18/57 [00:12<00:26,  1.45it/s]

tensor([[0.1604, 0.1644, 0.1759,  ..., 0.1584, 0.1692, 0.1702],
        [0.1561, 0.1639, 0.1706,  ..., 0.1594, 0.1680, 0.1719],
        [0.1554, 0.1661, 0.1720,  ..., 0.1589, 0.1667, 0.1697],
        ...,
        [0.1612, 0.1638, 0.1768,  ..., 0.1580, 0.1658, 0.1702],
        [0.1579, 0.1660, 0.1744,  ..., 0.1601, 0.1708, 0.1739],
        [0.1593, 0.1684, 0.1763,  ..., 0.1600, 0.1695, 0.1732]],
       grad_fn=<LogBackward>)


 33%|███▎      | 19/57 [00:12<00:26,  1.45it/s]

tensor([[0.1561, 0.1641, 0.1720,  ..., 0.1585, 0.1680, 0.1717],
        [0.1557, 0.1656, 0.1728,  ..., 0.1618, 0.1707, 0.1699],
        [0.1564, 0.1633, 0.1723,  ..., 0.1595, 0.1694, 0.1687],
        ...,
        [0.1587, 0.1632, 0.1735,  ..., 0.1578, 0.1678, 0.1705],
        [0.1602, 0.1653, 0.1749,  ..., 0.1608, 0.1708, 0.1730],
        [0.1586, 0.1636, 0.1735,  ..., 0.1578, 0.1688, 0.1783]],
       grad_fn=<LogBackward>)


 35%|███▌      | 20/57 [00:13<00:25,  1.43it/s]

tensor([[0.1553, 0.1641, 0.1719,  ..., 0.1596, 0.1673, 0.1661],
        [0.1620, 0.1650, 0.1773,  ..., 0.1612, 0.1718, 0.1757],
        [0.1574, 0.1644, 0.1754,  ..., 0.1582, 0.1686, 0.1733],
        ...,
        [0.1598, 0.1658, 0.1741,  ..., 0.1584, 0.1687, 0.1714],
        [0.1560, 0.1644, 0.1733,  ..., 0.1572, 0.1678, 0.1708],
        [0.1564, 0.1659, 0.1716,  ..., 0.1579, 0.1692, 0.1680]],
       grad_fn=<LogBackward>)


 37%|███▋      | 21/57 [00:14<00:25,  1.43it/s]

tensor([[0.1570, 0.1646, 0.1742,  ..., 0.1583, 0.1696, 0.1701],
        [0.1550, 0.1666, 0.1728,  ..., 0.1580, 0.1685, 0.1663],
        [0.1595, 0.1618, 0.1749,  ..., 0.1601, 0.1662, 0.1743],
        ...,
        [0.1606, 0.1651, 0.1761,  ..., 0.1588, 0.1703, 0.1699],
        [0.1621, 0.1634, 0.1775,  ..., 0.1611, 0.1666, 0.1702],
        [0.1603, 0.1651, 0.1768,  ..., 0.1607, 0.1707, 0.1729]],
       grad_fn=<LogBackward>)


 39%|███▊      | 22/57 [00:15<00:24,  1.42it/s]

tensor([[0.1571, 0.1652, 0.1764,  ..., 0.1595, 0.1698, 0.1698],
        [0.1577, 0.1635, 0.1717,  ..., 0.1597, 0.1685, 0.1720],
        [0.1570, 0.1644, 0.1729,  ..., 0.1572, 0.1688, 0.1734],
        ...,
        [0.1566, 0.1651, 0.1754,  ..., 0.1585, 0.1684, 0.1686],
        [0.1568, 0.1656, 0.1727,  ..., 0.1569, 0.1672, 0.1654],
        [0.1599, 0.1653, 0.1762,  ..., 0.1590, 0.1688, 0.1719]],
       grad_fn=<LogBackward>)


 40%|████      | 23/57 [00:15<00:23,  1.43it/s]

tensor([[0.1546, 0.1645, 0.1741,  ..., 0.1588, 0.1685, 0.1704],
        [0.1572, 0.1652, 0.1760,  ..., 0.1576, 0.1688, 0.1719],
        [0.1590, 0.1649, 0.1771,  ..., 0.1582, 0.1692, 0.1708],
        ...,
        [0.1591, 0.1624, 0.1747,  ..., 0.1589, 0.1689, 0.1717],
        [0.1611, 0.1632, 0.1770,  ..., 0.1607, 0.1706, 0.1753],
        [0.1574, 0.1645, 0.1749,  ..., 0.1590, 0.1695, 0.1689]],
       grad_fn=<LogBackward>)


 42%|████▏     | 24/57 [00:16<00:22,  1.45it/s]

tensor([[0.1610, 0.1650, 0.1755,  ..., 0.1572, 0.1700, 0.1698],
        [0.1565, 0.1656, 0.1744,  ..., 0.1565, 0.1680, 0.1678],
        [0.1557, 0.1645, 0.1751,  ..., 0.1583, 0.1675, 0.1700],
        ...,
        [0.1568, 0.1652, 0.1715,  ..., 0.1574, 0.1688, 0.1681],
        [0.1586, 0.1651, 0.1750,  ..., 0.1613, 0.1707, 0.1722],
        [0.1566, 0.1650, 0.1731,  ..., 0.1610, 0.1706, 0.1666]],
       grad_fn=<LogBackward>)


 44%|████▍     | 25/57 [00:17<00:22,  1.42it/s]

tensor([[0.1556, 0.1641, 0.1708,  ..., 0.1598, 0.1691, 0.1691],
        [0.1573, 0.1644, 0.1756,  ..., 0.1626, 0.1703, 0.1706],
        [0.1595, 0.1651, 0.1757,  ..., 0.1604, 0.1715, 0.1737],
        ...,
        [0.1560, 0.1654, 0.1740,  ..., 0.1623, 0.1700, 0.1703],
        [0.1567, 0.1641, 0.1743,  ..., 0.1585, 0.1686, 0.1703],
        [0.1572, 0.1651, 0.1747,  ..., 0.1577, 0.1697, 0.1706]],
       grad_fn=<LogBackward>)


 46%|████▌     | 26/57 [00:17<00:21,  1.42it/s]

tensor([[0.1608, 0.1661, 0.1772,  ..., 0.1607, 0.1733, 0.1721],
        [0.1549, 0.1644, 0.1749,  ..., 0.1598, 0.1682, 0.1693],
        [0.1574, 0.1647, 0.1737,  ..., 0.1603, 0.1702, 0.1721],
        ...,
        [0.1573, 0.1654, 0.1740,  ..., 0.1589, 0.1682, 0.1694],
        [0.1582, 0.1663, 0.1743,  ..., 0.1593, 0.1698, 0.1697],
        [0.1604, 0.1656, 0.1756,  ..., 0.1620, 0.1694, 0.1725]],
       grad_fn=<LogBackward>)


 47%|████▋     | 27/57 [00:18<00:21,  1.37it/s]

tensor([[0.1594, 0.1662, 0.1765,  ..., 0.1586, 0.1711, 0.1708],
        [0.1586, 0.1623, 0.1726,  ..., 0.1594, 0.1686, 0.1710],
        [0.1635, 0.1652, 0.1760,  ..., 0.1591, 0.1702, 0.1687],
        ...,
        [0.1614, 0.1661, 0.1729,  ..., 0.1595, 0.1691, 0.1725],
        [0.1591, 0.1647, 0.1735,  ..., 0.1570, 0.1701, 0.1710],
        [0.1609, 0.1664, 0.1766,  ..., 0.1600, 0.1718, 0.1728]],
       grad_fn=<LogBackward>)


 49%|████▉     | 28/57 [00:19<00:20,  1.41it/s]

tensor([[0.1609, 0.1639, 0.1768,  ..., 0.1597, 0.1700, 0.1762],
        [0.1611, 0.1651, 0.1747,  ..., 0.1587, 0.1699, 0.1741],
        [0.1591, 0.1623, 0.1739,  ..., 0.1599, 0.1716, 0.1725],
        ...,
        [0.1609, 0.1638, 0.1740,  ..., 0.1583, 0.1706, 0.1729],
        [0.1618, 0.1677, 0.1769,  ..., 0.1619, 0.1703, 0.1712],
        [0.1565, 0.1644, 0.1741,  ..., 0.1582, 0.1683, 0.1711]],
       grad_fn=<LogBackward>)


 51%|█████     | 29/57 [00:20<00:19,  1.44it/s]

tensor([[0.1584, 0.1629, 0.1731,  ..., 0.1561, 0.1667, 0.1683],
        [0.1589, 0.1636, 0.1736,  ..., 0.1595, 0.1657, 0.1695],
        [0.1578, 0.1661, 0.1753,  ..., 0.1559, 0.1673, 0.1660],
        ...,
        [0.1529, 0.1626, 0.1690,  ..., 0.1596, 0.1648, 0.1652],
        [0.1590, 0.1641, 0.1746,  ..., 0.1609, 0.1681, 0.1736],
        [0.1609, 0.1653, 0.1764,  ..., 0.1600, 0.1684, 0.1729]],
       grad_fn=<LogBackward>)


 53%|█████▎    | 30/57 [00:20<00:18,  1.45it/s]

tensor([[0.1572, 0.1647, 0.1791,  ..., 0.1600, 0.1658, 0.1704],
        [0.1603, 0.1690, 0.1755,  ..., 0.1585, 0.1676, 0.1626],
        [0.1552, 0.1649, 0.1734,  ..., 0.1576, 0.1664, 0.1673],
        ...,
        [0.1570, 0.1672, 0.1750,  ..., 0.1577, 0.1679, 0.1710],
        [0.1616, 0.1646, 0.1755,  ..., 0.1585, 0.1688, 0.1740],
        [0.1594, 0.1646, 0.1786,  ..., 0.1610, 0.1693, 0.1739]],
       grad_fn=<LogBackward>)


 54%|█████▍    | 31/57 [00:21<00:17,  1.46it/s]

tensor([[0.1594, 0.1616, 0.1770,  ..., 0.1589, 0.1691, 0.1713],
        [0.1623, 0.1657, 0.1787,  ..., 0.1619, 0.1704, 0.1741],
        [0.1581, 0.1645, 0.1732,  ..., 0.1601, 0.1676, 0.1733],
        ...,
        [0.1591, 0.1654, 0.1768,  ..., 0.1600, 0.1679, 0.1714],
        [0.1585, 0.1650, 0.1759,  ..., 0.1621, 0.1689, 0.1757],
        [0.1579, 0.1650, 0.1753,  ..., 0.1578, 0.1664, 0.1668]],
       grad_fn=<LogBackward>)


 56%|█████▌    | 32/57 [00:22<00:17,  1.43it/s]

tensor([[0.1604, 0.1644, 0.1768,  ..., 0.1571, 0.1694, 0.1737],
        [0.1579, 0.1650, 0.1731,  ..., 0.1605, 0.1683, 0.1665],
        [0.1565, 0.1671, 0.1717,  ..., 0.1583, 0.1683, 0.1675],
        ...,
        [0.1570, 0.1631, 0.1737,  ..., 0.1591, 0.1669, 0.1717],
        [0.1557, 0.1648, 0.1758,  ..., 0.1575, 0.1664, 0.1686],
        [0.1560, 0.1659, 0.1745,  ..., 0.1584, 0.1669, 0.1685]],
       grad_fn=<LogBackward>)


 58%|█████▊    | 33/57 [00:22<00:16,  1.45it/s]

tensor([[0.1599, 0.1661, 0.1747,  ..., 0.1567, 0.1682, 0.1705],
        [0.1574, 0.1649, 0.1766,  ..., 0.1607, 0.1696, 0.1728],
        [0.1592, 0.1653, 0.1757,  ..., 0.1587, 0.1689, 0.1715],
        ...,
        [0.1569, 0.1640, 0.1731,  ..., 0.1611, 0.1689, 0.1724],
        [0.1623, 0.1657, 0.1792,  ..., 0.1606, 0.1708, 0.1758],
        [0.1593, 0.1638, 0.1763,  ..., 0.1563, 0.1693, 0.1708]],
       grad_fn=<LogBackward>)


 60%|█████▉    | 34/57 [00:23<00:15,  1.47it/s]

tensor([[0.1609, 0.1646, 0.1788,  ..., 0.1584, 0.1676, 0.1687],
        [0.1563, 0.1639, 0.1729,  ..., 0.1604, 0.1681, 0.1683],
        [0.1581, 0.1672, 0.1794,  ..., 0.1574, 0.1709, 0.1702],
        ...,
        [0.1566, 0.1671, 0.1752,  ..., 0.1562, 0.1677, 0.1692],
        [0.1585, 0.1650, 0.1757,  ..., 0.1591, 0.1683, 0.1718],
        [0.1541, 0.1658, 0.1726,  ..., 0.1598, 0.1658, 0.1677]],
       grad_fn=<LogBackward>)


 61%|██████▏   | 35/57 [00:24<00:14,  1.49it/s]

tensor([[0.1579, 0.1622, 0.1727,  ..., 0.1601, 0.1668, 0.1728],
        [0.1583, 0.1645, 0.1739,  ..., 0.1563, 0.1636, 0.1634],
        [0.1573, 0.1638, 0.1738,  ..., 0.1574, 0.1704, 0.1694],
        ...,
        [0.1569, 0.1665, 0.1769,  ..., 0.1581, 0.1683, 0.1712],
        [0.1566, 0.1645, 0.1757,  ..., 0.1580, 0.1679, 0.1705],
        [0.1589, 0.1663, 0.1762,  ..., 0.1591, 0.1694, 0.1735]],
       grad_fn=<LogBackward>)


 63%|██████▎   | 36/57 [00:24<00:14,  1.50it/s]

tensor([[0.1593, 0.1655, 0.1747,  ..., 0.1604, 0.1695, 0.1697],
        [0.1570, 0.1630, 0.1743,  ..., 0.1595, 0.1691, 0.1696],
        [0.1570, 0.1683, 0.1738,  ..., 0.1570, 0.1681, 0.1693],
        ...,
        [0.1565, 0.1646, 0.1711,  ..., 0.1589, 0.1678, 0.1696],
        [0.1548, 0.1644, 0.1712,  ..., 0.1579, 0.1684, 0.1694],
        [0.1574, 0.1680, 0.1774,  ..., 0.1578, 0.1703, 0.1695]],
       grad_fn=<LogBackward>)


 65%|██████▍   | 37/57 [00:25<00:13,  1.51it/s]

tensor([[0.1604, 0.1663, 0.1777,  ..., 0.1594, 0.1683, 0.1710],
        [0.1565, 0.1659, 0.1767,  ..., 0.1592, 0.1654, 0.1703],
        [0.1578, 0.1678, 0.1751,  ..., 0.1609, 0.1677, 0.1696],
        ...,
        [0.1587, 0.1622, 0.1751,  ..., 0.1596, 0.1675, 0.1722],
        [0.1540, 0.1657, 0.1723,  ..., 0.1577, 0.1677, 0.1658],
        [0.1571, 0.1656, 0.1733,  ..., 0.1614, 0.1706, 0.1695]],
       grad_fn=<LogBackward>)


 67%|██████▋   | 38/57 [00:26<00:12,  1.52it/s]

tensor([[0.1592, 0.1648, 0.1759,  ..., 0.1592, 0.1721, 0.1713],
        [0.1563, 0.1655, 0.1731,  ..., 0.1584, 0.1674, 0.1683],
        [0.1560, 0.1657, 0.1733,  ..., 0.1566, 0.1668, 0.1660],
        ...,
        [0.1566, 0.1667, 0.1731,  ..., 0.1600, 0.1707, 0.1686],
        [0.1613, 0.1645, 0.1817,  ..., 0.1579, 0.1696, 0.1731],
        [0.1569, 0.1637, 0.1766,  ..., 0.1606, 0.1694, 0.1704]],
       grad_fn=<LogBackward>)


 68%|██████▊   | 39/57 [00:26<00:11,  1.52it/s]

tensor([[0.1560, 0.1666, 0.1739,  ..., 0.1610, 0.1701, 0.1692],
        [0.1569, 0.1670, 0.1715,  ..., 0.1581, 0.1662, 0.1618],
        [0.1605, 0.1667, 0.1779,  ..., 0.1592, 0.1691, 0.1715],
        ...,
        [0.1608, 0.1650, 0.1754,  ..., 0.1603, 0.1700, 0.1763],
        [0.1587, 0.1649, 0.1738,  ..., 0.1584, 0.1701, 0.1723],
        [0.1577, 0.1636, 0.1747,  ..., 0.1609, 0.1685, 0.1702]],
       grad_fn=<LogBackward>)


 70%|███████   | 40/57 [00:27<00:11,  1.52it/s]

tensor([[0.1572, 0.1645, 0.1739,  ..., 0.1643, 0.1697, 0.1682],
        [0.1572, 0.1661, 0.1739,  ..., 0.1580, 0.1668, 0.1689],
        [0.1538, 0.1672, 0.1718,  ..., 0.1564, 0.1661, 0.1638],
        ...,
        [0.1589, 0.1638, 0.1749,  ..., 0.1559, 0.1692, 0.1708],
        [0.1563, 0.1637, 0.1734,  ..., 0.1582, 0.1697, 0.1714],
        [0.1593, 0.1628, 0.1783,  ..., 0.1584, 0.1680, 0.1701]],
       grad_fn=<LogBackward>)


 72%|███████▏  | 41/57 [00:28<00:10,  1.54it/s]

tensor([[0.1551, 0.1683, 0.1727,  ..., 0.1576, 0.1674, 0.1672],
        [0.1587, 0.1654, 0.1747,  ..., 0.1580, 0.1725, 0.1713],
        [0.1587, 0.1637, 0.1750,  ..., 0.1599, 0.1683, 0.1732],
        ...,
        [0.1575, 0.1623, 0.1755,  ..., 0.1581, 0.1683, 0.1712],
        [0.1615, 0.1691, 0.1793,  ..., 0.1600, 0.1728, 0.1723],
        [0.1605, 0.1640, 0.1753,  ..., 0.1579, 0.1702, 0.1724]],
       grad_fn=<LogBackward>)


 74%|███████▎  | 42/57 [00:28<00:10,  1.48it/s]

tensor([[0.1588, 0.1623, 0.1760,  ..., 0.1581, 0.1706, 0.1749],
        [0.1585, 0.1693, 0.1749,  ..., 0.1608, 0.1687, 0.1691],
        [0.1610, 0.1639, 0.1746,  ..., 0.1609, 0.1703, 0.1740],
        ...,
        [0.1588, 0.1639, 0.1756,  ..., 0.1597, 0.1692, 0.1712],
        [0.1591, 0.1665, 0.1729,  ..., 0.1584, 0.1685, 0.1711],
        [0.1596, 0.1666, 0.1763,  ..., 0.1591, 0.1692, 0.1704]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 43/57 [00:29<00:09,  1.47it/s]

tensor([[0.1563, 0.1640, 0.1723,  ..., 0.1584, 0.1655, 0.1664],
        [0.1604, 0.1666, 0.1772,  ..., 0.1594, 0.1684, 0.1724],
        [0.1554, 0.1649, 0.1718,  ..., 0.1567, 0.1666, 0.1660],
        ...,
        [0.1590, 0.1637, 0.1755,  ..., 0.1600, 0.1672, 0.1703],
        [0.1602, 0.1623, 0.1741,  ..., 0.1584, 0.1620, 0.1676],
        [0.1584, 0.1646, 0.1797,  ..., 0.1617, 0.1669, 0.1723]],
       grad_fn=<LogBackward>)


 77%|███████▋  | 44/57 [00:30<00:08,  1.49it/s]

tensor([[0.1566, 0.1656, 0.1732,  ..., 0.1568, 0.1682, 0.1720],
        [0.1595, 0.1645, 0.1749,  ..., 0.1607, 0.1683, 0.1744],
        [0.1415, 0.1525, 0.1530,  ..., 0.1433, 0.1532, 0.1519],
        ...,
        [0.1598, 0.1661, 0.1781,  ..., 0.1579, 0.1712, 0.1714],
        [0.1578, 0.1652, 0.1770,  ..., 0.1605, 0.1726, 0.1717],
        [0.1573, 0.1642, 0.1734,  ..., 0.1571, 0.1688, 0.1707]],
       grad_fn=<LogBackward>)


 79%|███████▉  | 45/57 [00:30<00:08,  1.49it/s]

tensor([[0.1568, 0.1641, 0.1739,  ..., 0.1604, 0.1701, 0.1686],
        [0.1575, 0.1672, 0.1752,  ..., 0.1601, 0.1694, 0.1721],
        [0.1580, 0.1647, 0.1772,  ..., 0.1573, 0.1690, 0.1703],
        ...,
        [0.1549, 0.1611, 0.1704,  ..., 0.1562, 0.1622, 0.1668],
        [0.1598, 0.1702, 0.1788,  ..., 0.1610, 0.1687, 0.1700],
        [0.1612, 0.1626, 0.1782,  ..., 0.1593, 0.1710, 0.1705]],
       grad_fn=<LogBackward>)


 81%|████████  | 46/57 [00:31<00:07,  1.50it/s]

tensor([[0.1582, 0.1652, 0.1749,  ..., 0.1598, 0.1702, 0.1702],
        [0.1560, 0.1636, 0.1719,  ..., 0.1600, 0.1679, 0.1696],
        [0.1596, 0.1653, 0.1733,  ..., 0.1614, 0.1713, 0.1677],
        ...,
        [0.1581, 0.1656, 0.1758,  ..., 0.1587, 0.1715, 0.1711],
        [0.1589, 0.1665, 0.1742,  ..., 0.1581, 0.1683, 0.1669],
        [0.1585, 0.1632, 0.1750,  ..., 0.1591, 0.1678, 0.1715]],
       grad_fn=<LogBackward>)


 82%|████████▏ | 47/57 [00:32<00:06,  1.51it/s]

tensor([[0.1574, 0.1675, 0.1750,  ..., 0.1578, 0.1697, 0.1695],
        [0.1594, 0.1647, 0.1742,  ..., 0.1599, 0.1692, 0.1693],
        [0.1584, 0.1633, 0.1742,  ..., 0.1595, 0.1692, 0.1706],
        ...,
        [0.1580, 0.1641, 0.1785,  ..., 0.1603, 0.1681, 0.1694],
        [0.1549, 0.1629, 0.1711,  ..., 0.1596, 0.1654, 0.1668],
        [0.1428, 0.1538, 0.1544,  ..., 0.1451, 0.1548, 0.1537]],
       grad_fn=<LogBackward>)


 84%|████████▍ | 48/57 [00:32<00:05,  1.53it/s]

tensor([[0.1541, 0.1622, 0.1674,  ..., 0.1564, 0.1634, 0.1635],
        [0.1578, 0.1621, 0.1748,  ..., 0.1596, 0.1661, 0.1705],
        [0.1591, 0.1639, 0.1773,  ..., 0.1576, 0.1680, 0.1719],
        ...,
        [0.1567, 0.1633, 0.1752,  ..., 0.1615, 0.1663, 0.1682],
        [0.1590, 0.1644, 0.1778,  ..., 0.1581, 0.1687, 0.1723],
        [0.1558, 0.1644, 0.1727,  ..., 0.1572, 0.1681, 0.1704]],
       grad_fn=<LogBackward>)


 86%|████████▌ | 49/57 [00:33<00:05,  1.51it/s]

tensor([[0.1556, 0.1649, 0.1754,  ..., 0.1586, 0.1672, 0.1683],
        [0.1596, 0.1661, 0.1753,  ..., 0.1583, 0.1697, 0.1718],
        [0.1611, 0.1658, 0.1792,  ..., 0.1581, 0.1685, 0.1716],
        ...,
        [0.1554, 0.1660, 0.1746,  ..., 0.1580, 0.1682, 0.1686],
        [0.1565, 0.1644, 0.1749,  ..., 0.1585, 0.1701, 0.1700],
        [0.1564, 0.1635, 0.1747,  ..., 0.1577, 0.1674, 0.1705]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 50/57 [00:34<00:04,  1.40it/s]

tensor([[0.1613, 0.1659, 0.1772,  ..., 0.1595, 0.1696, 0.1740],
        [0.1602, 0.1657, 0.1769,  ..., 0.1600, 0.1683, 0.1737],
        [0.1566, 0.1643, 0.1739,  ..., 0.1600, 0.1685, 0.1718],
        ...,
        [0.1579, 0.1657, 0.1736,  ..., 0.1559, 0.1642, 0.1645],
        [0.1617, 0.1661, 0.1789,  ..., 0.1586, 0.1690, 0.1701],
        [0.1571, 0.1643, 0.1758,  ..., 0.1577, 0.1690, 0.1707]],
       grad_fn=<LogBackward>)


 89%|████████▉ | 51/57 [00:34<00:04,  1.37it/s]

tensor([[0.1525, 0.1640, 0.1700,  ..., 0.1549, 0.1640, 0.1638],
        [0.1592, 0.1657, 0.1765,  ..., 0.1591, 0.1706, 0.1728],
        [0.1588, 0.1656, 0.1777,  ..., 0.1580, 0.1639, 0.1689],
        ...,
        [0.1588, 0.1645, 0.1749,  ..., 0.1590, 0.1683, 0.1714],
        [0.1562, 0.1638, 0.1720,  ..., 0.1579, 0.1697, 0.1689],
        [0.1615, 0.1670, 0.1769,  ..., 0.1617, 0.1709, 0.1760]],
       grad_fn=<LogBackward>)


 91%|█████████ | 52/57 [00:35<00:03,  1.31it/s]

tensor([[0.1590, 0.1643, 0.1737,  ..., 0.1606, 0.1705, 0.1693],
        [0.1540, 0.1638, 0.1721,  ..., 0.1591, 0.1657, 0.1675],
        [0.1548, 0.1640, 0.1725,  ..., 0.1567, 0.1680, 0.1667],
        ...,
        [0.1585, 0.1654, 0.1760,  ..., 0.1606, 0.1689, 0.1680],
        [0.1597, 0.1653, 0.1752,  ..., 0.1591, 0.1687, 0.1694],
        [0.1569, 0.1635, 0.1739,  ..., 0.1574, 0.1659, 0.1699]],
       grad_fn=<LogBackward>)


 93%|█████████▎| 53/57 [00:36<00:03,  1.31it/s]

tensor([[0.1571, 0.1669, 0.1773,  ..., 0.1568, 0.1677, 0.1707],
        [0.1561, 0.1646, 0.1735,  ..., 0.1596, 0.1676, 0.1712],
        [0.1598, 0.1647, 0.1780,  ..., 0.1592, 0.1681, 0.1722],
        ...,
        [0.1580, 0.1646, 0.1746,  ..., 0.1599, 0.1677, 0.1712],
        [0.1551, 0.1650, 0.1748,  ..., 0.1586, 0.1645, 0.1642],
        [0.1598, 0.1657, 0.1772,  ..., 0.1584, 0.1697, 0.1746]],
       grad_fn=<LogBackward>)


 95%|█████████▍| 54/57 [00:37<00:02,  1.31it/s]

tensor([[0.1549, 0.1623, 0.1745,  ..., 0.1565, 0.1678, 0.1736],
        [0.1534, 0.1640, 0.1716,  ..., 0.1589, 0.1671, 0.1675],
        [0.1586, 0.1664, 0.1776,  ..., 0.1620, 0.1694, 0.1731],
        ...,
        [0.1573, 0.1657, 0.1751,  ..., 0.1570, 0.1691, 0.1677],
        [0.1579, 0.1625, 0.1755,  ..., 0.1601, 0.1663, 0.1719],
        [0.1602, 0.1658, 0.1772,  ..., 0.1585, 0.1680, 0.1716]],
       grad_fn=<LogBackward>)


 96%|█████████▋| 55/57 [00:38<00:01,  1.33it/s]

tensor([[0.1557, 0.1638, 0.1701,  ..., 0.1554, 0.1672, 0.1637],
        [0.1588, 0.1667, 0.1736,  ..., 0.1587, 0.1662, 0.1695],
        [0.1562, 0.1646, 0.1769,  ..., 0.1581, 0.1694, 0.1683],
        ...,
        [0.1567, 0.1647, 0.1745,  ..., 0.1584, 0.1680, 0.1714],
        [0.1607, 0.1648, 0.1770,  ..., 0.1602, 0.1670, 0.1721],
        [0.1605, 0.1660, 0.1769,  ..., 0.1579, 0.1713, 0.1709]],
       grad_fn=<LogBackward>)


 98%|█████████▊| 56/57 [00:38<00:00,  1.34it/s]

tensor([[0.1566, 0.1646, 0.1735,  ..., 0.1585, 0.1677, 0.1691],
        [0.1560, 0.1700, 0.1710,  ..., 0.1601, 0.1673, 0.1649],
        [0.1627, 0.1671, 0.1771,  ..., 0.1618, 0.1710, 0.1754],
        ...,
        [0.1608, 0.1643, 0.1780,  ..., 0.1561, 0.1679, 0.1735],
        [0.1550, 0.1638, 0.1753,  ..., 0.1586, 0.1658, 0.1682],
        [0.1572, 0.1645, 0.1743,  ..., 0.1591, 0.1676, 0.1687]],
       grad_fn=<LogBackward>)


100%|██████████| 57/57 [00:39<00:00,  1.44it/s]
 12%|█▎        | 1/8 [00:00<00:02,  3.38it/s]

tensor([[0.1599, 0.1656, 0.1774,  ..., 0.1600, 0.1694, 0.1746],
        [0.1558, 0.1670, 0.1757,  ..., 0.1562, 0.1685, 0.1690],
        [0.1569, 0.1624, 0.1747,  ..., 0.1569, 0.1686, 0.1709],
        ...,
        [0.1603, 0.1674, 0.1771,  ..., 0.1619, 0.1678, 0.1727],
        [0.1567, 0.1656, 0.1752,  ..., 0.1587, 0.1687, 0.1709],
        [0.1591, 0.1669, 0.1770,  ..., 0.1579, 0.1647, 0.1682]],
       grad_fn=<LogBackward>)


 25%|██▌       | 2/8 [00:00<00:01,  3.33it/s]

tensor([[0.1582, 0.1676, 0.1771,  ..., 0.1569, 0.1676, 0.1726],
        [0.1546, 0.1647, 0.1748,  ..., 0.1595, 0.1656, 0.1657],
        [0.1549, 0.1645, 0.1702,  ..., 0.1567, 0.1662, 0.1640],
        ...,
        [0.1547, 0.1651, 0.1760,  ..., 0.1588, 0.1662, 0.1702],
        [0.1599, 0.1644, 0.1783,  ..., 0.1588, 0.1676, 0.1705],
        [0.1566, 0.1656, 0.1745,  ..., 0.1561, 0.1678, 0.1729]],
       grad_fn=<LogBackward>)


 38%|███▊      | 3/8 [00:00<00:01,  3.16it/s]

tensor([[0.1551, 0.1653, 0.1736,  ..., 0.1582, 0.1668, 0.1672],
        [0.1566, 0.1664, 0.1762,  ..., 0.1595, 0.1668, 0.1745],
        [0.1537, 0.1625, 0.1710,  ..., 0.1598, 0.1650, 0.1663],
        ...,
        [0.1604, 0.1659, 0.1796,  ..., 0.1584, 0.1697, 0.1752],
        [0.1549, 0.1665, 0.1724,  ..., 0.1592, 0.1638, 0.1695],
        [0.1620, 0.1630, 0.1796,  ..., 0.1586, 0.1687, 0.1726]],
       grad_fn=<LogBackward>)


 50%|█████     | 4/8 [00:01<00:01,  3.15it/s]

tensor([[0.1570, 0.1621, 0.1732,  ..., 0.1589, 0.1672, 0.1724],
        [0.1580, 0.1650, 0.1769,  ..., 0.1603, 0.1688, 0.1712],
        [0.1588, 0.1647, 0.1795,  ..., 0.1587, 0.1688, 0.1719],
        ...,
        [0.1583, 0.1683, 0.1774,  ..., 0.1587, 0.1679, 0.1696],
        [0.1581, 0.1649, 0.1767,  ..., 0.1601, 0.1692, 0.1704],
        [0.1581, 0.1656, 0.1769,  ..., 0.1607, 0.1678, 0.1716]],
       grad_fn=<LogBackward>)


 62%|██████▎   | 5/8 [00:01<00:00,  3.05it/s]

tensor([[0.1569, 0.1630, 0.1756,  ..., 0.1581, 0.1688, 0.1728],
        [0.1610, 0.1641, 0.1754,  ..., 0.1587, 0.1706, 0.1690],
        [0.1592, 0.1659, 0.1760,  ..., 0.1567, 0.1683, 0.1688],
        ...,
        [0.1580, 0.1638, 0.1763,  ..., 0.1581, 0.1683, 0.1700],
        [0.1588, 0.1661, 0.1768,  ..., 0.1579, 0.1703, 0.1693],
        [0.1593, 0.1673, 0.1757,  ..., 0.1580, 0.1681, 0.1710]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 6/8 [00:01<00:00,  3.03it/s]

tensor([[0.1582, 0.1664, 0.1761,  ..., 0.1582, 0.1693, 0.1716],
        [0.1590, 0.1661, 0.1798,  ..., 0.1578, 0.1706, 0.1687],
        [0.1551, 0.1654, 0.1734,  ..., 0.1553, 0.1644, 0.1652],
        ...,
        [0.1576, 0.1640, 0.1742,  ..., 0.1594, 0.1671, 0.1687],
        [0.1565, 0.1659, 0.1756,  ..., 0.1580, 0.1671, 0.1675],
        [0.1585, 0.1658, 0.1751,  ..., 0.1598, 0.1663, 0.1721]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 7/8 [00:02<00:00,  3.10it/s]

tensor([[0.1575, 0.1687, 0.1759,  ..., 0.1574, 0.1652, 0.1664],
        [0.1579, 0.1667, 0.1733,  ..., 0.1581, 0.1656, 0.1658],
        [0.1597, 0.1654, 0.1771,  ..., 0.1604, 0.1680, 0.1699],
        ...,
        [0.1580, 0.1664, 0.1762,  ..., 0.1587, 0.1676, 0.1717],
        [0.1574, 0.1653, 0.1766,  ..., 0.1568, 0.1699, 0.1697],
        [0.1548, 0.1639, 0.1736,  ..., 0.1575, 0.1657, 0.1641]],
       grad_fn=<LogBackward>)


100%|██████████| 8/8 [00:02<00:00,  3.10it/s]
  0%|          | 0/57 [00:00<?, ?it/s]

tensor([[0.1581, 0.1648, 0.1756,  ..., 0.1578, 0.1699, 0.1699],
        [0.1546, 0.1633, 0.1753,  ..., 0.1579, 0.1668, 0.1713],
        [0.1575, 0.1637, 0.1778,  ..., 0.1627, 0.1688, 0.1733],
        ...,
        [0.1591, 0.1639, 0.1749,  ..., 0.1612, 0.1696, 0.1705],
        [0.1581, 0.1646, 0.1745,  ..., 0.1593, 0.1661, 0.1714],
        [0.1560, 0.1644, 0.1743,  ..., 0.1611, 0.1671, 0.1702]],
       grad_fn=<LogBackward>)
[tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128)]
Epoch: 0, train_loss: 0.9820208549499512, valid_loss: 0.8703474998474121
Parameter containing:
tensor([[ 1.0001,  1.0001,  1.0001,  1.0001,  1.0001,  1.0001,  1.0001,  1.0001,
          1.0001,  1.0001, -0.4999, -0.4999, -0.4999, -0.4999, -0.4999, -0.4999,
         -0.4999, -0.4999, -0.4999, -0.4999],
        

  2%|▏         | 1/57 [00:00<00:44,  1.27it/s]

tensor([[0.1583, 0.1635, 0.1781,  ..., 0.1596, 0.1686, 0.1743],
        [0.1605, 0.1666, 0.1804,  ..., 0.1572, 0.1675, 0.1731],
        [0.1569, 0.1653, 0.1766,  ..., 0.1583, 0.1667, 0.1695],
        ...,
        [0.1572, 0.1682, 0.1771,  ..., 0.1580, 0.1679, 0.1686],
        [0.1552, 0.1653, 0.1744,  ..., 0.1568, 0.1671, 0.1663],
        [0.1558, 0.1628, 0.1707,  ..., 0.1581, 0.1656, 0.1668]],
       grad_fn=<LogBackward>)


  4%|▎         | 2/57 [00:01<00:42,  1.30it/s]

tensor([[0.1555, 0.1638, 0.1733,  ..., 0.1577, 0.1672, 0.1677],
        [0.1588, 0.1617, 0.1758,  ..., 0.1575, 0.1682, 0.1695],
        [0.1614, 0.1653, 0.1771,  ..., 0.1581, 0.1684, 0.1717],
        ...,
        [0.1563, 0.1623, 0.1728,  ..., 0.1584, 0.1657, 0.1665],
        [0.1602, 0.1640, 0.1763,  ..., 0.1604, 0.1702, 0.1743],
        [0.1602, 0.1632, 0.1765,  ..., 0.1581, 0.1684, 0.1731]],
       grad_fn=<LogBackward>)


  5%|▌         | 3/57 [00:02<00:40,  1.33it/s]

tensor([[0.1575, 0.1623, 0.1762,  ..., 0.1564, 0.1670, 0.1705],
        [0.1562, 0.1661, 0.1753,  ..., 0.1570, 0.1673, 0.1709],
        [0.1586, 0.1645, 0.1771,  ..., 0.1589, 0.1694, 0.1721],
        ...,
        [0.1559, 0.1634, 0.1746,  ..., 0.1587, 0.1659, 0.1722],
        [0.1568, 0.1661, 0.1758,  ..., 0.1563, 0.1679, 0.1696],
        [0.1622, 0.1638, 0.1788,  ..., 0.1589, 0.1702, 0.1738]],
       grad_fn=<LogBackward>)


  7%|▋         | 4/57 [00:02<00:39,  1.35it/s]

tensor([[0.1569, 0.1640, 0.1755,  ..., 0.1579, 0.1655, 0.1664],
        [0.1598, 0.1672, 0.1796,  ..., 0.1578, 0.1693, 0.1753],
        [0.1587, 0.1660, 0.1758,  ..., 0.1571, 0.1678, 0.1698],
        ...,
        [0.1573, 0.1635, 0.1766,  ..., 0.1596, 0.1691, 0.1704],
        [0.1603, 0.1642, 0.1775,  ..., 0.1576, 0.1701, 0.1727],
        [0.1591, 0.1658, 0.1757,  ..., 0.1590, 0.1722, 0.1735]],
       grad_fn=<LogBackward>)


  9%|▉         | 5/57 [00:03<00:37,  1.40it/s]

tensor([[0.1585, 0.1660, 0.1748,  ..., 0.1588, 0.1696, 0.1704],
        [0.1543, 0.1665, 0.1750,  ..., 0.1563, 0.1665, 0.1645],
        [0.1605, 0.1662, 0.1746,  ..., 0.1576, 0.1647, 0.1664],
        ...,
        [0.1538, 0.1653, 0.1726,  ..., 0.1577, 0.1686, 0.1651],
        [0.1557, 0.1651, 0.1758,  ..., 0.1584, 0.1690, 0.1689],
        [0.1569, 0.1654, 0.1762,  ..., 0.1606, 0.1699, 0.1691]],
       grad_fn=<LogBackward>)


 11%|█         | 6/57 [00:04<00:35,  1.44it/s]

tensor([[0.1594, 0.1649, 0.1738,  ..., 0.1555, 0.1689, 0.1698],
        [0.1551, 0.1634, 0.1719,  ..., 0.1598, 0.1700, 0.1706],
        [0.1561, 0.1637, 0.1781,  ..., 0.1587, 0.1667, 0.1693],
        ...,
        [0.1571, 0.1629, 0.1735,  ..., 0.1581, 0.1663, 0.1669],
        [0.1560, 0.1626, 0.1757,  ..., 0.1577, 0.1676, 0.1675],
        [0.1623, 0.1658, 0.1783,  ..., 0.1591, 0.1660, 0.1704]],
       grad_fn=<LogBackward>)


 12%|█▏        | 7/57 [00:04<00:34,  1.43it/s]

tensor([[0.1557, 0.1650, 0.1729,  ..., 0.1590, 0.1670, 0.1704],
        [0.1545, 0.1654, 0.1739,  ..., 0.1606, 0.1671, 0.1668],
        [0.1575, 0.1648, 0.1743,  ..., 0.1564, 0.1680, 0.1665],
        ...,
        [0.1543, 0.1645, 0.1735,  ..., 0.1562, 0.1662, 0.1629],
        [0.1570, 0.1650, 0.1764,  ..., 0.1583, 0.1689, 0.1726],
        [0.1587, 0.1685, 0.1782,  ..., 0.1597, 0.1721, 0.1748]],
       grad_fn=<LogBackward>)


 14%|█▍        | 8/57 [00:05<00:34,  1.43it/s]

tensor([[0.1629, 0.1654, 0.1793,  ..., 0.1598, 0.1707, 0.1732],
        [0.1566, 0.1619, 0.1762,  ..., 0.1587, 0.1662, 0.1707],
        [0.1537, 0.1642, 0.1736,  ..., 0.1597, 0.1671, 0.1665],
        ...,
        [0.1591, 0.1645, 0.1771,  ..., 0.1586, 0.1705, 0.1704],
        [0.1582, 0.1639, 0.1787,  ..., 0.1589, 0.1652, 0.1709],
        [0.1589, 0.1684, 0.1754,  ..., 0.1586, 0.1718, 0.1706]],
       grad_fn=<LogBackward>)


 16%|█▌        | 9/57 [00:06<00:33,  1.41it/s]

tensor([[0.1584, 0.1672, 0.1774,  ..., 0.1576, 0.1659, 0.1694],
        [0.1604, 0.1669, 0.1789,  ..., 0.1592, 0.1694, 0.1705],
        [0.1564, 0.1662, 0.1751,  ..., 0.1598, 0.1655, 0.1687],
        ...,
        [0.1571, 0.1656, 0.1748,  ..., 0.1616, 0.1670, 0.1693],
        [0.1582, 0.1646, 0.1759,  ..., 0.1584, 0.1671, 0.1708],
        [0.1567, 0.1635, 0.1743,  ..., 0.1579, 0.1682, 0.1690]],
       grad_fn=<LogBackward>)


 18%|█▊        | 10/57 [00:07<00:33,  1.41it/s]

tensor([[0.1551, 0.1639, 0.1753,  ..., 0.1568, 0.1651, 0.1654],
        [0.1553, 0.1622, 0.1732,  ..., 0.1577, 0.1671, 0.1712],
        [0.1567, 0.1635, 0.1750,  ..., 0.1602, 0.1689, 0.1705],
        ...,
        [0.1585, 0.1650, 0.1762,  ..., 0.1588, 0.1700, 0.1722],
        [0.1607, 0.1639, 0.1794,  ..., 0.1598, 0.1682, 0.1738],
        [0.1543, 0.1648, 0.1756,  ..., 0.1587, 0.1670, 0.1719]],
       grad_fn=<LogBackward>)


 19%|█▉        | 11/57 [00:07<00:32,  1.40it/s]

tensor([[0.1561, 0.1663, 0.1763,  ..., 0.1596, 0.1677, 0.1688],
        [0.1557, 0.1651, 0.1743,  ..., 0.1577, 0.1686, 0.1665],
        [0.1586, 0.1670, 0.1753,  ..., 0.1571, 0.1678, 0.1685],
        ...,
        [0.1587, 0.1653, 0.1749,  ..., 0.1587, 0.1690, 0.1676],
        [0.1553, 0.1629, 0.1733,  ..., 0.1601, 0.1658, 0.1713],
        [0.1583, 0.1665, 0.1772,  ..., 0.1584, 0.1687, 0.1750]],
       grad_fn=<LogBackward>)


 21%|██        | 12/57 [00:08<00:31,  1.43it/s]

tensor([[0.1568, 0.1678, 0.1775,  ..., 0.1583, 0.1656, 0.1684],
        [0.1563, 0.1631, 0.1742,  ..., 0.1590, 0.1676, 0.1707],
        [0.1576, 0.1662, 0.1756,  ..., 0.1578, 0.1686, 0.1688],
        ...,
        [0.1545, 0.1657, 0.1741,  ..., 0.1566, 0.1667, 0.1665],
        [0.1614, 0.1654, 0.1788,  ..., 0.1590, 0.1692, 0.1738],
        [0.1579, 0.1673, 0.1750,  ..., 0.1577, 0.1654, 0.1631]],
       grad_fn=<LogBackward>)


 23%|██▎       | 13/57 [00:09<00:30,  1.46it/s]

tensor([[0.1561, 0.1627, 0.1726,  ..., 0.1582, 0.1656, 0.1711],
        [0.1528, 0.1627, 0.1686,  ..., 0.1558, 0.1638, 0.1656],
        [0.1561, 0.1653, 0.1752,  ..., 0.1568, 0.1686, 0.1706],
        ...,
        [0.1576, 0.1633, 0.1759,  ..., 0.1581, 0.1681, 0.1698],
        [0.1560, 0.1664, 0.1750,  ..., 0.1569, 0.1659, 0.1695],
        [0.1588, 0.1655, 0.1764,  ..., 0.1597, 0.1675, 0.1710]],
       grad_fn=<LogBackward>)


 25%|██▍       | 14/57 [00:09<00:29,  1.46it/s]

tensor([[0.1626, 0.1662, 0.1804,  ..., 0.1605, 0.1720, 0.1733],
        [0.1596, 0.1645, 0.1758,  ..., 0.1591, 0.1700, 0.1722],
        [0.1572, 0.1663, 0.1761,  ..., 0.1568, 0.1690, 0.1699],
        ...,
        [0.1585, 0.1676, 0.1771,  ..., 0.1592, 0.1669, 0.1682],
        [0.1598, 0.1648, 0.1787,  ..., 0.1599, 0.1678, 0.1713],
        [0.1580, 0.1635, 0.1758,  ..., 0.1587, 0.1651, 0.1676]],
       grad_fn=<LogBackward>)


 26%|██▋       | 15/57 [00:10<00:29,  1.44it/s]

tensor([[0.1556, 0.1649, 0.1744,  ..., 0.1577, 0.1663, 0.1728],
        [0.1588, 0.1639, 0.1773,  ..., 0.1599, 0.1681, 0.1696],
        [0.1562, 0.1672, 0.1768,  ..., 0.1575, 0.1687, 0.1673],
        ...,
        [0.1562, 0.1643, 0.1716,  ..., 0.1607, 0.1649, 0.1651],
        [0.1562, 0.1635, 0.1752,  ..., 0.1587, 0.1670, 0.1685],
        [0.1551, 0.1648, 0.1731,  ..., 0.1585, 0.1679, 0.1686]],
       grad_fn=<LogBackward>)


 28%|██▊       | 16/57 [00:11<00:28,  1.46it/s]

tensor([[0.1572, 0.1621, 0.1734,  ..., 0.1585, 0.1674, 0.1710],
        [0.1568, 0.1636, 0.1764,  ..., 0.1573, 0.1685, 0.1699],
        [0.1571, 0.1631, 0.1744,  ..., 0.1598, 0.1685, 0.1726],
        ...,
        [0.1557, 0.1641, 0.1753,  ..., 0.1586, 0.1663, 0.1720],
        [0.1575, 0.1660, 0.1762,  ..., 0.1593, 0.1671, 0.1699],
        [0.1564, 0.1629, 0.1738,  ..., 0.1575, 0.1674, 0.1736]],
       grad_fn=<LogBackward>)


 30%|██▉       | 17/57 [00:11<00:27,  1.48it/s]

tensor([[0.1601, 0.1635, 0.1764,  ..., 0.1593, 0.1681, 0.1700],
        [0.1575, 0.1653, 0.1768,  ..., 0.1613, 0.1660, 0.1708],
        [0.1608, 0.1640, 0.1771,  ..., 0.1607, 0.1689, 0.1760],
        ...,
        [0.1575, 0.1649, 0.1747,  ..., 0.1587, 0.1676, 0.1693],
        [0.1563, 0.1650, 0.1749,  ..., 0.1564, 0.1667, 0.1698],
        [0.1581, 0.1612, 0.1760,  ..., 0.1574, 0.1663, 0.1703]],
       grad_fn=<LogBackward>)


 32%|███▏      | 18/57 [00:12<00:26,  1.49it/s]

tensor([[0.1555, 0.1641, 0.1762,  ..., 0.1575, 0.1673, 0.1687],
        [0.1543, 0.1613, 0.1721,  ..., 0.1547, 0.1644, 0.1680],
        [0.1580, 0.1650, 0.1738,  ..., 0.1585, 0.1684, 0.1713],
        ...,
        [0.1573, 0.1631, 0.1766,  ..., 0.1567, 0.1674, 0.1671],
        [0.1605, 0.1655, 0.1782,  ..., 0.1598, 0.1687, 0.1744],
        [0.1577, 0.1632, 0.1740,  ..., 0.1597, 0.1665, 0.1693]],
       grad_fn=<LogBackward>)


 33%|███▎      | 19/57 [00:13<00:24,  1.53it/s]

tensor([[0.1632, 0.1655, 0.1788,  ..., 0.1610, 0.1693, 0.1718],
        [0.1596, 0.1658, 0.1775,  ..., 0.1581, 0.1685, 0.1743],
        [0.1609, 0.1633, 0.1787,  ..., 0.1615, 0.1677, 0.1726],
        ...,
        [0.1540, 0.1661, 0.1743,  ..., 0.1585, 0.1686, 0.1676],
        [0.1576, 0.1671, 0.1759,  ..., 0.1583, 0.1673, 0.1724],
        [0.1553, 0.1656, 0.1746,  ..., 0.1589, 0.1692, 0.1736]],
       grad_fn=<LogBackward>)


 35%|███▌      | 20/57 [00:13<00:24,  1.53it/s]

tensor([[0.1594, 0.1631, 0.1748,  ..., 0.1606, 0.1689, 0.1746],
        [0.1559, 0.1656, 0.1747,  ..., 0.1585, 0.1670, 0.1700],
        [0.1593, 0.1671, 0.1764,  ..., 0.1578, 0.1631, 0.1678],
        ...,
        [0.1576, 0.1637, 0.1772,  ..., 0.1579, 0.1688, 0.1704],
        [0.1591, 0.1639, 0.1766,  ..., 0.1582, 0.1719, 0.1733],
        [0.1565, 0.1626, 0.1743,  ..., 0.1571, 0.1675, 0.1687]],
       grad_fn=<LogBackward>)


 37%|███▋      | 21/57 [00:14<00:24,  1.50it/s]

tensor([[0.1592, 0.1654, 0.1789,  ..., 0.1582, 0.1698, 0.1687],
        [0.1598, 0.1655, 0.1786,  ..., 0.1586, 0.1695, 0.1722],
        [0.1575, 0.1620, 0.1763,  ..., 0.1597, 0.1665, 0.1742],
        ...,
        [0.1592, 0.1654, 0.1767,  ..., 0.1604, 0.1714, 0.1708],
        [0.1563, 0.1647, 0.1732,  ..., 0.1593, 0.1666, 0.1704],
        [0.1544, 0.1628, 0.1734,  ..., 0.1579, 0.1679, 0.1736]],
       grad_fn=<LogBackward>)


 39%|███▊      | 22/57 [00:15<00:23,  1.49it/s]

tensor([[0.1579, 0.1643, 0.1755,  ..., 0.1586, 0.1675, 0.1692],
        [0.1557, 0.1676, 0.1710,  ..., 0.1600, 0.1650, 0.1627],
        [0.1555, 0.1648, 0.1761,  ..., 0.1573, 0.1667, 0.1722],
        ...,
        [0.1577, 0.1642, 0.1717,  ..., 0.1587, 0.1673, 0.1677],
        [0.1583, 0.1635, 0.1759,  ..., 0.1596, 0.1680, 0.1724],
        [0.1561, 0.1677, 0.1740,  ..., 0.1587, 0.1675, 0.1653]],
       grad_fn=<LogBackward>)


 40%|████      | 23/57 [00:15<00:22,  1.50it/s]

tensor([[0.1588, 0.1631, 0.1738,  ..., 0.1604, 0.1679, 0.1725],
        [0.1550, 0.1671, 0.1722,  ..., 0.1575, 0.1661, 0.1636],
        [0.1584, 0.1654, 0.1791,  ..., 0.1577, 0.1683, 0.1704],
        ...,
        [0.1604, 0.1644, 0.1801,  ..., 0.1570, 0.1695, 0.1749],
        [0.1569, 0.1646, 0.1775,  ..., 0.1601, 0.1671, 0.1703],
        [0.1582, 0.1675, 0.1744,  ..., 0.1573, 0.1682, 0.1697]],
       grad_fn=<LogBackward>)


 42%|████▏     | 24/57 [00:16<00:22,  1.50it/s]

tensor([[0.1604, 0.1653, 0.1754,  ..., 0.1593, 0.1700, 0.1730],
        [0.1601, 0.1680, 0.1761,  ..., 0.1592, 0.1702, 0.1745],
        [0.1605, 0.1644, 0.1767,  ..., 0.1573, 0.1668, 0.1689],
        ...,
        [0.1554, 0.1652, 0.1777,  ..., 0.1584, 0.1671, 0.1675],
        [0.1565, 0.1642, 0.1783,  ..., 0.1597, 0.1658, 0.1701],
        [0.1545, 0.1636, 0.1741,  ..., 0.1573, 0.1687, 0.1704]],
       grad_fn=<LogBackward>)


 44%|████▍     | 25/57 [00:17<00:21,  1.52it/s]

tensor([[0.1568, 0.1660, 0.1748,  ..., 0.1570, 0.1661, 0.1684],
        [0.1596, 0.1667, 0.1775,  ..., 0.1561, 0.1709, 0.1688],
        [0.1587, 0.1665, 0.1785,  ..., 0.1590, 0.1681, 0.1688],
        ...,
        [0.1559, 0.1618, 0.1745,  ..., 0.1606, 0.1671, 0.1736],
        [0.1560, 0.1660, 0.1738,  ..., 0.1556, 0.1650, 0.1658],
        [0.1576, 0.1632, 0.1744,  ..., 0.1587, 0.1684, 0.1741]],
       grad_fn=<LogBackward>)


 46%|████▌     | 26/57 [00:17<00:20,  1.53it/s]

tensor([[0.1552, 0.1634, 0.1742,  ..., 0.1586, 0.1678, 0.1699],
        [0.1551, 0.1661, 0.1742,  ..., 0.1577, 0.1702, 0.1698],
        [0.1573, 0.1632, 0.1750,  ..., 0.1601, 0.1675, 0.1728],
        ...,
        [0.1576, 0.1637, 0.1770,  ..., 0.1597, 0.1681, 0.1698],
        [0.1586, 0.1645, 0.1772,  ..., 0.1576, 0.1673, 0.1708],
        [0.1606, 0.1666, 0.1766,  ..., 0.1596, 0.1685, 0.1725]],
       grad_fn=<LogBackward>)


 47%|████▋     | 27/57 [00:18<00:19,  1.53it/s]

tensor([[0.1581, 0.1634, 0.1756,  ..., 0.1584, 0.1665, 0.1690],
        [0.1564, 0.1615, 0.1755,  ..., 0.1569, 0.1668, 0.1705],
        [0.1542, 0.1635, 0.1729,  ..., 0.1570, 0.1658, 0.1654],
        ...,
        [0.1561, 0.1643, 0.1740,  ..., 0.1585, 0.1671, 0.1688],
        [0.1560, 0.1650, 0.1728,  ..., 0.1566, 0.1641, 0.1685],
        [0.1541, 0.1667, 0.1754,  ..., 0.1571, 0.1665, 0.1694]],
       grad_fn=<LogBackward>)


 49%|████▉     | 28/57 [00:19<00:18,  1.55it/s]

tensor([[0.1544, 0.1626, 0.1737,  ..., 0.1566, 0.1671, 0.1692],
        [0.1592, 0.1675, 0.1758,  ..., 0.1589, 0.1691, 0.1750],
        [0.1551, 0.1633, 0.1754,  ..., 0.1568, 0.1679, 0.1696],
        ...,
        [0.1595, 0.1630, 0.1751,  ..., 0.1588, 0.1680, 0.1706],
        [0.1569, 0.1649, 0.1772,  ..., 0.1572, 0.1633, 0.1667],
        [0.1562, 0.1648, 0.1745,  ..., 0.1581, 0.1671, 0.1685]],
       grad_fn=<LogBackward>)


 51%|█████     | 29/57 [00:19<00:18,  1.53it/s]

tensor([[0.1577, 0.1650, 0.1770,  ..., 0.1592, 0.1666, 0.1735],
        [0.1568, 0.1669, 0.1739,  ..., 0.1581, 0.1706, 0.1683],
        [0.1574, 0.1629, 0.1736,  ..., 0.1588, 0.1665, 0.1761],
        ...,
        [0.1582, 0.1657, 0.1761,  ..., 0.1591, 0.1686, 0.1720],
        [0.1595, 0.1654, 0.1778,  ..., 0.1585, 0.1678, 0.1741],
        [0.1592, 0.1635, 0.1798,  ..., 0.1586, 0.1683, 0.1715]],
       grad_fn=<LogBackward>)


 53%|█████▎    | 30/57 [00:20<00:17,  1.53it/s]

tensor([[0.1572, 0.1650, 0.1747,  ..., 0.1605, 0.1693, 0.1709],
        [0.1578, 0.1630, 0.1755,  ..., 0.1564, 0.1623, 0.1668],
        [0.1575, 0.1677, 0.1758,  ..., 0.1590, 0.1712, 0.1727],
        ...,
        [0.1590, 0.1653, 0.1761,  ..., 0.1596, 0.1683, 0.1721],
        [0.1559, 0.1667, 0.1769,  ..., 0.1583, 0.1673, 0.1675],
        [0.1598, 0.1636, 0.1781,  ..., 0.1577, 0.1686, 0.1714]],
       grad_fn=<LogBackward>)


 54%|█████▍    | 31/57 [00:21<00:17,  1.52it/s]

tensor([[0.1553, 0.1624, 0.1748,  ..., 0.1584, 0.1654, 0.1720],
        [0.1559, 0.1655, 0.1731,  ..., 0.1568, 0.1642, 0.1654],
        [0.1543, 0.1627, 0.1739,  ..., 0.1573, 0.1678, 0.1690],
        ...,
        [0.1553, 0.1646, 0.1738,  ..., 0.1575, 0.1656, 0.1656],
        [0.1557, 0.1663, 0.1760,  ..., 0.1595, 0.1672, 0.1693],
        [0.1573, 0.1659, 0.1767,  ..., 0.1601, 0.1679, 0.1700]],
       grad_fn=<LogBackward>)


 56%|█████▌    | 32/57 [00:21<00:16,  1.49it/s]

tensor([[0.1581, 0.1662, 0.1774,  ..., 0.1600, 0.1700, 0.1700],
        [0.1576, 0.1658, 0.1733,  ..., 0.1600, 0.1688, 0.1691],
        [0.1584, 0.1689, 0.1738,  ..., 0.1574, 0.1678, 0.1671],
        ...,
        [0.1541, 0.1633, 0.1738,  ..., 0.1584, 0.1644, 0.1664],
        [0.1559, 0.1650, 0.1739,  ..., 0.1567, 0.1663, 0.1696],
        [0.1574, 0.1671, 0.1752,  ..., 0.1581, 0.1664, 0.1699]],
       grad_fn=<LogBackward>)


 58%|█████▊    | 33/57 [00:22<00:16,  1.49it/s]

tensor([[0.1581, 0.1658, 0.1753,  ..., 0.1588, 0.1678, 0.1694],
        [0.1589, 0.1635, 0.1762,  ..., 0.1556, 0.1681, 0.1705],
        [0.1546, 0.1616, 0.1715,  ..., 0.1584, 0.1670, 0.1681],
        ...,
        [0.1553, 0.1649, 0.1759,  ..., 0.1578, 0.1672, 0.1689],
        [0.1575, 0.1640, 0.1769,  ..., 0.1594, 0.1672, 0.1709],
        [0.1548, 0.1634, 0.1749,  ..., 0.1614, 0.1663, 0.1715]],
       grad_fn=<LogBackward>)


 60%|█████▉    | 34/57 [00:23<00:15,  1.50it/s]

tensor([[0.1595, 0.1665, 0.1781,  ..., 0.1587, 0.1667, 0.1692],
        [0.1567, 0.1657, 0.1714,  ..., 0.1558, 0.1639, 0.1630],
        [0.1580, 0.1669, 0.1741,  ..., 0.1559, 0.1618, 0.1641],
        ...,
        [0.1554, 0.1639, 0.1758,  ..., 0.1600, 0.1677, 0.1719],
        [0.1587, 0.1631, 0.1789,  ..., 0.1583, 0.1642, 0.1685],
        [0.1583, 0.1658, 0.1787,  ..., 0.1580, 0.1678, 0.1694]],
       grad_fn=<LogBackward>)


 61%|██████▏   | 35/57 [00:23<00:14,  1.50it/s]

tensor([[0.1573, 0.1676, 0.1749,  ..., 0.1582, 0.1685, 0.1714],
        [0.1589, 0.1639, 0.1763,  ..., 0.1588, 0.1697, 0.1722],
        [0.1579, 0.1673, 0.1732,  ..., 0.1586, 0.1654, 0.1686],
        ...,
        [0.1562, 0.1642, 0.1745,  ..., 0.1571, 0.1674, 0.1725],
        [0.1591, 0.1663, 0.1816,  ..., 0.1588, 0.1678, 0.1721],
        [0.1577, 0.1663, 0.1753,  ..., 0.1594, 0.1644, 0.1667]],
       grad_fn=<LogBackward>)


 63%|██████▎   | 36/57 [00:24<00:14,  1.49it/s]

tensor([[0.1563, 0.1638, 0.1746,  ..., 0.1580, 0.1689, 0.1697],
        [0.1570, 0.1677, 0.1739,  ..., 0.1558, 0.1664, 0.1678],
        [0.1581, 0.1660, 0.1749,  ..., 0.1565, 0.1671, 0.1684],
        ...,
        [0.1589, 0.1660, 0.1795,  ..., 0.1575, 0.1685, 0.1706],
        [0.1626, 0.1702, 0.1785,  ..., 0.1627, 0.1701, 0.1719],
        [0.1577, 0.1651, 0.1765,  ..., 0.1584, 0.1683, 0.1720]],
       grad_fn=<LogBackward>)


 65%|██████▍   | 37/57 [00:25<00:13,  1.49it/s]

tensor([[0.1588, 0.1635, 0.1780,  ..., 0.1604, 0.1655, 0.1699],
        [0.1569, 0.1649, 0.1775,  ..., 0.1575, 0.1670, 0.1701],
        [0.1571, 0.1655, 0.1751,  ..., 0.1572, 0.1661, 0.1705],
        ...,
        [0.1590, 0.1634, 0.1762,  ..., 0.1592, 0.1656, 0.1702],
        [0.1572, 0.1654, 0.1760,  ..., 0.1574, 0.1680, 0.1706],
        [0.1594, 0.1645, 0.1763,  ..., 0.1585, 0.1666, 0.1697]],
       grad_fn=<LogBackward>)


 67%|██████▋   | 38/57 [00:25<00:12,  1.50it/s]

tensor([[0.1573, 0.1651, 0.1746,  ..., 0.1574, 0.1661, 0.1675],
        [0.1549, 0.1638, 0.1758,  ..., 0.1588, 0.1670, 0.1715],
        [0.1571, 0.1658, 0.1770,  ..., 0.1549, 0.1648, 0.1663],
        ...,
        [0.1587, 0.1660, 0.1771,  ..., 0.1588, 0.1683, 0.1710],
        [0.1567, 0.1672, 0.1736,  ..., 0.1577, 0.1646, 0.1684],
        [0.1597, 0.1652, 0.1771,  ..., 0.1587, 0.1685, 0.1728]],
       grad_fn=<LogBackward>)


 68%|██████▊   | 39/57 [00:26<00:12,  1.46it/s]

tensor([[0.1575, 0.1597, 0.1764,  ..., 0.1569, 0.1664, 0.1705],
        [0.1545, 0.1656, 0.1739,  ..., 0.1611, 0.1645, 0.1671],
        [0.1575, 0.1663, 0.1751,  ..., 0.1580, 0.1659, 0.1702],
        ...,
        [0.1594, 0.1655, 0.1764,  ..., 0.1592, 0.1713, 0.1708],
        [0.1584, 0.1658, 0.1748,  ..., 0.1609, 0.1671, 0.1695],
        [0.1578, 0.1646, 0.1738,  ..., 0.1568, 0.1657, 0.1695]],
       grad_fn=<LogBackward>)


 70%|███████   | 40/57 [00:27<00:11,  1.47it/s]

tensor([[0.1539, 0.1610, 0.1703,  ..., 0.1560, 0.1624, 0.1691],
        [0.1587, 0.1664, 0.1781,  ..., 0.1581, 0.1697, 0.1732],
        [0.1579, 0.1658, 0.1767,  ..., 0.1569, 0.1684, 0.1694],
        ...,
        [0.1562, 0.1679, 0.1747,  ..., 0.1564, 0.1663, 0.1676],
        [0.1580, 0.1641, 0.1761,  ..., 0.1578, 0.1673, 0.1702],
        [0.1578, 0.1657, 0.1773,  ..., 0.1573, 0.1683, 0.1696]],
       grad_fn=<LogBackward>)


 72%|███████▏  | 41/57 [00:27<00:10,  1.47it/s]

tensor([[0.1574, 0.1659, 0.1776,  ..., 0.1566, 0.1648, 0.1688],
        [0.1613, 0.1668, 0.1779,  ..., 0.1583, 0.1666, 0.1708],
        [0.1570, 0.1668, 0.1736,  ..., 0.1589, 0.1671, 0.1684],
        ...,
        [0.1565, 0.1647, 0.1748,  ..., 0.1574, 0.1677, 0.1682],
        [0.1600, 0.1663, 0.1791,  ..., 0.1586, 0.1670, 0.1728],
        [0.1564, 0.1636, 0.1741,  ..., 0.1591, 0.1662, 0.1702]],
       grad_fn=<LogBackward>)


 74%|███████▎  | 42/57 [00:28<00:10,  1.47it/s]

tensor([[0.1586, 0.1680, 0.1773,  ..., 0.1581, 0.1654, 0.1678],
        [0.1551, 0.1615, 0.1731,  ..., 0.1564, 0.1654, 0.1708],
        [0.1592, 0.1653, 0.1779,  ..., 0.1590, 0.1685, 0.1712],
        ...,
        [0.1546, 0.1658, 0.1750,  ..., 0.1577, 0.1671, 0.1683],
        [0.1585, 0.1650, 0.1773,  ..., 0.1586, 0.1683, 0.1685],
        [0.1617, 0.1679, 0.1799,  ..., 0.1582, 0.1653, 0.1675]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 43/57 [00:29<00:09,  1.46it/s]

tensor([[0.1552, 0.1650, 0.1740,  ..., 0.1577, 0.1666, 0.1676],
        [0.1537, 0.1665, 0.1763,  ..., 0.1564, 0.1657, 0.1682],
        [0.1566, 0.1608, 0.1736,  ..., 0.1577, 0.1656, 0.1766],
        ...,
        [0.1588, 0.1655, 0.1783,  ..., 0.1613, 0.1681, 0.1745],
        [0.1555, 0.1665, 0.1739,  ..., 0.1605, 0.1660, 0.1662],
        [0.1633, 0.1651, 0.1768,  ..., 0.1598, 0.1688, 0.1733]],
       grad_fn=<LogBackward>)


 77%|███████▋  | 44/57 [00:29<00:09,  1.44it/s]

tensor([[0.1576, 0.1621, 0.1748,  ..., 0.1582, 0.1671, 0.1732],
        [0.1565, 0.1648, 0.1739,  ..., 0.1588, 0.1670, 0.1720],
        [0.1553, 0.1640, 0.1737,  ..., 0.1600, 0.1647, 0.1668],
        ...,
        [0.1576, 0.1639, 0.1778,  ..., 0.1582, 0.1682, 0.1721],
        [0.1607, 0.1666, 0.1818,  ..., 0.1584, 0.1668, 0.1727],
        [0.1565, 0.1644, 0.1736,  ..., 0.1565, 0.1627, 0.1639]],
       grad_fn=<LogBackward>)


 79%|███████▉  | 45/57 [00:30<00:08,  1.44it/s]

tensor([[0.1535, 0.1642, 0.1742,  ..., 0.1574, 0.1655, 0.1707],
        [0.1585, 0.1633, 0.1759,  ..., 0.1581, 0.1685, 0.1716],
        [0.1571, 0.1619, 0.1735,  ..., 0.1576, 0.1654, 0.1712],
        ...,
        [0.1577, 0.1626, 0.1769,  ..., 0.1579, 0.1657, 0.1711],
        [0.1599, 0.1662, 0.1773,  ..., 0.1569, 0.1696, 0.1745],
        [0.1572, 0.1657, 0.1749,  ..., 0.1577, 0.1657, 0.1687]],
       grad_fn=<LogBackward>)


 81%|████████  | 46/57 [00:31<00:07,  1.42it/s]

tensor([[0.1550, 0.1659, 0.1749,  ..., 0.1592, 0.1678, 0.1673],
        [0.1587, 0.1658, 0.1760,  ..., 0.1580, 0.1670, 0.1699],
        [0.1572, 0.1670, 0.1752,  ..., 0.1580, 0.1676, 0.1700],
        ...,
        [0.1560, 0.1639, 0.1753,  ..., 0.1569, 0.1692, 0.1682],
        [0.1557, 0.1641, 0.1751,  ..., 0.1603, 0.1671, 0.1697],
        [0.1597, 0.1625, 0.1761,  ..., 0.1584, 0.1672, 0.1714]],
       grad_fn=<LogBackward>)


 82%|████████▏ | 47/57 [00:32<00:07,  1.40it/s]

tensor([[0.1566, 0.1654, 0.1762,  ..., 0.1566, 0.1671, 0.1693],
        [0.1572, 0.1664, 0.1748,  ..., 0.1557, 0.1642, 0.1662],
        [0.1555, 0.1662, 0.1762,  ..., 0.1595, 0.1665, 0.1718],
        ...,
        [0.1589, 0.1655, 0.1767,  ..., 0.1583, 0.1680, 0.1713],
        [0.1575, 0.1663, 0.1800,  ..., 0.1572, 0.1671, 0.1693],
        [0.1572, 0.1648, 0.1730,  ..., 0.1614, 0.1686, 0.1662]],
       grad_fn=<LogBackward>)


 84%|████████▍ | 48/57 [00:32<00:06,  1.39it/s]

tensor([[0.1562, 0.1630, 0.1749,  ..., 0.1584, 0.1657, 0.1742],
        [0.1587, 0.1639, 0.1768,  ..., 0.1573, 0.1709, 0.1722],
        [0.1564, 0.1666, 0.1766,  ..., 0.1572, 0.1667, 0.1695],
        ...,
        [0.1590, 0.1686, 0.1774,  ..., 0.1587, 0.1658, 0.1687],
        [0.1564, 0.1614, 0.1743,  ..., 0.1562, 0.1656, 0.1728],
        [0.1579, 0.1619, 0.1757,  ..., 0.1569, 0.1675, 0.1710]],
       grad_fn=<LogBackward>)


 86%|████████▌ | 49/57 [00:33<00:05,  1.35it/s]

tensor([[0.1591, 0.1626, 0.1727,  ..., 0.1563, 0.1667, 0.1692],
        [0.1545, 0.1649, 0.1728,  ..., 0.1568, 0.1657, 0.1663],
        [0.1534, 0.1635, 0.1725,  ..., 0.1606, 0.1659, 0.1668],
        ...,
        [0.1579, 0.1639, 0.1768,  ..., 0.1583, 0.1654, 0.1689],
        [0.1568, 0.1637, 0.1748,  ..., 0.1574, 0.1658, 0.1703],
        [0.1591, 0.1639, 0.1772,  ..., 0.1588, 0.1679, 0.1735]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 50/57 [00:34<00:05,  1.36it/s]

tensor([[0.1565, 0.1668, 0.1751,  ..., 0.1566, 0.1667, 0.1688],
        [0.1569, 0.1661, 0.1761,  ..., 0.1574, 0.1645, 0.1668],
        [0.1553, 0.1636, 0.1763,  ..., 0.1593, 0.1679, 0.1688],
        ...,
        [0.1599, 0.1656, 0.1800,  ..., 0.1593, 0.1640, 0.1699],
        [0.1571, 0.1667, 0.1739,  ..., 0.1564, 0.1653, 0.1674],
        [0.1602, 0.1675, 0.1763,  ..., 0.1592, 0.1683, 0.1698]],
       grad_fn=<LogBackward>)


 89%|████████▉ | 51/57 [00:35<00:04,  1.39it/s]

tensor([[0.1562, 0.1662, 0.1775,  ..., 0.1562, 0.1660, 0.1667],
        [0.1566, 0.1682, 0.1743,  ..., 0.1572, 0.1673, 0.1701],
        [0.1561, 0.1665, 0.1753,  ..., 0.1591, 0.1677, 0.1686],
        ...,
        [0.1543, 0.1648, 0.1745,  ..., 0.1556, 0.1648, 0.1664],
        [0.1571, 0.1658, 0.1743,  ..., 0.1579, 0.1673, 0.1696],
        [0.1552, 0.1618, 0.1753,  ..., 0.1582, 0.1659, 0.1694]],
       grad_fn=<LogBackward>)


 91%|█████████ | 52/57 [00:35<00:03,  1.39it/s]

tensor([[0.1588, 0.1664, 0.1796,  ..., 0.1589, 0.1686, 0.1719],
        [0.1598, 0.1651, 0.1769,  ..., 0.1594, 0.1669, 0.1725],
        [0.1558, 0.1683, 0.1749,  ..., 0.1587, 0.1659, 0.1687],
        ...,
        [0.1577, 0.1644, 0.1765,  ..., 0.1597, 0.1699, 0.1676],
        [0.1544, 0.1616, 0.1744,  ..., 0.1564, 0.1665, 0.1691],
        [0.1570, 0.1658, 0.1722,  ..., 0.1567, 0.1658, 0.1677]],
       grad_fn=<LogBackward>)


 93%|█████████▎| 53/57 [00:36<00:02,  1.40it/s]

tensor([[0.1592, 0.1646, 0.1783,  ..., 0.1594, 0.1667, 0.1723],
        [0.1588, 0.1685, 0.1749,  ..., 0.1564, 0.1654, 0.1661],
        [0.1547, 0.1625, 0.1739,  ..., 0.1594, 0.1605, 0.1658],
        ...,
        [0.1566, 0.1629, 0.1755,  ..., 0.1571, 0.1663, 0.1726],
        [0.1570, 0.1635, 0.1756,  ..., 0.1593, 0.1668, 0.1713],
        [0.1570, 0.1627, 0.1790,  ..., 0.1570, 0.1663, 0.1701]],
       grad_fn=<LogBackward>)


 95%|█████████▍| 54/57 [00:37<00:02,  1.42it/s]

tensor([[0.1575, 0.1655, 0.1751,  ..., 0.1589, 0.1657, 0.1684],
        [0.1581, 0.1644, 0.1765,  ..., 0.1587, 0.1681, 0.1717],
        [0.1592, 0.1669, 0.1770,  ..., 0.1606, 0.1677, 0.1688],
        ...,
        [0.1574, 0.1650, 0.1759,  ..., 0.1571, 0.1653, 0.1697],
        [0.1550, 0.1652, 0.1742,  ..., 0.1566, 0.1651, 0.1658],
        [0.1574, 0.1653, 0.1736,  ..., 0.1573, 0.1671, 0.1701]],
       grad_fn=<LogBackward>)


 96%|█████████▋| 55/57 [00:37<00:01,  1.43it/s]

tensor([[0.1556, 0.1636, 0.1727,  ..., 0.1593, 0.1645, 0.1668],
        [0.1554, 0.1655, 0.1754,  ..., 0.1576, 0.1706, 0.1684],
        [0.1574, 0.1667, 0.1734,  ..., 0.1569, 0.1666, 0.1663],
        ...,
        [0.1584, 0.1684, 0.1773,  ..., 0.1594, 0.1676, 0.1732],
        [0.1550, 0.1625, 0.1758,  ..., 0.1580, 0.1640, 0.1684],
        [0.1558, 0.1645, 0.1733,  ..., 0.1584, 0.1659, 0.1664]],
       grad_fn=<LogBackward>)


 98%|█████████▊| 56/57 [00:38<00:00,  1.45it/s]

tensor([[0.1564, 0.1657, 0.1744,  ..., 0.1595, 0.1647, 0.1686],
        [0.1577, 0.1650, 0.1757,  ..., 0.1583, 0.1613, 0.1695],
        [0.1575, 0.1666, 0.1756,  ..., 0.1557, 0.1679, 0.1681],
        ...,
        [0.1549, 0.1651, 0.1766,  ..., 0.1593, 0.1684, 0.1686],
        [0.1550, 0.1664, 0.1734,  ..., 0.1553, 0.1649, 0.1656],
        [0.1557, 0.1630, 0.1747,  ..., 0.1578, 0.1642, 0.1690]],
       grad_fn=<LogBackward>)


100%|██████████| 57/57 [00:39<00:00,  1.46it/s]
 12%|█▎        | 1/8 [00:00<00:02,  3.41it/s]

tensor([[0.1581, 0.1650, 0.1759,  ..., 0.1617, 0.1660, 0.1708],
        [0.1556, 0.1645, 0.1743,  ..., 0.1572, 0.1691, 0.1700],
        [0.1575, 0.1660, 0.1742,  ..., 0.1566, 0.1643, 0.1668],
        ...,
        [0.1584, 0.1641, 0.1746,  ..., 0.1593, 0.1682, 0.1718],
        [0.1553, 0.1618, 0.1736,  ..., 0.1576, 0.1614, 0.1662],
        [0.1591, 0.1660, 0.1788,  ..., 0.1579, 0.1664, 0.1707]],
       grad_fn=<LogBackward>)


 25%|██▌       | 2/8 [00:00<00:01,  3.32it/s]

tensor([[0.1595, 0.1646, 0.1820,  ..., 0.1597, 0.1654, 0.1716],
        [0.1591, 0.1682, 0.1745,  ..., 0.1572, 0.1701, 0.1688],
        [0.1555, 0.1640, 0.1750,  ..., 0.1571, 0.1668, 0.1697],
        ...,
        [0.1556, 0.1681, 0.1749,  ..., 0.1563, 0.1645, 0.1655],
        [0.1573, 0.1662, 0.1758,  ..., 0.1559, 0.1673, 0.1679],
        [0.1595, 0.1639, 0.1762,  ..., 0.1590, 0.1674, 0.1723]],
       grad_fn=<LogBackward>)


 38%|███▊      | 3/8 [00:00<00:01,  3.32it/s]

tensor([[0.1594, 0.1654, 0.1750,  ..., 0.1579, 0.1641, 0.1644],
        [0.1586, 0.1658, 0.1765,  ..., 0.1573, 0.1646, 0.1705],
        [0.1553, 0.1620, 0.1749,  ..., 0.1568, 0.1655, 0.1687],
        ...,
        [0.1607, 0.1644, 0.1760,  ..., 0.1583, 0.1658, 0.1746],
        [0.1569, 0.1655, 0.1756,  ..., 0.1549, 0.1630, 0.1704],
        [0.1568, 0.1647, 0.1757,  ..., 0.1580, 0.1684, 0.1752]],
       grad_fn=<LogBackward>)


 50%|█████     | 4/8 [00:01<00:01,  3.26it/s]

tensor([[0.1554, 0.1655, 0.1758,  ..., 0.1579, 0.1667, 0.1684],
        [0.1571, 0.1655, 0.1754,  ..., 0.1585, 0.1680, 0.1694],
        [0.1569, 0.1669, 0.1744,  ..., 0.1587, 0.1690, 0.1698],
        ...,
        [0.1591, 0.1629, 0.1768,  ..., 0.1581, 0.1692, 0.1707],
        [0.1585, 0.1643, 0.1769,  ..., 0.1576, 0.1688, 0.1717],
        [0.1560, 0.1652, 0.1760,  ..., 0.1569, 0.1675, 0.1685]],
       grad_fn=<LogBackward>)


 62%|██████▎   | 5/8 [00:01<00:00,  3.34it/s]

tensor([[0.1582, 0.1651, 0.1756,  ..., 0.1580, 0.1678, 0.1677],
        [0.1588, 0.1647, 0.1785,  ..., 0.1588, 0.1675, 0.1738],
        [0.1579, 0.1670, 0.1757,  ..., 0.1551, 0.1674, 0.1699],
        ...,
        [0.1542, 0.1636, 0.1750,  ..., 0.1593, 0.1662, 0.1680],
        [0.1539, 0.1624, 0.1697,  ..., 0.1550, 0.1633, 0.1619],
        [0.1564, 0.1638, 0.1740,  ..., 0.1601, 0.1666, 0.1707]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 6/8 [00:01<00:00,  3.40it/s]

tensor([[0.1583, 0.1655, 0.1765,  ..., 0.1579, 0.1666, 0.1698],
        [0.1562, 0.1629, 0.1746,  ..., 0.1572, 0.1679, 0.1729],
        [0.1610, 0.1688, 0.1848,  ..., 0.1598, 0.1705, 0.1770],
        ...,
        [0.1606, 0.1668, 0.1807,  ..., 0.1574, 0.1698, 0.1707],
        [0.1580, 0.1671, 0.1748,  ..., 0.1602, 0.1673, 0.1683],
        [0.1568, 0.1671, 0.1773,  ..., 0.1583, 0.1669, 0.1693]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 7/8 [00:02<00:00,  3.44it/s]

tensor([[0.1552, 0.1624, 0.1737,  ..., 0.1584, 0.1655, 0.1715],
        [0.1544, 0.1626, 0.1721,  ..., 0.1570, 0.1648, 0.1659],
        [0.1549, 0.1640, 0.1723,  ..., 0.1599, 0.1662, 0.1686],
        ...,
        [0.1555, 0.1654, 0.1752,  ..., 0.1591, 0.1667, 0.1693],
        [0.1572, 0.1626, 0.1741,  ..., 0.1571, 0.1654, 0.1689],
        [0.1545, 0.1629, 0.1734,  ..., 0.1558, 0.1637, 0.1682]],
       grad_fn=<LogBackward>)


100%|██████████| 8/8 [00:02<00:00,  3.38it/s]
  0%|          | 0/57 [00:00<?, ?it/s]

tensor([[0.1597, 0.1638, 0.1768,  ..., 0.1578, 0.1651, 0.1704],
        [0.1559, 0.1627, 0.1724,  ..., 0.1606, 0.1652, 0.1725],
        [0.1549, 0.1640, 0.1736,  ..., 0.1567, 0.1673, 0.1688],
        ...,
        [0.1588, 0.1652, 0.1765,  ..., 0.1595, 0.1687, 0.1713],
        [0.1579, 0.1640, 0.1780,  ..., 0.1581, 0.1677, 0.1760],
        [0.1606, 0.1665, 0.1776,  ..., 0.1599, 0.1687, 0.1708]],
       grad_fn=<LogBackward>)
[tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128)]
Epoch: 1, train_loss: 0.7630578279495239, valid_loss: 0.6494913697242737
Parameter containing:
tensor([[ 1.0002,  1.0003,  1.0003,  1.0003,  1.0002,  1.0002,  1.0003,  1.0002,
          1.0002,  1.0003, -0.4997, -0.4998, -0.4998, -0.4997, -0.4997, -0.4997,
         -0.4997, -0.4998, -0.4997, -0.4998],
        

  2%|▏         | 1/57 [00:00<00:35,  1.57it/s]

tensor([[0.1566, 0.1643, 0.1740,  ..., 0.1579, 0.1654, 0.1703],
        [0.1593, 0.1657, 0.1806,  ..., 0.1598, 0.1689, 0.1716],
        [0.1587, 0.1665, 0.1786,  ..., 0.1600, 0.1682, 0.1714],
        ...,
        [0.1561, 0.1625, 0.1743,  ..., 0.1588, 0.1640, 0.1656],
        [0.1554, 0.1670, 0.1742,  ..., 0.1554, 0.1638, 0.1662],
        [0.1575, 0.1650, 0.1747,  ..., 0.1590, 0.1675, 0.1711]],
       grad_fn=<LogBackward>)


  4%|▎         | 2/57 [00:01<00:35,  1.55it/s]

tensor([[0.1572, 0.1676, 0.1740,  ..., 0.1553, 0.1640, 0.1632],
        [0.1565, 0.1667, 0.1741,  ..., 0.1586, 0.1679, 0.1718],
        [0.1587, 0.1651, 0.1778,  ..., 0.1594, 0.1657, 0.1733],
        ...,
        [0.1580, 0.1663, 0.1774,  ..., 0.1567, 0.1668, 0.1707],
        [0.1560, 0.1645, 0.1767,  ..., 0.1560, 0.1645, 0.1684],
        [0.1552, 0.1624, 0.1728,  ..., 0.1560, 0.1657, 0.1706]],
       grad_fn=<LogBackward>)


  5%|▌         | 3/57 [00:01<00:34,  1.55it/s]

tensor([[0.1549, 0.1642, 0.1741,  ..., 0.1591, 0.1658, 0.1702],
        [0.1568, 0.1662, 0.1768,  ..., 0.1560, 0.1669, 0.1676],
        [0.1598, 0.1691, 0.1810,  ..., 0.1565, 0.1649, 0.1690],
        ...,
        [0.1608, 0.1668, 0.1786,  ..., 0.1594, 0.1707, 0.1721],
        [0.1584, 0.1668, 0.1773,  ..., 0.1593, 0.1668, 0.1730],
        [0.1559, 0.1640, 0.1755,  ..., 0.1579, 0.1676, 0.1696]],
       grad_fn=<LogBackward>)


  7%|▋         | 4/57 [00:02<00:34,  1.56it/s]

tensor([[0.1565, 0.1647, 0.1749,  ..., 0.1568, 0.1657, 0.1719],
        [0.1581, 0.1646, 0.1758,  ..., 0.1572, 0.1663, 0.1715],
        [0.1581, 0.1633, 0.1734,  ..., 0.1609, 0.1657, 0.1721],
        ...,
        [0.1567, 0.1680, 0.1765,  ..., 0.1571, 0.1667, 0.1693],
        [0.1547, 0.1677, 0.1754,  ..., 0.1584, 0.1673, 0.1680],
        [0.1578, 0.1670, 0.1787,  ..., 0.1573, 0.1654, 0.1681]],
       grad_fn=<LogBackward>)


  9%|▉         | 5/57 [00:03<00:33,  1.54it/s]

tensor([[0.1584, 0.1642, 0.1767,  ..., 0.1594, 0.1680, 0.1712],
        [0.1616, 0.1657, 0.1798,  ..., 0.1633, 0.1697, 0.1714],
        [0.1584, 0.1700, 0.1747,  ..., 0.1572, 0.1639, 0.1648],
        ...,
        [0.1595, 0.1673, 0.1794,  ..., 0.1594, 0.1647, 0.1696],
        [0.1558, 0.1625, 0.1753,  ..., 0.1562, 0.1646, 0.1690],
        [0.1588, 0.1672, 0.1770,  ..., 0.1577, 0.1660, 0.1690]],
       grad_fn=<LogBackward>)


 11%|█         | 6/57 [00:03<00:33,  1.54it/s]

tensor([[0.1601, 0.1676, 0.1763,  ..., 0.1599, 0.1705, 0.1719],
        [0.1563, 0.1649, 0.1729,  ..., 0.1579, 0.1663, 0.1673],
        [0.1573, 0.1653, 0.1761,  ..., 0.1596, 0.1652, 0.1711],
        ...,
        [0.1559, 0.1653, 0.1737,  ..., 0.1581, 0.1673, 0.1690],
        [0.1580, 0.1644, 0.1756,  ..., 0.1581, 0.1670, 0.1711],
        [0.1592, 0.1672, 0.1779,  ..., 0.1584, 0.1677, 0.1701]],
       grad_fn=<LogBackward>)


 12%|█▏        | 7/57 [00:04<00:32,  1.54it/s]

tensor([[0.1446, 0.1574, 0.1591,  ..., 0.1462, 0.1553, 0.1562],
        [0.1582, 0.1645, 0.1763,  ..., 0.1576, 0.1662, 0.1690],
        [0.1607, 0.1673, 0.1783,  ..., 0.1571, 0.1661, 0.1691],
        ...,
        [0.1579, 0.1667, 0.1761,  ..., 0.1560, 0.1664, 0.1649],
        [0.1583, 0.1638, 0.1790,  ..., 0.1575, 0.1655, 0.1703],
        [0.1560, 0.1642, 0.1756,  ..., 0.1586, 0.1650, 0.1698]],
       grad_fn=<LogBackward>)


 14%|█▍        | 8/57 [00:05<00:32,  1.50it/s]

tensor([[0.1573, 0.1668, 0.1745,  ..., 0.1566, 0.1678, 0.1686],
        [0.1595, 0.1663, 0.1798,  ..., 0.1577, 0.1662, 0.1668],
        [0.1578, 0.1626, 0.1767,  ..., 0.1585, 0.1667, 0.1750],
        ...,
        [0.1546, 0.1647, 0.1744,  ..., 0.1556, 0.1656, 0.1686],
        [0.1581, 0.1679, 0.1777,  ..., 0.1589, 0.1685, 0.1674],
        [0.1542, 0.1644, 0.1735,  ..., 0.1572, 0.1652, 0.1678]],
       grad_fn=<LogBackward>)


 16%|█▌        | 9/57 [00:05<00:32,  1.49it/s]

tensor([[0.1561, 0.1601, 0.1733,  ..., 0.1589, 0.1625, 0.1713],
        [0.1576, 0.1646, 0.1759,  ..., 0.1572, 0.1671, 0.1703],
        [0.1587, 0.1649, 0.1792,  ..., 0.1591, 0.1670, 0.1713],
        ...,
        [0.1582, 0.1653, 0.1752,  ..., 0.1565, 0.1630, 0.1682],
        [0.1557, 0.1613, 0.1732,  ..., 0.1562, 0.1658, 0.1710],
        [0.1581, 0.1632, 0.1746,  ..., 0.1575, 0.1663, 0.1757]],
       grad_fn=<LogBackward>)


 18%|█▊        | 10/57 [00:06<00:31,  1.48it/s]

tensor([[0.1620, 0.1681, 0.1787,  ..., 0.1599, 0.1689, 0.1727],
        [0.1553, 0.1655, 0.1717,  ..., 0.1593, 0.1647, 0.1650],
        [0.1547, 0.1626, 0.1740,  ..., 0.1588, 0.1647, 0.1676],
        ...,
        [0.1557, 0.1631, 0.1745,  ..., 0.1569, 0.1661, 0.1716],
        [0.1590, 0.1632, 0.1743,  ..., 0.1591, 0.1655, 0.1709],
        [0.1537, 0.1620, 0.1710,  ..., 0.1539, 0.1610, 0.1622]],
       grad_fn=<LogBackward>)


 19%|█▉        | 11/57 [00:07<00:30,  1.51it/s]

tensor([[0.1562, 0.1622, 0.1761,  ..., 0.1605, 0.1659, 0.1705],
        [0.1572, 0.1652, 0.1768,  ..., 0.1589, 0.1679, 0.1701],
        [0.1553, 0.1650, 0.1756,  ..., 0.1577, 0.1653, 0.1672],
        ...,
        [0.1557, 0.1644, 0.1751,  ..., 0.1580, 0.1649, 0.1676],
        [0.1605, 0.1669, 0.1756,  ..., 0.1596, 0.1687, 0.1682],
        [0.1568, 0.1680, 0.1764,  ..., 0.1580, 0.1654, 0.1657]],
       grad_fn=<LogBackward>)


 21%|██        | 12/57 [00:07<00:29,  1.51it/s]

tensor([[0.1581, 0.1669, 0.1767,  ..., 0.1557, 0.1665, 0.1707],
        [0.1565, 0.1633, 0.1740,  ..., 0.1590, 0.1678, 0.1687],
        [0.1601, 0.1638, 0.1789,  ..., 0.1591, 0.1639, 0.1725],
        ...,
        [0.1563, 0.1628, 0.1751,  ..., 0.1568, 0.1640, 0.1664],
        [0.1563, 0.1649, 0.1738,  ..., 0.1586, 0.1672, 0.1684],
        [0.1562, 0.1647, 0.1749,  ..., 0.1570, 0.1659, 0.1721]],
       grad_fn=<LogBackward>)


 23%|██▎       | 13/57 [00:08<00:29,  1.51it/s]

tensor([[0.1567, 0.1661, 0.1777,  ..., 0.1570, 0.1670, 0.1699],
        [0.1551, 0.1656, 0.1765,  ..., 0.1595, 0.1666, 0.1684],
        [0.1608, 0.1677, 0.1769,  ..., 0.1588, 0.1669, 0.1690],
        ...,
        [0.1544, 0.1653, 0.1745,  ..., 0.1572, 0.1668, 0.1667],
        [0.1553, 0.1685, 0.1745,  ..., 0.1573, 0.1647, 0.1670],
        [0.1561, 0.1620, 0.1749,  ..., 0.1582, 0.1694, 0.1691]],
       grad_fn=<LogBackward>)


 25%|██▍       | 14/57 [00:09<00:28,  1.52it/s]

tensor([[0.1612, 0.1659, 0.1755,  ..., 0.1580, 0.1681, 0.1672],
        [0.1576, 0.1639, 0.1756,  ..., 0.1587, 0.1636, 0.1680],
        [0.1582, 0.1654, 0.1767,  ..., 0.1565, 0.1663, 0.1706],
        ...,
        [0.1567, 0.1663, 0.1780,  ..., 0.1557, 0.1659, 0.1712],
        [0.1604, 0.1652, 0.1803,  ..., 0.1577, 0.1662, 0.1721],
        [0.1564, 0.1659, 0.1712,  ..., 0.1555, 0.1632, 0.1631]],
       grad_fn=<LogBackward>)


 26%|██▋       | 15/57 [00:09<00:27,  1.51it/s]

tensor([[0.1572, 0.1647, 0.1760,  ..., 0.1587, 0.1689, 0.1744],
        [0.1585, 0.1656, 0.1770,  ..., 0.1580, 0.1672, 0.1738],
        [0.1587, 0.1668, 0.1747,  ..., 0.1560, 0.1663, 0.1691],
        ...,
        [0.1557, 0.1634, 0.1740,  ..., 0.1586, 0.1667, 0.1698],
        [0.1568, 0.1653, 0.1751,  ..., 0.1588, 0.1624, 0.1673],
        [0.1569, 0.1646, 0.1744,  ..., 0.1570, 0.1655, 0.1654]],
       grad_fn=<LogBackward>)


 28%|██▊       | 16/57 [00:10<00:26,  1.52it/s]

tensor([[0.1548, 0.1644, 0.1751,  ..., 0.1563, 0.1640, 0.1656],
        [0.1606, 0.1652, 0.1753,  ..., 0.1568, 0.1634, 0.1662],
        [0.1579, 0.1660, 0.1764,  ..., 0.1592, 0.1662, 0.1701],
        ...,
        [0.1522, 0.1621, 0.1709,  ..., 0.1560, 0.1611, 0.1625],
        [0.1579, 0.1647, 0.1729,  ..., 0.1577, 0.1653, 0.1680],
        [0.1573, 0.1676, 0.1758,  ..., 0.1577, 0.1669, 0.1695]],
       grad_fn=<LogBackward>)


 30%|██▉       | 17/57 [00:11<00:26,  1.53it/s]

tensor([[0.1534, 0.1625, 0.1728,  ..., 0.1572, 0.1654, 0.1656],
        [0.1565, 0.1636, 0.1758,  ..., 0.1584, 0.1651, 0.1708],
        [0.1548, 0.1641, 0.1750,  ..., 0.1571, 0.1647, 0.1699],
        ...,
        [0.1555, 0.1657, 0.1752,  ..., 0.1582, 0.1667, 0.1692],
        [0.1526, 0.1641, 0.1712,  ..., 0.1570, 0.1607, 0.1686],
        [0.1593, 0.1651, 0.1760,  ..., 0.1576, 0.1679, 0.1677]],
       grad_fn=<LogBackward>)


 32%|███▏      | 18/57 [00:11<00:25,  1.51it/s]

tensor([[0.1592, 0.1697, 0.1765,  ..., 0.1581, 0.1688, 0.1712],
        [0.1586, 0.1636, 0.1732,  ..., 0.1594, 0.1662, 0.1701],
        [0.1573, 0.1634, 0.1752,  ..., 0.1585, 0.1665, 0.1733],
        ...,
        [0.1563, 0.1648, 0.1750,  ..., 0.1593, 0.1652, 0.1698],
        [0.1574, 0.1665, 0.1765,  ..., 0.1590, 0.1652, 0.1691],
        [0.1588, 0.1665, 0.1799,  ..., 0.1585, 0.1664, 0.1689]],
       grad_fn=<LogBackward>)


 33%|███▎      | 19/57 [00:12<00:26,  1.46it/s]

tensor([[0.1555, 0.1633, 0.1762,  ..., 0.1595, 0.1657, 0.1704],
        [0.1541, 0.1634, 0.1728,  ..., 0.1575, 0.1664, 0.1732],
        [0.1557, 0.1642, 0.1737,  ..., 0.1602, 0.1657, 0.1728],
        ...,
        [0.1569, 0.1674, 0.1764,  ..., 0.1556, 0.1658, 0.1666],
        [0.1602, 0.1654, 0.1797,  ..., 0.1573, 0.1658, 0.1678],
        [0.1601, 0.1671, 0.1796,  ..., 0.1589, 0.1682, 0.1704]],
       grad_fn=<LogBackward>)


 35%|███▌      | 20/57 [00:13<00:25,  1.47it/s]

tensor([[0.1592, 0.1653, 0.1761,  ..., 0.1588, 0.1676, 0.1725],
        [0.1553, 0.1633, 0.1741,  ..., 0.1556, 0.1647, 0.1698],
        [0.1553, 0.1650, 0.1723,  ..., 0.1594, 0.1666, 0.1700],
        ...,
        [0.1573, 0.1649, 0.1739,  ..., 0.1592, 0.1654, 0.1720],
        [0.1576, 0.1656, 0.1775,  ..., 0.1607, 0.1654, 0.1698],
        [0.1573, 0.1673, 0.1769,  ..., 0.1578, 0.1675, 0.1696]],
       grad_fn=<LogBackward>)


 37%|███▋      | 21/57 [00:13<00:24,  1.49it/s]

tensor([[0.1562, 0.1669, 0.1739,  ..., 0.1578, 0.1638, 0.1657],
        [0.1552, 0.1646, 0.1724,  ..., 0.1581, 0.1670, 0.1675],
        [0.1565, 0.1666, 0.1777,  ..., 0.1562, 0.1657, 0.1687],
        ...,
        [0.1584, 0.1641, 0.1780,  ..., 0.1601, 0.1664, 0.1733],
        [0.1555, 0.1633, 0.1747,  ..., 0.1558, 0.1644, 0.1703],
        [0.1548, 0.1669, 0.1758,  ..., 0.1582, 0.1657, 0.1685]],
       grad_fn=<LogBackward>)


 39%|███▊      | 22/57 [00:14<00:23,  1.50it/s]

tensor([[0.1549, 0.1634, 0.1741,  ..., 0.1567, 0.1621, 0.1694],
        [0.1574, 0.1637, 0.1759,  ..., 0.1582, 0.1671, 0.1697],
        [0.1536, 0.1615, 0.1697,  ..., 0.1542, 0.1603, 0.1639],
        ...,
        [0.1556, 0.1621, 0.1749,  ..., 0.1569, 0.1679, 0.1735],
        [0.1570, 0.1666, 0.1756,  ..., 0.1610, 0.1669, 0.1683],
        [0.1552, 0.1642, 0.1713,  ..., 0.1577, 0.1613, 0.1663]],
       grad_fn=<LogBackward>)


 40%|████      | 23/57 [00:15<00:22,  1.51it/s]

tensor([[0.1554, 0.1646, 0.1731,  ..., 0.1552, 0.1655, 0.1667],
        [0.1534, 0.1629, 0.1734,  ..., 0.1577, 0.1655, 0.1692],
        [0.1557, 0.1616, 0.1738,  ..., 0.1592, 0.1650, 0.1680],
        ...,
        [0.1553, 0.1622, 0.1736,  ..., 0.1605, 0.1650, 0.1691],
        [0.1592, 0.1668, 0.1772,  ..., 0.1587, 0.1695, 0.1735],
        [0.1573, 0.1632, 0.1759,  ..., 0.1589, 0.1672, 0.1691]],
       grad_fn=<LogBackward>)


 42%|████▏     | 24/57 [00:15<00:21,  1.52it/s]

tensor([[0.1571, 0.1647, 0.1765,  ..., 0.1579, 0.1673, 0.1713],
        [0.1556, 0.1637, 0.1764,  ..., 0.1587, 0.1660, 0.1698],
        [0.1572, 0.1658, 0.1767,  ..., 0.1566, 0.1666, 0.1696],
        ...,
        [0.1586, 0.1637, 0.1771,  ..., 0.1569, 0.1623, 0.1681],
        [0.1560, 0.1604, 0.1725,  ..., 0.1567, 0.1643, 0.1674],
        [0.1562, 0.1632, 0.1772,  ..., 0.1571, 0.1674, 0.1721]],
       grad_fn=<LogBackward>)


 44%|████▍     | 25/57 [00:16<00:20,  1.54it/s]

tensor([[0.1546, 0.1627, 0.1747,  ..., 0.1584, 0.1644, 0.1703],
        [0.1554, 0.1594, 0.1736,  ..., 0.1580, 0.1645, 0.1690],
        [0.1552, 0.1647, 0.1740,  ..., 0.1570, 0.1645, 0.1657],
        ...,
        [0.1527, 0.1636, 0.1726,  ..., 0.1573, 0.1643, 0.1693],
        [0.1595, 0.1661, 0.1780,  ..., 0.1575, 0.1617, 0.1658],
        [0.1570, 0.1660, 0.1737,  ..., 0.1589, 0.1646, 0.1702]],
       grad_fn=<LogBackward>)


 46%|████▌     | 26/57 [00:17<00:20,  1.52it/s]

tensor([[0.1566, 0.1632, 0.1747,  ..., 0.1567, 0.1646, 0.1709],
        [0.1575, 0.1619, 0.1744,  ..., 0.1560, 0.1656, 0.1696],
        [0.1566, 0.1657, 0.1769,  ..., 0.1586, 0.1650, 0.1658],
        ...,
        [0.1615, 0.1671, 0.1803,  ..., 0.1585, 0.1656, 0.1708],
        [0.1571, 0.1660, 0.1802,  ..., 0.1569, 0.1649, 0.1682],
        [0.1565, 0.1662, 0.1727,  ..., 0.1572, 0.1653, 0.1662]],
       grad_fn=<LogBackward>)


 47%|████▋     | 27/57 [00:17<00:19,  1.53it/s]

tensor([[0.1574, 0.1685, 0.1741,  ..., 0.1585, 0.1686, 0.1692],
        [0.1547, 0.1641, 0.1753,  ..., 0.1581, 0.1666, 0.1698],
        [0.1567, 0.1635, 0.1763,  ..., 0.1573, 0.1649, 0.1698],
        ...,
        [0.1583, 0.1662, 0.1751,  ..., 0.1563, 0.1665, 0.1703],
        [0.1573, 0.1670, 0.1768,  ..., 0.1580, 0.1654, 0.1665],
        [0.1544, 0.1631, 0.1755,  ..., 0.1583, 0.1653, 0.1686]],
       grad_fn=<LogBackward>)


 49%|████▉     | 28/57 [00:18<00:19,  1.50it/s]

tensor([[0.1585, 0.1651, 0.1753,  ..., 0.1566, 0.1657, 0.1703],
        [0.1556, 0.1646, 0.1747,  ..., 0.1570, 0.1659, 0.1668],
        [0.1563, 0.1637, 0.1742,  ..., 0.1564, 0.1631, 0.1672],
        ...,
        [0.1609, 0.1664, 0.1786,  ..., 0.1607, 0.1692, 0.1714],
        [0.1580, 0.1665, 0.1776,  ..., 0.1579, 0.1648, 0.1685],
        [0.1562, 0.1661, 0.1743,  ..., 0.1566, 0.1695, 0.1682]],
       grad_fn=<LogBackward>)


 51%|█████     | 29/57 [00:19<00:19,  1.47it/s]

tensor([[0.1550, 0.1652, 0.1731,  ..., 0.1579, 0.1658, 0.1680],
        [0.1605, 0.1623, 0.1772,  ..., 0.1575, 0.1669, 0.1747],
        [0.1577, 0.1679, 0.1740,  ..., 0.1571, 0.1669, 0.1684],
        ...,
        [0.1583, 0.1685, 0.1772,  ..., 0.1592, 0.1677, 0.1705],
        [0.1588, 0.1656, 0.1773,  ..., 0.1601, 0.1689, 0.1712],
        [0.1558, 0.1659, 0.1758,  ..., 0.1587, 0.1662, 0.1723]],
       grad_fn=<LogBackward>)


 53%|█████▎    | 30/57 [00:19<00:18,  1.49it/s]

tensor([[0.1582, 0.1647, 0.1783,  ..., 0.1573, 0.1670, 0.1717],
        [0.1570, 0.1639, 0.1765,  ..., 0.1549, 0.1677, 0.1690],
        [0.1558, 0.1679, 0.1708,  ..., 0.1546, 0.1648, 0.1643],
        ...,
        [0.1539, 0.1661, 0.1720,  ..., 0.1571, 0.1666, 0.1665],
        [0.1541, 0.1651, 0.1723,  ..., 0.1559, 0.1650, 0.1647],
        [0.1595, 0.1652, 0.1782,  ..., 0.1581, 0.1656, 0.1725]],
       grad_fn=<LogBackward>)


 54%|█████▍    | 31/57 [00:20<00:17,  1.50it/s]

tensor([[0.1545, 0.1642, 0.1729,  ..., 0.1580, 0.1663, 0.1695],
        [0.1566, 0.1647, 0.1755,  ..., 0.1562, 0.1661, 0.1689],
        [0.1590, 0.1641, 0.1724,  ..., 0.1584, 0.1653, 0.1670],
        ...,
        [0.1551, 0.1636, 0.1735,  ..., 0.1555, 0.1656, 0.1720],
        [0.1530, 0.1613, 0.1719,  ..., 0.1565, 0.1601, 0.1672],
        [0.1558, 0.1617, 0.1727,  ..., 0.1572, 0.1632, 0.1695]],
       grad_fn=<LogBackward>)


 56%|█████▌    | 32/57 [00:21<00:16,  1.50it/s]

tensor([[0.1562, 0.1658, 0.1760,  ..., 0.1570, 0.1679, 0.1678],
        [0.1552, 0.1691, 0.1743,  ..., 0.1551, 0.1631, 0.1662],
        [0.1540, 0.1662, 0.1725,  ..., 0.1578, 0.1654, 0.1651],
        ...,
        [0.1592, 0.1655, 0.1762,  ..., 0.1587, 0.1697, 0.1707],
        [0.1561, 0.1619, 0.1737,  ..., 0.1557, 0.1661, 0.1721],
        [0.1565, 0.1635, 0.1749,  ..., 0.1581, 0.1651, 0.1703]],
       grad_fn=<LogBackward>)


 58%|█████▊    | 33/57 [00:21<00:15,  1.51it/s]

tensor([[0.1559, 0.1643, 0.1750,  ..., 0.1578, 0.1652, 0.1688],
        [0.1586, 0.1671, 0.1755,  ..., 0.1591, 0.1674, 0.1706],
        [0.1550, 0.1624, 0.1740,  ..., 0.1582, 0.1643, 0.1689],
        ...,
        [0.1578, 0.1642, 0.1755,  ..., 0.1557, 0.1664, 0.1710],
        [0.1544, 0.1648, 0.1743,  ..., 0.1562, 0.1644, 0.1664],
        [0.1559, 0.1651, 0.1732,  ..., 0.1562, 0.1681, 0.1700]],
       grad_fn=<LogBackward>)


 60%|█████▉    | 34/57 [00:22<00:15,  1.52it/s]

tensor([[0.1555, 0.1639, 0.1722,  ..., 0.1552, 0.1640, 0.1726],
        [0.1581, 0.1664, 0.1772,  ..., 0.1573, 0.1676, 0.1683],
        [0.1572, 0.1633, 0.1739,  ..., 0.1556, 0.1632, 0.1656],
        ...,
        [0.1562, 0.1652, 0.1749,  ..., 0.1542, 0.1648, 0.1664],
        [0.1561, 0.1634, 0.1742,  ..., 0.1553, 0.1643, 0.1673],
        [0.1606, 0.1619, 0.1761,  ..., 0.1564, 0.1642, 0.1714]],
       grad_fn=<LogBackward>)


 61%|██████▏   | 35/57 [00:23<00:14,  1.48it/s]

tensor([[0.1613, 0.1654, 0.1787,  ..., 0.1606, 0.1681, 0.1760],
        [0.1550, 0.1632, 0.1751,  ..., 0.1576, 0.1647, 0.1696],
        [0.1576, 0.1636, 0.1758,  ..., 0.1572, 0.1640, 0.1680],
        ...,
        [0.1576, 0.1640, 0.1762,  ..., 0.1576, 0.1651, 0.1699],
        [0.1561, 0.1655, 0.1745,  ..., 0.1579, 0.1665, 0.1697],
        [0.1567, 0.1622, 0.1763,  ..., 0.1555, 0.1669, 0.1685]],
       grad_fn=<LogBackward>)


 63%|██████▎   | 36/57 [00:23<00:14,  1.43it/s]

tensor([[0.1569, 0.1664, 0.1745,  ..., 0.1566, 0.1678, 0.1709],
        [0.1541, 0.1651, 0.1730,  ..., 0.1568, 0.1635, 0.1676],
        [0.1581, 0.1641, 0.1763,  ..., 0.1598, 0.1679, 0.1703],
        ...,
        [0.1576, 0.1656, 0.1745,  ..., 0.1593, 0.1650, 0.1718],
        [0.1574, 0.1656, 0.1746,  ..., 0.1554, 0.1666, 0.1679],
        [0.1565, 0.1630, 0.1777,  ..., 0.1587, 0.1610, 0.1690]],
       grad_fn=<LogBackward>)


 65%|██████▍   | 37/57 [00:24<00:13,  1.43it/s]

tensor([[0.1557, 0.1628, 0.1732,  ..., 0.1558, 0.1659, 0.1726],
        [0.1561, 0.1633, 0.1768,  ..., 0.1585, 0.1660, 0.1716],
        [0.1578, 0.1644, 0.1766,  ..., 0.1574, 0.1674, 0.1740],
        ...,
        [0.1605, 0.1652, 0.1797,  ..., 0.1570, 0.1679, 0.1715],
        [0.1564, 0.1626, 0.1742,  ..., 0.1565, 0.1641, 0.1713],
        [0.1527, 0.1632, 0.1728,  ..., 0.1554, 0.1615, 0.1659]],
       grad_fn=<LogBackward>)


 67%|██████▋   | 38/57 [00:25<00:13,  1.40it/s]

tensor([[0.1536, 0.1640, 0.1747,  ..., 0.1567, 0.1647, 0.1645],
        [0.1546, 0.1643, 0.1741,  ..., 0.1581, 0.1644, 0.1695],
        [0.1576, 0.1681, 0.1759,  ..., 0.1576, 0.1695, 0.1698],
        ...,
        [0.1561, 0.1634, 0.1775,  ..., 0.1561, 0.1637, 0.1689],
        [0.1548, 0.1613, 0.1725,  ..., 0.1579, 0.1635, 0.1707],
        [0.1549, 0.1620, 0.1744,  ..., 0.1556, 0.1629, 0.1658]],
       grad_fn=<LogBackward>)


 68%|██████▊   | 39/57 [00:26<00:13,  1.37it/s]

tensor([[0.1550, 0.1655, 0.1770,  ..., 0.1563, 0.1653, 0.1658],
        [0.1548, 0.1625, 0.1740,  ..., 0.1572, 0.1636, 0.1683],
        [0.1577, 0.1642, 0.1788,  ..., 0.1572, 0.1666, 0.1700],
        ...,
        [0.1588, 0.1658, 0.1759,  ..., 0.1564, 0.1669, 0.1726],
        [0.1529, 0.1648, 0.1689,  ..., 0.1557, 0.1617, 0.1642],
        [0.1555, 0.1635, 0.1735,  ..., 0.1575, 0.1646, 0.1625]],
       grad_fn=<LogBackward>)


 70%|███████   | 40/57 [00:26<00:12,  1.40it/s]

tensor([[0.1591, 0.1671, 0.1804,  ..., 0.1562, 0.1650, 0.1689],
        [0.1560, 0.1646, 0.1747,  ..., 0.1563, 0.1669, 0.1714],
        [0.1570, 0.1639, 0.1743,  ..., 0.1575, 0.1655, 0.1703],
        ...,
        [0.1557, 0.1664, 0.1749,  ..., 0.1558, 0.1631, 0.1658],
        [0.1575, 0.1664, 0.1758,  ..., 0.1579, 0.1662, 0.1676],
        [0.1605, 0.1665, 0.1762,  ..., 0.1585, 0.1670, 0.1712]],
       grad_fn=<LogBackward>)


 72%|███████▏  | 41/57 [00:27<00:11,  1.41it/s]

tensor([[0.1601, 0.1657, 0.1785,  ..., 0.1555, 0.1670, 0.1699],
        [0.1598, 0.1651, 0.1769,  ..., 0.1598, 0.1674, 0.1717],
        [0.1567, 0.1648, 0.1752,  ..., 0.1580, 0.1674, 0.1691],
        ...,
        [0.1541, 0.1625, 0.1721,  ..., 0.1565, 0.1638, 0.1676],
        [0.1582, 0.1644, 0.1810,  ..., 0.1567, 0.1683, 0.1697],
        [0.1585, 0.1623, 0.1784,  ..., 0.1590, 0.1676, 0.1715]],
       grad_fn=<LogBackward>)


 74%|███████▎  | 42/57 [00:28<00:10,  1.41it/s]

tensor([[0.1586, 0.1630, 0.1788,  ..., 0.1564, 0.1606, 0.1670],
        [0.1583, 0.1640, 0.1780,  ..., 0.1596, 0.1661, 0.1722],
        [0.1587, 0.1663, 0.1781,  ..., 0.1587, 0.1677, 0.1705],
        ...,
        [0.1547, 0.1623, 0.1758,  ..., 0.1580, 0.1605, 0.1688],
        [0.1566, 0.1623, 0.1756,  ..., 0.1583, 0.1654, 0.1749],
        [0.1558, 0.1635, 0.1749,  ..., 0.1583, 0.1649, 0.1693]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 43/57 [00:28<00:09,  1.41it/s]

tensor([[0.1576, 0.1644, 0.1766,  ..., 0.1599, 0.1649, 0.1694],
        [0.1563, 0.1650, 0.1757,  ..., 0.1570, 0.1659, 0.1676],
        [0.1572, 0.1638, 0.1757,  ..., 0.1571, 0.1665, 0.1694],
        ...,
        [0.1567, 0.1688, 0.1775,  ..., 0.1575, 0.1686, 0.1690],
        [0.1560, 0.1661, 0.1743,  ..., 0.1560, 0.1656, 0.1674],
        [0.1557, 0.1643, 0.1766,  ..., 0.1555, 0.1657, 0.1694]],
       grad_fn=<LogBackward>)


 77%|███████▋  | 44/57 [00:29<00:09,  1.42it/s]

tensor([[0.1566, 0.1658, 0.1758,  ..., 0.1564, 0.1627, 0.1676],
        [0.1562, 0.1643, 0.1740,  ..., 0.1576, 0.1663, 0.1692],
        [0.1580, 0.1659, 0.1782,  ..., 0.1570, 0.1654, 0.1701],
        ...,
        [0.1606, 0.1682, 0.1785,  ..., 0.1600, 0.1676, 0.1696],
        [0.1560, 0.1639, 0.1745,  ..., 0.1573, 0.1653, 0.1734],
        [0.1563, 0.1627, 0.1746,  ..., 0.1563, 0.1654, 0.1696]],
       grad_fn=<LogBackward>)


 79%|███████▉  | 45/57 [00:30<00:08,  1.39it/s]

tensor([[0.1570, 0.1639, 0.1753,  ..., 0.1574, 0.1661, 0.1728],
        [0.1582, 0.1650, 0.1798,  ..., 0.1593, 0.1668, 0.1747],
        [0.1583, 0.1671, 0.1742,  ..., 0.1584, 0.1679, 0.1699],
        ...,
        [0.1554, 0.1644, 0.1758,  ..., 0.1572, 0.1653, 0.1701],
        [0.1586, 0.1692, 0.1727,  ..., 0.1592, 0.1659, 0.1664],
        [0.1583, 0.1626, 0.1773,  ..., 0.1579, 0.1650, 0.1706]],
       grad_fn=<LogBackward>)


 81%|████████  | 46/57 [00:31<00:07,  1.41it/s]

tensor([[0.1558, 0.1653, 0.1741,  ..., 0.1579, 0.1653, 0.1675],
        [0.1575, 0.1674, 0.1771,  ..., 0.1575, 0.1642, 0.1663],
        [0.1593, 0.1642, 0.1779,  ..., 0.1569, 0.1669, 0.1715],
        ...,
        [0.1565, 0.1658, 0.1746,  ..., 0.1571, 0.1646, 0.1690],
        [0.1581, 0.1632, 0.1782,  ..., 0.1578, 0.1681, 0.1760],
        [0.1550, 0.1674, 0.1754,  ..., 0.1566, 0.1672, 0.1656]],
       grad_fn=<LogBackward>)


 82%|████████▏ | 47/57 [00:31<00:07,  1.41it/s]

tensor([[0.1588, 0.1656, 0.1749,  ..., 0.1575, 0.1684, 0.1720],
        [0.1588, 0.1645, 0.1772,  ..., 0.1578, 0.1662, 0.1731],
        [0.1591, 0.1649, 0.1759,  ..., 0.1569, 0.1686, 0.1691],
        ...,
        [0.1570, 0.1650, 0.1746,  ..., 0.1561, 0.1648, 0.1676],
        [0.1573, 0.1662, 0.1760,  ..., 0.1581, 0.1689, 0.1712],
        [0.1552, 0.1634, 0.1772,  ..., 0.1581, 0.1661, 0.1673]],
       grad_fn=<LogBackward>)


 84%|████████▍ | 48/57 [00:32<00:06,  1.40it/s]

tensor([[0.1567, 0.1623, 0.1761,  ..., 0.1570, 0.1649, 0.1713],
        [0.1584, 0.1661, 0.1777,  ..., 0.1579, 0.1675, 0.1718],
        [0.1547, 0.1651, 0.1730,  ..., 0.1597, 0.1634, 0.1659],
        ...,
        [0.1562, 0.1639, 0.1753,  ..., 0.1580, 0.1641, 0.1666],
        [0.1598, 0.1635, 0.1746,  ..., 0.1570, 0.1659, 0.1691],
        [0.1547, 0.1640, 0.1713,  ..., 0.1541, 0.1630, 0.1639]],
       grad_fn=<LogBackward>)


 86%|████████▌ | 49/57 [00:33<00:05,  1.37it/s]

tensor([[0.1533, 0.1615, 0.1702,  ..., 0.1575, 0.1602, 0.1632],
        [0.1543, 0.1655, 0.1747,  ..., 0.1555, 0.1624, 0.1656],
        [0.1597, 0.1646, 0.1779,  ..., 0.1570, 0.1654, 0.1706],
        ...,
        [0.1496, 0.1594, 0.1636,  ..., 0.1491, 0.1598, 0.1602],
        [0.1563, 0.1643, 0.1772,  ..., 0.1577, 0.1647, 0.1691],
        [0.1576, 0.1642, 0.1775,  ..., 0.1582, 0.1651, 0.1705]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 50/57 [00:34<00:05,  1.35it/s]

tensor([[0.1568, 0.1646, 0.1766,  ..., 0.1570, 0.1655, 0.1705],
        [0.1554, 0.1638, 0.1756,  ..., 0.1569, 0.1645, 0.1681],
        [0.1578, 0.1675, 0.1798,  ..., 0.1577, 0.1643, 0.1682],
        ...,
        [0.1565, 0.1664, 0.1748,  ..., 0.1555, 0.1662, 0.1686],
        [0.1572, 0.1642, 0.1756,  ..., 0.1575, 0.1659, 0.1692],
        [0.1574, 0.1612, 0.1742,  ..., 0.1591, 0.1652, 0.1730]],
       grad_fn=<LogBackward>)


 89%|████████▉ | 51/57 [00:34<00:04,  1.32it/s]

tensor([[0.1584, 0.1661, 0.1776,  ..., 0.1566, 0.1657, 0.1696],
        [0.1559, 0.1633, 0.1746,  ..., 0.1571, 0.1633, 0.1712],
        [0.1573, 0.1662, 0.1769,  ..., 0.1573, 0.1677, 0.1702],
        ...,
        [0.1550, 0.1630, 0.1750,  ..., 0.1574, 0.1637, 0.1681],
        [0.1531, 0.1640, 0.1719,  ..., 0.1565, 0.1627, 0.1647],
        [0.1552, 0.1643, 0.1726,  ..., 0.1556, 0.1656, 0.1665]],
       grad_fn=<LogBackward>)


 91%|█████████ | 52/57 [00:35<00:03,  1.35it/s]

tensor([[0.1572, 0.1650, 0.1750,  ..., 0.1576, 0.1655, 0.1690],
        [0.1555, 0.1657, 0.1740,  ..., 0.1563, 0.1645, 0.1686],
        [0.1540, 0.1642, 0.1728,  ..., 0.1542, 0.1611, 0.1642],
        ...,
        [0.1580, 0.1666, 0.1766,  ..., 0.1564, 0.1657, 0.1709],
        [0.1555, 0.1644, 0.1748,  ..., 0.1591, 0.1659, 0.1712],
        [0.1566, 0.1678, 0.1742,  ..., 0.1584, 0.1638, 0.1671]],
       grad_fn=<LogBackward>)


 93%|█████████▎| 53/57 [00:36<00:02,  1.37it/s]

tensor([[0.1597, 0.1707, 0.1761,  ..., 0.1581, 0.1665, 0.1673],
        [0.1552, 0.1664, 0.1720,  ..., 0.1564, 0.1648, 0.1631],
        [0.1596, 0.1653, 0.1777,  ..., 0.1573, 0.1675, 0.1677],
        ...,
        [0.1563, 0.1631, 0.1752,  ..., 0.1569, 0.1649, 0.1691],
        [0.1556, 0.1654, 0.1732,  ..., 0.1560, 0.1640, 0.1705],
        [0.1573, 0.1652, 0.1748,  ..., 0.1571, 0.1671, 0.1679]],
       grad_fn=<LogBackward>)


 95%|█████████▍| 54/57 [00:37<00:02,  1.38it/s]

tensor([[0.1544, 0.1632, 0.1749,  ..., 0.1571, 0.1663, 0.1698],
        [0.1603, 0.1648, 0.1778,  ..., 0.1586, 0.1653, 0.1700],
        [0.1574, 0.1648, 0.1755,  ..., 0.1577, 0.1646, 0.1689],
        ...,
        [0.1589, 0.1650, 0.1786,  ..., 0.1586, 0.1651, 0.1711],
        [0.1557, 0.1628, 0.1690,  ..., 0.1551, 0.1642, 0.1666],
        [0.1573, 0.1697, 0.1756,  ..., 0.1585, 0.1669, 0.1684]],
       grad_fn=<LogBackward>)


 96%|█████████▋| 55/57 [00:37<00:01,  1.40it/s]

tensor([[0.1572, 0.1686, 0.1754,  ..., 0.1570, 0.1630, 0.1652],
        [0.1614, 0.1681, 0.1806,  ..., 0.1589, 0.1685, 0.1748],
        [0.1596, 0.1652, 0.1785,  ..., 0.1580, 0.1671, 0.1748],
        ...,
        [0.1574, 0.1628, 0.1736,  ..., 0.1557, 0.1651, 0.1700],
        [0.1594, 0.1642, 0.1755,  ..., 0.1580, 0.1652, 0.1704],
        [0.1581, 0.1616, 0.1747,  ..., 0.1595, 0.1633, 0.1685]],
       grad_fn=<LogBackward>)


 98%|█████████▊| 56/57 [00:38<00:00,  1.37it/s]

tensor([[0.1553, 0.1606, 0.1736,  ..., 0.1565, 0.1625, 0.1698],
        [0.1560, 0.1628, 0.1752,  ..., 0.1560, 0.1663, 0.1721],
        [0.1572, 0.1640, 0.1766,  ..., 0.1585, 0.1660, 0.1704],
        ...,
        [0.1569, 0.1637, 0.1748,  ..., 0.1566, 0.1636, 0.1692],
        [0.1556, 0.1637, 0.1743,  ..., 0.1579, 0.1615, 0.1678],
        [0.1574, 0.1669, 0.1776,  ..., 0.1579, 0.1673, 0.1700]],
       grad_fn=<LogBackward>)


100%|██████████| 57/57 [00:39<00:00,  1.45it/s]
 12%|█▎        | 1/8 [00:00<00:02,  2.72it/s]

tensor([[0.1604, 0.1661, 0.1731,  ..., 0.1565, 0.1662, 0.1657],
        [0.1601, 0.1681, 0.1842,  ..., 0.1596, 0.1684, 0.1758],
        [0.1575, 0.1643, 0.1750,  ..., 0.1552, 0.1639, 0.1662],
        ...,
        [0.1552, 0.1646, 0.1749,  ..., 0.1588, 0.1657, 0.1691],
        [0.1576, 0.1655, 0.1782,  ..., 0.1558, 0.1678, 0.1710],
        [0.1584, 0.1652, 0.1726,  ..., 0.1586, 0.1651, 0.1683]],
       grad_fn=<LogBackward>)


 25%|██▌       | 2/8 [00:00<00:02,  2.71it/s]

tensor([[0.1552, 0.1649, 0.1737,  ..., 0.1568, 0.1649, 0.1697],
        [0.1593, 0.1643, 0.1756,  ..., 0.1581, 0.1688, 0.1672],
        [0.1554, 0.1647, 0.1739,  ..., 0.1582, 0.1646, 0.1706],
        ...,
        [0.1568, 0.1629, 0.1765,  ..., 0.1578, 0.1648, 0.1717],
        [0.1559, 0.1668, 0.1752,  ..., 0.1554, 0.1602, 0.1657],
        [0.1571, 0.1620, 0.1748,  ..., 0.1544, 0.1645, 0.1693]],
       grad_fn=<LogBackward>)


 38%|███▊      | 3/8 [00:01<00:01,  2.75it/s]

tensor([[0.1562, 0.1650, 0.1760,  ..., 0.1559, 0.1635, 0.1670],
        [0.1572, 0.1654, 0.1793,  ..., 0.1573, 0.1674, 0.1694],
        [0.1569, 0.1645, 0.1775,  ..., 0.1564, 0.1648, 0.1679],
        ...,
        [0.1568, 0.1663, 0.1746,  ..., 0.1548, 0.1645, 0.1665],
        [0.1584, 0.1665, 0.1750,  ..., 0.1570, 0.1667, 0.1689],
        [0.1560, 0.1673, 0.1765,  ..., 0.1567, 0.1654, 0.1690]],
       grad_fn=<LogBackward>)


 50%|█████     | 4/8 [00:01<00:01,  2.85it/s]

tensor([[0.1604, 0.1674, 0.1770,  ..., 0.1588, 0.1663, 0.1712],
        [0.1556, 0.1650, 0.1747,  ..., 0.1551, 0.1643, 0.1692],
        [0.1547, 0.1650, 0.1735,  ..., 0.1566, 0.1636, 0.1651],
        ...,
        [0.1553, 0.1656, 0.1744,  ..., 0.1558, 0.1651, 0.1704],
        [0.1604, 0.1637, 0.1752,  ..., 0.1571, 0.1646, 0.1717],
        [0.1596, 0.1668, 0.1756,  ..., 0.1569, 0.1662, 0.1698]],
       grad_fn=<LogBackward>)


 62%|██████▎   | 5/8 [00:01<00:01,  2.89it/s]

tensor([[0.1560, 0.1663, 0.1741,  ..., 0.1565, 0.1660, 0.1658],
        [0.1557, 0.1643, 0.1756,  ..., 0.1573, 0.1667, 0.1680],
        [0.1568, 0.1644, 0.1752,  ..., 0.1556, 0.1650, 0.1696],
        ...,
        [0.1596, 0.1663, 0.1777,  ..., 0.1600, 0.1673, 0.1750],
        [0.1578, 0.1678, 0.1763,  ..., 0.1572, 0.1642, 0.1732],
        [0.1556, 0.1642, 0.1739,  ..., 0.1557, 0.1662, 0.1700]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 6/8 [00:02<00:00,  3.00it/s]

tensor([[0.1567, 0.1645, 0.1750,  ..., 0.1570, 0.1663, 0.1688],
        [0.1545, 0.1651, 0.1758,  ..., 0.1574, 0.1650, 0.1682],
        [0.1568, 0.1687, 0.1722,  ..., 0.1545, 0.1632, 0.1636],
        ...,
        [0.1557, 0.1656, 0.1731,  ..., 0.1569, 0.1641, 0.1675],
        [0.1563, 0.1653, 0.1745,  ..., 0.1570, 0.1655, 0.1730],
        [0.1547, 0.1615, 0.1723,  ..., 0.1553, 0.1618, 0.1607]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 7/8 [00:02<00:00,  3.02it/s]

tensor([[0.1584, 0.1678, 0.1747,  ..., 0.1566, 0.1643, 0.1660],
        [0.1576, 0.1632, 0.1744,  ..., 0.1563, 0.1692, 0.1711],
        [0.1583, 0.1669, 0.1776,  ..., 0.1594, 0.1660, 0.1697],
        ...,
        [0.1574, 0.1668, 0.1726,  ..., 0.1574, 0.1641, 0.1621],
        [0.1576, 0.1644, 0.1749,  ..., 0.1578, 0.1658, 0.1733],
        [0.1557, 0.1617, 0.1732,  ..., 0.1558, 0.1634, 0.1731]],
       grad_fn=<LogBackward>)


100%|██████████| 8/8 [00:02<00:00,  2.96it/s]
  0%|          | 0/57 [00:00<?, ?it/s]

tensor([[0.1594, 0.1666, 0.1787,  ..., 0.1565, 0.1658, 0.1739],
        [0.1552, 0.1639, 0.1774,  ..., 0.1548, 0.1611, 0.1648],
        [0.1598, 0.1693, 0.1775,  ..., 0.1592, 0.1689, 0.1741],
        ...,
        [0.1585, 0.1676, 0.1784,  ..., 0.1575, 0.1691, 0.1704],
        [0.1587, 0.1656, 0.1797,  ..., 0.1586, 0.1663, 0.1676],
        [0.1528, 0.1636, 0.1710,  ..., 0.1532, 0.1619, 0.1618]],
       grad_fn=<LogBackward>)
[tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128)]
Epoch: 2, train_loss: 0.5441777110099792, valid_loss: 0.44457605481147766
Parameter containing:
tensor([[ 1.0003,  1.0004,  1.0004,  1.0004,  1.0003,  1.0003,  1.0004,  1.0003,
          1.0003,  1.0004, -0.4996, -0.4997, -0.4997, -0.4997, -0.4997, -0.4997,
         -0.4997, -0.4997, -0.4997, -0.4997],
       

  2%|▏         | 1/57 [00:00<00:42,  1.33it/s]

tensor([[0.1559, 0.1664, 0.1755,  ..., 0.1567, 0.1652, 0.1681],
        [0.1590, 0.1666, 0.1775,  ..., 0.1586, 0.1679, 0.1735],
        [0.1527, 0.1654, 0.1733,  ..., 0.1569, 0.1645, 0.1655],
        ...,
        [0.1554, 0.1643, 0.1728,  ..., 0.1574, 0.1624, 0.1672],
        [0.1563, 0.1644, 0.1753,  ..., 0.1603, 0.1663, 0.1728],
        [0.1561, 0.1668, 0.1744,  ..., 0.1572, 0.1666, 0.1704]],
       grad_fn=<LogBackward>)


  4%|▎         | 2/57 [00:01<00:41,  1.32it/s]

tensor([[0.1583, 0.1691, 0.1767,  ..., 0.1571, 0.1664, 0.1691],
        [0.1560, 0.1648, 0.1750,  ..., 0.1576, 0.1631, 0.1671],
        [0.1571, 0.1673, 0.1783,  ..., 0.1585, 0.1648, 0.1702],
        ...,
        [0.1568, 0.1662, 0.1753,  ..., 0.1562, 0.1640, 0.1651],
        [0.1598, 0.1661, 0.1776,  ..., 0.1587, 0.1688, 0.1697],
        [0.1621, 0.1663, 0.1800,  ..., 0.1622, 0.1676, 0.1753]],
       grad_fn=<LogBackward>)


  5%|▌         | 3/57 [00:02<00:40,  1.34it/s]

tensor([[0.1555, 0.1659, 0.1779,  ..., 0.1541, 0.1639, 0.1670],
        [0.1563, 0.1642, 0.1752,  ..., 0.1564, 0.1642, 0.1671],
        [0.1570, 0.1631, 0.1783,  ..., 0.1563, 0.1681, 0.1720],
        ...,
        [0.1587, 0.1654, 0.1769,  ..., 0.1583, 0.1652, 0.1711],
        [0.1616, 0.1685, 0.1785,  ..., 0.1582, 0.1661, 0.1704],
        [0.1544, 0.1641, 0.1724,  ..., 0.1568, 0.1626, 0.1644]],
       grad_fn=<LogBackward>)


  7%|▋         | 4/57 [00:02<00:38,  1.39it/s]

tensor([[0.1551, 0.1618, 0.1740,  ..., 0.1548, 0.1649, 0.1710],
        [0.1596, 0.1658, 0.1790,  ..., 0.1584, 0.1664, 0.1715],
        [0.1553, 0.1648, 0.1735,  ..., 0.1562, 0.1636, 0.1653],
        ...,
        [0.1560, 0.1667, 0.1770,  ..., 0.1575, 0.1632, 0.1685],
        [0.1554, 0.1648, 0.1749,  ..., 0.1559, 0.1662, 0.1676],
        [0.1576, 0.1658, 0.1772,  ..., 0.1572, 0.1652, 0.1693]],
       grad_fn=<LogBackward>)


  9%|▉         | 5/57 [00:03<00:36,  1.41it/s]

tensor([[0.1555, 0.1644, 0.1752,  ..., 0.1578, 0.1660, 0.1718],
        [0.1558, 0.1663, 0.1744,  ..., 0.1590, 0.1661, 0.1705],
        [0.1525, 0.1589, 0.1667,  ..., 0.1545, 0.1591, 0.1597],
        ...,
        [0.1570, 0.1661, 0.1758,  ..., 0.1583, 0.1651, 0.1719],
        [0.1562, 0.1620, 0.1752,  ..., 0.1582, 0.1641, 0.1682],
        [0.1618, 0.1679, 0.1783,  ..., 0.1586, 0.1683, 0.1732]],
       grad_fn=<LogBackward>)


 11%|█         | 6/57 [00:04<00:36,  1.41it/s]

tensor([[0.1619, 0.1646, 0.1797,  ..., 0.1584, 0.1645, 0.1734],
        [0.1573, 0.1629, 0.1746,  ..., 0.1577, 0.1651, 0.1713],
        [0.1561, 0.1636, 0.1728,  ..., 0.1561, 0.1628, 0.1672],
        ...,
        [0.1566, 0.1645, 0.1754,  ..., 0.1574, 0.1672, 0.1720],
        [0.1564, 0.1667, 0.1758,  ..., 0.1556, 0.1640, 0.1666],
        [0.1587, 0.1635, 0.1715,  ..., 0.1594, 0.1669, 0.1715]],
       grad_fn=<LogBackward>)


 12%|█▏        | 7/57 [00:05<00:36,  1.38it/s]

tensor([[0.1593, 0.1626, 0.1763,  ..., 0.1574, 0.1659, 0.1723],
        [0.1555, 0.1664, 0.1743,  ..., 0.1609, 0.1650, 0.1692],
        [0.1576, 0.1625, 0.1773,  ..., 0.1562, 0.1636, 0.1639],
        ...,
        [0.1558, 0.1674, 0.1764,  ..., 0.1552, 0.1669, 0.1670],
        [0.1567, 0.1680, 0.1733,  ..., 0.1551, 0.1634, 0.1643],
        [0.1563, 0.1646, 0.1753,  ..., 0.1584, 0.1636, 0.1690]],
       grad_fn=<LogBackward>)


 14%|█▍        | 8/57 [00:05<00:34,  1.40it/s]

tensor([[0.1575, 0.1656, 0.1739,  ..., 0.1551, 0.1638, 0.1669],
        [0.1557, 0.1661, 0.1747,  ..., 0.1556, 0.1645, 0.1688],
        [0.1533, 0.1607, 0.1709,  ..., 0.1545, 0.1583, 0.1612],
        ...,
        [0.1531, 0.1632, 0.1736,  ..., 0.1574, 0.1624, 0.1633],
        [0.1593, 0.1683, 0.1720,  ..., 0.1565, 0.1628, 0.1633],
        [0.1593, 0.1637, 0.1766,  ..., 0.1587, 0.1646, 0.1744]],
       grad_fn=<LogBackward>)


 16%|█▌        | 9/57 [00:06<00:33,  1.41it/s]

tensor([[0.1569, 0.1658, 0.1762,  ..., 0.1556, 0.1652, 0.1681],
        [0.1541, 0.1642, 0.1730,  ..., 0.1560, 0.1659, 0.1675],
        [0.1571, 0.1655, 0.1757,  ..., 0.1584, 0.1677, 0.1708],
        ...,
        [0.1570, 0.1662, 0.1758,  ..., 0.1563, 0.1651, 0.1725],
        [0.1591, 0.1664, 0.1780,  ..., 0.1573, 0.1665, 0.1690],
        [0.1559, 0.1646, 0.1744,  ..., 0.1574, 0.1639, 0.1693]],
       grad_fn=<LogBackward>)


 18%|█▊        | 10/57 [00:07<00:34,  1.37it/s]

tensor([[0.1593, 0.1660, 0.1759,  ..., 0.1570, 0.1655, 0.1707],
        [0.1572, 0.1640, 0.1718,  ..., 0.1528, 0.1639, 0.1682],
        [0.1555, 0.1664, 0.1750,  ..., 0.1579, 0.1654, 0.1707],
        ...,
        [0.1550, 0.1661, 0.1721,  ..., 0.1559, 0.1634, 0.1654],
        [0.1585, 0.1642, 0.1749,  ..., 0.1578, 0.1662, 0.1718],
        [0.1510, 0.1598, 0.1684,  ..., 0.1541, 0.1548, 0.1628]],
       grad_fn=<LogBackward>)


 19%|█▉        | 11/57 [00:08<00:35,  1.31it/s]

tensor([[0.1565, 0.1660, 0.1729,  ..., 0.1582, 0.1662, 0.1699],
        [0.1568, 0.1649, 0.1755,  ..., 0.1562, 0.1643, 0.1670],
        [0.1563, 0.1683, 0.1730,  ..., 0.1556, 0.1638, 0.1659],
        ...,
        [0.1587, 0.1690, 0.1728,  ..., 0.1588, 0.1652, 0.1661],
        [0.1589, 0.1655, 0.1781,  ..., 0.1553, 0.1667, 0.1709],
        [0.1576, 0.1664, 0.1750,  ..., 0.1566, 0.1675, 0.1683]],
       grad_fn=<LogBackward>)


 21%|██        | 12/57 [00:08<00:35,  1.28it/s]

tensor([[0.1564, 0.1623, 0.1741,  ..., 0.1551, 0.1652, 0.1691],
        [0.1563, 0.1657, 0.1739,  ..., 0.1579, 0.1658, 0.1693],
        [0.1566, 0.1670, 0.1753,  ..., 0.1572, 0.1635, 0.1685],
        ...,
        [0.1539, 0.1639, 0.1735,  ..., 0.1580, 0.1651, 0.1686],
        [0.1562, 0.1649, 0.1758,  ..., 0.1568, 0.1658, 0.1692],
        [0.1607, 0.1664, 0.1762,  ..., 0.1571, 0.1652, 0.1698]],
       grad_fn=<LogBackward>)


 23%|██▎       | 13/57 [00:09<00:35,  1.25it/s]

tensor([[0.1549, 0.1648, 0.1742,  ..., 0.1564, 0.1644, 0.1673],
        [0.1557, 0.1645, 0.1776,  ..., 0.1554, 0.1654, 0.1686],
        [0.1558, 0.1602, 0.1725,  ..., 0.1538, 0.1608, 0.1694],
        ...,
        [0.1604, 0.1651, 0.1795,  ..., 0.1568, 0.1656, 0.1732],
        [0.1582, 0.1646, 0.1779,  ..., 0.1577, 0.1650, 0.1692],
        [0.1548, 0.1640, 0.1747,  ..., 0.1564, 0.1619, 0.1628]],
       grad_fn=<LogBackward>)


 25%|██▍       | 14/57 [00:10<00:33,  1.28it/s]

tensor([[0.1575, 0.1651, 0.1754,  ..., 0.1562, 0.1637, 0.1669],
        [0.1586, 0.1643, 0.1774,  ..., 0.1582, 0.1665, 0.1719],
        [0.1572, 0.1662, 0.1767,  ..., 0.1600, 0.1646, 0.1700],
        ...,
        [0.1598, 0.1661, 0.1769,  ..., 0.1575, 0.1653, 0.1706],
        [0.1534, 0.1626, 0.1713,  ..., 0.1579, 0.1619, 0.1692],
        [0.1583, 0.1661, 0.1754,  ..., 0.1611, 0.1676, 0.1695]],
       grad_fn=<LogBackward>)


 26%|██▋       | 15/57 [00:11<00:32,  1.28it/s]

tensor([[0.1558, 0.1656, 0.1751,  ..., 0.1563, 0.1622, 0.1649],
        [0.1569, 0.1643, 0.1740,  ..., 0.1570, 0.1643, 0.1704],
        [0.1542, 0.1654, 0.1718,  ..., 0.1576, 0.1640, 0.1654],
        ...,
        [0.1565, 0.1651, 0.1731,  ..., 0.1586, 0.1638, 0.1674],
        [0.1605, 0.1668, 0.1784,  ..., 0.1570, 0.1660, 0.1711],
        [0.1552, 0.1637, 0.1766,  ..., 0.1543, 0.1630, 0.1667]],
       grad_fn=<LogBackward>)


 28%|██▊       | 16/57 [00:12<00:31,  1.29it/s]

tensor([[0.1562, 0.1660, 0.1741,  ..., 0.1566, 0.1669, 0.1656],
        [0.1555, 0.1631, 0.1747,  ..., 0.1579, 0.1650, 0.1684],
        [0.1572, 0.1662, 0.1730,  ..., 0.1561, 0.1645, 0.1652],
        ...,
        [0.1539, 0.1655, 0.1724,  ..., 0.1580, 0.1635, 0.1675],
        [0.1566, 0.1643, 0.1765,  ..., 0.1563, 0.1649, 0.1719],
        [0.1559, 0.1643, 0.1743,  ..., 0.1553, 0.1616, 0.1657]],
       grad_fn=<LogBackward>)


 30%|██▉       | 17/57 [00:12<00:30,  1.31it/s]

tensor([[0.1556, 0.1679, 0.1741,  ..., 0.1560, 0.1666, 0.1663],
        [0.1580, 0.1648, 0.1754,  ..., 0.1570, 0.1620, 0.1679],
        [0.1575, 0.1665, 0.1770,  ..., 0.1552, 0.1653, 0.1675],
        ...,
        [0.1553, 0.1700, 0.1719,  ..., 0.1584, 0.1686, 0.1675],
        [0.1569, 0.1663, 0.1762,  ..., 0.1557, 0.1658, 0.1681],
        [0.1600, 0.1670, 0.1773,  ..., 0.1576, 0.1671, 0.1697]],
       grad_fn=<LogBackward>)


 32%|███▏      | 18/57 [00:13<00:29,  1.30it/s]

tensor([[0.1597, 0.1661, 0.1806,  ..., 0.1564, 0.1669, 0.1697],
        [0.1530, 0.1636, 0.1746,  ..., 0.1585, 0.1633, 0.1682],
        [0.1586, 0.1691, 0.1767,  ..., 0.1564, 0.1680, 0.1691],
        ...,
        [0.1569, 0.1672, 0.1756,  ..., 0.1584, 0.1660, 0.1683],
        [0.1581, 0.1652, 0.1765,  ..., 0.1560, 0.1654, 0.1701],
        [0.1592, 0.1664, 0.1768,  ..., 0.1582, 0.1652, 0.1700]],
       grad_fn=<LogBackward>)


 33%|███▎      | 19/57 [00:14<00:28,  1.33it/s]

tensor([[0.1572, 0.1661, 0.1740,  ..., 0.1576, 0.1657, 0.1692],
        [0.1542, 0.1657, 0.1745,  ..., 0.1564, 0.1622, 0.1641],
        [0.1564, 0.1646, 0.1752,  ..., 0.1576, 0.1637, 0.1696],
        ...,
        [0.1570, 0.1655, 0.1766,  ..., 0.1570, 0.1631, 0.1665],
        [0.1545, 0.1650, 0.1743,  ..., 0.1561, 0.1651, 0.1689],
        [0.1567, 0.1632, 0.1746,  ..., 0.1556, 0.1651, 0.1714]],
       grad_fn=<LogBackward>)


 35%|███▌      | 20/57 [00:14<00:26,  1.37it/s]

tensor([[0.1567, 0.1655, 0.1765,  ..., 0.1558, 0.1662, 0.1737],
        [0.1537, 0.1661, 0.1732,  ..., 0.1556, 0.1655, 0.1661],
        [0.1536, 0.1658, 0.1740,  ..., 0.1556, 0.1584, 0.1625],
        ...,
        [0.1590, 0.1624, 0.1769,  ..., 0.1565, 0.1629, 0.1733],
        [0.1575, 0.1658, 0.1736,  ..., 0.1569, 0.1637, 0.1670],
        [0.1570, 0.1651, 0.1758,  ..., 0.1602, 0.1654, 0.1755]],
       grad_fn=<LogBackward>)


 37%|███▋      | 21/57 [00:15<00:25,  1.40it/s]

tensor([[0.1540, 0.1628, 0.1729,  ..., 0.1574, 0.1649, 0.1688],
        [0.1615, 0.1675, 0.1789,  ..., 0.1594, 0.1672, 0.1730],
        [0.1411, 0.1513, 0.1523,  ..., 0.1420, 0.1509, 0.1510],
        ...,
        [0.1557, 0.1629, 0.1747,  ..., 0.1572, 0.1658, 0.1688],
        [0.1567, 0.1669, 0.1721,  ..., 0.1578, 0.1665, 0.1664],
        [0.1548, 0.1660, 0.1753,  ..., 0.1583, 0.1654, 0.1660]],
       grad_fn=<LogBackward>)


 39%|███▊      | 22/57 [00:16<00:24,  1.43it/s]

tensor([[0.1543, 0.1644, 0.1744,  ..., 0.1590, 0.1645, 0.1706],
        [0.1547, 0.1647, 0.1728,  ..., 0.1583, 0.1635, 0.1701],
        [0.1586, 0.1660, 0.1769,  ..., 0.1581, 0.1648, 0.1702],
        ...,
        [0.1591, 0.1635, 0.1780,  ..., 0.1586, 0.1632, 0.1730],
        [0.1581, 0.1694, 0.1746,  ..., 0.1564, 0.1648, 0.1673],
        [0.1574, 0.1682, 0.1748,  ..., 0.1568, 0.1664, 0.1702]],
       grad_fn=<LogBackward>)


 40%|████      | 23/57 [00:16<00:23,  1.46it/s]

tensor([[0.1589, 0.1654, 0.1764,  ..., 0.1562, 0.1658, 0.1684],
        [0.1590, 0.1650, 0.1790,  ..., 0.1580, 0.1649, 0.1705],
        [0.1603, 0.1657, 0.1774,  ..., 0.1573, 0.1652, 0.1720],
        ...,
        [0.1597, 0.1685, 0.1798,  ..., 0.1578, 0.1656, 0.1672],
        [0.1602, 0.1680, 0.1776,  ..., 0.1598, 0.1667, 0.1695],
        [0.1558, 0.1657, 0.1725,  ..., 0.1585, 0.1645, 0.1647]],
       grad_fn=<LogBackward>)


 42%|████▏     | 24/57 [00:17<00:22,  1.48it/s]

tensor([[0.1592, 0.1674, 0.1763,  ..., 0.1592, 0.1683, 0.1716],
        [0.1556, 0.1651, 0.1755,  ..., 0.1573, 0.1668, 0.1702],
        [0.1582, 0.1610, 0.1746,  ..., 0.1582, 0.1663, 0.1715],
        ...,
        [0.1606, 0.1651, 0.1790,  ..., 0.1561, 0.1653, 0.1722],
        [0.1586, 0.1654, 0.1802,  ..., 0.1569, 0.1659, 0.1746],
        [0.1564, 0.1659, 0.1721,  ..., 0.1565, 0.1661, 0.1692]],
       grad_fn=<LogBackward>)


 44%|████▍     | 25/57 [00:18<00:21,  1.48it/s]

tensor([[0.1563, 0.1686, 0.1723,  ..., 0.1596, 0.1651, 0.1664],
        [0.1583, 0.1671, 0.1739,  ..., 0.1578, 0.1647, 0.1679],
        [0.1598, 0.1643, 0.1756,  ..., 0.1572, 0.1632, 0.1660],
        ...,
        [0.1556, 0.1633, 0.1730,  ..., 0.1551, 0.1606, 0.1650],
        [0.1560, 0.1618, 0.1730,  ..., 0.1550, 0.1607, 0.1631],
        [0.1549, 0.1673, 0.1741,  ..., 0.1571, 0.1633, 0.1678]],
       grad_fn=<LogBackward>)


 46%|████▌     | 26/57 [00:18<00:21,  1.47it/s]

tensor([[0.1544, 0.1625, 0.1731,  ..., 0.1562, 0.1641, 0.1686],
        [0.1573, 0.1657, 0.1747,  ..., 0.1582, 0.1641, 0.1690],
        [0.1591, 0.1666, 0.1792,  ..., 0.1579, 0.1652, 0.1701],
        ...,
        [0.1552, 0.1631, 0.1738,  ..., 0.1553, 0.1643, 0.1687],
        [0.1567, 0.1643, 0.1756,  ..., 0.1566, 0.1636, 0.1724],
        [0.1534, 0.1602, 0.1709,  ..., 0.1582, 0.1599, 0.1661]],
       grad_fn=<LogBackward>)


 47%|████▋     | 27/57 [00:19<00:20,  1.48it/s]

tensor([[0.1560, 0.1665, 0.1728,  ..., 0.1533, 0.1643, 0.1656],
        [0.1565, 0.1656, 0.1743,  ..., 0.1555, 0.1656, 0.1677],
        [0.1567, 0.1625, 0.1746,  ..., 0.1599, 0.1623, 0.1710],
        ...,
        [0.1600, 0.1649, 0.1788,  ..., 0.1570, 0.1648, 0.1680],
        [0.1576, 0.1632, 0.1763,  ..., 0.1574, 0.1625, 0.1683],
        [0.1567, 0.1665, 0.1746,  ..., 0.1587, 0.1663, 0.1699]],
       grad_fn=<LogBackward>)


 49%|████▉     | 28/57 [00:20<00:19,  1.51it/s]

tensor([[0.1558, 0.1682, 0.1765,  ..., 0.1600, 0.1663, 0.1678],
        [0.1533, 0.1594, 0.1700,  ..., 0.1524, 0.1610, 0.1649],
        [0.1569, 0.1646, 0.1770,  ..., 0.1577, 0.1660, 0.1697],
        ...,
        [0.1592, 0.1676, 0.1758,  ..., 0.1563, 0.1661, 0.1695],
        [0.1556, 0.1640, 0.1751,  ..., 0.1561, 0.1660, 0.1714],
        [0.1573, 0.1656, 0.1766,  ..., 0.1555, 0.1650, 0.1690]],
       grad_fn=<LogBackward>)


 51%|█████     | 29/57 [00:20<00:18,  1.51it/s]

tensor([[0.1579, 0.1640, 0.1738,  ..., 0.1581, 0.1647, 0.1705],
        [0.1600, 0.1661, 0.1775,  ..., 0.1587, 0.1664, 0.1718],
        [0.1573, 0.1652, 0.1745,  ..., 0.1578, 0.1648, 0.1665],
        ...,
        [0.1587, 0.1650, 0.1759,  ..., 0.1571, 0.1655, 0.1701],
        [0.1583, 0.1652, 0.1761,  ..., 0.1564, 0.1652, 0.1702],
        [0.1581, 0.1635, 0.1734,  ..., 0.1567, 0.1635, 0.1686]],
       grad_fn=<LogBackward>)


 53%|█████▎    | 30/57 [00:21<00:17,  1.52it/s]

tensor([[0.1559, 0.1658, 0.1745,  ..., 0.1562, 0.1621, 0.1667],
        [0.1571, 0.1630, 0.1746,  ..., 0.1575, 0.1609, 0.1669],
        [0.1575, 0.1669, 0.1756,  ..., 0.1564, 0.1679, 0.1693],
        ...,
        [0.1557, 0.1629, 0.1740,  ..., 0.1562, 0.1640, 0.1703],
        [0.1571, 0.1647, 0.1739,  ..., 0.1579, 0.1640, 0.1715],
        [0.1573, 0.1670, 0.1751,  ..., 0.1571, 0.1673, 0.1680]],
       grad_fn=<LogBackward>)


 54%|█████▍    | 31/57 [00:22<00:17,  1.52it/s]

tensor([[0.1560, 0.1626, 0.1765,  ..., 0.1576, 0.1688, 0.1686],
        [0.1571, 0.1634, 0.1766,  ..., 0.1553, 0.1647, 0.1706],
        [0.1543, 0.1661, 0.1711,  ..., 0.1561, 0.1648, 0.1640],
        ...,
        [0.1546, 0.1656, 0.1734,  ..., 0.1588, 0.1647, 0.1652],
        [0.1589, 0.1636, 0.1773,  ..., 0.1582, 0.1666, 0.1762],
        [0.1565, 0.1638, 0.1709,  ..., 0.1555, 0.1634, 0.1652]],
       grad_fn=<LogBackward>)


 56%|█████▌    | 32/57 [00:22<00:16,  1.52it/s]

tensor([[0.1562, 0.1630, 0.1755,  ..., 0.1559, 0.1649, 0.1670],
        [0.1553, 0.1636, 0.1731,  ..., 0.1555, 0.1628, 0.1666],
        [0.1575, 0.1656, 0.1746,  ..., 0.1568, 0.1650, 0.1693],
        ...,
        [0.1569, 0.1634, 0.1754,  ..., 0.1558, 0.1613, 0.1663],
        [0.1555, 0.1655, 0.1748,  ..., 0.1600, 0.1667, 0.1722],
        [0.1584, 0.1635, 0.1773,  ..., 0.1586, 0.1651, 0.1745]],
       grad_fn=<LogBackward>)


 58%|█████▊    | 33/57 [00:23<00:15,  1.53it/s]

tensor([[0.1558, 0.1640, 0.1734,  ..., 0.1589, 0.1643, 0.1707],
        [0.1577, 0.1655, 0.1782,  ..., 0.1570, 0.1647, 0.1676],
        [0.1564, 0.1621, 0.1754,  ..., 0.1594, 0.1669, 0.1720],
        ...,
        [0.1569, 0.1658, 0.1751,  ..., 0.1563, 0.1653, 0.1670],
        [0.1577, 0.1700, 0.1778,  ..., 0.1580, 0.1659, 0.1654],
        [0.1554, 0.1614, 0.1737,  ..., 0.1567, 0.1625, 0.1702]],
       grad_fn=<LogBackward>)


 60%|█████▉    | 34/57 [00:24<00:14,  1.54it/s]

tensor([[0.1611, 0.1692, 0.1777,  ..., 0.1570, 0.1657, 0.1692],
        [0.1550, 0.1640, 0.1731,  ..., 0.1560, 0.1646, 0.1675],
        [0.1594, 0.1670, 0.1773,  ..., 0.1580, 0.1661, 0.1717],
        ...,
        [0.1562, 0.1630, 0.1757,  ..., 0.1576, 0.1627, 0.1704],
        [0.1571, 0.1641, 0.1754,  ..., 0.1580, 0.1630, 0.1726],
        [0.1578, 0.1661, 0.1732,  ..., 0.1566, 0.1657, 0.1678]],
       grad_fn=<LogBackward>)


 61%|██████▏   | 35/57 [00:24<00:14,  1.53it/s]

tensor([[0.1555, 0.1637, 0.1742,  ..., 0.1567, 0.1629, 0.1674],
        [0.1552, 0.1625, 0.1738,  ..., 0.1569, 0.1633, 0.1696],
        [0.1563, 0.1649, 0.1752,  ..., 0.1559, 0.1638, 0.1681],
        ...,
        [0.1569, 0.1674, 0.1723,  ..., 0.1563, 0.1624, 0.1661],
        [0.1588, 0.1648, 0.1783,  ..., 0.1585, 0.1689, 0.1742],
        [0.1579, 0.1642, 0.1772,  ..., 0.1568, 0.1657, 0.1716]],
       grad_fn=<LogBackward>)


 63%|██████▎   | 36/57 [00:25<00:13,  1.53it/s]

tensor([[0.1565, 0.1636, 0.1756,  ..., 0.1583, 0.1632, 0.1685],
        [0.1554, 0.1647, 0.1728,  ..., 0.1578, 0.1657, 0.1661],
        [0.1570, 0.1629, 0.1733,  ..., 0.1576, 0.1669, 0.1721],
        ...,
        [0.1446, 0.1553, 0.1564,  ..., 0.1466, 0.1560, 0.1558],
        [0.1584, 0.1649, 0.1778,  ..., 0.1573, 0.1654, 0.1693],
        [0.1535, 0.1655, 0.1718,  ..., 0.1540, 0.1650, 0.1642]],
       grad_fn=<LogBackward>)


 65%|██████▍   | 37/57 [00:26<00:13,  1.51it/s]

tensor([[0.1480, 0.1567, 0.1589,  ..., 0.1510, 0.1568, 0.1569],
        [0.1523, 0.1615, 0.1683,  ..., 0.1555, 0.1592, 0.1635],
        [0.1607, 0.1643, 0.1806,  ..., 0.1561, 0.1663, 0.1766],
        ...,
        [0.1596, 0.1655, 0.1770,  ..., 0.1590, 0.1643, 0.1750],
        [0.1600, 0.1675, 0.1758,  ..., 0.1579, 0.1649, 0.1662],
        [0.1575, 0.1641, 0.1700,  ..., 0.1586, 0.1591, 0.1620]],
       grad_fn=<LogBackward>)


 67%|██████▋   | 38/57 [00:26<00:12,  1.50it/s]

tensor([[0.1600, 0.1677, 0.1754,  ..., 0.1600, 0.1672, 0.1723],
        [0.1550, 0.1657, 0.1745,  ..., 0.1571, 0.1646, 0.1713],
        [0.1547, 0.1632, 0.1762,  ..., 0.1562, 0.1643, 0.1650],
        ...,
        [0.1565, 0.1659, 0.1744,  ..., 0.1562, 0.1688, 0.1687],
        [0.1570, 0.1687, 0.1734,  ..., 0.1584, 0.1669, 0.1690],
        [0.1550, 0.1648, 0.1731,  ..., 0.1557, 0.1628, 0.1681]],
       grad_fn=<LogBackward>)


 68%|██████▊   | 39/57 [00:27<00:12,  1.50it/s]

tensor([[0.1550, 0.1641, 0.1737,  ..., 0.1590, 0.1656, 0.1690],
        [0.1584, 0.1682, 0.1748,  ..., 0.1585, 0.1668, 0.1703],
        [0.1554, 0.1647, 0.1716,  ..., 0.1559, 0.1644, 0.1677],
        ...,
        [0.1584, 0.1678, 0.1766,  ..., 0.1580, 0.1646, 0.1697],
        [0.1580, 0.1653, 0.1767,  ..., 0.1576, 0.1674, 0.1695],
        [0.1541, 0.1660, 0.1719,  ..., 0.1566, 0.1650, 0.1630]],
       grad_fn=<LogBackward>)


 70%|███████   | 40/57 [00:28<00:11,  1.50it/s]

tensor([[0.1585, 0.1663, 0.1742,  ..., 0.1581, 0.1642, 0.1684],
        [0.1613, 0.1665, 0.1769,  ..., 0.1570, 0.1662, 0.1724],
        [0.1554, 0.1639, 0.1737,  ..., 0.1569, 0.1645, 0.1711],
        ...,
        [0.1565, 0.1627, 0.1745,  ..., 0.1547, 0.1618, 0.1695],
        [0.1551, 0.1635, 0.1749,  ..., 0.1580, 0.1637, 0.1693],
        [0.1572, 0.1643, 0.1778,  ..., 0.1555, 0.1620, 0.1692]],
       grad_fn=<LogBackward>)


 72%|███████▏  | 41/57 [00:28<00:10,  1.51it/s]

tensor([[0.1588, 0.1638, 0.1770,  ..., 0.1580, 0.1659, 0.1708],
        [0.1576, 0.1633, 0.1746,  ..., 0.1589, 0.1670, 0.1689],
        [0.1571, 0.1658, 0.1765,  ..., 0.1567, 0.1645, 0.1671],
        ...,
        [0.1598, 0.1672, 0.1774,  ..., 0.1580, 0.1667, 0.1751],
        [0.1591, 0.1671, 0.1794,  ..., 0.1588, 0.1663, 0.1741],
        [0.1587, 0.1674, 0.1764,  ..., 0.1564, 0.1663, 0.1684]],
       grad_fn=<LogBackward>)


 74%|███████▎  | 42/57 [00:29<00:09,  1.52it/s]

tensor([[0.1547, 0.1635, 0.1753,  ..., 0.1563, 0.1618, 0.1677],
        [0.1579, 0.1657, 0.1770,  ..., 0.1583, 0.1662, 0.1715],
        [0.1593, 0.1648, 0.1766,  ..., 0.1579, 0.1672, 0.1715],
        ...,
        [0.1591, 0.1654, 0.1771,  ..., 0.1577, 0.1683, 0.1702],
        [0.1566, 0.1633, 0.1734,  ..., 0.1564, 0.1610, 0.1668],
        [0.1592, 0.1681, 0.1764,  ..., 0.1579, 0.1667, 0.1687]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 43/57 [00:30<00:09,  1.51it/s]

tensor([[0.1574, 0.1635, 0.1750,  ..., 0.1570, 0.1637, 0.1725],
        [0.1560, 0.1631, 0.1753,  ..., 0.1564, 0.1643, 0.1685],
        [0.1607, 0.1665, 0.1774,  ..., 0.1566, 0.1701, 0.1675],
        ...,
        [0.1574, 0.1658, 0.1739,  ..., 0.1559, 0.1659, 0.1677],
        [0.1572, 0.1650, 0.1765,  ..., 0.1562, 0.1662, 0.1689],
        [0.1575, 0.1664, 0.1742,  ..., 0.1557, 0.1657, 0.1721]],
       grad_fn=<LogBackward>)


 77%|███████▋  | 44/57 [00:30<00:08,  1.49it/s]

tensor([[0.1547, 0.1646, 0.1744,  ..., 0.1572, 0.1649, 0.1682],
        [0.1561, 0.1637, 0.1750,  ..., 0.1565, 0.1645, 0.1700],
        [0.1594, 0.1671, 0.1773,  ..., 0.1585, 0.1677, 0.1707],
        ...,
        [0.1548, 0.1629, 0.1707,  ..., 0.1562, 0.1636, 0.1676],
        [0.1574, 0.1669, 0.1742,  ..., 0.1576, 0.1627, 0.1660],
        [0.1593, 0.1702, 0.1740,  ..., 0.1565, 0.1668, 0.1677]],
       grad_fn=<LogBackward>)


 79%|███████▉  | 45/57 [00:31<00:07,  1.51it/s]

tensor([[0.1423, 0.1535, 0.1530,  ..., 0.1445, 0.1537, 0.1529],
        [0.1546, 0.1652, 0.1724,  ..., 0.1559, 0.1671, 0.1682],
        [0.1574, 0.1650, 0.1776,  ..., 0.1554, 0.1665, 0.1691],
        ...,
        [0.1581, 0.1685, 0.1737,  ..., 0.1588, 0.1660, 0.1634],
        [0.1565, 0.1664, 0.1729,  ..., 0.1540, 0.1617, 0.1627],
        [0.1600, 0.1679, 0.1770,  ..., 0.1580, 0.1667, 0.1703]],
       grad_fn=<LogBackward>)


 81%|████████  | 46/57 [00:32<00:07,  1.49it/s]

tensor([[0.1537, 0.1667, 0.1713,  ..., 0.1572, 0.1649, 0.1659],
        [0.1558, 0.1659, 0.1730,  ..., 0.1569, 0.1648, 0.1668],
        [0.1567, 0.1657, 0.1765,  ..., 0.1555, 0.1651, 0.1668],
        ...,
        [0.1571, 0.1672, 0.1758,  ..., 0.1553, 0.1656, 0.1689],
        [0.1575, 0.1642, 0.1736,  ..., 0.1566, 0.1639, 0.1719],
        [0.1554, 0.1658, 0.1728,  ..., 0.1577, 0.1663, 0.1663]],
       grad_fn=<LogBackward>)


 82%|████████▏ | 47/57 [00:32<00:06,  1.47it/s]

tensor([[0.1587, 0.1654, 0.1752,  ..., 0.1576, 0.1646, 0.1693],
        [0.1554, 0.1660, 0.1738,  ..., 0.1567, 0.1651, 0.1656],
        [0.1603, 0.1650, 0.1790,  ..., 0.1579, 0.1636, 0.1716],
        ...,
        [0.1573, 0.1639, 0.1758,  ..., 0.1585, 0.1634, 0.1697],
        [0.1595, 0.1653, 0.1762,  ..., 0.1597, 0.1658, 0.1707],
        [0.1589, 0.1686, 0.1784,  ..., 0.1564, 0.1662, 0.1678]],
       grad_fn=<LogBackward>)


 84%|████████▍ | 48/57 [00:33<00:06,  1.48it/s]

tensor([[0.1564, 0.1667, 0.1746,  ..., 0.1576, 0.1647, 0.1681],
        [0.1564, 0.1651, 0.1755,  ..., 0.1567, 0.1663, 0.1699],
        [0.1560, 0.1638, 0.1761,  ..., 0.1558, 0.1638, 0.1661],
        ...,
        [0.1585, 0.1683, 0.1739,  ..., 0.1603, 0.1650, 0.1684],
        [0.1575, 0.1679, 0.1764,  ..., 0.1567, 0.1659, 0.1699],
        [0.1556, 0.1665, 0.1721,  ..., 0.1574, 0.1611, 0.1633]],
       grad_fn=<LogBackward>)


 86%|████████▌ | 49/57 [00:34<00:05,  1.49it/s]

tensor([[0.1590, 0.1679, 0.1759,  ..., 0.1580, 0.1670, 0.1682],
        [0.1425, 0.1535, 0.1540,  ..., 0.1447, 0.1542, 0.1528],
        [0.1552, 0.1646, 0.1744,  ..., 0.1565, 0.1633, 0.1660],
        ...,
        [0.1598, 0.1642, 0.1749,  ..., 0.1580, 0.1642, 0.1715],
        [0.1571, 0.1659, 0.1757,  ..., 0.1572, 0.1647, 0.1681],
        [0.1587, 0.1648, 0.1756,  ..., 0.1592, 0.1666, 0.1712]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 50/57 [00:34<00:04,  1.51it/s]

tensor([[0.1547, 0.1640, 0.1719,  ..., 0.1572, 0.1654, 0.1691],
        [0.1568, 0.1682, 0.1724,  ..., 0.1570, 0.1640, 0.1665],
        [0.1548, 0.1638, 0.1714,  ..., 0.1571, 0.1656, 0.1684],
        ...,
        [0.1552, 0.1654, 0.1734,  ..., 0.1579, 0.1624, 0.1654],
        [0.1576, 0.1634, 0.1744,  ..., 0.1553, 0.1634, 0.1677],
        [0.1563, 0.1643, 0.1752,  ..., 0.1563, 0.1649, 0.1708]],
       grad_fn=<LogBackward>)


 89%|████████▉ | 51/57 [00:35<00:03,  1.53it/s]

tensor([[0.1574, 0.1675, 0.1738,  ..., 0.1560, 0.1653, 0.1696],
        [0.1593, 0.1668, 0.1777,  ..., 0.1571, 0.1636, 0.1691],
        [0.1581, 0.1663, 0.1755,  ..., 0.1568, 0.1661, 0.1694],
        ...,
        [0.1551, 0.1648, 0.1741,  ..., 0.1556, 0.1630, 0.1676],
        [0.1554, 0.1618, 0.1738,  ..., 0.1575, 0.1631, 0.1716],
        [0.1563, 0.1639, 0.1749,  ..., 0.1564, 0.1614, 0.1678]],
       grad_fn=<LogBackward>)


 91%|█████████ | 52/57 [00:36<00:03,  1.53it/s]

tensor([[0.1577, 0.1654, 0.1730,  ..., 0.1561, 0.1633, 0.1661],
        [0.1559, 0.1640, 0.1768,  ..., 0.1567, 0.1627, 0.1700],
        [0.1591, 0.1673, 0.1780,  ..., 0.1567, 0.1666, 0.1745],
        ...,
        [0.1574, 0.1650, 0.1759,  ..., 0.1574, 0.1659, 0.1702],
        [0.1565, 0.1634, 0.1747,  ..., 0.1552, 0.1607, 0.1678],
        [0.1539, 0.1638, 0.1742,  ..., 0.1558, 0.1647, 0.1674]],
       grad_fn=<LogBackward>)


 93%|█████████▎| 53/57 [00:36<00:02,  1.54it/s]

tensor([[0.1567, 0.1690, 0.1730,  ..., 0.1539, 0.1649, 0.1656],
        [0.1550, 0.1642, 0.1721,  ..., 0.1568, 0.1643, 0.1667],
        [0.1570, 0.1633, 0.1745,  ..., 0.1572, 0.1649, 0.1724],
        ...,
        [0.1588, 0.1672, 0.1781,  ..., 0.1556, 0.1628, 0.1680],
        [0.1563, 0.1653, 0.1735,  ..., 0.1549, 0.1643, 0.1672],
        [0.1551, 0.1639, 0.1751,  ..., 0.1567, 0.1634, 0.1652]],
       grad_fn=<LogBackward>)


 95%|█████████▍| 54/57 [00:37<00:01,  1.53it/s]

tensor([[0.1574, 0.1666, 0.1768,  ..., 0.1595, 0.1664, 0.1685],
        [0.1591, 0.1679, 0.1777,  ..., 0.1576, 0.1663, 0.1726],
        [0.1554, 0.1643, 0.1733,  ..., 0.1565, 0.1643, 0.1678],
        ...,
        [0.1606, 0.1643, 0.1768,  ..., 0.1567, 0.1646, 0.1723],
        [0.1588, 0.1659, 0.1775,  ..., 0.1555, 0.1667, 0.1680],
        [0.1532, 0.1629, 0.1737,  ..., 0.1585, 0.1634, 0.1687]],
       grad_fn=<LogBackward>)


 96%|█████████▋| 55/57 [00:38<00:01,  1.54it/s]

tensor([[0.1580, 0.1667, 0.1767,  ..., 0.1589, 0.1648, 0.1704],
        [0.1576, 0.1656, 0.1776,  ..., 0.1569, 0.1663, 0.1703],
        [0.1589, 0.1656, 0.1764,  ..., 0.1559, 0.1652, 0.1688],
        ...,
        [0.1598, 0.1651, 0.1745,  ..., 0.1590, 0.1659, 0.1699],
        [0.1550, 0.1657, 0.1733,  ..., 0.1587, 0.1625, 0.1677],
        [0.1595, 0.1678, 0.1767,  ..., 0.1581, 0.1640, 0.1670]],
       grad_fn=<LogBackward>)


 98%|█████████▊| 56/57 [00:38<00:00,  1.53it/s]

tensor([[0.1566, 0.1657, 0.1732,  ..., 0.1559, 0.1631, 0.1672],
        [0.1569, 0.1668, 0.1776,  ..., 0.1572, 0.1638, 0.1690],
        [0.1528, 0.1641, 0.1701,  ..., 0.1563, 0.1608, 0.1641],
        ...,
        [0.1569, 0.1668, 0.1738,  ..., 0.1560, 0.1630, 0.1683],
        [0.1607, 0.1663, 0.1751,  ..., 0.1581, 0.1645, 0.1704],
        [0.1566, 0.1661, 0.1753,  ..., 0.1560, 0.1661, 0.1679]],
       grad_fn=<LogBackward>)


100%|██████████| 57/57 [00:39<00:00,  1.45it/s]
 12%|█▎        | 1/8 [00:00<00:02,  3.35it/s]

tensor([[0.1562, 0.1650, 0.1744,  ..., 0.1583, 0.1630, 0.1696],
        [0.1603, 0.1639, 0.1756,  ..., 0.1588, 0.1631, 0.1683],
        [0.1548, 0.1655, 0.1740,  ..., 0.1547, 0.1629, 0.1678],
        ...,
        [0.1572, 0.1644, 0.1758,  ..., 0.1572, 0.1648, 0.1708],
        [0.1558, 0.1636, 0.1716,  ..., 0.1534, 0.1612, 0.1614],
        [0.1532, 0.1636, 0.1717,  ..., 0.1567, 0.1627, 0.1654]],
       grad_fn=<LogBackward>)


 25%|██▌       | 2/8 [00:00<00:01,  3.39it/s]

tensor([[0.1546, 0.1671, 0.1737,  ..., 0.1566, 0.1646, 0.1667],
        [0.1598, 0.1672, 0.1740,  ..., 0.1560, 0.1648, 0.1666],
        [0.1581, 0.1644, 0.1751,  ..., 0.1565, 0.1651, 0.1673],
        ...,
        [0.1584, 0.1655, 0.1761,  ..., 0.1587, 0.1620, 0.1660],
        [0.1579, 0.1652, 0.1778,  ..., 0.1584, 0.1634, 0.1719],
        [0.1579, 0.1702, 0.1780,  ..., 0.1560, 0.1650, 0.1689]],
       grad_fn=<LogBackward>)


 38%|███▊      | 3/8 [00:00<00:01,  3.36it/s]

tensor([[0.1569, 0.1639, 0.1749,  ..., 0.1577, 0.1637, 0.1700],
        [0.1579, 0.1661, 0.1751,  ..., 0.1569, 0.1667, 0.1691],
        [0.1567, 0.1644, 0.1737,  ..., 0.1561, 0.1644, 0.1718],
        ...,
        [0.1575, 0.1640, 0.1746,  ..., 0.1569, 0.1651, 0.1710],
        [0.1528, 0.1610, 0.1681,  ..., 0.1538, 0.1579, 0.1594],
        [0.1559, 0.1650, 0.1753,  ..., 0.1569, 0.1653, 0.1712]],
       grad_fn=<LogBackward>)


 50%|█████     | 4/8 [00:01<00:01,  3.40it/s]

tensor([[0.1568, 0.1657, 0.1770,  ..., 0.1565, 0.1629, 0.1648],
        [0.1565, 0.1660, 0.1744,  ..., 0.1577, 0.1670, 0.1693],
        [0.1578, 0.1636, 0.1762,  ..., 0.1559, 0.1656, 0.1714],
        ...,
        [0.1575, 0.1644, 0.1773,  ..., 0.1575, 0.1667, 0.1715],
        [0.1588, 0.1650, 0.1757,  ..., 0.1573, 0.1656, 0.1690],
        [0.1550, 0.1656, 0.1730,  ..., 0.1558, 0.1642, 0.1670]],
       grad_fn=<LogBackward>)


 62%|██████▎   | 5/8 [00:01<00:00,  3.42it/s]

tensor([[0.1554, 0.1668, 0.1749,  ..., 0.1571, 0.1656, 0.1720],
        [0.1582, 0.1657, 0.1782,  ..., 0.1570, 0.1666, 0.1704],
        [0.1543, 0.1635, 0.1711,  ..., 0.1567, 0.1621, 0.1687],
        ...,
        [0.1575, 0.1660, 0.1748,  ..., 0.1578, 0.1676, 0.1742],
        [0.1556, 0.1671, 0.1743,  ..., 0.1582, 0.1624, 0.1691],
        [0.1557, 0.1652, 0.1733,  ..., 0.1570, 0.1652, 0.1693]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 6/8 [00:01<00:00,  3.42it/s]

tensor([[0.1534, 0.1639, 0.1723,  ..., 0.1571, 0.1626, 0.1661],
        [0.1592, 0.1651, 0.1756,  ..., 0.1575, 0.1641, 0.1691],
        [0.1555, 0.1623, 0.1743,  ..., 0.1547, 0.1571, 0.1646],
        ...,
        [0.1543, 0.1626, 0.1723,  ..., 0.1579, 0.1626, 0.1649],
        [0.1572, 0.1620, 0.1774,  ..., 0.1574, 0.1638, 0.1693],
        [0.1605, 0.1696, 0.1783,  ..., 0.1591, 0.1655, 0.1719]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 7/8 [00:02<00:00,  3.42it/s]

tensor([[0.1596, 0.1640, 0.1779,  ..., 0.1577, 0.1650, 0.1756],
        [0.1559, 0.1679, 0.1708,  ..., 0.1584, 0.1655, 0.1689],
        [0.1557, 0.1661, 0.1748,  ..., 0.1603, 0.1652, 0.1711],
        ...,
        [0.1585, 0.1680, 0.1755,  ..., 0.1580, 0.1688, 0.1693],
        [0.1600, 0.1691, 0.1786,  ..., 0.1576, 0.1694, 0.1721],
        [0.1525, 0.1626, 0.1680,  ..., 0.1574, 0.1621, 0.1626]],
       grad_fn=<LogBackward>)


100%|██████████| 8/8 [00:02<00:00,  3.43it/s]
  0%|          | 0/57 [00:00<?, ?it/s]

tensor([[0.1550, 0.1663, 0.1725,  ..., 0.1537, 0.1633, 0.1662],
        [0.1559, 0.1645, 0.1738,  ..., 0.1572, 0.1629, 0.1704],
        [0.1590, 0.1652, 0.1742,  ..., 0.1569, 0.1645, 0.1692],
        ...,
        [0.1566, 0.1637, 0.1747,  ..., 0.1564, 0.1634, 0.1674],
        [0.1566, 0.1662, 0.1713,  ..., 0.1533, 0.1634, 0.1642],
        [0.1575, 0.1662, 0.1734,  ..., 0.1553, 0.1621, 0.1643]],
       grad_fn=<LogBackward>)
[tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128)]
Epoch: 3, train_loss: 0.3445173501968384, valid_loss: 0.26066964864730835
Parameter containing:
tensor([[ 1.0004,  1.0004,  1.0004,  1.0004,  1.0004,  1.0004,  1.0004,  1.0004,
          1.0004,  1.0004, -0.4996, -0.4996, -0.4996, -0.4996, -0.4996, -0.4996,
         -0.4996, -0.4996, -0.4996, -0.4996],
       

  2%|▏         | 1/57 [00:00<00:36,  1.53it/s]

tensor([[0.1557, 0.1645, 0.1741,  ..., 0.1562, 0.1651, 0.1704],
        [0.1573, 0.1630, 0.1770,  ..., 0.1568, 0.1657, 0.1682],
        [0.1572, 0.1664, 0.1733,  ..., 0.1584, 0.1639, 0.1713],
        ...,
        [0.1555, 0.1649, 0.1754,  ..., 0.1581, 0.1635, 0.1664],
        [0.1599, 0.1681, 0.1786,  ..., 0.1566, 0.1641, 0.1690],
        [0.1569, 0.1662, 0.1767,  ..., 0.1558, 0.1644, 0.1715]],
       grad_fn=<LogBackward>)


  4%|▎         | 2/57 [00:01<00:36,  1.52it/s]

tensor([[0.1576, 0.1648, 0.1748,  ..., 0.1544, 0.1629, 0.1681],
        [0.1557, 0.1630, 0.1726,  ..., 0.1557, 0.1623, 0.1679],
        [0.1557, 0.1648, 0.1727,  ..., 0.1559, 0.1613, 0.1624],
        ...,
        [0.1571, 0.1667, 0.1768,  ..., 0.1571, 0.1660, 0.1682],
        [0.1558, 0.1653, 0.1749,  ..., 0.1563, 0.1636, 0.1681],
        [0.1576, 0.1674, 0.1738,  ..., 0.1565, 0.1628, 0.1666]],
       grad_fn=<LogBackward>)


  5%|▌         | 3/57 [00:01<00:35,  1.53it/s]

tensor([[0.1584, 0.1656, 0.1748,  ..., 0.1580, 0.1653, 0.1704],
        [0.1578, 0.1650, 0.1754,  ..., 0.1568, 0.1650, 0.1721],
        [0.1572, 0.1652, 0.1762,  ..., 0.1557, 0.1646, 0.1689],
        ...,
        [0.1562, 0.1638, 0.1753,  ..., 0.1577, 0.1664, 0.1702],
        [0.1595, 0.1689, 0.1749,  ..., 0.1556, 0.1638, 0.1659],
        [0.1594, 0.1673, 0.1777,  ..., 0.1589, 0.1682, 0.1709]],
       grad_fn=<LogBackward>)


  7%|▋         | 4/57 [00:02<00:34,  1.53it/s]

tensor([[0.1579, 0.1682, 0.1757,  ..., 0.1578, 0.1640, 0.1668],
        [0.1596, 0.1656, 0.1768,  ..., 0.1570, 0.1655, 0.1688],
        [0.1566, 0.1651, 0.1732,  ..., 0.1567, 0.1632, 0.1684],
        ...,
        [0.1580, 0.1652, 0.1774,  ..., 0.1566, 0.1639, 0.1698],
        [0.1575, 0.1629, 0.1758,  ..., 0.1565, 0.1623, 0.1739],
        [0.1583, 0.1668, 0.1763,  ..., 0.1556, 0.1670, 0.1692]],
       grad_fn=<LogBackward>)


  9%|▉         | 5/57 [00:03<00:34,  1.53it/s]

tensor([[0.1567, 0.1647, 0.1745,  ..., 0.1560, 0.1639, 0.1714],
        [0.1608, 0.1694, 0.1766,  ..., 0.1598, 0.1660, 0.1727],
        [0.1609, 0.1665, 0.1783,  ..., 0.1572, 0.1660, 0.1691],
        ...,
        [0.1571, 0.1669, 0.1717,  ..., 0.1565, 0.1619, 0.1628],
        [0.1564, 0.1666, 0.1742,  ..., 0.1550, 0.1613, 0.1634],
        [0.1596, 0.1681, 0.1759,  ..., 0.1601, 0.1660, 0.1742]],
       grad_fn=<LogBackward>)


 11%|█         | 6/57 [00:03<00:33,  1.51it/s]

tensor([[0.1570, 0.1650, 0.1737,  ..., 0.1585, 0.1654, 0.1710],
        [0.1578, 0.1677, 0.1748,  ..., 0.1577, 0.1653, 0.1711],
        [0.1568, 0.1665, 0.1742,  ..., 0.1561, 0.1626, 0.1659],
        ...,
        [0.1542, 0.1629, 0.1730,  ..., 0.1540, 0.1636, 0.1666],
        [0.1574, 0.1655, 0.1762,  ..., 0.1561, 0.1648, 0.1691],
        [0.1567, 0.1653, 0.1758,  ..., 0.1575, 0.1640, 0.1670]],
       grad_fn=<LogBackward>)


 12%|█▏        | 7/57 [00:04<00:33,  1.48it/s]

tensor([[0.1538, 0.1631, 0.1720,  ..., 0.1568, 0.1643, 0.1661],
        [0.1538, 0.1632, 0.1688,  ..., 0.1567, 0.1620, 0.1664],
        [0.1585, 0.1640, 0.1758,  ..., 0.1578, 0.1650, 0.1736],
        ...,
        [0.1593, 0.1657, 0.1763,  ..., 0.1576, 0.1616, 0.1672],
        [0.1605, 0.1670, 0.1761,  ..., 0.1542, 0.1642, 0.1666],
        [0.1569, 0.1644, 0.1766,  ..., 0.1566, 0.1635, 0.1675]],
       grad_fn=<LogBackward>)


 14%|█▍        | 8/57 [00:05<00:32,  1.51it/s]

tensor([[0.1556, 0.1589, 0.1706,  ..., 0.1538, 0.1557, 0.1603],
        [0.1572, 0.1653, 0.1742,  ..., 0.1554, 0.1623, 0.1660],
        [0.1571, 0.1666, 0.1757,  ..., 0.1533, 0.1647, 0.1660],
        ...,
        [0.1556, 0.1624, 0.1709,  ..., 0.1574, 0.1640, 0.1683],
        [0.1600, 0.1650, 0.1792,  ..., 0.1556, 0.1649, 0.1703],
        [0.1557, 0.1639, 0.1745,  ..., 0.1559, 0.1640, 0.1692]],
       grad_fn=<LogBackward>)


 16%|█▌        | 9/57 [00:05<00:31,  1.51it/s]

tensor([[0.1563, 0.1651, 0.1742,  ..., 0.1568, 0.1646, 0.1683],
        [0.1577, 0.1670, 0.1774,  ..., 0.1571, 0.1666, 0.1714],
        [0.1574, 0.1625, 0.1744,  ..., 0.1560, 0.1632, 0.1683],
        ...,
        [0.1562, 0.1657, 0.1729,  ..., 0.1557, 0.1636, 0.1651],
        [0.1565, 0.1680, 0.1730,  ..., 0.1579, 0.1649, 0.1683],
        [0.1572, 0.1668, 0.1732,  ..., 0.1580, 0.1665, 0.1707]],
       grad_fn=<LogBackward>)


 18%|█▊        | 10/57 [00:06<00:30,  1.52it/s]

tensor([[0.1566, 0.1650, 0.1742,  ..., 0.1562, 0.1672, 0.1690],
        [0.1566, 0.1612, 0.1710,  ..., 0.1563, 0.1631, 0.1746],
        [0.1571, 0.1670, 0.1752,  ..., 0.1575, 0.1664, 0.1709],
        ...,
        [0.1575, 0.1680, 0.1747,  ..., 0.1570, 0.1655, 0.1687],
        [0.1553, 0.1667, 0.1744,  ..., 0.1550, 0.1639, 0.1668],
        [0.1588, 0.1674, 0.1764,  ..., 0.1578, 0.1643, 0.1723]],
       grad_fn=<LogBackward>)


 19%|█▉        | 11/57 [00:07<00:29,  1.54it/s]

tensor([[0.1566, 0.1672, 0.1755,  ..., 0.1560, 0.1644, 0.1699],
        [0.1535, 0.1639, 0.1713,  ..., 0.1567, 0.1617, 0.1657],
        [0.1578, 0.1639, 0.1739,  ..., 0.1570, 0.1656, 0.1689],
        ...,
        [0.1589, 0.1671, 0.1766,  ..., 0.1579, 0.1656, 0.1715],
        [0.1556, 0.1654, 0.1730,  ..., 0.1566, 0.1637, 0.1686],
        [0.1562, 0.1654, 0.1744,  ..., 0.1568, 0.1645, 0.1682]],
       grad_fn=<LogBackward>)


 21%|██        | 12/57 [00:07<00:29,  1.54it/s]

tensor([[0.1584, 0.1655, 0.1751,  ..., 0.1562, 0.1644, 0.1711],
        [0.1587, 0.1670, 0.1759,  ..., 0.1555, 0.1639, 0.1682],
        [0.1598, 0.1648, 0.1772,  ..., 0.1558, 0.1644, 0.1714],
        ...,
        [0.1588, 0.1679, 0.1764,  ..., 0.1564, 0.1629, 0.1682],
        [0.1587, 0.1652, 0.1760,  ..., 0.1557, 0.1650, 0.1698],
        [0.1563, 0.1645, 0.1760,  ..., 0.1571, 0.1645, 0.1684]],
       grad_fn=<LogBackward>)


 23%|██▎       | 13/57 [00:08<00:28,  1.55it/s]

tensor([[0.1557, 0.1628, 0.1733,  ..., 0.1557, 0.1637, 0.1698],
        [0.1587, 0.1665, 0.1765,  ..., 0.1569, 0.1629, 0.1669],
        [0.1570, 0.1642, 0.1745,  ..., 0.1556, 0.1638, 0.1694],
        ...,
        [0.1561, 0.1656, 0.1751,  ..., 0.1576, 0.1652, 0.1689],
        [0.1578, 0.1664, 0.1762,  ..., 0.1558, 0.1647, 0.1705],
        [0.1573, 0.1657, 0.1770,  ..., 0.1556, 0.1634, 0.1701]],
       grad_fn=<LogBackward>)


 25%|██▍       | 14/57 [00:09<00:27,  1.55it/s]

tensor([[0.1582, 0.1644, 0.1746,  ..., 0.1561, 0.1643, 0.1686],
        [0.1572, 0.1644, 0.1768,  ..., 0.1565, 0.1677, 0.1701],
        [0.1592, 0.1666, 0.1768,  ..., 0.1565, 0.1657, 0.1746],
        ...,
        [0.1558, 0.1654, 0.1744,  ..., 0.1557, 0.1634, 0.1656],
        [0.1541, 0.1647, 0.1692,  ..., 0.1560, 0.1645, 0.1633],
        [0.1572, 0.1673, 0.1736,  ..., 0.1583, 0.1657, 0.1695]],
       grad_fn=<LogBackward>)


 26%|██▋       | 15/57 [00:09<00:27,  1.54it/s]

tensor([[0.1566, 0.1652, 0.1752,  ..., 0.1556, 0.1612, 0.1645],
        [0.1580, 0.1673, 0.1769,  ..., 0.1577, 0.1636, 0.1686],
        [0.1580, 0.1634, 0.1745,  ..., 0.1569, 0.1629, 0.1693],
        ...,
        [0.1581, 0.1658, 0.1763,  ..., 0.1564, 0.1654, 0.1674],
        [0.1564, 0.1632, 0.1743,  ..., 0.1569, 0.1649, 0.1705],
        [0.1588, 0.1653, 0.1757,  ..., 0.1559, 0.1657, 0.1712]],
       grad_fn=<LogBackward>)


 28%|██▊       | 16/57 [00:10<00:26,  1.54it/s]

tensor([[0.1575, 0.1643, 0.1774,  ..., 0.1552, 0.1628, 0.1683],
        [0.1545, 0.1618, 0.1695,  ..., 0.1562, 0.1645, 0.1690],
        [0.1569, 0.1647, 0.1749,  ..., 0.1562, 0.1649, 0.1702],
        ...,
        [0.1579, 0.1651, 0.1743,  ..., 0.1569, 0.1655, 0.1681],
        [0.1595, 0.1660, 0.1782,  ..., 0.1563, 0.1632, 0.1662],
        [0.1581, 0.1648, 0.1769,  ..., 0.1553, 0.1646, 0.1699]],
       grad_fn=<LogBackward>)


 30%|██▉       | 17/57 [00:11<00:26,  1.49it/s]

tensor([[0.1577, 0.1672, 0.1745,  ..., 0.1575, 0.1656, 0.1653],
        [0.1586, 0.1637, 0.1758,  ..., 0.1578, 0.1619, 0.1662],
        [0.1545, 0.1663, 0.1723,  ..., 0.1571, 0.1648, 0.1658],
        ...,
        [0.1591, 0.1667, 0.1761,  ..., 0.1547, 0.1637, 0.1679],
        [0.1582, 0.1633, 0.1767,  ..., 0.1560, 0.1655, 0.1738],
        [0.1581, 0.1665, 0.1755,  ..., 0.1566, 0.1649, 0.1673]],
       grad_fn=<LogBackward>)


 32%|███▏      | 18/57 [00:11<00:26,  1.48it/s]

tensor([[0.1540, 0.1641, 0.1743,  ..., 0.1554, 0.1611, 0.1643],
        [0.1585, 0.1670, 0.1774,  ..., 0.1572, 0.1669, 0.1701],
        [0.1563, 0.1665, 0.1755,  ..., 0.1554, 0.1666, 0.1689],
        ...,
        [0.1604, 0.1673, 0.1787,  ..., 0.1571, 0.1653, 0.1724],
        [0.1510, 0.1591, 0.1663,  ..., 0.1524, 0.1527, 0.1612],
        [0.1555, 0.1637, 0.1736,  ..., 0.1566, 0.1631, 0.1679]],
       grad_fn=<LogBackward>)


 33%|███▎      | 19/57 [00:12<00:25,  1.50it/s]

tensor([[0.1576, 0.1669, 0.1757,  ..., 0.1560, 0.1653, 0.1703],
        [0.1547, 0.1647, 0.1724,  ..., 0.1579, 0.1634, 0.1681],
        [0.1440, 0.1548, 0.1562,  ..., 0.1459, 0.1548, 0.1542],
        ...,
        [0.1575, 0.1671, 0.1742,  ..., 0.1553, 0.1641, 0.1683],
        [0.1556, 0.1635, 0.1724,  ..., 0.1597, 0.1632, 0.1703],
        [0.1595, 0.1651, 0.1765,  ..., 0.1587, 0.1648, 0.1737]],
       grad_fn=<LogBackward>)


 35%|███▌      | 20/57 [00:13<00:24,  1.52it/s]

tensor([[0.1602, 0.1662, 0.1761,  ..., 0.1583, 0.1666, 0.1706],
        [0.1558, 0.1648, 0.1759,  ..., 0.1558, 0.1623, 0.1669],
        [0.1572, 0.1641, 0.1762,  ..., 0.1565, 0.1645, 0.1710],
        ...,
        [0.1540, 0.1645, 0.1736,  ..., 0.1546, 0.1589, 0.1643],
        [0.1551, 0.1637, 0.1743,  ..., 0.1577, 0.1628, 0.1666],
        [0.1544, 0.1632, 0.1740,  ..., 0.1546, 0.1613, 0.1682]],
       grad_fn=<LogBackward>)


 37%|███▋      | 21/57 [00:13<00:23,  1.53it/s]

tensor([[0.1567, 0.1680, 0.1725,  ..., 0.1573, 0.1668, 0.1670],
        [0.1575, 0.1656, 0.1764,  ..., 0.1555, 0.1636, 0.1687],
        [0.1592, 0.1677, 0.1768,  ..., 0.1580, 0.1639, 0.1720],
        ...,
        [0.1589, 0.1666, 0.1732,  ..., 0.1583, 0.1643, 0.1666],
        [0.1584, 0.1681, 0.1746,  ..., 0.1582, 0.1651, 0.1698],
        [0.1569, 0.1634, 0.1698,  ..., 0.1564, 0.1595, 0.1648]],
       grad_fn=<LogBackward>)


 39%|███▊      | 22/57 [00:14<00:22,  1.53it/s]

tensor([[0.1590, 0.1675, 0.1745,  ..., 0.1567, 0.1657, 0.1691],
        [0.1591, 0.1662, 0.1753,  ..., 0.1565, 0.1657, 0.1680],
        [0.1580, 0.1662, 0.1763,  ..., 0.1567, 0.1673, 0.1706],
        ...,
        [0.1573, 0.1666, 0.1765,  ..., 0.1556, 0.1641, 0.1684],
        [0.1584, 0.1655, 0.1736,  ..., 0.1563, 0.1621, 0.1663],
        [0.1570, 0.1657, 0.1749,  ..., 0.1592, 0.1659, 0.1693]],
       grad_fn=<LogBackward>)


 40%|████      | 23/57 [00:15<00:22,  1.54it/s]

tensor([[0.1580, 0.1697, 0.1720,  ..., 0.1573, 0.1636, 0.1642],
        [0.1567, 0.1660, 0.1756,  ..., 0.1562, 0.1647, 0.1691],
        [0.1558, 0.1644, 0.1725,  ..., 0.1574, 0.1651, 0.1710],
        ...,
        [0.1557, 0.1648, 0.1744,  ..., 0.1572, 0.1642, 0.1646],
        [0.1561, 0.1637, 0.1722,  ..., 0.1553, 0.1602, 0.1660],
        [0.1549, 0.1644, 0.1716,  ..., 0.1563, 0.1625, 0.1681]],
       grad_fn=<LogBackward>)


 42%|████▏     | 24/57 [00:15<00:21,  1.54it/s]

tensor([[0.1559, 0.1640, 0.1754,  ..., 0.1564, 0.1630, 0.1706],
        [0.1566, 0.1658, 0.1734,  ..., 0.1572, 0.1636, 0.1701],
        [0.1548, 0.1653, 0.1719,  ..., 0.1558, 0.1636, 0.1677],
        ...,
        [0.1536, 0.1646, 0.1701,  ..., 0.1522, 0.1615, 0.1615],
        [0.1564, 0.1641, 0.1747,  ..., 0.1559, 0.1635, 0.1681],
        [0.1562, 0.1674, 0.1704,  ..., 0.1547, 0.1624, 0.1627]],
       grad_fn=<LogBackward>)


 44%|████▍     | 25/57 [00:16<00:20,  1.53it/s]

tensor([[0.1574, 0.1650, 0.1755,  ..., 0.1580, 0.1658, 0.1733],
        [0.1557, 0.1657, 0.1739,  ..., 0.1554, 0.1642, 0.1706],
        [0.1612, 0.1639, 0.1753,  ..., 0.1559, 0.1655, 0.1711],
        ...,
        [0.1580, 0.1633, 0.1763,  ..., 0.1566, 0.1662, 0.1689],
        [0.1561, 0.1655, 0.1764,  ..., 0.1575, 0.1645, 0.1670],
        [0.1594, 0.1680, 0.1749,  ..., 0.1554, 0.1645, 0.1691]],
       grad_fn=<LogBackward>)


 46%|████▌     | 26/57 [00:17<00:20,  1.55it/s]

tensor([[0.1485, 0.1606, 0.1615,  ..., 0.1509, 0.1587, 0.1596],
        [0.1595, 0.1665, 0.1777,  ..., 0.1583, 0.1648, 0.1719],
        [0.1595, 0.1668, 0.1732,  ..., 0.1547, 0.1633, 0.1634],
        ...,
        [0.1558, 0.1679, 0.1739,  ..., 0.1576, 0.1641, 0.1663],
        [0.1553, 0.1670, 0.1712,  ..., 0.1573, 0.1655, 0.1669],
        [0.1581, 0.1652, 0.1754,  ..., 0.1579, 0.1645, 0.1721]],
       grad_fn=<LogBackward>)


 47%|████▋     | 27/57 [00:17<00:19,  1.53it/s]

tensor([[0.1570, 0.1676, 0.1788,  ..., 0.1569, 0.1634, 0.1673],
        [0.1581, 0.1678, 0.1759,  ..., 0.1582, 0.1659, 0.1697],
        [0.1571, 0.1647, 0.1767,  ..., 0.1574, 0.1642, 0.1691],
        ...,
        [0.1613, 0.1665, 0.1745,  ..., 0.1594, 0.1679, 0.1707],
        [0.1569, 0.1659, 0.1762,  ..., 0.1561, 0.1653, 0.1690],
        [0.1581, 0.1666, 0.1765,  ..., 0.1576, 0.1646, 0.1718]],
       grad_fn=<LogBackward>)


 49%|████▉     | 28/57 [00:18<00:19,  1.49it/s]

tensor([[0.1560, 0.1643, 0.1760,  ..., 0.1572, 0.1646, 0.1737],
        [0.1574, 0.1654, 0.1750,  ..., 0.1574, 0.1635, 0.1698],
        [0.1561, 0.1634, 0.1736,  ..., 0.1579, 0.1633, 0.1710],
        ...,
        [0.1530, 0.1638, 0.1720,  ..., 0.1548, 0.1589, 0.1635],
        [0.1564, 0.1637, 0.1727,  ..., 0.1556, 0.1654, 0.1690],
        [0.1569, 0.1643, 0.1741,  ..., 0.1563, 0.1651, 0.1705]],
       grad_fn=<LogBackward>)


 51%|█████     | 29/57 [00:19<00:18,  1.50it/s]

tensor([[0.1594, 0.1666, 0.1766,  ..., 0.1585, 0.1654, 0.1704],
        [0.1581, 0.1654, 0.1744,  ..., 0.1569, 0.1643, 0.1670],
        [0.1576, 0.1659, 0.1718,  ..., 0.1566, 0.1641, 0.1668],
        ...,
        [0.1565, 0.1643, 0.1729,  ..., 0.1590, 0.1622, 0.1710],
        [0.1597, 0.1665, 0.1755,  ..., 0.1573, 0.1657, 0.1721],
        [0.1543, 0.1631, 0.1713,  ..., 0.1572, 0.1628, 0.1684]],
       grad_fn=<LogBackward>)


 53%|█████▎    | 30/57 [00:19<00:17,  1.50it/s]

tensor([[0.1571, 0.1641, 0.1743,  ..., 0.1562, 0.1639, 0.1694],
        [0.1586, 0.1657, 0.1772,  ..., 0.1574, 0.1665, 0.1729],
        [0.1594, 0.1655, 0.1776,  ..., 0.1567, 0.1654, 0.1723],
        ...,
        [0.1570, 0.1684, 0.1761,  ..., 0.1572, 0.1629, 0.1665],
        [0.1572, 0.1665, 0.1749,  ..., 0.1571, 0.1650, 0.1733],
        [0.1577, 0.1661, 0.1792,  ..., 0.1572, 0.1652, 0.1676]],
       grad_fn=<LogBackward>)


 54%|█████▍    | 31/57 [00:20<00:17,  1.52it/s]

tensor([[0.1560, 0.1654, 0.1738,  ..., 0.1570, 0.1648, 0.1656],
        [0.1593, 0.1652, 0.1790,  ..., 0.1579, 0.1649, 0.1736],
        [0.1588, 0.1642, 0.1756,  ..., 0.1575, 0.1649, 0.1708],
        ...,
        [0.1601, 0.1685, 0.1779,  ..., 0.1588, 0.1680, 0.1703],
        [0.1560, 0.1675, 0.1730,  ..., 0.1552, 0.1640, 0.1637],
        [0.1573, 0.1640, 0.1740,  ..., 0.1560, 0.1631, 0.1676]],
       grad_fn=<LogBackward>)


 56%|█████▌    | 32/57 [00:21<00:16,  1.53it/s]

tensor([[0.1536, 0.1648, 0.1681,  ..., 0.1561, 0.1587, 0.1628],
        [0.1565, 0.1657, 0.1719,  ..., 0.1579, 0.1661, 0.1676],
        [0.1569, 0.1647, 0.1754,  ..., 0.1581, 0.1631, 0.1665],
        ...,
        [0.1574, 0.1631, 0.1763,  ..., 0.1560, 0.1626, 0.1665],
        [0.1596, 0.1655, 0.1765,  ..., 0.1571, 0.1640, 0.1703],
        [0.1558, 0.1647, 0.1721,  ..., 0.1570, 0.1622, 0.1688]],
       grad_fn=<LogBackward>)


 58%|█████▊    | 33/57 [00:21<00:15,  1.53it/s]

tensor([[0.1560, 0.1654, 0.1745,  ..., 0.1572, 0.1642, 0.1703],
        [0.1573, 0.1631, 0.1731,  ..., 0.1559, 0.1620, 0.1702],
        [0.1583, 0.1664, 0.1767,  ..., 0.1555, 0.1635, 0.1666],
        ...,
        [0.1609, 0.1673, 0.1779,  ..., 0.1577, 0.1676, 0.1727],
        [0.1594, 0.1637, 0.1768,  ..., 0.1572, 0.1650, 0.1697],
        [0.1589, 0.1654, 0.1762,  ..., 0.1566, 0.1667, 0.1706]],
       grad_fn=<LogBackward>)


 60%|█████▉    | 34/57 [00:22<00:14,  1.54it/s]

tensor([[0.1604, 0.1647, 0.1775,  ..., 0.1584, 0.1638, 0.1703],
        [0.1566, 0.1650, 0.1723,  ..., 0.1564, 0.1645, 0.1698],
        [0.1599, 0.1639, 0.1756,  ..., 0.1560, 0.1632, 0.1704],
        ...,
        [0.1584, 0.1670, 0.1771,  ..., 0.1572, 0.1641, 0.1697],
        [0.1584, 0.1665, 0.1758,  ..., 0.1594, 0.1649, 0.1708],
        [0.1559, 0.1686, 0.1732,  ..., 0.1570, 0.1661, 0.1672]],
       grad_fn=<LogBackward>)


 61%|██████▏   | 35/57 [00:22<00:14,  1.53it/s]

tensor([[0.1578, 0.1650, 0.1799,  ..., 0.1579, 0.1646, 0.1702],
        [0.1567, 0.1646, 0.1738,  ..., 0.1572, 0.1654, 0.1699],
        [0.1557, 0.1623, 0.1721,  ..., 0.1568, 0.1625, 0.1713],
        ...,
        [0.1566, 0.1648, 0.1736,  ..., 0.1561, 0.1648, 0.1700],
        [0.1586, 0.1700, 0.1754,  ..., 0.1569, 0.1674, 0.1700],
        [0.1577, 0.1653, 0.1768,  ..., 0.1557, 0.1654, 0.1722]],
       grad_fn=<LogBackward>)


 63%|██████▎   | 36/57 [00:23<00:13,  1.54it/s]

tensor([[0.1522, 0.1628, 0.1693,  ..., 0.1565, 0.1625, 0.1674],
        [0.1586, 0.1638, 0.1764,  ..., 0.1570, 0.1635, 0.1683],
        [0.1589, 0.1682, 0.1751,  ..., 0.1561, 0.1652, 0.1700],
        ...,
        [0.1547, 0.1652, 0.1700,  ..., 0.1553, 0.1648, 0.1620],
        [0.1577, 0.1667, 0.1743,  ..., 0.1554, 0.1641, 0.1667],
        [0.1588, 0.1680, 0.1746,  ..., 0.1575, 0.1684, 0.1723]],
       grad_fn=<LogBackward>)


 65%|██████▍   | 37/57 [00:24<00:13,  1.53it/s]

tensor([[0.1584, 0.1644, 0.1735,  ..., 0.1581, 0.1655, 0.1691],
        [0.1585, 0.1658, 0.1742,  ..., 0.1561, 0.1639, 0.1685],
        [0.1588, 0.1684, 0.1773,  ..., 0.1569, 0.1633, 0.1682],
        ...,
        [0.1530, 0.1611, 0.1715,  ..., 0.1530, 0.1575, 0.1604],
        [0.1571, 0.1643, 0.1740,  ..., 0.1560, 0.1633, 0.1708],
        [0.1560, 0.1654, 0.1733,  ..., 0.1569, 0.1643, 0.1689]],
       grad_fn=<LogBackward>)


 67%|██████▋   | 38/57 [00:24<00:12,  1.50it/s]

tensor([[0.1559, 0.1639, 0.1751,  ..., 0.1559, 0.1643, 0.1664],
        [0.1572, 0.1675, 0.1747,  ..., 0.1600, 0.1651, 0.1690],
        [0.1572, 0.1675, 0.1737,  ..., 0.1573, 0.1636, 0.1664],
        ...,
        [0.1590, 0.1684, 0.1747,  ..., 0.1567, 0.1632, 0.1696],
        [0.1580, 0.1679, 0.1755,  ..., 0.1564, 0.1660, 0.1678],
        [0.1597, 0.1672, 0.1775,  ..., 0.1581, 0.1667, 0.1696]],
       grad_fn=<LogBackward>)


 68%|██████▊   | 39/57 [00:25<00:12,  1.50it/s]

tensor([[0.1557, 0.1652, 0.1755,  ..., 0.1571, 0.1646, 0.1694],
        [0.1560, 0.1646, 0.1737,  ..., 0.1568, 0.1633, 0.1702],
        [0.1575, 0.1661, 0.1756,  ..., 0.1568, 0.1661, 0.1708],
        ...,
        [0.1564, 0.1623, 0.1747,  ..., 0.1571, 0.1592, 0.1681],
        [0.1589, 0.1675, 0.1740,  ..., 0.1546, 0.1650, 0.1665],
        [0.1579, 0.1650, 0.1757,  ..., 0.1571, 0.1656, 0.1678]],
       grad_fn=<LogBackward>)


 70%|███████   | 40/57 [00:26<00:11,  1.45it/s]

tensor([[0.1556, 0.1649, 0.1730,  ..., 0.1580, 0.1647, 0.1710],
        [0.1585, 0.1655, 0.1760,  ..., 0.1581, 0.1666, 0.1696],
        [0.1580, 0.1659, 0.1761,  ..., 0.1560, 0.1659, 0.1701],
        ...,
        [0.1529, 0.1641, 0.1691,  ..., 0.1571, 0.1607, 0.1638],
        [0.1570, 0.1640, 0.1743,  ..., 0.1565, 0.1640, 0.1708],
        [0.1572, 0.1635, 0.1777,  ..., 0.1561, 0.1633, 0.1689]],
       grad_fn=<LogBackward>)


 72%|███████▏  | 41/57 [00:27<00:11,  1.40it/s]

tensor([[0.1551, 0.1633, 0.1722,  ..., 0.1560, 0.1648, 0.1676],
        [0.1569, 0.1626, 0.1723,  ..., 0.1566, 0.1624, 0.1686],
        [0.1587, 0.1672, 0.1741,  ..., 0.1573, 0.1655, 0.1677],
        ...,
        [0.1577, 0.1632, 0.1751,  ..., 0.1562, 0.1635, 0.1702],
        [0.1561, 0.1662, 0.1739,  ..., 0.1547, 0.1655, 0.1645],
        [0.1546, 0.1646, 0.1721,  ..., 0.1554, 0.1634, 0.1672]],
       grad_fn=<LogBackward>)


 74%|███████▎  | 42/57 [00:27<00:10,  1.41it/s]

tensor([[0.1572, 0.1628, 0.1723,  ..., 0.1573, 0.1616, 0.1691],
        [0.1566, 0.1654, 0.1724,  ..., 0.1551, 0.1593, 0.1655],
        [0.1561, 0.1637, 0.1739,  ..., 0.1568, 0.1633, 0.1694],
        ...,
        [0.1568, 0.1684, 0.1742,  ..., 0.1564, 0.1648, 0.1700],
        [0.1585, 0.1669, 0.1761,  ..., 0.1572, 0.1657, 0.1697],
        [0.1547, 0.1659, 0.1701,  ..., 0.1571, 0.1618, 0.1643]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 43/57 [00:28<00:09,  1.44it/s]

tensor([[0.1527, 0.1611, 0.1686,  ..., 0.1554, 0.1590, 0.1643],
        [0.1597, 0.1686, 0.1773,  ..., 0.1576, 0.1682, 0.1721],
        [0.1561, 0.1641, 0.1714,  ..., 0.1549, 0.1616, 0.1659],
        ...,
        [0.1529, 0.1606, 0.1684,  ..., 0.1555, 0.1570, 0.1629],
        [0.1525, 0.1589, 0.1697,  ..., 0.1540, 0.1589, 0.1669],
        [0.1570, 0.1664, 0.1757,  ..., 0.1565, 0.1650, 0.1691]],
       grad_fn=<LogBackward>)


 77%|███████▋  | 44/57 [00:29<00:08,  1.45it/s]

tensor([[0.1568, 0.1650, 0.1759,  ..., 0.1569, 0.1645, 0.1664],
        [0.1584, 0.1669, 0.1748,  ..., 0.1600, 0.1654, 0.1698],
        [0.1600, 0.1656, 0.1741,  ..., 0.1571, 0.1656, 0.1710],
        ...,
        [0.1572, 0.1631, 0.1744,  ..., 0.1560, 0.1641, 0.1707],
        [0.1567, 0.1666, 0.1754,  ..., 0.1570, 0.1637, 0.1663],
        [0.1544, 0.1626, 0.1723,  ..., 0.1574, 0.1613, 0.1694]],
       grad_fn=<LogBackward>)


 79%|███████▉  | 45/57 [00:29<00:08,  1.49it/s]

tensor([[0.1617, 0.1666, 0.1761,  ..., 0.1569, 0.1668, 0.1711],
        [0.1636, 0.1678, 0.1777,  ..., 0.1571, 0.1649, 0.1708],
        [0.1551, 0.1636, 0.1729,  ..., 0.1573, 0.1632, 0.1716],
        ...,
        [0.1592, 0.1693, 0.1777,  ..., 0.1579, 0.1653, 0.1714],
        [0.1502, 0.1597, 0.1672,  ..., 0.1559, 0.1567, 0.1621],
        [0.1620, 0.1671, 0.1767,  ..., 0.1567, 0.1658, 0.1742]],
       grad_fn=<LogBackward>)


 81%|████████  | 46/57 [00:30<00:07,  1.50it/s]

tensor([[0.1558, 0.1662, 0.1737,  ..., 0.1557, 0.1635, 0.1647],
        [0.1521, 0.1618, 0.1679,  ..., 0.1530, 0.1588, 0.1620],
        [0.1539, 0.1649, 0.1725,  ..., 0.1545, 0.1625, 0.1641],
        ...,
        [0.1597, 0.1666, 0.1753,  ..., 0.1594, 0.1672, 0.1752],
        [0.1559, 0.1640, 0.1728,  ..., 0.1577, 0.1631, 0.1686],
        [0.1599, 0.1663, 0.1778,  ..., 0.1567, 0.1640, 0.1694]],
       grad_fn=<LogBackward>)


 82%|████████▏ | 47/57 [00:31<00:06,  1.51it/s]

tensor([[0.1570, 0.1628, 0.1740,  ..., 0.1565, 0.1624, 0.1693],
        [0.1570, 0.1650, 0.1756,  ..., 0.1565, 0.1645, 0.1721],
        [0.1580, 0.1658, 0.1738,  ..., 0.1575, 0.1611, 0.1653],
        ...,
        [0.1592, 0.1655, 0.1765,  ..., 0.1569, 0.1647, 0.1723],
        [0.1562, 0.1646, 0.1750,  ..., 0.1563, 0.1628, 0.1681],
        [0.1529, 0.1652, 0.1705,  ..., 0.1565, 0.1616, 0.1669]],
       grad_fn=<LogBackward>)


 84%|████████▍ | 48/57 [00:31<00:06,  1.47it/s]

tensor([[0.1567, 0.1665, 0.1739,  ..., 0.1563, 0.1642, 0.1700],
        [0.1588, 0.1648, 0.1746,  ..., 0.1599, 0.1627, 0.1679],
        [0.1570, 0.1660, 0.1731,  ..., 0.1561, 0.1655, 0.1683],
        ...,
        [0.1580, 0.1649, 0.1755,  ..., 0.1569, 0.1642, 0.1726],
        [0.1599, 0.1642, 0.1760,  ..., 0.1566, 0.1649, 0.1696],
        [0.1562, 0.1638, 0.1735,  ..., 0.1572, 0.1639, 0.1698]],
       grad_fn=<LogBackward>)


 86%|████████▌ | 49/57 [00:32<00:05,  1.42it/s]

tensor([[0.1557, 0.1657, 0.1736,  ..., 0.1567, 0.1670, 0.1692],
        [0.1574, 0.1661, 0.1737,  ..., 0.1548, 0.1628, 0.1675],
        [0.1554, 0.1631, 0.1717,  ..., 0.1559, 0.1652, 0.1669],
        ...,
        [0.1537, 0.1633, 0.1690,  ..., 0.1555, 0.1574, 0.1601],
        [0.1562, 0.1641, 0.1754,  ..., 0.1545, 0.1579, 0.1608],
        [0.1567, 0.1656, 0.1730,  ..., 0.1564, 0.1646, 0.1680]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 50/57 [00:33<00:04,  1.44it/s]

tensor([[0.1564, 0.1646, 0.1748,  ..., 0.1550, 0.1650, 0.1665],
        [0.1545, 0.1645, 0.1742,  ..., 0.1560, 0.1658, 0.1658],
        [0.1548, 0.1647, 0.1719,  ..., 0.1577, 0.1635, 0.1687],
        ...,
        [0.1569, 0.1641, 0.1710,  ..., 0.1568, 0.1619, 0.1615],
        [0.1557, 0.1643, 0.1708,  ..., 0.1547, 0.1636, 0.1679],
        [0.1587, 0.1672, 0.1761,  ..., 0.1554, 0.1633, 0.1696]],
       grad_fn=<LogBackward>)


 89%|████████▉ | 51/57 [00:33<00:04,  1.44it/s]

tensor([[0.1552, 0.1643, 0.1729,  ..., 0.1573, 0.1648, 0.1697],
        [0.1564, 0.1643, 0.1734,  ..., 0.1576, 0.1631, 0.1714],
        [0.1563, 0.1657, 0.1727,  ..., 0.1559, 0.1655, 0.1680],
        ...,
        [0.1587, 0.1663, 0.1744,  ..., 0.1582, 0.1663, 0.1696],
        [0.1599, 0.1695, 0.1751,  ..., 0.1558, 0.1650, 0.1655],
        [0.1574, 0.1663, 0.1728,  ..., 0.1566, 0.1599, 0.1669]],
       grad_fn=<LogBackward>)


 91%|█████████ | 52/57 [00:34<00:03,  1.45it/s]

tensor([[0.1574, 0.1664, 0.1750,  ..., 0.1572, 0.1661, 0.1699],
        [0.1590, 0.1696, 0.1738,  ..., 0.1592, 0.1655, 0.1699],
        [0.1588, 0.1645, 0.1734,  ..., 0.1560, 0.1643, 0.1699],
        ...,
        [0.1580, 0.1664, 0.1747,  ..., 0.1562, 0.1646, 0.1690],
        [0.1583, 0.1674, 0.1762,  ..., 0.1556, 0.1682, 0.1705],
        [0.1558, 0.1665, 0.1739,  ..., 0.1565, 0.1649, 0.1687]],
       grad_fn=<LogBackward>)


 93%|█████████▎| 53/57 [00:35<00:02,  1.47it/s]

tensor([[0.1427, 0.1534, 0.1543,  ..., 0.1431, 0.1528, 0.1526],
        [0.1563, 0.1633, 0.1711,  ..., 0.1556, 0.1641, 0.1665],
        [0.1585, 0.1665, 0.1774,  ..., 0.1565, 0.1657, 0.1721],
        ...,
        [0.1576, 0.1667, 0.1768,  ..., 0.1566, 0.1640, 0.1690],
        [0.1543, 0.1687, 0.1732,  ..., 0.1582, 0.1620, 0.1688],
        [0.1515, 0.1662, 0.1715,  ..., 0.1571, 0.1647, 0.1662]],
       grad_fn=<LogBackward>)


 95%|█████████▍| 54/57 [00:35<00:02,  1.49it/s]

tensor([[0.1607, 0.1646, 0.1791,  ..., 0.1556, 0.1650, 0.1700],
        [0.1550, 0.1646, 0.1763,  ..., 0.1578, 0.1626, 0.1678],
        [0.1578, 0.1622, 0.1725,  ..., 0.1576, 0.1616, 0.1710],
        ...,
        [0.1541, 0.1634, 0.1719,  ..., 0.1566, 0.1621, 0.1657],
        [0.1535, 0.1613, 0.1710,  ..., 0.1560, 0.1598, 0.1638],
        [0.1571, 0.1663, 0.1730,  ..., 0.1592, 0.1665, 0.1681]],
       grad_fn=<LogBackward>)


 96%|█████████▋| 55/57 [00:36<00:01,  1.49it/s]

tensor([[0.1608, 0.1696, 0.1753,  ..., 0.1578, 0.1669, 0.1712],
        [0.1581, 0.1652, 0.1756,  ..., 0.1553, 0.1632, 0.1695],
        [0.1571, 0.1641, 0.1735,  ..., 0.1574, 0.1654, 0.1713],
        ...,
        [0.1569, 0.1665, 0.1740,  ..., 0.1570, 0.1652, 0.1669],
        [0.1592, 0.1638, 0.1749,  ..., 0.1558, 0.1633, 0.1687],
        [0.1575, 0.1645, 0.1754,  ..., 0.1552, 0.1584, 0.1627]],
       grad_fn=<LogBackward>)


 98%|█████████▊| 56/57 [00:37<00:00,  1.47it/s]

tensor([[0.1568, 0.1649, 0.1742,  ..., 0.1574, 0.1642, 0.1710],
        [0.1555, 0.1659, 0.1709,  ..., 0.1570, 0.1645, 0.1650],
        [0.1578, 0.1636, 0.1748,  ..., 0.1556, 0.1617, 0.1696],
        ...,
        [0.1560, 0.1647, 0.1735,  ..., 0.1560, 0.1631, 0.1686],
        [0.1582, 0.1667, 0.1735,  ..., 0.1565, 0.1626, 0.1706],
        [0.1576, 0.1652, 0.1752,  ..., 0.1578, 0.1652, 0.1709]],
       grad_fn=<LogBackward>)


100%|██████████| 57/57 [00:38<00:00,  1.50it/s]
 12%|█▎        | 1/8 [00:00<00:02,  3.18it/s]

tensor([[0.1557, 0.1641, 0.1716,  ..., 0.1574, 0.1623, 0.1645],
        [0.1544, 0.1664, 0.1744,  ..., 0.1566, 0.1646, 0.1653],
        [0.1569, 0.1659, 0.1750,  ..., 0.1576, 0.1650, 0.1703],
        ...,
        [0.1556, 0.1646, 0.1736,  ..., 0.1566, 0.1637, 0.1700],
        [0.1554, 0.1645, 0.1703,  ..., 0.1579, 0.1608, 0.1694],
        [0.1584, 0.1663, 0.1778,  ..., 0.1573, 0.1646, 0.1715]],
       grad_fn=<LogBackward>)


 25%|██▌       | 2/8 [00:00<00:01,  3.16it/s]

tensor([[0.1595, 0.1635, 0.1777,  ..., 0.1565, 0.1630, 0.1711],
        [0.1567, 0.1669, 0.1706,  ..., 0.1566, 0.1661, 0.1661],
        [0.1585, 0.1653, 0.1746,  ..., 0.1580, 0.1645, 0.1689],
        ...,
        [0.1576, 0.1657, 0.1743,  ..., 0.1573, 0.1645, 0.1708],
        [0.1576, 0.1654, 0.1755,  ..., 0.1578, 0.1636, 0.1710],
        [0.1586, 0.1659, 0.1731,  ..., 0.1589, 0.1660, 0.1703]],
       grad_fn=<LogBackward>)


 38%|███▊      | 3/8 [00:00<00:01,  3.25it/s]

tensor([[0.1575, 0.1643, 0.1740,  ..., 0.1572, 0.1642, 0.1706],
        [0.1570, 0.1653, 0.1729,  ..., 0.1572, 0.1677, 0.1718],
        [0.1587, 0.1679, 0.1750,  ..., 0.1577, 0.1667, 0.1716],
        ...,
        [0.1585, 0.1663, 0.1749,  ..., 0.1560, 0.1637, 0.1701],
        [0.1564, 0.1652, 0.1720,  ..., 0.1588, 0.1628, 0.1641],
        [0.1575, 0.1679, 0.1742,  ..., 0.1558, 0.1654, 0.1718]],
       grad_fn=<LogBackward>)


 50%|█████     | 4/8 [00:01<00:01,  3.28it/s]

tensor([[0.1591, 0.1673, 0.1746,  ..., 0.1573, 0.1652, 0.1710],
        [0.1608, 0.1689, 0.1779,  ..., 0.1578, 0.1643, 0.1698],
        [0.1585, 0.1687, 0.1758,  ..., 0.1602, 0.1646, 0.1676],
        ...,
        [0.1589, 0.1658, 0.1762,  ..., 0.1552, 0.1647, 0.1706],
        [0.1583, 0.1638, 0.1750,  ..., 0.1560, 0.1640, 0.1712],
        [0.1546, 0.1648, 0.1726,  ..., 0.1545, 0.1599, 0.1630]],
       grad_fn=<LogBackward>)


 62%|██████▎   | 5/8 [00:01<00:00,  3.29it/s]

tensor([[0.1562, 0.1634, 0.1694,  ..., 0.1579, 0.1612, 0.1665],
        [0.1550, 0.1643, 0.1729,  ..., 0.1583, 0.1612, 0.1658],
        [0.1573, 0.1649, 0.1737,  ..., 0.1566, 0.1655, 0.1716],
        ...,
        [0.1567, 0.1651, 0.1739,  ..., 0.1583, 0.1645, 0.1721],
        [0.1578, 0.1668, 0.1732,  ..., 0.1550, 0.1655, 0.1676],
        [0.1547, 0.1655, 0.1705,  ..., 0.1563, 0.1619, 0.1676]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 6/8 [00:01<00:00,  3.34it/s]

tensor([[0.1561, 0.1641, 0.1730,  ..., 0.1561, 0.1635, 0.1693],
        [0.1584, 0.1666, 0.1754,  ..., 0.1568, 0.1648, 0.1687],
        [0.1583, 0.1683, 0.1737,  ..., 0.1553, 0.1624, 0.1709],
        ...,
        [0.1570, 0.1650, 0.1738,  ..., 0.1568, 0.1628, 0.1688],
        [0.1583, 0.1655, 0.1723,  ..., 0.1586, 0.1634, 0.1695],
        [0.1598, 0.1659, 0.1753,  ..., 0.1576, 0.1642, 0.1715]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 7/8 [00:02<00:00,  3.40it/s]

tensor([[0.1601, 0.1643, 0.1755,  ..., 0.1581, 0.1654, 0.1697],
        [0.1585, 0.1646, 0.1744,  ..., 0.1574, 0.1643, 0.1703],
        [0.1569, 0.1646, 0.1741,  ..., 0.1587, 0.1647, 0.1708],
        ...,
        [0.1583, 0.1665, 0.1774,  ..., 0.1565, 0.1654, 0.1709],
        [0.1571, 0.1650, 0.1729,  ..., 0.1556, 0.1636, 0.1654],
        [0.1590, 0.1688, 0.1756,  ..., 0.1558, 0.1664, 0.1680]],
       grad_fn=<LogBackward>)


100%|██████████| 8/8 [00:02<00:00,  3.34it/s]
  0%|          | 0/57 [00:00<?, ?it/s]

tensor([[0.1570, 0.1684, 0.1750,  ..., 0.1570, 0.1682, 0.1676],
        [0.1579, 0.1653, 0.1760,  ..., 0.1568, 0.1643, 0.1692],
        [0.1588, 0.1670, 0.1769,  ..., 0.1557, 0.1647, 0.1703],
        ...,
        [0.1565, 0.1627, 0.1712,  ..., 0.1564, 0.1630, 0.1672],
        [0.1558, 0.1654, 0.1748,  ..., 0.1557, 0.1644, 0.1656],
        [0.1549, 0.1637, 0.1736,  ..., 0.1582, 0.1616, 0.1654]],
       grad_fn=<LogBackward>)
[tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128)]
Epoch: 4, train_loss: 0.15788891911506653, valid_loss: 0.08080749958753586
Parameter containing:
tensor([[ 1.0005,  1.0005,  1.0005,  1.0005,  1.0005,  1.0005,  1.0005,  1.0005,
          1.0005,  1.0005, -0.4995, -0.4995, -0.4996, -0.4995, -0.4995, -0.4995,
         -0.4995, -0.4995, -0.4995, -0.4996],
      

  2%|▏         | 1/57 [00:00<00:36,  1.53it/s]

tensor([[0.1553, 0.1648, 0.1727,  ..., 0.1548, 0.1607, 0.1628],
        [0.1564, 0.1636, 0.1755,  ..., 0.1559, 0.1637, 0.1689],
        [0.1591, 0.1649, 0.1737,  ..., 0.1579, 0.1643, 0.1698],
        ...,
        [0.1556, 0.1655, 0.1720,  ..., 0.1555, 0.1645, 0.1659],
        [0.1597, 0.1699, 0.1771,  ..., 0.1555, 0.1632, 0.1665],
        [0.1589, 0.1635, 0.1769,  ..., 0.1575, 0.1629, 0.1748]],
       grad_fn=<LogBackward>)


  4%|▎         | 2/57 [00:01<00:35,  1.53it/s]

tensor([[0.1574, 0.1658, 0.1759,  ..., 0.1558, 0.1641, 0.1674],
        [0.1582, 0.1664, 0.1748,  ..., 0.1558, 0.1638, 0.1664],
        [0.1552, 0.1640, 0.1732,  ..., 0.1562, 0.1620, 0.1646],
        ...,
        [0.1582, 0.1663, 0.1679,  ..., 0.1557, 0.1636, 0.1618],
        [0.1564, 0.1688, 0.1745,  ..., 0.1557, 0.1622, 0.1659],
        [0.1573, 0.1652, 0.1746,  ..., 0.1566, 0.1629, 0.1699]],
       grad_fn=<LogBackward>)


  5%|▌         | 3/57 [00:01<00:34,  1.54it/s]

tensor([[0.1515, 0.1662, 0.1715,  ..., 0.1570, 0.1647, 0.1662],
        [0.1569, 0.1652, 0.1748,  ..., 0.1549, 0.1630, 0.1683],
        [0.1592, 0.1654, 0.1758,  ..., 0.1564, 0.1637, 0.1723],
        ...,
        [0.1573, 0.1653, 0.1731,  ..., 0.1609, 0.1656, 0.1678],
        [0.1573, 0.1662, 0.1746,  ..., 0.1567, 0.1633, 0.1665],
        [0.1564, 0.1643, 0.1730,  ..., 0.1558, 0.1642, 0.1734]],
       grad_fn=<LogBackward>)


  7%|▋         | 4/57 [00:02<00:34,  1.55it/s]

tensor([[0.1552, 0.1655, 0.1721,  ..., 0.1556, 0.1654, 0.1666],
        [0.1587, 0.1646, 0.1735,  ..., 0.1554, 0.1651, 0.1667],
        [0.1554, 0.1636, 0.1691,  ..., 0.1578, 0.1628, 0.1653],
        ...,
        [0.1526, 0.1636, 0.1716,  ..., 0.1551, 0.1593, 0.1646],
        [0.1559, 0.1650, 0.1695,  ..., 0.1569, 0.1648, 0.1697],
        [0.1571, 0.1652, 0.1757,  ..., 0.1568, 0.1643, 0.1693]],
       grad_fn=<LogBackward>)


  9%|▉         | 5/57 [00:03<00:33,  1.54it/s]

tensor([[0.1583, 0.1655, 0.1747,  ..., 0.1552, 0.1614, 0.1652],
        [0.1561, 0.1655, 0.1742,  ..., 0.1546, 0.1619, 0.1638],
        [0.1576, 0.1644, 0.1769,  ..., 0.1561, 0.1659, 0.1689],
        ...,
        [0.1558, 0.1638, 0.1742,  ..., 0.1578, 0.1627, 0.1696],
        [0.1572, 0.1665, 0.1746,  ..., 0.1562, 0.1624, 0.1676],
        [0.1577, 0.1644, 0.1747,  ..., 0.1553, 0.1642, 0.1704]],
       grad_fn=<LogBackward>)


 11%|█         | 6/57 [00:03<00:32,  1.55it/s]

tensor([[0.1579, 0.1649, 0.1761,  ..., 0.1576, 0.1646, 0.1716],
        [0.1583, 0.1690, 0.1765,  ..., 0.1588, 0.1660, 0.1702],
        [0.1576, 0.1660, 0.1746,  ..., 0.1585, 0.1652, 0.1697],
        ...,
        [0.1580, 0.1673, 0.1740,  ..., 0.1569, 0.1642, 0.1679],
        [0.1572, 0.1661, 0.1729,  ..., 0.1569, 0.1655, 0.1696],
        [0.1566, 0.1654, 0.1728,  ..., 0.1563, 0.1646, 0.1670]],
       grad_fn=<LogBackward>)


 12%|█▏        | 7/57 [00:04<00:32,  1.52it/s]

tensor([[0.1582, 0.1666, 0.1781,  ..., 0.1583, 0.1646, 0.1707],
        [0.1552, 0.1667, 0.1707,  ..., 0.1586, 0.1625, 0.1700],
        [0.1545, 0.1634, 0.1724,  ..., 0.1569, 0.1641, 0.1659],
        ...,
        [0.1552, 0.1637, 0.1714,  ..., 0.1564, 0.1623, 0.1688],
        [0.1574, 0.1670, 0.1755,  ..., 0.1566, 0.1652, 0.1661],
        [0.1528, 0.1640, 0.1665,  ..., 0.1571, 0.1605, 0.1627]],
       grad_fn=<LogBackward>)


 14%|█▍        | 8/57 [00:05<00:32,  1.51it/s]

tensor([[0.1574, 0.1652, 0.1746,  ..., 0.1564, 0.1649, 0.1704],
        [0.1583, 0.1696, 0.1760,  ..., 0.1563, 0.1635, 0.1697],
        [0.1592, 0.1654, 0.1763,  ..., 0.1550, 0.1631, 0.1668],
        ...,
        [0.1595, 0.1671, 0.1740,  ..., 0.1602, 0.1641, 0.1691],
        [0.1573, 0.1658, 0.1719,  ..., 0.1551, 0.1642, 0.1636],
        [0.1533, 0.1623, 0.1722,  ..., 0.1565, 0.1607, 0.1642]],
       grad_fn=<LogBackward>)


 16%|█▌        | 9/57 [00:05<00:31,  1.52it/s]

tensor([[0.1575, 0.1662, 0.1733,  ..., 0.1566, 0.1663, 0.1710],
        [0.1576, 0.1657, 0.1735,  ..., 0.1560, 0.1642, 0.1676],
        [0.1554, 0.1680, 0.1721,  ..., 0.1557, 0.1628, 0.1652],
        ...,
        [0.1568, 0.1644, 0.1737,  ..., 0.1567, 0.1621, 0.1672],
        [0.1583, 0.1672, 0.1740,  ..., 0.1564, 0.1660, 0.1715],
        [0.1596, 0.1655, 0.1772,  ..., 0.1560, 0.1652, 0.1697]],
       grad_fn=<LogBackward>)


 18%|█▊        | 10/57 [00:06<00:30,  1.52it/s]

tensor([[0.1530, 0.1595, 0.1695,  ..., 0.1519, 0.1599, 0.1642],
        [0.1598, 0.1675, 0.1768,  ..., 0.1551, 0.1640, 0.1691],
        [0.1574, 0.1668, 0.1729,  ..., 0.1551, 0.1632, 0.1653],
        ...,
        [0.1563, 0.1661, 0.1761,  ..., 0.1561, 0.1635, 0.1663],
        [0.1567, 0.1662, 0.1719,  ..., 0.1585, 0.1669, 0.1678],
        [0.1599, 0.1657, 0.1752,  ..., 0.1606, 0.1657, 0.1701]],
       grad_fn=<LogBackward>)


 19%|█▉        | 11/57 [00:07<00:30,  1.53it/s]

tensor([[0.1582, 0.1661, 0.1745,  ..., 0.1559, 0.1643, 0.1669],
        [0.1557, 0.1644, 0.1725,  ..., 0.1591, 0.1657, 0.1650],
        [0.1542, 0.1647, 0.1716,  ..., 0.1561, 0.1611, 0.1680],
        ...,
        [0.1590, 0.1646, 0.1754,  ..., 0.1577, 0.1629, 0.1729],
        [0.1578, 0.1648, 0.1732,  ..., 0.1587, 0.1646, 0.1710],
        [0.1581, 0.1672, 0.1730,  ..., 0.1570, 0.1643, 0.1699]],
       grad_fn=<LogBackward>)


 21%|██        | 12/57 [00:07<00:30,  1.47it/s]

tensor([[0.1570, 0.1660, 0.1738,  ..., 0.1568, 0.1658, 0.1687],
        [0.1585, 0.1632, 0.1750,  ..., 0.1566, 0.1621, 0.1671],
        [0.1586, 0.1685, 0.1759,  ..., 0.1588, 0.1652, 0.1702],
        ...,
        [0.1591, 0.1670, 0.1754,  ..., 0.1561, 0.1658, 0.1704],
        [0.1568, 0.1681, 0.1743,  ..., 0.1547, 0.1646, 0.1663],
        [0.1601, 0.1664, 0.1779,  ..., 0.1583, 0.1662, 0.1710]],
       grad_fn=<LogBackward>)


 23%|██▎       | 13/57 [00:08<00:33,  1.33it/s]

tensor([[0.1592, 0.1670, 0.1740,  ..., 0.1562, 0.1654, 0.1713],
        [0.1593, 0.1652, 0.1764,  ..., 0.1582, 0.1649, 0.1703],
        [0.1592, 0.1660, 0.1757,  ..., 0.1587, 0.1636, 0.1702],
        ...,
        [0.1589, 0.1670, 0.1763,  ..., 0.1578, 0.1607, 0.1672],
        [0.1599, 0.1688, 0.1753,  ..., 0.1564, 0.1648, 0.1718],
        [0.1567, 0.1661, 0.1733,  ..., 0.1571, 0.1641, 0.1641]],
       grad_fn=<LogBackward>)


 25%|██▍       | 14/57 [00:09<00:33,  1.29it/s]

tensor([[0.1596, 0.1671, 0.1755,  ..., 0.1566, 0.1650, 0.1716],
        [0.1554, 0.1633, 0.1707,  ..., 0.1556, 0.1595, 0.1693],
        [0.1589, 0.1635, 0.1738,  ..., 0.1560, 0.1649, 0.1698],
        ...,
        [0.1589, 0.1659, 0.1745,  ..., 0.1576, 0.1638, 0.1703],
        [0.1583, 0.1634, 0.1740,  ..., 0.1567, 0.1632, 0.1715],
        [0.1572, 0.1633, 0.1735,  ..., 0.1566, 0.1632, 0.1730]],
       grad_fn=<LogBackward>)


 26%|██▋       | 15/57 [00:10<00:32,  1.28it/s]

tensor([[0.1589, 0.1633, 0.1720,  ..., 0.1572, 0.1643, 0.1736],
        [0.1576, 0.1636, 0.1739,  ..., 0.1557, 0.1652, 0.1708],
        [0.1594, 0.1669, 0.1760,  ..., 0.1548, 0.1640, 0.1681],
        ...,
        [0.1588, 0.1645, 0.1745,  ..., 0.1574, 0.1637, 0.1704],
        [0.1579, 0.1634, 0.1732,  ..., 0.1573, 0.1638, 0.1739],
        [0.1563, 0.1651, 0.1723,  ..., 0.1572, 0.1617, 0.1687]],
       grad_fn=<LogBackward>)


 28%|██▊       | 16/57 [00:11<00:31,  1.29it/s]

tensor([[0.1577, 0.1638, 0.1742,  ..., 0.1563, 0.1619, 0.1727],
        [0.1596, 0.1667, 0.1777,  ..., 0.1552, 0.1658, 0.1688],
        [0.1589, 0.1653, 0.1747,  ..., 0.1558, 0.1647, 0.1701],
        ...,
        [0.1594, 0.1665, 0.1739,  ..., 0.1568, 0.1674, 0.1704],
        [0.1576, 0.1698, 0.1729,  ..., 0.1565, 0.1643, 0.1677],
        [0.1630, 0.1662, 0.1762,  ..., 0.1587, 0.1658, 0.1721]],
       grad_fn=<LogBackward>)


 30%|██▉       | 17/57 [00:11<00:29,  1.35it/s]

tensor([[0.1580, 0.1655, 0.1731,  ..., 0.1563, 0.1667, 0.1676],
        [0.1581, 0.1669, 0.1750,  ..., 0.1551, 0.1648, 0.1693],
        [0.1560, 0.1633, 0.1723,  ..., 0.1573, 0.1623, 0.1686],
        ...,
        [0.1601, 0.1673, 0.1755,  ..., 0.1569, 0.1644, 0.1707],
        [0.1580, 0.1642, 0.1775,  ..., 0.1560, 0.1652, 0.1698],
        [0.1601, 0.1649, 0.1761,  ..., 0.1565, 0.1660, 0.1718]],
       grad_fn=<LogBackward>)


 32%|███▏      | 18/57 [00:12<00:28,  1.39it/s]

tensor([[0.1589, 0.1648, 0.1771,  ..., 0.1543, 0.1623, 0.1679],
        [0.1536, 0.1625, 0.1711,  ..., 0.1559, 0.1578, 0.1665],
        [0.1589, 0.1679, 0.1751,  ..., 0.1562, 0.1632, 0.1686],
        ...,
        [0.1583, 0.1675, 0.1733,  ..., 0.1564, 0.1669, 0.1697],
        [0.1592, 0.1679, 0.1754,  ..., 0.1561, 0.1679, 0.1684],
        [0.1572, 0.1661, 0.1714,  ..., 0.1576, 0.1624, 0.1647]],
       grad_fn=<LogBackward>)


 33%|███▎      | 19/57 [00:13<00:26,  1.44it/s]

tensor([[0.1608, 0.1665, 0.1745,  ..., 0.1587, 0.1669, 0.1735],
        [0.1596, 0.1683, 0.1766,  ..., 0.1580, 0.1637, 0.1673],
        [0.1547, 0.1635, 0.1727,  ..., 0.1575, 0.1606, 0.1692],
        ...,
        [0.1595, 0.1663, 0.1754,  ..., 0.1567, 0.1653, 0.1723],
        [0.1594, 0.1636, 0.1767,  ..., 0.1567, 0.1652, 0.1703],
        [0.1576, 0.1651, 0.1729,  ..., 0.1544, 0.1609, 0.1669]],
       grad_fn=<LogBackward>)


 35%|███▌      | 20/57 [00:13<00:25,  1.47it/s]

tensor([[0.1548, 0.1651, 0.1728,  ..., 0.1564, 0.1635, 0.1681],
        [0.1596, 0.1674, 0.1755,  ..., 0.1580, 0.1639, 0.1728],
        [0.1621, 0.1673, 0.1746,  ..., 0.1600, 0.1660, 0.1679],
        ...,
        [0.1577, 0.1627, 0.1743,  ..., 0.1567, 0.1633, 0.1741],
        [0.1585, 0.1654, 0.1762,  ..., 0.1562, 0.1633, 0.1700],
        [0.1596, 0.1674, 0.1756,  ..., 0.1580, 0.1636, 0.1714]],
       grad_fn=<LogBackward>)


 37%|███▋      | 21/57 [00:14<00:23,  1.50it/s]

tensor([[0.1587, 0.1681, 0.1746,  ..., 0.1563, 0.1635, 0.1694],
        [0.1588, 0.1680, 0.1761,  ..., 0.1569, 0.1643, 0.1732],
        [0.1553, 0.1629, 0.1709,  ..., 0.1548, 0.1615, 0.1638],
        ...,
        [0.1575, 0.1652, 0.1761,  ..., 0.1558, 0.1591, 0.1681],
        [0.1576, 0.1663, 0.1743,  ..., 0.1574, 0.1649, 0.1657],
        [0.1566, 0.1661, 0.1745,  ..., 0.1570, 0.1649, 0.1713]],
       grad_fn=<LogBackward>)


 39%|███▊      | 22/57 [00:15<00:22,  1.53it/s]

tensor([[0.1579, 0.1653, 0.1731,  ..., 0.1555, 0.1628, 0.1676],
        [0.1569, 0.1649, 0.1725,  ..., 0.1576, 0.1618, 0.1696],
        [0.1585, 0.1681, 0.1742,  ..., 0.1584, 0.1686, 0.1675],
        ...,
        [0.1586, 0.1649, 0.1747,  ..., 0.1560, 0.1642, 0.1718],
        [0.1575, 0.1665, 0.1748,  ..., 0.1563, 0.1641, 0.1715],
        [0.1609, 0.1673, 0.1766,  ..., 0.1592, 0.1665, 0.1720]],
       grad_fn=<LogBackward>)


 40%|████      | 23/57 [00:15<00:22,  1.52it/s]

tensor([[0.1583, 0.1677, 0.1758,  ..., 0.1585, 0.1647, 0.1711],
        [0.1556, 0.1648, 0.1733,  ..., 0.1544, 0.1593, 0.1642],
        [0.1560, 0.1652, 0.1739,  ..., 0.1572, 0.1637, 0.1655],
        ...,
        [0.1577, 0.1668, 0.1731,  ..., 0.1568, 0.1634, 0.1707],
        [0.1544, 0.1616, 0.1715,  ..., 0.1561, 0.1606, 0.1682],
        [0.1572, 0.1651, 0.1757,  ..., 0.1570, 0.1649, 0.1711]],
       grad_fn=<LogBackward>)


 42%|████▏     | 24/57 [00:16<00:22,  1.50it/s]

tensor([[0.1560, 0.1659, 0.1725,  ..., 0.1544, 0.1625, 0.1640],
        [0.1548, 0.1657, 0.1730,  ..., 0.1563, 0.1599, 0.1683],
        [0.1579, 0.1654, 0.1732,  ..., 0.1597, 0.1638, 0.1677],
        ...,
        [0.1535, 0.1649, 0.1684,  ..., 0.1546, 0.1581, 0.1622],
        [0.1585, 0.1662, 0.1730,  ..., 0.1548, 0.1652, 0.1672],
        [0.1563, 0.1664, 0.1719,  ..., 0.1553, 0.1627, 0.1635]],
       grad_fn=<LogBackward>)


 44%|████▍     | 25/57 [00:17<00:21,  1.52it/s]

tensor([[0.1567, 0.1650, 0.1728,  ..., 0.1568, 0.1628, 0.1714],
        [0.1582, 0.1660, 0.1741,  ..., 0.1552, 0.1640, 0.1667],
        [0.1578, 0.1684, 0.1768,  ..., 0.1589, 0.1648, 0.1713],
        ...,
        [0.1592, 0.1667, 0.1747,  ..., 0.1563, 0.1626, 0.1700],
        [0.1530, 0.1640, 0.1697,  ..., 0.1576, 0.1616, 0.1664],
        [0.1550, 0.1628, 0.1713,  ..., 0.1578, 0.1588, 0.1633]],
       grad_fn=<LogBackward>)


 46%|████▌     | 26/57 [00:17<00:20,  1.53it/s]

tensor([[0.1590, 0.1672, 0.1755,  ..., 0.1566, 0.1646, 0.1733],
        [0.1593, 0.1624, 0.1727,  ..., 0.1562, 0.1638, 0.1686],
        [0.1571, 0.1654, 0.1739,  ..., 0.1559, 0.1603, 0.1674],
        ...,
        [0.1634, 0.1667, 0.1758,  ..., 0.1555, 0.1634, 0.1670],
        [0.1584, 0.1670, 0.1723,  ..., 0.1563, 0.1629, 0.1652],
        [0.1602, 0.1664, 0.1761,  ..., 0.1574, 0.1658, 0.1695]],
       grad_fn=<LogBackward>)


 47%|████▋     | 27/57 [00:18<00:20,  1.49it/s]

tensor([[0.1599, 0.1679, 0.1758,  ..., 0.1553, 0.1668, 0.1714],
        [0.1554, 0.1655, 0.1725,  ..., 0.1571, 0.1635, 0.1683],
        [0.1589, 0.1649, 0.1764,  ..., 0.1574, 0.1640, 0.1728],
        ...,
        [0.1589, 0.1658, 0.1737,  ..., 0.1614, 0.1637, 0.1684],
        [0.1586, 0.1652, 0.1746,  ..., 0.1568, 0.1657, 0.1731],
        [0.1568, 0.1655, 0.1761,  ..., 0.1574, 0.1619, 0.1671]],
       grad_fn=<LogBackward>)


 49%|████▉     | 28/57 [00:19<00:19,  1.46it/s]

tensor([[0.1565, 0.1637, 0.1721,  ..., 0.1588, 0.1634, 0.1741],
        [0.1604, 0.1688, 0.1769,  ..., 0.1587, 0.1673, 0.1706],
        [0.1538, 0.1651, 0.1709,  ..., 0.1556, 0.1614, 0.1628],
        ...,
        [0.1593, 0.1673, 0.1750,  ..., 0.1563, 0.1634, 0.1694],
        [0.1564, 0.1652, 0.1724,  ..., 0.1570, 0.1632, 0.1707],
        [0.1566, 0.1665, 0.1694,  ..., 0.1536, 0.1609, 0.1635]],
       grad_fn=<LogBackward>)


 51%|█████     | 29/57 [00:19<00:18,  1.49it/s]

tensor([[0.1563, 0.1686, 0.1720,  ..., 0.1578, 0.1643, 0.1666],
        [0.1560, 0.1682, 0.1715,  ..., 0.1528, 0.1633, 0.1640],
        [0.1581, 0.1660, 0.1754,  ..., 0.1563, 0.1656, 0.1719],
        ...,
        [0.1542, 0.1641, 0.1714,  ..., 0.1557, 0.1630, 0.1692],
        [0.1557, 0.1652, 0.1733,  ..., 0.1593, 0.1631, 0.1688],
        [0.1555, 0.1644, 0.1714,  ..., 0.1552, 0.1600, 0.1653]],
       grad_fn=<LogBackward>)


 53%|█████▎    | 30/57 [00:20<00:18,  1.50it/s]

tensor([[0.1584, 0.1642, 0.1745,  ..., 0.1562, 0.1642, 0.1696],
        [0.1586, 0.1640, 0.1724,  ..., 0.1557, 0.1625, 0.1694],
        [0.1588, 0.1640, 0.1741,  ..., 0.1557, 0.1609, 0.1692],
        ...,
        [0.1582, 0.1646, 0.1750,  ..., 0.1573, 0.1657, 0.1718],
        [0.1597, 0.1683, 0.1727,  ..., 0.1602, 0.1639, 0.1691],
        [0.1556, 0.1653, 0.1711,  ..., 0.1564, 0.1612, 0.1669]],
       grad_fn=<LogBackward>)


 54%|█████▍    | 31/57 [00:21<00:17,  1.51it/s]

tensor([[0.1590, 0.1656, 0.1749,  ..., 0.1561, 0.1646, 0.1712],
        [0.1563, 0.1662, 0.1720,  ..., 0.1567, 0.1648, 0.1660],
        [0.1546, 0.1621, 0.1662,  ..., 0.1508, 0.1610, 0.1587],
        ...,
        [0.1574, 0.1646, 0.1744,  ..., 0.1571, 0.1662, 0.1701],
        [0.1583, 0.1637, 0.1773,  ..., 0.1564, 0.1639, 0.1697],
        [0.1578, 0.1659, 0.1744,  ..., 0.1564, 0.1638, 0.1715]],
       grad_fn=<LogBackward>)


 56%|█████▌    | 32/57 [00:21<00:16,  1.50it/s]

tensor([[0.1570, 0.1640, 0.1733,  ..., 0.1567, 0.1615, 0.1708],
        [0.1579, 0.1638, 0.1768,  ..., 0.1552, 0.1636, 0.1703],
        [0.1589, 0.1681, 0.1731,  ..., 0.1553, 0.1610, 0.1681],
        ...,
        [0.1583, 0.1648, 0.1767,  ..., 0.1557, 0.1646, 0.1705],
        [0.1559, 0.1643, 0.1706,  ..., 0.1577, 0.1625, 0.1687],
        [0.1587, 0.1655, 0.1750,  ..., 0.1589, 0.1659, 0.1729]],
       grad_fn=<LogBackward>)


 58%|█████▊    | 33/57 [00:22<00:15,  1.51it/s]

tensor([[0.1573, 0.1656, 0.1729,  ..., 0.1569, 0.1640, 0.1660],
        [0.1549, 0.1623, 0.1686,  ..., 0.1552, 0.1602, 0.1628],
        [0.1581, 0.1646, 0.1736,  ..., 0.1564, 0.1632, 0.1696],
        ...,
        [0.1584, 0.1615, 0.1728,  ..., 0.1545, 0.1602, 0.1713],
        [0.1453, 0.1559, 0.1581,  ..., 0.1483, 0.1574, 0.1565],
        [0.1604, 0.1666, 0.1763,  ..., 0.1550, 0.1630, 0.1695]],
       grad_fn=<LogBackward>)


 60%|█████▉    | 34/57 [00:23<00:15,  1.52it/s]

tensor([[0.1593, 0.1682, 0.1748,  ..., 0.1573, 0.1637, 0.1672],
        [0.1576, 0.1628, 0.1751,  ..., 0.1575, 0.1637, 0.1721],
        [0.1571, 0.1649, 0.1729,  ..., 0.1558, 0.1634, 0.1659],
        ...,
        [0.1619, 0.1662, 0.1772,  ..., 0.1570, 0.1653, 0.1721],
        [0.1586, 0.1684, 0.1757,  ..., 0.1577, 0.1653, 0.1674],
        [0.1572, 0.1652, 0.1741,  ..., 0.1576, 0.1635, 0.1669]],
       grad_fn=<LogBackward>)


 61%|██████▏   | 35/57 [00:23<00:14,  1.51it/s]

tensor([[0.1554, 0.1632, 0.1678,  ..., 0.1527, 0.1586, 0.1594],
        [0.1570, 0.1669, 0.1728,  ..., 0.1542, 0.1630, 0.1671],
        [0.1544, 0.1645, 0.1696,  ..., 0.1571, 0.1630, 0.1652],
        ...,
        [0.1611, 0.1695, 0.1733,  ..., 0.1552, 0.1659, 0.1666],
        [0.1541, 0.1622, 0.1677,  ..., 0.1519, 0.1581, 0.1645],
        [0.1551, 0.1644, 0.1720,  ..., 0.1559, 0.1625, 0.1724]],
       grad_fn=<LogBackward>)


 63%|██████▎   | 36/57 [00:24<00:13,  1.52it/s]

tensor([[0.1571, 0.1664, 0.1725,  ..., 0.1564, 0.1630, 0.1666],
        [0.1566, 0.1653, 0.1700,  ..., 0.1576, 0.1622, 0.1677],
        [0.1575, 0.1639, 0.1747,  ..., 0.1560, 0.1627, 0.1711],
        ...,
        [0.1578, 0.1664, 0.1744,  ..., 0.1556, 0.1631, 0.1663],
        [0.1567, 0.1643, 0.1747,  ..., 0.1563, 0.1618, 0.1667],
        [0.1563, 0.1682, 0.1725,  ..., 0.1581, 0.1655, 0.1695]],
       grad_fn=<LogBackward>)


 65%|██████▍   | 37/57 [00:25<00:13,  1.49it/s]

tensor([[0.1572, 0.1663, 0.1714,  ..., 0.1554, 0.1627, 0.1647],
        [0.1568, 0.1655, 0.1730,  ..., 0.1560, 0.1647, 0.1682],
        [0.1578, 0.1674, 0.1741,  ..., 0.1545, 0.1617, 0.1646],
        ...,
        [0.1596, 0.1660, 0.1743,  ..., 0.1547, 0.1614, 0.1641],
        [0.1560, 0.1632, 0.1705,  ..., 0.1574, 0.1610, 0.1672],
        [0.1590, 0.1672, 0.1738,  ..., 0.1556, 0.1629, 0.1691]],
       grad_fn=<LogBackward>)


 67%|██████▋   | 38/57 [00:25<00:12,  1.49it/s]

tensor([[0.1594, 0.1673, 0.1732,  ..., 0.1561, 0.1626, 0.1697],
        [0.1554, 0.1657, 0.1729,  ..., 0.1551, 0.1611, 0.1657],
        [0.1601, 0.1648, 0.1762,  ..., 0.1568, 0.1635, 0.1710],
        ...,
        [0.1568, 0.1657, 0.1729,  ..., 0.1571, 0.1633, 0.1659],
        [0.1598, 0.1676, 0.1784,  ..., 0.1551, 0.1688, 0.1720],
        [0.1571, 0.1662, 0.1722,  ..., 0.1545, 0.1627, 0.1646]],
       grad_fn=<LogBackward>)


 68%|██████▊   | 39/57 [00:26<00:12,  1.49it/s]

tensor([[0.1586, 0.1621, 0.1731,  ..., 0.1577, 0.1649, 0.1727],
        [0.1594, 0.1675, 0.1752,  ..., 0.1565, 0.1655, 0.1710],
        [0.1588, 0.1666, 0.1739,  ..., 0.1607, 0.1663, 0.1728],
        ...,
        [0.1586, 0.1648, 0.1770,  ..., 0.1569, 0.1665, 0.1685],
        [0.1591, 0.1665, 0.1752,  ..., 0.1567, 0.1647, 0.1710],
        [0.1558, 0.1652, 0.1701,  ..., 0.1569, 0.1622, 0.1647]],
       grad_fn=<LogBackward>)


 70%|███████   | 40/57 [00:27<00:11,  1.52it/s]

tensor([[0.1565, 0.1656, 0.1743,  ..., 0.1588, 0.1628, 0.1666],
        [0.1578, 0.1653, 0.1726,  ..., 0.1576, 0.1627, 0.1718],
        [0.1580, 0.1690, 0.1717,  ..., 0.1584, 0.1638, 0.1671],
        ...,
        [0.1567, 0.1655, 0.1747,  ..., 0.1562, 0.1632, 0.1693],
        [0.1586, 0.1638, 0.1739,  ..., 0.1573, 0.1622, 0.1720],
        [0.1575, 0.1664, 0.1747,  ..., 0.1560, 0.1635, 0.1686]],
       grad_fn=<LogBackward>)


 72%|███████▏  | 41/57 [00:27<00:10,  1.52it/s]

tensor([[0.1574, 0.1685, 0.1718,  ..., 0.1564, 0.1633, 0.1659],
        [0.1575, 0.1637, 0.1746,  ..., 0.1557, 0.1643, 0.1679],
        [0.1590, 0.1671, 0.1742,  ..., 0.1558, 0.1634, 0.1680],
        ...,
        [0.1584, 0.1668, 0.1758,  ..., 0.1559, 0.1632, 0.1674],
        [0.1583, 0.1653, 0.1726,  ..., 0.1559, 0.1656, 0.1703],
        [0.1578, 0.1662, 0.1722,  ..., 0.1554, 0.1648, 0.1668]],
       grad_fn=<LogBackward>)


 74%|███████▎  | 42/57 [00:28<00:09,  1.53it/s]

tensor([[0.1604, 0.1661, 0.1726,  ..., 0.1567, 0.1635, 0.1682],
        [0.1574, 0.1648, 0.1735,  ..., 0.1557, 0.1616, 0.1700],
        [0.1557, 0.1627, 0.1717,  ..., 0.1549, 0.1606, 0.1638],
        ...,
        [0.1593, 0.1662, 0.1728,  ..., 0.1574, 0.1648, 0.1721],
        [0.1597, 0.1663, 0.1754,  ..., 0.1560, 0.1648, 0.1711],
        [0.1563, 0.1678, 0.1721,  ..., 0.1557, 0.1626, 0.1658]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 43/57 [00:29<00:09,  1.53it/s]

tensor([[0.1577, 0.1675, 0.1759,  ..., 0.1551, 0.1630, 0.1656],
        [0.1565, 0.1646, 0.1734,  ..., 0.1570, 0.1620, 0.1679],
        [0.1575, 0.1662, 0.1746,  ..., 0.1566, 0.1642, 0.1676],
        ...,
        [0.1606, 0.1658, 0.1756,  ..., 0.1569, 0.1657, 0.1715],
        [0.1607, 0.1687, 0.1767,  ..., 0.1553, 0.1659, 0.1682],
        [0.1554, 0.1654, 0.1708,  ..., 0.1555, 0.1614, 0.1649]],
       grad_fn=<LogBackward>)


 77%|███████▋  | 44/57 [00:29<00:08,  1.53it/s]

tensor([[0.1600, 0.1673, 0.1757,  ..., 0.1567, 0.1659, 0.1705],
        [0.1576, 0.1644, 0.1757,  ..., 0.1556, 0.1633, 0.1706],
        [0.1579, 0.1635, 0.1777,  ..., 0.1567, 0.1629, 0.1720],
        ...,
        [0.1566, 0.1649, 0.1715,  ..., 0.1588, 0.1621, 0.1703],
        [0.1542, 0.1643, 0.1685,  ..., 0.1537, 0.1605, 0.1613],
        [0.1597, 0.1662, 0.1750,  ..., 0.1559, 0.1646, 0.1722]],
       grad_fn=<LogBackward>)


 79%|███████▉  | 45/57 [00:30<00:07,  1.53it/s]

tensor([[0.1598, 0.1655, 0.1734,  ..., 0.1558, 0.1613, 0.1688],
        [0.1588, 0.1640, 0.1743,  ..., 0.1549, 0.1617, 0.1701],
        [0.1549, 0.1643, 0.1708,  ..., 0.1547, 0.1589, 0.1684],
        ...,
        [0.1548, 0.1647, 0.1705,  ..., 0.1538, 0.1616, 0.1646],
        [0.1587, 0.1658, 0.1733,  ..., 0.1552, 0.1641, 0.1717],
        [0.1579, 0.1676, 0.1741,  ..., 0.1569, 0.1626, 0.1685]],
       grad_fn=<LogBackward>)


 81%|████████  | 46/57 [00:30<00:07,  1.54it/s]

tensor([[0.1590, 0.1666, 0.1756,  ..., 0.1573, 0.1648, 0.1720],
        [0.1591, 0.1656, 0.1755,  ..., 0.1568, 0.1639, 0.1728],
        [0.1602, 0.1653, 0.1770,  ..., 0.1547, 0.1632, 0.1696],
        ...,
        [0.1549, 0.1653, 0.1742,  ..., 0.1608, 0.1641, 0.1671],
        [0.1579, 0.1651, 0.1723,  ..., 0.1558, 0.1629, 0.1637],
        [0.1579, 0.1673, 0.1713,  ..., 0.1562, 0.1599, 0.1624]],
       grad_fn=<LogBackward>)


 82%|████████▏ | 47/57 [00:31<00:06,  1.49it/s]

tensor([[0.1571, 0.1663, 0.1735,  ..., 0.1586, 0.1661, 0.1668],
        [0.1588, 0.1667, 0.1749,  ..., 0.1558, 0.1635, 0.1697],
        [0.1576, 0.1651, 0.1738,  ..., 0.1576, 0.1620, 0.1708],
        ...,
        [0.1572, 0.1648, 0.1749,  ..., 0.1562, 0.1638, 0.1710],
        [0.1586, 0.1663, 0.1743,  ..., 0.1549, 0.1637, 0.1712],
        [0.1584, 0.1673, 0.1734,  ..., 0.1571, 0.1636, 0.1672]],
       grad_fn=<LogBackward>)


 84%|████████▍ | 48/57 [00:32<00:06,  1.46it/s]

tensor([[0.1556, 0.1648, 0.1696,  ..., 0.1569, 0.1621, 0.1666],
        [0.1569, 0.1624, 0.1701,  ..., 0.1566, 0.1627, 0.1677],
        [0.1588, 0.1684, 0.1752,  ..., 0.1551, 0.1658, 0.1680],
        ...,
        [0.1572, 0.1640, 0.1731,  ..., 0.1566, 0.1635, 0.1680],
        [0.1557, 0.1659, 0.1715,  ..., 0.1559, 0.1629, 0.1691],
        [0.1584, 0.1667, 0.1752,  ..., 0.1593, 0.1653, 0.1715]],
       grad_fn=<LogBackward>)


 86%|████████▌ | 49/57 [00:33<00:05,  1.46it/s]

tensor([[0.1556, 0.1671, 0.1726,  ..., 0.1552, 0.1615, 0.1641],
        [0.1588, 0.1665, 0.1742,  ..., 0.1565, 0.1638, 0.1702],
        [0.1573, 0.1679, 0.1754,  ..., 0.1603, 0.1613, 0.1641],
        ...,
        [0.1540, 0.1647, 0.1684,  ..., 0.1575, 0.1606, 0.1639],
        [0.1589, 0.1666, 0.1760,  ..., 0.1575, 0.1655, 0.1708],
        [0.1413, 0.1516, 0.1523,  ..., 0.1421, 0.1510, 0.1511]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 50/57 [00:33<00:04,  1.48it/s]

tensor([[0.1587, 0.1656, 0.1769,  ..., 0.1575, 0.1641, 0.1729],
        [0.1544, 0.1630, 0.1714,  ..., 0.1546, 0.1606, 0.1625],
        [0.1571, 0.1661, 0.1731,  ..., 0.1566, 0.1660, 0.1683],
        ...,
        [0.1616, 0.1695, 0.1738,  ..., 0.1562, 0.1634, 0.1684],
        [0.1588, 0.1648, 0.1747,  ..., 0.1550, 0.1645, 0.1706],
        [0.1581, 0.1648, 0.1729,  ..., 0.1567, 0.1634, 0.1703]],
       grad_fn=<LogBackward>)


 89%|████████▉ | 51/57 [00:34<00:04,  1.48it/s]

tensor([[0.1612, 0.1666, 0.1758,  ..., 0.1559, 0.1624, 0.1666],
        [0.1587, 0.1661, 0.1750,  ..., 0.1559, 0.1657, 0.1711],
        [0.1595, 0.1666, 0.1742,  ..., 0.1572, 0.1649, 0.1695],
        ...,
        [0.1569, 0.1667, 0.1717,  ..., 0.1547, 0.1630, 0.1649],
        [0.1574, 0.1657, 0.1726,  ..., 0.1555, 0.1635, 0.1682],
        [0.1585, 0.1659, 0.1748,  ..., 0.1578, 0.1664, 0.1718]],
       grad_fn=<LogBackward>)


 91%|█████████ | 52/57 [00:35<00:03,  1.51it/s]

tensor([[0.1583, 0.1641, 0.1736,  ..., 0.1563, 0.1633, 0.1724],
        [0.1569, 0.1620, 0.1693,  ..., 0.1569, 0.1627, 0.1682],
        [0.1545, 0.1649, 0.1723,  ..., 0.1567, 0.1648, 0.1672],
        ...,
        [0.1617, 0.1672, 0.1731,  ..., 0.1549, 0.1629, 0.1677],
        [0.1575, 0.1645, 0.1728,  ..., 0.1580, 0.1652, 0.1707],
        [0.1573, 0.1657, 0.1729,  ..., 0.1564, 0.1622, 0.1663]],
       grad_fn=<LogBackward>)


 93%|█████████▎| 53/57 [00:35<00:02,  1.53it/s]

tensor([[0.1549, 0.1635, 0.1731,  ..., 0.1557, 0.1603, 0.1684],
        [0.1545, 0.1639, 0.1707,  ..., 0.1580, 0.1611, 0.1648],
        [0.1576, 0.1659, 0.1724,  ..., 0.1566, 0.1633, 0.1687],
        ...,
        [0.1596, 0.1637, 0.1751,  ..., 0.1557, 0.1622, 0.1711],
        [0.1596, 0.1651, 0.1733,  ..., 0.1560, 0.1640, 0.1729],
        [0.1586, 0.1696, 0.1746,  ..., 0.1564, 0.1661, 0.1658]],
       grad_fn=<LogBackward>)


 95%|█████████▍| 54/57 [00:36<00:01,  1.53it/s]

tensor([[0.1588, 0.1655, 0.1711,  ..., 0.1573, 0.1656, 0.1699],
        [0.1583, 0.1663, 0.1747,  ..., 0.1578, 0.1648, 0.1688],
        [0.1592, 0.1657, 0.1732,  ..., 0.1573, 0.1609, 0.1672],
        ...,
        [0.1588, 0.1685, 0.1759,  ..., 0.1568, 0.1645, 0.1686],
        [0.1584, 0.1663, 0.1722,  ..., 0.1553, 0.1633, 0.1676],
        [0.1591, 0.1672, 0.1744,  ..., 0.1573, 0.1667, 0.1699]],
       grad_fn=<LogBackward>)


 96%|█████████▋| 55/57 [00:36<00:01,  1.55it/s]

tensor([[0.1552, 0.1655, 0.1690,  ..., 0.1532, 0.1585, 0.1625],
        [0.1606, 0.1653, 0.1744,  ..., 0.1561, 0.1657, 0.1726],
        [0.1579, 0.1633, 0.1725,  ..., 0.1571, 0.1636, 0.1713],
        ...,
        [0.1610, 0.1685, 0.1752,  ..., 0.1571, 0.1658, 0.1722],
        [0.1577, 0.1658, 0.1698,  ..., 0.1582, 0.1641, 0.1697],
        [0.1582, 0.1652, 0.1744,  ..., 0.1570, 0.1629, 0.1694]],
       grad_fn=<LogBackward>)


 98%|█████████▊| 56/57 [00:37<00:00,  1.54it/s]

tensor([[0.1532, 0.1615, 0.1675,  ..., 0.1566, 0.1581, 0.1648],
        [0.1610, 0.1654, 0.1759,  ..., 0.1582, 0.1647, 0.1680],
        [0.1526, 0.1631, 0.1652,  ..., 0.1526, 0.1603, 0.1615],
        ...,
        [0.1611, 0.1676, 0.1751,  ..., 0.1568, 0.1645, 0.1717],
        [0.1569, 0.1694, 0.1727,  ..., 0.1581, 0.1661, 0.1670],
        [0.1593, 0.1653, 0.1740,  ..., 0.1559, 0.1639, 0.1694]],
       grad_fn=<LogBackward>)


100%|██████████| 57/57 [00:38<00:00,  1.49it/s]
 12%|█▎        | 1/8 [00:00<00:02,  3.21it/s]

tensor([[0.1585, 0.1640, 0.1727,  ..., 0.1568, 0.1624, 0.1706],
        [0.1556, 0.1643, 0.1694,  ..., 0.1516, 0.1583, 0.1616],
        [0.1603, 0.1682, 0.1737,  ..., 0.1572, 0.1629, 0.1701],
        ...,
        [0.1572, 0.1621, 0.1718,  ..., 0.1561, 0.1617, 0.1668],
        [0.1556, 0.1663, 0.1689,  ..., 0.1558, 0.1625, 0.1627],
        [0.1590, 0.1686, 0.1727,  ..., 0.1566, 0.1617, 0.1653]],
       grad_fn=<LogBackward>)


 25%|██▌       | 2/8 [00:00<00:01,  3.17it/s]

tensor([[0.1533, 0.1646, 0.1693,  ..., 0.1547, 0.1615, 0.1647],
        [0.1592, 0.1686, 0.1760,  ..., 0.1565, 0.1661, 0.1705],
        [0.1603, 0.1659, 0.1774,  ..., 0.1570, 0.1644, 0.1712],
        ...,
        [0.1585, 0.1689, 0.1733,  ..., 0.1563, 0.1632, 0.1696],
        [0.1563, 0.1655, 0.1711,  ..., 0.1544, 0.1608, 0.1624],
        [0.1565, 0.1644, 0.1726,  ..., 0.1564, 0.1623, 0.1647]],
       grad_fn=<LogBackward>)


 38%|███▊      | 3/8 [00:00<00:01,  3.15it/s]

tensor([[0.1557, 0.1634, 0.1701,  ..., 0.1564, 0.1635, 0.1640],
        [0.1610, 0.1667, 0.1743,  ..., 0.1569, 0.1645, 0.1683],
        [0.1597, 0.1663, 0.1763,  ..., 0.1566, 0.1655, 0.1718],
        ...,
        [0.1577, 0.1667, 0.1732,  ..., 0.1563, 0.1635, 0.1712],
        [0.1572, 0.1640, 0.1725,  ..., 0.1571, 0.1629, 0.1688],
        [0.1571, 0.1666, 0.1736,  ..., 0.1573, 0.1656, 0.1706]],
       grad_fn=<LogBackward>)


 50%|█████     | 4/8 [00:01<00:01,  3.24it/s]

tensor([[0.1535, 0.1627, 0.1682,  ..., 0.1546, 0.1597, 0.1612],
        [0.1586, 0.1657, 0.1730,  ..., 0.1593, 0.1632, 0.1713],
        [0.1595, 0.1661, 0.1745,  ..., 0.1583, 0.1618, 0.1672],
        ...,
        [0.1613, 0.1668, 0.1743,  ..., 0.1605, 0.1652, 0.1714],
        [0.1562, 0.1659, 0.1730,  ..., 0.1574, 0.1667, 0.1701],
        [0.1579, 0.1638, 0.1715,  ..., 0.1567, 0.1616, 0.1682]],
       grad_fn=<LogBackward>)


 62%|██████▎   | 5/8 [00:01<00:00,  3.29it/s]

tensor([[0.1549, 0.1651, 0.1710,  ..., 0.1572, 0.1641, 0.1674],
        [0.1559, 0.1650, 0.1721,  ..., 0.1559, 0.1612, 0.1653],
        [0.1587, 0.1696, 0.1747,  ..., 0.1558, 0.1648, 0.1704],
        ...,
        [0.1594, 0.1658, 0.1740,  ..., 0.1560, 0.1640, 0.1698],
        [0.1582, 0.1642, 0.1752,  ..., 0.1563, 0.1628, 0.1699],
        [0.1569, 0.1633, 0.1682,  ..., 0.1569, 0.1618, 0.1669]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 6/8 [00:01<00:00,  3.32it/s]

tensor([[0.1585, 0.1648, 0.1734,  ..., 0.1563, 0.1627, 0.1697],
        [0.1585, 0.1675, 0.1749,  ..., 0.1558, 0.1640, 0.1719],
        [0.1581, 0.1692, 0.1728,  ..., 0.1568, 0.1645, 0.1679],
        ...,
        [0.1589, 0.1672, 0.1751,  ..., 0.1567, 0.1640, 0.1682],
        [0.1569, 0.1647, 0.1722,  ..., 0.1576, 0.1648, 0.1720],
        [0.1592, 0.1668, 0.1760,  ..., 0.1578, 0.1663, 0.1710]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 7/8 [00:02<00:00,  3.36it/s]

tensor([[0.1596, 0.1656, 0.1740,  ..., 0.1569, 0.1645, 0.1717],
        [0.1597, 0.1633, 0.1746,  ..., 0.1555, 0.1620, 0.1706],
        [0.1555, 0.1648, 0.1737,  ..., 0.1555, 0.1621, 0.1659],
        ...,
        [0.1568, 0.1646, 0.1741,  ..., 0.1559, 0.1613, 0.1695],
        [0.1594, 0.1673, 0.1745,  ..., 0.1555, 0.1678, 0.1703],
        [0.1575, 0.1657, 0.1718,  ..., 0.1561, 0.1628, 0.1683]],
       grad_fn=<LogBackward>)


100%|██████████| 8/8 [00:02<00:00,  3.33it/s]
  0%|          | 0/57 [00:00<?, ?it/s]

tensor([[0.1551, 0.1630, 0.1715,  ..., 0.1552, 0.1621, 0.1681],
        [0.1572, 0.1682, 0.1736,  ..., 0.1549, 0.1641, 0.1678],
        [0.1582, 0.1649, 0.1740,  ..., 0.1591, 0.1645, 0.1693],
        ...,
        [0.1595, 0.1639, 0.1739,  ..., 0.1551, 0.1623, 0.1676],
        [0.1584, 0.1641, 0.1753,  ..., 0.1569, 0.1616, 0.1690],
        [0.1573, 0.1698, 0.1754,  ..., 0.1571, 0.1628, 0.1668]],
       grad_fn=<LogBackward>)
[tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128)]
Epoch: 5, train_loss: -0.013688430190086365, valid_loss: -0.07285822182893753
Parameter containing:
tensor([[ 1.0005,  1.0006,  1.0005,  1.0005,  1.0005,  1.0005,  1.0006,  1.0005,
          1.0005,  1.0006, -0.4994, -0.4995, -0.4995, -0.4995, -0.4995, -0.4995,
         -0.4995, -0.4995, -0.4995, -0.4995],
   

  2%|▏         | 1/57 [00:00<00:36,  1.53it/s]

tensor([[0.1531, 0.1615, 0.1663,  ..., 0.1520, 0.1590, 0.1614],
        [0.1581, 0.1644, 0.1717,  ..., 0.1577, 0.1641, 0.1717],
        [0.1570, 0.1648, 0.1745,  ..., 0.1585, 0.1638, 0.1672],
        ...,
        [0.1544, 0.1624, 0.1699,  ..., 0.1547, 0.1585, 0.1659],
        [0.1591, 0.1662, 0.1744,  ..., 0.1559, 0.1664, 0.1735],
        [0.1566, 0.1647, 0.1745,  ..., 0.1587, 0.1647, 0.1671]],
       grad_fn=<LogBackward>)


  4%|▎         | 2/57 [00:01<00:35,  1.55it/s]

tensor([[0.1566, 0.1667, 0.1727,  ..., 0.1564, 0.1641, 0.1700],
        [0.1576, 0.1650, 0.1726,  ..., 0.1570, 0.1637, 0.1683],
        [0.1597, 0.1672, 0.1743,  ..., 0.1566, 0.1644, 0.1707],
        ...,
        [0.1583, 0.1652, 0.1740,  ..., 0.1550, 0.1623, 0.1660],
        [0.1606, 0.1650, 0.1744,  ..., 0.1566, 0.1646, 0.1710],
        [0.1542, 0.1641, 0.1697,  ..., 0.1554, 0.1615, 0.1647]],
       grad_fn=<LogBackward>)


  5%|▌         | 3/57 [00:01<00:35,  1.54it/s]

tensor([[0.1602, 0.1671, 0.1791,  ..., 0.1560, 0.1646, 0.1712],
        [0.1582, 0.1665, 0.1752,  ..., 0.1576, 0.1615, 0.1685],
        [0.1570, 0.1671, 0.1726,  ..., 0.1594, 0.1653, 0.1635],
        ...,
        [0.1542, 0.1642, 0.1682,  ..., 0.1537, 0.1602, 0.1612],
        [0.1588, 0.1636, 0.1753,  ..., 0.1568, 0.1631, 0.1733],
        [0.1600, 0.1663, 0.1739,  ..., 0.1566, 0.1654, 0.1702]],
       grad_fn=<LogBackward>)


  7%|▋         | 4/57 [00:02<00:34,  1.53it/s]

tensor([[0.1537, 0.1636, 0.1679,  ..., 0.1551, 0.1603, 0.1604],
        [0.1574, 0.1668, 0.1760,  ..., 0.1583, 0.1626, 0.1665],
        [0.1603, 0.1660, 0.1743,  ..., 0.1558, 0.1648, 0.1698],
        ...,
        [0.1579, 0.1665, 0.1733,  ..., 0.1572, 0.1637, 0.1699],
        [0.1554, 0.1643, 0.1709,  ..., 0.1525, 0.1593, 0.1638],
        [0.1582, 0.1616, 0.1747,  ..., 0.1563, 0.1627, 0.1765]],
       grad_fn=<LogBackward>)


  9%|▉         | 5/57 [00:03<00:33,  1.55it/s]

tensor([[0.1544, 0.1635, 0.1714,  ..., 0.1566, 0.1626, 0.1652],
        [0.1587, 0.1669, 0.1734,  ..., 0.1561, 0.1641, 0.1685],
        [0.1570, 0.1648, 0.1749,  ..., 0.1551, 0.1619, 0.1662],
        ...,
        [0.1587, 0.1669, 0.1759,  ..., 0.1588, 0.1625, 0.1661],
        [0.1566, 0.1670, 0.1721,  ..., 0.1566, 0.1639, 0.1657],
        [0.1591, 0.1645, 0.1761,  ..., 0.1575, 0.1632, 0.1690]],
       grad_fn=<LogBackward>)


 11%|█         | 6/57 [00:03<00:33,  1.54it/s]

tensor([[0.1604, 0.1655, 0.1773,  ..., 0.1568, 0.1615, 0.1691],
        [0.1592, 0.1670, 0.1743,  ..., 0.1578, 0.1632, 0.1720],
        [0.1590, 0.1647, 0.1739,  ..., 0.1567, 0.1622, 0.1705],
        ...,
        [0.1590, 0.1655, 0.1718,  ..., 0.1549, 0.1626, 0.1701],
        [0.1570, 0.1659, 0.1737,  ..., 0.1555, 0.1640, 0.1669],
        [0.1576, 0.1668, 0.1734,  ..., 0.1587, 0.1642, 0.1692]],
       grad_fn=<LogBackward>)


 12%|█▏        | 7/57 [00:04<00:32,  1.55it/s]

tensor([[0.1596, 0.1679, 0.1754,  ..., 0.1561, 0.1647, 0.1716],
        [0.1594, 0.1676, 0.1743,  ..., 0.1574, 0.1645, 0.1710],
        [0.1608, 0.1660, 0.1741,  ..., 0.1563, 0.1668, 0.1690],
        ...,
        [0.1586, 0.1658, 0.1741,  ..., 0.1578, 0.1642, 0.1706],
        [0.1588, 0.1640, 0.1741,  ..., 0.1549, 0.1644, 0.1722],
        [0.1613, 0.1685, 0.1733,  ..., 0.1557, 0.1680, 0.1696]],
       grad_fn=<LogBackward>)


 14%|█▍        | 8/57 [00:05<00:32,  1.50it/s]

tensor([[0.1571, 0.1638, 0.1714,  ..., 0.1569, 0.1635, 0.1753],
        [0.1554, 0.1642, 0.1690,  ..., 0.1569, 0.1645, 0.1665],
        [0.1566, 0.1673, 0.1717,  ..., 0.1541, 0.1647, 0.1640],
        ...,
        [0.1566, 0.1678, 0.1718,  ..., 0.1579, 0.1671, 0.1667],
        [0.1526, 0.1616, 0.1662,  ..., 0.1555, 0.1622, 0.1645],
        [0.1587, 0.1636, 0.1746,  ..., 0.1570, 0.1643, 0.1703]],
       grad_fn=<LogBackward>)


 16%|█▌        | 9/57 [00:05<00:31,  1.50it/s]

tensor([[0.1584, 0.1665, 0.1730,  ..., 0.1565, 0.1638, 0.1680],
        [0.1586, 0.1647, 0.1729,  ..., 0.1580, 0.1634, 0.1714],
        [0.1585, 0.1661, 0.1732,  ..., 0.1563, 0.1651, 0.1718],
        ...,
        [0.1582, 0.1655, 0.1744,  ..., 0.1569, 0.1633, 0.1698],
        [0.1612, 0.1654, 0.1755,  ..., 0.1573, 0.1652, 0.1707],
        [0.1567, 0.1656, 0.1737,  ..., 0.1560, 0.1634, 0.1671]],
       grad_fn=<LogBackward>)


 18%|█▊        | 10/57 [00:06<00:30,  1.52it/s]

tensor([[0.1596, 0.1668, 0.1757,  ..., 0.1571, 0.1622, 0.1682],
        [0.1604, 0.1682, 0.1771,  ..., 0.1570, 0.1653, 0.1733],
        [0.1606, 0.1694, 0.1742,  ..., 0.1584, 0.1640, 0.1693],
        ...,
        [0.1594, 0.1653, 0.1744,  ..., 0.1545, 0.1635, 0.1703],
        [0.1582, 0.1649, 0.1752,  ..., 0.1569, 0.1641, 0.1709],
        [0.1587, 0.1678, 0.1749,  ..., 0.1565, 0.1655, 0.1702]],
       grad_fn=<LogBackward>)


 19%|█▉        | 11/57 [00:07<00:30,  1.53it/s]

tensor([[0.1586, 0.1644, 0.1739,  ..., 0.1563, 0.1651, 0.1685],
        [0.1573, 0.1655, 0.1744,  ..., 0.1536, 0.1589, 0.1662],
        [0.1582, 0.1654, 0.1737,  ..., 0.1574, 0.1619, 0.1691],
        ...,
        [0.1548, 0.1631, 0.1703,  ..., 0.1564, 0.1621, 0.1671],
        [0.1565, 0.1669, 0.1721,  ..., 0.1597, 0.1623, 0.1669],
        [0.1595, 0.1676, 0.1770,  ..., 0.1584, 0.1619, 0.1704]],
       grad_fn=<LogBackward>)


 21%|██        | 12/57 [00:07<00:29,  1.54it/s]

tensor([[0.1590, 0.1664, 0.1743,  ..., 0.1558, 0.1633, 0.1666],
        [0.1559, 0.1647, 0.1734,  ..., 0.1569, 0.1616, 0.1688],
        [0.1583, 0.1660, 0.1745,  ..., 0.1552, 0.1632, 0.1651],
        ...,
        [0.1426, 0.1539, 0.1544,  ..., 0.1447, 0.1546, 0.1546],
        [0.1557, 0.1638, 0.1708,  ..., 0.1569, 0.1633, 0.1647],
        [0.1574, 0.1685, 0.1734,  ..., 0.1572, 0.1651, 0.1685]],
       grad_fn=<LogBackward>)


 23%|██▎       | 13/57 [00:08<00:28,  1.54it/s]

tensor([[0.1554, 0.1633, 0.1711,  ..., 0.1555, 0.1618, 0.1654],
        [0.1555, 0.1638, 0.1713,  ..., 0.1564, 0.1611, 0.1659],
        [0.1534, 0.1629, 0.1702,  ..., 0.1561, 0.1598, 0.1640],
        ...,
        [0.1579, 0.1674, 0.1749,  ..., 0.1575, 0.1634, 0.1714],
        [0.1564, 0.1665, 0.1714,  ..., 0.1566, 0.1646, 0.1678],
        [0.1584, 0.1674, 0.1743,  ..., 0.1567, 0.1678, 0.1694]],
       grad_fn=<LogBackward>)


 25%|██▍       | 14/57 [00:09<00:28,  1.53it/s]

tensor([[0.1585, 0.1652, 0.1737,  ..., 0.1571, 0.1634, 0.1704],
        [0.1566, 0.1645, 0.1719,  ..., 0.1571, 0.1626, 0.1708],
        [0.1593, 0.1679, 0.1759,  ..., 0.1579, 0.1650, 0.1691],
        ...,
        [0.1569, 0.1644, 0.1728,  ..., 0.1565, 0.1617, 0.1694],
        [0.1600, 0.1664, 0.1759,  ..., 0.1553, 0.1661, 0.1711],
        [0.1572, 0.1616, 0.1691,  ..., 0.1540, 0.1599, 0.1648]],
       grad_fn=<LogBackward>)


 26%|██▋       | 15/57 [00:09<00:28,  1.47it/s]

tensor([[0.1581, 0.1656, 0.1731,  ..., 0.1573, 0.1656, 0.1709],
        [0.1518, 0.1588, 0.1684,  ..., 0.1488, 0.1580, 0.1682],
        [0.1593, 0.1687, 0.1758,  ..., 0.1554, 0.1632, 0.1687],
        ...,
        [0.1538, 0.1648, 0.1721,  ..., 0.1560, 0.1642, 0.1643],
        [0.1593, 0.1667, 0.1751,  ..., 0.1569, 0.1643, 0.1713],
        [0.1589, 0.1664, 0.1747,  ..., 0.1550, 0.1641, 0.1698]],
       grad_fn=<LogBackward>)


 28%|██▊       | 16/57 [00:10<00:29,  1.37it/s]

tensor([[0.1545, 0.1657, 0.1702,  ..., 0.1569, 0.1631, 0.1693],
        [0.1580, 0.1652, 0.1747,  ..., 0.1570, 0.1635, 0.1683],
        [0.1601, 0.1686, 0.1735,  ..., 0.1558, 0.1653, 0.1702],
        ...,
        [0.1599, 0.1686, 0.1773,  ..., 0.1569, 0.1655, 0.1698],
        [0.1597, 0.1673, 0.1786,  ..., 0.1578, 0.1653, 0.1690],
        [0.1577, 0.1644, 0.1728,  ..., 0.1571, 0.1636, 0.1691]],
       grad_fn=<LogBackward>)


 30%|██▉       | 17/57 [00:11<00:29,  1.35it/s]

tensor([[0.1580, 0.1657, 0.1718,  ..., 0.1602, 0.1633, 0.1702],
        [0.1567, 0.1636, 0.1719,  ..., 0.1560, 0.1625, 0.1686],
        [0.1566, 0.1635, 0.1730,  ..., 0.1564, 0.1642, 0.1696],
        ...,
        [0.1603, 0.1669, 0.1726,  ..., 0.1578, 0.1657, 0.1688],
        [0.1561, 0.1660, 0.1723,  ..., 0.1585, 0.1656, 0.1674],
        [0.1588, 0.1662, 0.1749,  ..., 0.1552, 0.1649, 0.1693]],
       grad_fn=<LogBackward>)


 32%|███▏      | 18/57 [00:12<00:29,  1.31it/s]

tensor([[0.1594, 0.1664, 0.1745,  ..., 0.1570, 0.1635, 0.1721],
        [0.1601, 0.1707, 0.1763,  ..., 0.1570, 0.1639, 0.1689],
        [0.1598, 0.1672, 0.1723,  ..., 0.1597, 0.1630, 0.1697],
        ...,
        [0.1571, 0.1665, 0.1731,  ..., 0.1574, 0.1645, 0.1718],
        [0.1585, 0.1731, 0.1731,  ..., 0.1582, 0.1671, 0.1682],
        [0.1594, 0.1661, 0.1733,  ..., 0.1583, 0.1632, 0.1729]],
       grad_fn=<LogBackward>)


 33%|███▎      | 19/57 [00:13<00:29,  1.30it/s]

tensor([[0.1540, 0.1647, 0.1687,  ..., 0.1569, 0.1593, 0.1641],
        [0.1580, 0.1666, 0.1722,  ..., 0.1576, 0.1691, 0.1719],
        [0.1577, 0.1660, 0.1729,  ..., 0.1562, 0.1627, 0.1703],
        ...,
        [0.1611, 0.1675, 0.1742,  ..., 0.1568, 0.1665, 0.1704],
        [0.1570, 0.1645, 0.1726,  ..., 0.1565, 0.1638, 0.1700],
        [0.1568, 0.1661, 0.1712,  ..., 0.1563, 0.1594, 0.1668]],
       grad_fn=<LogBackward>)


 35%|███▌      | 20/57 [00:13<00:29,  1.27it/s]

tensor([[0.1581, 0.1632, 0.1665,  ..., 0.1501, 0.1589, 0.1629],
        [0.1560, 0.1651, 0.1731,  ..., 0.1562, 0.1626, 0.1668],
        [0.1572, 0.1629, 0.1729,  ..., 0.1558, 0.1631, 0.1697],
        ...,
        [0.1581, 0.1644, 0.1741,  ..., 0.1571, 0.1642, 0.1720],
        [0.1570, 0.1661, 0.1725,  ..., 0.1561, 0.1634, 0.1690],
        [0.1601, 0.1640, 0.1717,  ..., 0.1562, 0.1617, 0.1709]],
       grad_fn=<LogBackward>)


 37%|███▋      | 21/57 [00:14<00:29,  1.23it/s]

tensor([[0.1602, 0.1678, 0.1742,  ..., 0.1577, 0.1643, 0.1706],
        [0.1578, 0.1668, 0.1721,  ..., 0.1582, 0.1647, 0.1709],
        [0.1581, 0.1651, 0.1727,  ..., 0.1564, 0.1638, 0.1703],
        ...,
        [0.1558, 0.1649, 0.1734,  ..., 0.1568, 0.1634, 0.1664],
        [0.1579, 0.1676, 0.1705,  ..., 0.1571, 0.1625, 0.1647],
        [0.1581, 0.1656, 0.1731,  ..., 0.1565, 0.1608, 0.1698]],
       grad_fn=<LogBackward>)


 39%|███▊      | 22/57 [00:15<00:29,  1.20it/s]

tensor([[0.1575, 0.1678, 0.1723,  ..., 0.1555, 0.1625, 0.1688],
        [0.1560, 0.1644, 0.1690,  ..., 0.1579, 0.1639, 0.1690],
        [0.1568, 0.1665, 0.1711,  ..., 0.1553, 0.1607, 0.1661],
        ...,
        [0.1605, 0.1709, 0.1742,  ..., 0.1584, 0.1620, 0.1715],
        [0.1553, 0.1625, 0.1723,  ..., 0.1530, 0.1608, 0.1668],
        [0.1549, 0.1665, 0.1710,  ..., 0.1578, 0.1608, 0.1680]],
       grad_fn=<LogBackward>)


 40%|████      | 23/57 [00:16<00:28,  1.21it/s]

tensor([[0.1577, 0.1642, 0.1725,  ..., 0.1568, 0.1629, 0.1698],
        [0.1514, 0.1615, 0.1663,  ..., 0.1528, 0.1576, 0.1608],
        [0.1544, 0.1645, 0.1732,  ..., 0.1530, 0.1620, 0.1650],
        ...,
        [0.1600, 0.1678, 0.1777,  ..., 0.1575, 0.1646, 0.1715],
        [0.1585, 0.1690, 0.1748,  ..., 0.1574, 0.1627, 0.1665],
        [0.1586, 0.1657, 0.1763,  ..., 0.1549, 0.1638, 0.1684]],
       grad_fn=<LogBackward>)


 42%|████▏     | 24/57 [00:17<00:27,  1.20it/s]

tensor([[0.1598, 0.1670, 0.1740,  ..., 0.1561, 0.1650, 0.1684],
        [0.1594, 0.1664, 0.1760,  ..., 0.1571, 0.1655, 0.1700],
        [0.1555, 0.1622, 0.1702,  ..., 0.1542, 0.1592, 0.1649],
        ...,
        [0.1555, 0.1657, 0.1753,  ..., 0.1573, 0.1652, 0.1711],
        [0.1605, 0.1659, 0.1765,  ..., 0.1578, 0.1646, 0.1702],
        [0.1601, 0.1668, 0.1754,  ..., 0.1574, 0.1641, 0.1706]],
       grad_fn=<LogBackward>)


 44%|████▍     | 25/57 [00:18<00:26,  1.22it/s]

tensor([[0.1557, 0.1650, 0.1722,  ..., 0.1552, 0.1607, 0.1706],
        [0.1601, 0.1673, 0.1742,  ..., 0.1569, 0.1647, 0.1708],
        [0.1596, 0.1653, 0.1724,  ..., 0.1572, 0.1623, 0.1703],
        ...,
        [0.1599, 0.1665, 0.1746,  ..., 0.1547, 0.1627, 0.1658],
        [0.1521, 0.1602, 0.1667,  ..., 0.1499, 0.1576, 0.1630],
        [0.1438, 0.1548, 0.1556,  ..., 0.1454, 0.1547, 0.1548]],
       grad_fn=<LogBackward>)


 46%|████▌     | 26/57 [00:18<00:25,  1.24it/s]

tensor([[0.1597, 0.1653, 0.1739,  ..., 0.1566, 0.1614, 0.1701],
        [0.1601, 0.1687, 0.1752,  ..., 0.1580, 0.1687, 0.1683],
        [0.1546, 0.1634, 0.1698,  ..., 0.1571, 0.1607, 0.1653],
        ...,
        [0.1573, 0.1627, 0.1724,  ..., 0.1568, 0.1591, 0.1706],
        [0.1590, 0.1669, 0.1758,  ..., 0.1575, 0.1670, 0.1688],
        [0.1573, 0.1649, 0.1701,  ..., 0.1537, 0.1609, 0.1639]],
       grad_fn=<LogBackward>)


 47%|████▋     | 27/57 [00:19<00:23,  1.26it/s]

tensor([[0.1581, 0.1654, 0.1733,  ..., 0.1522, 0.1583, 0.1655],
        [0.1599, 0.1694, 0.1734,  ..., 0.1579, 0.1627, 0.1635],
        [0.1574, 0.1614, 0.1698,  ..., 0.1567, 0.1614, 0.1673],
        ...,
        [0.1558, 0.1658, 0.1750,  ..., 0.1609, 0.1637, 0.1666],
        [0.1573, 0.1661, 0.1744,  ..., 0.1580, 0.1646, 0.1687],
        [0.1565, 0.1664, 0.1763,  ..., 0.1572, 0.1613, 0.1668]],
       grad_fn=<LogBackward>)


 49%|████▉     | 28/57 [00:20<00:21,  1.32it/s]

tensor([[0.1588, 0.1685, 0.1768,  ..., 0.1581, 0.1644, 0.1702],
        [0.1566, 0.1669, 0.1722,  ..., 0.1562, 0.1635, 0.1645],
        [0.1590, 0.1669, 0.1719,  ..., 0.1551, 0.1641, 0.1657],
        ...,
        [0.1557, 0.1649, 0.1714,  ..., 0.1563, 0.1621, 0.1653],
        [0.1593, 0.1663, 0.1743,  ..., 0.1576, 0.1652, 0.1701],
        [0.1593, 0.1670, 0.1750,  ..., 0.1555, 0.1637, 0.1713]],
       grad_fn=<LogBackward>)


 51%|█████     | 29/57 [00:21<00:20,  1.37it/s]

tensor([[0.1574, 0.1654, 0.1729,  ..., 0.1551, 0.1648, 0.1681],
        [0.1558, 0.1667, 0.1726,  ..., 0.1556, 0.1610, 0.1652],
        [0.1567, 0.1664, 0.1732,  ..., 0.1559, 0.1621, 0.1648],
        ...,
        [0.1584, 0.1666, 0.1738,  ..., 0.1560, 0.1610, 0.1640],
        [0.1597, 0.1651, 0.1715,  ..., 0.1582, 0.1641, 0.1712],
        [0.1584, 0.1653, 0.1720,  ..., 0.1578, 0.1625, 0.1661]],
       grad_fn=<LogBackward>)


 53%|█████▎    | 30/57 [00:21<00:19,  1.39it/s]

tensor([[0.1522, 0.1619, 0.1685,  ..., 0.1533, 0.1588, 0.1623],
        [0.1496, 0.1590, 0.1644,  ..., 0.1510, 0.1565, 0.1617],
        [0.1578, 0.1668, 0.1725,  ..., 0.1570, 0.1659, 0.1690],
        ...,
        [0.1598, 0.1659, 0.1751,  ..., 0.1570, 0.1649, 0.1688],
        [0.1569, 0.1634, 0.1707,  ..., 0.1568, 0.1604, 0.1737],
        [0.1597, 0.1665, 0.1738,  ..., 0.1577, 0.1636, 0.1696]],
       grad_fn=<LogBackward>)


 54%|█████▍    | 31/57 [00:22<00:18,  1.42it/s]

tensor([[0.1588, 0.1674, 0.1738,  ..., 0.1570, 0.1655, 0.1686],
        [0.1578, 0.1670, 0.1748,  ..., 0.1560, 0.1653, 0.1678],
        [0.1574, 0.1648, 0.1726,  ..., 0.1554, 0.1618, 0.1682],
        ...,
        [0.1605, 0.1654, 0.1735,  ..., 0.1576, 0.1638, 0.1710],
        [0.1589, 0.1660, 0.1741,  ..., 0.1571, 0.1647, 0.1710],
        [0.1552, 0.1634, 0.1695,  ..., 0.1536, 0.1608, 0.1627]],
       grad_fn=<LogBackward>)


 56%|█████▌    | 32/57 [00:23<00:17,  1.46it/s]

tensor([[0.1582, 0.1666, 0.1722,  ..., 0.1581, 0.1636, 0.1688],
        [0.1571, 0.1648, 0.1724,  ..., 0.1564, 0.1635, 0.1677],
        [0.1568, 0.1664, 0.1732,  ..., 0.1550, 0.1601, 0.1668],
        ...,
        [0.1617, 0.1668, 0.1789,  ..., 0.1574, 0.1650, 0.1703],
        [0.1585, 0.1671, 0.1676,  ..., 0.1546, 0.1609, 0.1630],
        [0.1585, 0.1662, 0.1703,  ..., 0.1573, 0.1607, 0.1661]],
       grad_fn=<LogBackward>)


 58%|█████▊    | 33/57 [00:23<00:16,  1.48it/s]

tensor([[0.1591, 0.1667, 0.1740,  ..., 0.1575, 0.1652, 0.1692],
        [0.1566, 0.1655, 0.1685,  ..., 0.1548, 0.1633, 0.1634],
        [0.1553, 0.1646, 0.1700,  ..., 0.1578, 0.1607, 0.1682],
        ...,
        [0.1565, 0.1655, 0.1699,  ..., 0.1576, 0.1632, 0.1674],
        [0.1576, 0.1652, 0.1732,  ..., 0.1574, 0.1628, 0.1703],
        [0.1542, 0.1613, 0.1683,  ..., 0.1545, 0.1582, 0.1625]],
       grad_fn=<LogBackward>)


 60%|█████▉    | 34/57 [00:24<00:15,  1.51it/s]

tensor([[0.1573, 0.1647, 0.1717,  ..., 0.1574, 0.1614, 0.1687],
        [0.1588, 0.1668, 0.1747,  ..., 0.1589, 0.1647, 0.1680],
        [0.1572, 0.1680, 0.1741,  ..., 0.1551, 0.1652, 0.1652],
        ...,
        [0.1583, 0.1642, 0.1724,  ..., 0.1563, 0.1632, 0.1690],
        [0.1549, 0.1647, 0.1704,  ..., 0.1550, 0.1597, 0.1624],
        [0.1574, 0.1689, 0.1701,  ..., 0.1570, 0.1640, 0.1669]],
       grad_fn=<LogBackward>)


 61%|██████▏   | 35/57 [00:24<00:14,  1.52it/s]

tensor([[0.1598, 0.1660, 0.1753,  ..., 0.1577, 0.1647, 0.1702],
        [0.1594, 0.1675, 0.1750,  ..., 0.1585, 0.1648, 0.1703],
        [0.1563, 0.1648, 0.1704,  ..., 0.1548, 0.1612, 0.1679],
        ...,
        [0.1574, 0.1684, 0.1714,  ..., 0.1579, 0.1627, 0.1667],
        [0.1577, 0.1694, 0.1735,  ..., 0.1568, 0.1653, 0.1673],
        [0.1588, 0.1661, 0.1741,  ..., 0.1560, 0.1651, 0.1706]],
       grad_fn=<LogBackward>)


 63%|██████▎   | 36/57 [00:25<00:13,  1.51it/s]

tensor([[0.1619, 0.1676, 0.1723,  ..., 0.1582, 0.1633, 0.1702],
        [0.1596, 0.1682, 0.1748,  ..., 0.1586, 0.1663, 0.1713],
        [0.1587, 0.1664, 0.1760,  ..., 0.1574, 0.1632, 0.1715],
        ...,
        [0.1564, 0.1658, 0.1708,  ..., 0.1534, 0.1601, 0.1610],
        [0.1615, 0.1693, 0.1739,  ..., 0.1564, 0.1649, 0.1711],
        [0.1582, 0.1637, 0.1715,  ..., 0.1568, 0.1619, 0.1692]],
       grad_fn=<LogBackward>)


 65%|██████▍   | 37/57 [00:26<00:13,  1.48it/s]

tensor([[0.1568, 0.1674, 0.1716,  ..., 0.1567, 0.1637, 0.1642],
        [0.1604, 0.1674, 0.1726,  ..., 0.1575, 0.1651, 0.1677],
        [0.1588, 0.1686, 0.1733,  ..., 0.1564, 0.1661, 0.1690],
        ...,
        [0.1591, 0.1664, 0.1734,  ..., 0.1562, 0.1650, 0.1714],
        [0.1592, 0.1677, 0.1737,  ..., 0.1555, 0.1623, 0.1682],
        [0.1471, 0.1575, 0.1586,  ..., 0.1501, 0.1567, 0.1573]],
       grad_fn=<LogBackward>)


 67%|██████▋   | 38/57 [00:27<00:12,  1.46it/s]

tensor([[0.1582, 0.1699, 0.1746,  ..., 0.1573, 0.1650, 0.1706],
        [0.1580, 0.1660, 0.1736,  ..., 0.1558, 0.1646, 0.1675],
        [0.1594, 0.1685, 0.1763,  ..., 0.1563, 0.1646, 0.1676],
        ...,
        [0.1607, 0.1648, 0.1717,  ..., 0.1585, 0.1599, 0.1655],
        [0.1597, 0.1649, 0.1732,  ..., 0.1549, 0.1626, 0.1646],
        [0.1593, 0.1655, 0.1743,  ..., 0.1562, 0.1631, 0.1690]],
       grad_fn=<LogBackward>)


 68%|██████▊   | 39/57 [00:27<00:12,  1.48it/s]

tensor([[0.1574, 0.1648, 0.1706,  ..., 0.1570, 0.1630, 0.1692],
        [0.1585, 0.1703, 0.1732,  ..., 0.1577, 0.1662, 0.1684],
        [0.1601, 0.1666, 0.1768,  ..., 0.1563, 0.1662, 0.1699],
        ...,
        [0.1592, 0.1668, 0.1734,  ..., 0.1570, 0.1660, 0.1706],
        [0.1597, 0.1680, 0.1756,  ..., 0.1571, 0.1634, 0.1702],
        [0.1593, 0.1695, 0.1760,  ..., 0.1570, 0.1671, 0.1709]],
       grad_fn=<LogBackward>)


 70%|███████   | 40/57 [00:28<00:11,  1.49it/s]

tensor([[0.1478, 0.1585, 0.1609,  ..., 0.1523, 0.1569, 0.1570],
        [0.1602, 0.1658, 0.1759,  ..., 0.1579, 0.1633, 0.1689],
        [0.1596, 0.1678, 0.1741,  ..., 0.1567, 0.1650, 0.1684],
        ...,
        [0.1540, 0.1630, 0.1689,  ..., 0.1579, 0.1621, 0.1644],
        [0.1589, 0.1674, 0.1732,  ..., 0.1575, 0.1644, 0.1683],
        [0.1574, 0.1633, 0.1711,  ..., 0.1587, 0.1605, 0.1683]],
       grad_fn=<LogBackward>)


 72%|███████▏  | 41/57 [00:29<00:10,  1.50it/s]

tensor([[0.1589, 0.1650, 0.1735,  ..., 0.1569, 0.1631, 0.1712],
        [0.1583, 0.1650, 0.1725,  ..., 0.1556, 0.1623, 0.1699],
        [0.1577, 0.1623, 0.1714,  ..., 0.1562, 0.1612, 0.1704],
        ...,
        [0.1592, 0.1665, 0.1752,  ..., 0.1554, 0.1607, 0.1674],
        [0.1591, 0.1655, 0.1737,  ..., 0.1567, 0.1675, 0.1700],
        [0.1607, 0.1671, 0.1727,  ..., 0.1570, 0.1641, 0.1679]],
       grad_fn=<LogBackward>)


 74%|███████▎  | 42/57 [00:29<00:09,  1.52it/s]

tensor([[0.1591, 0.1637, 0.1755,  ..., 0.1570, 0.1653, 0.1716],
        [0.1584, 0.1617, 0.1721,  ..., 0.1550, 0.1590, 0.1669],
        [0.1572, 0.1631, 0.1714,  ..., 0.1559, 0.1600, 0.1688],
        ...,
        [0.1591, 0.1649, 0.1731,  ..., 0.1608, 0.1636, 0.1653],
        [0.1555, 0.1664, 0.1719,  ..., 0.1548, 0.1626, 0.1641],
        [0.1611, 0.1659, 0.1719,  ..., 0.1580, 0.1696, 0.1697]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 43/57 [00:30<00:09,  1.53it/s]

tensor([[0.1587, 0.1649, 0.1738,  ..., 0.1577, 0.1627, 0.1710],
        [0.1586, 0.1678, 0.1716,  ..., 0.1591, 0.1624, 0.1685],
        [0.1563, 0.1648, 0.1715,  ..., 0.1565, 0.1623, 0.1716],
        ...,
        [0.1596, 0.1645, 0.1737,  ..., 0.1551, 0.1621, 0.1698],
        [0.1586, 0.1671, 0.1740,  ..., 0.1584, 0.1662, 0.1714],
        [0.1587, 0.1630, 0.1722,  ..., 0.1555, 0.1608, 0.1698]],
       grad_fn=<LogBackward>)


 77%|███████▋  | 44/57 [00:30<00:08,  1.53it/s]

tensor([[0.1585, 0.1649, 0.1725,  ..., 0.1554, 0.1611, 0.1673],
        [0.1587, 0.1650, 0.1730,  ..., 0.1558, 0.1625, 0.1689],
        [0.1561, 0.1635, 0.1685,  ..., 0.1541, 0.1620, 0.1668],
        ...,
        [0.1540, 0.1630, 0.1698,  ..., 0.1541, 0.1595, 0.1638],
        [0.1589, 0.1648, 0.1740,  ..., 0.1584, 0.1635, 0.1678],
        [0.1548, 0.1645, 0.1713,  ..., 0.1556, 0.1628, 0.1650]],
       grad_fn=<LogBackward>)


 79%|███████▉  | 45/57 [00:31<00:07,  1.54it/s]

tensor([[0.1555, 0.1661, 0.1704,  ..., 0.1567, 0.1687, 0.1692],
        [0.1568, 0.1641, 0.1725,  ..., 0.1546, 0.1599, 0.1628],
        [0.1591, 0.1666, 0.1726,  ..., 0.1567, 0.1639, 0.1696],
        ...,
        [0.1595, 0.1671, 0.1755,  ..., 0.1571, 0.1680, 0.1713],
        [0.1603, 0.1699, 0.1748,  ..., 0.1600, 0.1666, 0.1720],
        [0.1594, 0.1698, 0.1731,  ..., 0.1576, 0.1649, 0.1686]],
       grad_fn=<LogBackward>)


 81%|████████  | 46/57 [00:32<00:07,  1.52it/s]

tensor([[0.1581, 0.1649, 0.1717,  ..., 0.1555, 0.1625, 0.1675],
        [0.1610, 0.1686, 0.1767,  ..., 0.1576, 0.1652, 0.1704],
        [0.1549, 0.1649, 0.1687,  ..., 0.1571, 0.1616, 0.1663],
        ...,
        [0.1603, 0.1665, 0.1741,  ..., 0.1586, 0.1633, 0.1727],
        [0.1586, 0.1660, 0.1767,  ..., 0.1584, 0.1627, 0.1683],
        [0.1608, 0.1672, 0.1742,  ..., 0.1557, 0.1659, 0.1713]],
       grad_fn=<LogBackward>)


 82%|████████▏ | 47/57 [00:32<00:06,  1.49it/s]

tensor([[0.1586, 0.1653, 0.1723,  ..., 0.1562, 0.1641, 0.1704],
        [0.1585, 0.1670, 0.1738,  ..., 0.1583, 0.1648, 0.1675],
        [0.1536, 0.1624, 0.1710,  ..., 0.1569, 0.1650, 0.1660],
        ...,
        [0.1577, 0.1648, 0.1717,  ..., 0.1573, 0.1641, 0.1692],
        [0.1596, 0.1679, 0.1715,  ..., 0.1566, 0.1627, 0.1718],
        [0.1595, 0.1662, 0.1749,  ..., 0.1536, 0.1605, 0.1645]],
       grad_fn=<LogBackward>)


 84%|████████▍ | 48/57 [00:33<00:06,  1.49it/s]

tensor([[0.1600, 0.1648, 0.1743,  ..., 0.1550, 0.1631, 0.1710],
        [0.1593, 0.1647, 0.1726,  ..., 0.1565, 0.1648, 0.1710],
        [0.1589, 0.1656, 0.1741,  ..., 0.1559, 0.1660, 0.1700],
        ...,
        [0.1586, 0.1668, 0.1753,  ..., 0.1556, 0.1648, 0.1684],
        [0.1570, 0.1651, 0.1722,  ..., 0.1548, 0.1623, 0.1647],
        [0.1585, 0.1660, 0.1740,  ..., 0.1568, 0.1628, 0.1691]],
       grad_fn=<LogBackward>)


 86%|████████▌ | 49/57 [00:34<00:05,  1.49it/s]

tensor([[0.1591, 0.1671, 0.1750,  ..., 0.1587, 0.1656, 0.1716],
        [0.1616, 0.1701, 0.1738,  ..., 0.1555, 0.1634, 0.1681],
        [0.1597, 0.1659, 0.1707,  ..., 0.1574, 0.1625, 0.1666],
        ...,
        [0.1598, 0.1664, 0.1745,  ..., 0.1567, 0.1646, 0.1714],
        [0.1576, 0.1656, 0.1735,  ..., 0.1560, 0.1641, 0.1687],
        [0.1579, 0.1671, 0.1712,  ..., 0.1553, 0.1594, 0.1613]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 50/57 [00:34<00:04,  1.50it/s]

tensor([[0.1593, 0.1690, 0.1758,  ..., 0.1561, 0.1660, 0.1704],
        [0.1588, 0.1666, 0.1750,  ..., 0.1554, 0.1645, 0.1695],
        [0.1548, 0.1662, 0.1700,  ..., 0.1563, 0.1625, 0.1653],
        ...,
        [0.1572, 0.1650, 0.1706,  ..., 0.1561, 0.1630, 0.1679],
        [0.1590, 0.1681, 0.1726,  ..., 0.1560, 0.1644, 0.1688],
        [0.1610, 0.1674, 0.1763,  ..., 0.1557, 0.1666, 0.1697]],
       grad_fn=<LogBackward>)


 89%|████████▉ | 51/57 [00:35<00:03,  1.51it/s]

tensor([[0.1598, 0.1661, 0.1727,  ..., 0.1571, 0.1636, 0.1700],
        [0.1585, 0.1659, 0.1740,  ..., 0.1560, 0.1635, 0.1692],
        [0.1603, 0.1661, 0.1726,  ..., 0.1565, 0.1645, 0.1678],
        ...,
        [0.1607, 0.1657, 0.1740,  ..., 0.1573, 0.1641, 0.1698],
        [0.1615, 0.1655, 0.1747,  ..., 0.1551, 0.1656, 0.1719],
        [0.1598, 0.1666, 0.1723,  ..., 0.1577, 0.1668, 0.1688]],
       grad_fn=<LogBackward>)


 91%|█████████ | 52/57 [00:36<00:03,  1.51it/s]

tensor([[0.1587, 0.1699, 0.1747,  ..., 0.1579, 0.1691, 0.1697],
        [0.1587, 0.1671, 0.1718,  ..., 0.1570, 0.1611, 0.1666],
        [0.1626, 0.1652, 0.1746,  ..., 0.1566, 0.1635, 0.1696],
        ...,
        [0.1576, 0.1656, 0.1727,  ..., 0.1576, 0.1628, 0.1706],
        [0.1530, 0.1605, 0.1651,  ..., 0.1516, 0.1525, 0.1594],
        [0.1571, 0.1665, 0.1687,  ..., 0.1574, 0.1659, 0.1667]],
       grad_fn=<LogBackward>)


 93%|█████████▎| 53/57 [00:36<00:02,  1.52it/s]

tensor([[0.1594, 0.1672, 0.1715,  ..., 0.1580, 0.1643, 0.1691],
        [0.1608, 0.1692, 0.1746,  ..., 0.1577, 0.1657, 0.1694],
        [0.1563, 0.1656, 0.1708,  ..., 0.1577, 0.1629, 0.1680],
        ...,
        [0.1566, 0.1667, 0.1714,  ..., 0.1568, 0.1665, 0.1673],
        [0.1579, 0.1662, 0.1731,  ..., 0.1576, 0.1641, 0.1698],
        [0.1606, 0.1669, 0.1751,  ..., 0.1567, 0.1632, 0.1685]],
       grad_fn=<LogBackward>)


 95%|█████████▍| 54/57 [00:37<00:01,  1.55it/s]

tensor([[0.1589, 0.1639, 0.1719,  ..., 0.1558, 0.1625, 0.1718],
        [0.1590, 0.1673, 0.1745,  ..., 0.1572, 0.1657, 0.1707],
        [0.1593, 0.1666, 0.1764,  ..., 0.1569, 0.1655, 0.1688],
        ...,
        [0.1612, 0.1681, 0.1743,  ..., 0.1573, 0.1655, 0.1693],
        [0.1571, 0.1658, 0.1689,  ..., 0.1543, 0.1578, 0.1643],
        [0.1581, 0.1674, 0.1732,  ..., 0.1569, 0.1632, 0.1698]],
       grad_fn=<LogBackward>)


 96%|█████████▋| 55/57 [00:38<00:01,  1.53it/s]

tensor([[0.1592, 0.1674, 0.1744,  ..., 0.1559, 0.1626, 0.1703],
        [0.1585, 0.1668, 0.1701,  ..., 0.1549, 0.1643, 0.1659],
        [0.1576, 0.1663, 0.1721,  ..., 0.1565, 0.1611, 0.1673],
        ...,
        [0.1591, 0.1662, 0.1746,  ..., 0.1573, 0.1666, 0.1705],
        [0.1618, 0.1696, 0.1741,  ..., 0.1574, 0.1642, 0.1692],
        [0.1595, 0.1648, 0.1736,  ..., 0.1563, 0.1610, 0.1676]],
       grad_fn=<LogBackward>)


 98%|█████████▊| 56/57 [00:38<00:00,  1.53it/s]

tensor([[0.1587, 0.1651, 0.1727,  ..., 0.1565, 0.1669, 0.1712],
        [0.1558, 0.1660, 0.1688,  ..., 0.1522, 0.1594, 0.1614],
        [0.1600, 0.1693, 0.1749,  ..., 0.1569, 0.1646, 0.1694],
        ...,
        [0.1551, 0.1623, 0.1671,  ..., 0.1522, 0.1578, 0.1641],
        [0.1564, 0.1653, 0.1696,  ..., 0.1569, 0.1620, 0.1642],
        [0.1582, 0.1647, 0.1728,  ..., 0.1567, 0.1633, 0.1715]],
       grad_fn=<LogBackward>)


100%|██████████| 57/57 [00:39<00:00,  1.44it/s]
 12%|█▎        | 1/8 [00:00<00:02,  3.29it/s]

tensor([[0.1430, 0.1551, 0.1546,  ..., 0.1455, 0.1549, 0.1543],
        [0.1587, 0.1671, 0.1742,  ..., 0.1579, 0.1658, 0.1679],
        [0.1597, 0.1652, 0.1754,  ..., 0.1572, 0.1644, 0.1743],
        ...,
        [0.1557, 0.1673, 0.1705,  ..., 0.1565, 0.1630, 0.1625],
        [0.1541, 0.1663, 0.1715,  ..., 0.1564, 0.1678, 0.1656],
        [0.1576, 0.1673, 0.1716,  ..., 0.1568, 0.1617, 0.1657]],
       grad_fn=<LogBackward>)


 25%|██▌       | 2/8 [00:00<00:01,  3.35it/s]

tensor([[0.1575, 0.1657, 0.1712,  ..., 0.1578, 0.1631, 0.1684],
        [0.1573, 0.1672, 0.1693,  ..., 0.1591, 0.1603, 0.1656],
        [0.1587, 0.1669, 0.1713,  ..., 0.1564, 0.1648, 0.1691],
        ...,
        [0.1606, 0.1678, 0.1764,  ..., 0.1571, 0.1665, 0.1676],
        [0.1602, 0.1676, 0.1763,  ..., 0.1573, 0.1628, 0.1686],
        [0.1602, 0.1710, 0.1763,  ..., 0.1566, 0.1674, 0.1731]],
       grad_fn=<LogBackward>)


 38%|███▊      | 3/8 [00:00<00:01,  3.41it/s]

tensor([[0.1563, 0.1633, 0.1655,  ..., 0.1515, 0.1581, 0.1603],
        [0.1583, 0.1649, 0.1737,  ..., 0.1579, 0.1617, 0.1674],
        [0.1579, 0.1666, 0.1746,  ..., 0.1560, 0.1615, 0.1703],
        ...,
        [0.1572, 0.1666, 0.1736,  ..., 0.1595, 0.1659, 0.1678],
        [0.1573, 0.1667, 0.1733,  ..., 0.1576, 0.1668, 0.1703],
        [0.1540, 0.1656, 0.1701,  ..., 0.1559, 0.1606, 0.1648]],
       grad_fn=<LogBackward>)


 50%|█████     | 4/8 [00:01<00:01,  3.44it/s]

tensor([[0.1577, 0.1664, 0.1718,  ..., 0.1587, 0.1652, 0.1691],
        [0.1561, 0.1628, 0.1694,  ..., 0.1572, 0.1586, 0.1677],
        [0.1604, 0.1672, 0.1759,  ..., 0.1572, 0.1646, 0.1707],
        ...,
        [0.1573, 0.1658, 0.1703,  ..., 0.1552, 0.1615, 0.1661],
        [0.1552, 0.1646, 0.1681,  ..., 0.1546, 0.1576, 0.1635],
        [0.1581, 0.1670, 0.1729,  ..., 0.1564, 0.1616, 0.1696]],
       grad_fn=<LogBackward>)


 62%|██████▎   | 5/8 [00:01<00:00,  3.36it/s]

tensor([[0.1596, 0.1659, 0.1746,  ..., 0.1565, 0.1642, 0.1716],
        [0.1600, 0.1668, 0.1746,  ..., 0.1580, 0.1618, 0.1681],
        [0.1604, 0.1668, 0.1762,  ..., 0.1559, 0.1653, 0.1717],
        ...,
        [0.1574, 0.1662, 0.1724,  ..., 0.1559, 0.1611, 0.1648],
        [0.1596, 0.1660, 0.1748,  ..., 0.1561, 0.1626, 0.1690],
        [0.1610, 0.1650, 0.1732,  ..., 0.1566, 0.1625, 0.1699]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 6/8 [00:01<00:00,  3.43it/s]

tensor([[0.1564, 0.1667, 0.1691,  ..., 0.1546, 0.1636, 0.1674],
        [0.1563, 0.1629, 0.1702,  ..., 0.1557, 0.1612, 0.1673],
        [0.1590, 0.1682, 0.1751,  ..., 0.1577, 0.1651, 0.1701],
        ...,
        [0.1589, 0.1664, 0.1705,  ..., 0.1558, 0.1631, 0.1687],
        [0.1578, 0.1669, 0.1739,  ..., 0.1545, 0.1667, 0.1682],
        [0.1571, 0.1638, 0.1693,  ..., 0.1547, 0.1600, 0.1648]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 7/8 [00:02<00:00,  3.47it/s]

tensor([[0.1595, 0.1665, 0.1734,  ..., 0.1571, 0.1633, 0.1688],
        [0.1572, 0.1660, 0.1723,  ..., 0.1567, 0.1626, 0.1672],
        [0.1586, 0.1668, 0.1742,  ..., 0.1576, 0.1630, 0.1719],
        ...,
        [0.1570, 0.1652, 0.1724,  ..., 0.1537, 0.1599, 0.1661],
        [0.1584, 0.1686, 0.1725,  ..., 0.1579, 0.1642, 0.1680],
        [0.1572, 0.1663, 0.1714,  ..., 0.1567, 0.1632, 0.1680]],
       grad_fn=<LogBackward>)


100%|██████████| 8/8 [00:02<00:00,  3.44it/s]
  0%|          | 0/57 [00:00<?, ?it/s]

tensor([[0.1597, 0.1674, 0.1732,  ..., 0.1569, 0.1636, 0.1690],
        [0.1587, 0.1671, 0.1716,  ..., 0.1568, 0.1642, 0.1682],
        [0.1575, 0.1650, 0.1720,  ..., 0.1563, 0.1613, 0.1657],
        ...,
        [0.1616, 0.1690, 0.1748,  ..., 0.1563, 0.1657, 0.1721],
        [0.1594, 0.1652, 0.1728,  ..., 0.1593, 0.1629, 0.1686],
        [0.1516, 0.1580, 0.1648,  ..., 0.1522, 0.1550, 0.1641]],
       grad_fn=<LogBackward>)
[tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128)]
Epoch: 6, train_loss: -0.15975430607795715, valid_loss: -0.20035214722156525
Parameter containing:
tensor([[ 1.0005,  1.0006,  1.0005,  1.0005,  1.0005,  1.0005,  1.0006,  1.0005,
          1.0005,  1.0006, -0.4994, -0.4995, -0.4996, -0.4995, -0.4995, -0.4995,
         -0.4995, -0.4995, -0.4995, -0.4995],
    

  2%|▏         | 1/57 [00:00<00:35,  1.59it/s]

tensor([[0.1553, 0.1652, 0.1691,  ..., 0.1549, 0.1630, 0.1648],
        [0.1590, 0.1654, 0.1733,  ..., 0.1566, 0.1612, 0.1717],
        [0.1595, 0.1678, 0.1730,  ..., 0.1573, 0.1644, 0.1698],
        ...,
        [0.1606, 0.1675, 0.1749,  ..., 0.1578, 0.1662, 0.1691],
        [0.1558, 0.1638, 0.1740,  ..., 0.1565, 0.1620, 0.1692],
        [0.1580, 0.1662, 0.1721,  ..., 0.1570, 0.1635, 0.1688]],
       grad_fn=<LogBackward>)


  4%|▎         | 2/57 [00:01<00:35,  1.54it/s]

tensor([[0.1579, 0.1654, 0.1728,  ..., 0.1559, 0.1624, 0.1663],
        [0.1577, 0.1625, 0.1728,  ..., 0.1560, 0.1602, 0.1667],
        [0.1591, 0.1688, 0.1731,  ..., 0.1580, 0.1691, 0.1675],
        ...,
        [0.1587, 0.1669, 0.1721,  ..., 0.1560, 0.1650, 0.1659],
        [0.1595, 0.1654, 0.1740,  ..., 0.1560, 0.1654, 0.1697],
        [0.1551, 0.1632, 0.1702,  ..., 0.1560, 0.1623, 0.1645]],
       grad_fn=<LogBackward>)


  5%|▌         | 3/57 [00:02<00:37,  1.45it/s]

tensor([[0.1594, 0.1647, 0.1718,  ..., 0.1562, 0.1634, 0.1696],
        [0.1586, 0.1652, 0.1712,  ..., 0.1562, 0.1620, 0.1687],
        [0.1592, 0.1649, 0.1738,  ..., 0.1583, 0.1639, 0.1697],
        ...,
        [0.1603, 0.1644, 0.1734,  ..., 0.1582, 0.1637, 0.1705],
        [0.1603, 0.1654, 0.1740,  ..., 0.1569, 0.1653, 0.1701],
        [0.1516, 0.1612, 0.1677,  ..., 0.1536, 0.1568, 0.1630]],
       grad_fn=<LogBackward>)


  7%|▋         | 4/57 [00:02<00:36,  1.46it/s]

tensor([[0.1545, 0.1651, 0.1701,  ..., 0.1555, 0.1640, 0.1687],
        [0.1608, 0.1683, 0.1749,  ..., 0.1576, 0.1648, 0.1721],
        [0.1527, 0.1625, 0.1660,  ..., 0.1522, 0.1586, 0.1630],
        ...,
        [0.1589, 0.1668, 0.1731,  ..., 0.1558, 0.1663, 0.1693],
        [0.1572, 0.1596, 0.1680,  ..., 0.1528, 0.1557, 0.1628],
        [0.1606, 0.1667, 0.1754,  ..., 0.1561, 0.1664, 0.1719]],
       grad_fn=<LogBackward>)


  9%|▉         | 5/57 [00:03<00:36,  1.42it/s]

tensor([[0.1581, 0.1682, 0.1722,  ..., 0.1556, 0.1620, 0.1623],
        [0.1600, 0.1676, 0.1743,  ..., 0.1555, 0.1652, 0.1709],
        [0.1601, 0.1671, 0.1734,  ..., 0.1568, 0.1655, 0.1657],
        ...,
        [0.1596, 0.1666, 0.1737,  ..., 0.1569, 0.1654, 0.1687],
        [0.1591, 0.1685, 0.1755,  ..., 0.1560, 0.1663, 0.1686],
        [0.1571, 0.1666, 0.1698,  ..., 0.1573, 0.1643, 0.1637]],
       grad_fn=<LogBackward>)


 11%|█         | 6/57 [00:04<00:36,  1.40it/s]

tensor([[0.1568, 0.1652, 0.1725,  ..., 0.1551, 0.1633, 0.1667],
        [0.1589, 0.1673, 0.1720,  ..., 0.1569, 0.1611, 0.1668],
        [0.1588, 0.1660, 0.1709,  ..., 0.1566, 0.1621, 0.1687],
        ...,
        [0.1548, 0.1623, 0.1688,  ..., 0.1539, 0.1618, 0.1647],
        [0.1542, 0.1607, 0.1680,  ..., 0.1556, 0.1602, 0.1612],
        [0.1600, 0.1671, 0.1735,  ..., 0.1567, 0.1641, 0.1695]],
       grad_fn=<LogBackward>)


 12%|█▏        | 7/57 [00:05<00:36,  1.35it/s]

tensor([[0.1569, 0.1672, 0.1725,  ..., 0.1563, 0.1618, 0.1685],
        [0.1563, 0.1645, 0.1676,  ..., 0.1577, 0.1611, 0.1590],
        [0.1579, 0.1645, 0.1726,  ..., 0.1566, 0.1615, 0.1702],
        ...,
        [0.1557, 0.1623, 0.1708,  ..., 0.1563, 0.1600, 0.1681],
        [0.1577, 0.1634, 0.1710,  ..., 0.1586, 0.1603, 0.1684],
        [0.1609, 0.1688, 0.1759,  ..., 0.1572, 0.1626, 0.1703]],
       grad_fn=<LogBackward>)


 14%|█▍        | 8/57 [00:05<00:35,  1.38it/s]

tensor([[0.1581, 0.1672, 0.1720,  ..., 0.1567, 0.1650, 0.1711],
        [0.1532, 0.1629, 0.1677,  ..., 0.1553, 0.1583, 0.1586],
        [0.1611, 0.1674, 0.1760,  ..., 0.1588, 0.1646, 0.1686],
        ...,
        [0.1570, 0.1680, 0.1736,  ..., 0.1548, 0.1650, 0.1659],
        [0.1594, 0.1665, 0.1735,  ..., 0.1567, 0.1633, 0.1716],
        [0.1596, 0.1664, 0.1727,  ..., 0.1572, 0.1652, 0.1695]],
       grad_fn=<LogBackward>)


 16%|█▌        | 9/57 [00:06<00:34,  1.38it/s]

tensor([[0.1570, 0.1639, 0.1729,  ..., 0.1553, 0.1617, 0.1675],
        [0.1591, 0.1668, 0.1745,  ..., 0.1576, 0.1650, 0.1688],
        [0.1577, 0.1648, 0.1738,  ..., 0.1536, 0.1610, 0.1654],
        ...,
        [0.1587, 0.1659, 0.1734,  ..., 0.1582, 0.1639, 0.1706],
        [0.1622, 0.1695, 0.1777,  ..., 0.1591, 0.1643, 0.1700],
        [0.1523, 0.1630, 0.1679,  ..., 0.1541, 0.1561, 0.1596]],
       grad_fn=<LogBackward>)


 18%|█▊        | 10/57 [00:07<00:33,  1.38it/s]

tensor([[0.1612, 0.1679, 0.1721,  ..., 0.1568, 0.1647, 0.1717],
        [0.1581, 0.1652, 0.1733,  ..., 0.1566, 0.1620, 0.1691],
        [0.1591, 0.1703, 0.1743,  ..., 0.1575, 0.1678, 0.1686],
        ...,
        [0.1551, 0.1631, 0.1691,  ..., 0.1563, 0.1602, 0.1671],
        [0.1594, 0.1687, 0.1737,  ..., 0.1587, 0.1641, 0.1687],
        [0.1531, 0.1601, 0.1658,  ..., 0.1517, 0.1543, 0.1608]],
       grad_fn=<LogBackward>)


 19%|█▉        | 11/57 [00:07<00:33,  1.36it/s]

tensor([[0.1564, 0.1646, 0.1744,  ..., 0.1550, 0.1618, 0.1639],
        [0.1590, 0.1650, 0.1725,  ..., 0.1567, 0.1651, 0.1679],
        [0.1619, 0.1676, 0.1767,  ..., 0.1577, 0.1639, 0.1713],
        ...,
        [0.1594, 0.1664, 0.1759,  ..., 0.1559, 0.1625, 0.1688],
        [0.1593, 0.1650, 0.1729,  ..., 0.1568, 0.1628, 0.1698],
        [0.1562, 0.1662, 0.1706,  ..., 0.1574, 0.1658, 0.1637]],
       grad_fn=<LogBackward>)


 21%|██        | 12/57 [00:08<00:33,  1.36it/s]

tensor([[0.1607, 0.1692, 0.1751,  ..., 0.1561, 0.1652, 0.1679],
        [0.1554, 0.1668, 0.1688,  ..., 0.1558, 0.1604, 0.1622],
        [0.1605, 0.1674, 0.1742,  ..., 0.1586, 0.1675, 0.1703],
        ...,
        [0.1586, 0.1681, 0.1745,  ..., 0.1578, 0.1655, 0.1698],
        [0.1601, 0.1660, 0.1736,  ..., 0.1563, 0.1634, 0.1703],
        [0.1571, 0.1648, 0.1732,  ..., 0.1568, 0.1644, 0.1675]],
       grad_fn=<LogBackward>)


 23%|██▎       | 13/57 [00:09<00:32,  1.34it/s]

tensor([[0.1588, 0.1651, 0.1709,  ..., 0.1569, 0.1616, 0.1667],
        [0.1576, 0.1672, 0.1728,  ..., 0.1579, 0.1633, 0.1653],
        [0.1578, 0.1653, 0.1722,  ..., 0.1552, 0.1622, 0.1680],
        ...,
        [0.1602, 0.1677, 0.1715,  ..., 0.1571, 0.1617, 0.1693],
        [0.1550, 0.1640, 0.1674,  ..., 0.1574, 0.1611, 0.1626],
        [0.1578, 0.1663, 0.1724,  ..., 0.1541, 0.1638, 0.1641]],
       grad_fn=<LogBackward>)


 25%|██▍       | 14/57 [00:10<00:32,  1.34it/s]

tensor([[0.1541, 0.1675, 0.1704,  ..., 0.1549, 0.1593, 0.1610],
        [0.1580, 0.1660, 0.1727,  ..., 0.1563, 0.1638, 0.1690],
        [0.1602, 0.1662, 0.1742,  ..., 0.1562, 0.1656, 0.1701],
        ...,
        [0.1589, 0.1656, 0.1725,  ..., 0.1572, 0.1660, 0.1693],
        [0.1577, 0.1630, 0.1695,  ..., 0.1542, 0.1586, 0.1657],
        [0.1584, 0.1660, 0.1726,  ..., 0.1571, 0.1650, 0.1686]],
       grad_fn=<LogBackward>)


 26%|██▋       | 15/57 [00:10<00:30,  1.37it/s]

tensor([[0.1576, 0.1663, 0.1695,  ..., 0.1589, 0.1664, 0.1646],
        [0.1573, 0.1629, 0.1739,  ..., 0.1559, 0.1626, 0.1686],
        [0.1538, 0.1629, 0.1682,  ..., 0.1580, 0.1606, 0.1629],
        ...,
        [0.1593, 0.1660, 0.1724,  ..., 0.1572, 0.1626, 0.1673],
        [0.1584, 0.1667, 0.1727,  ..., 0.1585, 0.1629, 0.1740],
        [0.1540, 0.1633, 0.1688,  ..., 0.1557, 0.1599, 0.1621]],
       grad_fn=<LogBackward>)


 28%|██▊       | 16/57 [00:11<00:30,  1.33it/s]

tensor([[0.1611, 0.1670, 0.1750,  ..., 0.1577, 0.1647, 0.1718],
        [0.1598, 0.1669, 0.1728,  ..., 0.1571, 0.1646, 0.1686],
        [0.1583, 0.1650, 0.1739,  ..., 0.1557, 0.1610, 0.1716],
        ...,
        [0.1612, 0.1683, 0.1769,  ..., 0.1557, 0.1676, 0.1706],
        [0.1593, 0.1671, 0.1727,  ..., 0.1571, 0.1655, 0.1699],
        [0.1596, 0.1662, 0.1746,  ..., 0.1567, 0.1623, 0.1681]],
       grad_fn=<LogBackward>)


 30%|██▉       | 17/57 [00:12<00:31,  1.28it/s]

tensor([[0.1585, 0.1637, 0.1727,  ..., 0.1567, 0.1663, 0.1698],
        [0.1591, 0.1646, 0.1734,  ..., 0.1560, 0.1629, 0.1681],
        [0.1608, 0.1692, 0.1748,  ..., 0.1601, 0.1643, 0.1691],
        ...,
        [0.1601, 0.1701, 0.1723,  ..., 0.1590, 0.1665, 0.1692],
        [0.1539, 0.1637, 0.1645,  ..., 0.1571, 0.1582, 0.1631],
        [0.1606, 0.1681, 0.1746,  ..., 0.1548, 0.1628, 0.1679]],
       grad_fn=<LogBackward>)


 32%|███▏      | 18/57 [00:13<00:29,  1.30it/s]

tensor([[0.1597, 0.1717, 0.1718,  ..., 0.1591, 0.1653, 0.1693],
        [0.1581, 0.1661, 0.1724,  ..., 0.1579, 0.1627, 0.1668],
        [0.1578, 0.1637, 0.1723,  ..., 0.1556, 0.1629, 0.1695],
        ...,
        [0.1561, 0.1642, 0.1709,  ..., 0.1562, 0.1627, 0.1658],
        [0.1596, 0.1680, 0.1734,  ..., 0.1577, 0.1652, 0.1706],
        [0.1573, 0.1644, 0.1720,  ..., 0.1555, 0.1613, 0.1682]],
       grad_fn=<LogBackward>)


 33%|███▎      | 19/57 [00:13<00:28,  1.34it/s]

tensor([[0.1593, 0.1697, 0.1747,  ..., 0.1548, 0.1639, 0.1690],
        [0.1582, 0.1670, 0.1737,  ..., 0.1587, 0.1659, 0.1699],
        [0.1583, 0.1658, 0.1744,  ..., 0.1558, 0.1631, 0.1686],
        ...,
        [0.1598, 0.1661, 0.1746,  ..., 0.1563, 0.1637, 0.1715],
        [0.1570, 0.1659, 0.1719,  ..., 0.1585, 0.1621, 0.1662],
        [0.1532, 0.1606, 0.1654,  ..., 0.1545, 0.1575, 0.1578]],
       grad_fn=<LogBackward>)


 35%|███▌      | 20/57 [00:14<00:27,  1.35it/s]

tensor([[0.1544, 0.1649, 0.1724,  ..., 0.1542, 0.1614, 0.1636],
        [0.1573, 0.1676, 0.1704,  ..., 0.1561, 0.1632, 0.1676],
        [0.1539, 0.1601, 0.1672,  ..., 0.1524, 0.1552, 0.1608],
        ...,
        [0.1596, 0.1669, 0.1718,  ..., 0.1595, 0.1634, 0.1690],
        [0.1592, 0.1659, 0.1751,  ..., 0.1568, 0.1644, 0.1700],
        [0.1570, 0.1677, 0.1742,  ..., 0.1600, 0.1636, 0.1691]],
       grad_fn=<LogBackward>)


 37%|███▋      | 21/57 [00:15<00:26,  1.38it/s]

tensor([[0.1602, 0.1661, 0.1773,  ..., 0.1561, 0.1632, 0.1677],
        [0.1568, 0.1644, 0.1714,  ..., 0.1559, 0.1629, 0.1625],
        [0.1432, 0.1541, 0.1545,  ..., 0.1453, 0.1544, 0.1532],
        ...,
        [0.1601, 0.1662, 0.1737,  ..., 0.1565, 0.1623, 0.1694],
        [0.1541, 0.1607, 0.1664,  ..., 0.1508, 0.1569, 0.1620],
        [0.1583, 0.1654, 0.1768,  ..., 0.1551, 0.1642, 0.1687]],
       grad_fn=<LogBackward>)


 39%|███▊      | 22/57 [00:16<00:24,  1.42it/s]

tensor([[0.1584, 0.1665, 0.1709,  ..., 0.1577, 0.1619, 0.1677],
        [0.1548, 0.1631, 0.1678,  ..., 0.1534, 0.1605, 0.1618],
        [0.1594, 0.1649, 0.1752,  ..., 0.1563, 0.1631, 0.1698],
        ...,
        [0.1581, 0.1662, 0.1735,  ..., 0.1579, 0.1657, 0.1688],
        [0.1608, 0.1653, 0.1739,  ..., 0.1571, 0.1629, 0.1711],
        [0.1584, 0.1657, 0.1716,  ..., 0.1557, 0.1603, 0.1623]],
       grad_fn=<LogBackward>)


 40%|████      | 23/57 [00:16<00:23,  1.45it/s]

tensor([[0.1552, 0.1660, 0.1737,  ..., 0.1570, 0.1610, 0.1662],
        [0.1588, 0.1613, 0.1673,  ..., 0.1560, 0.1608, 0.1652],
        [0.1577, 0.1653, 0.1713,  ..., 0.1561, 0.1617, 0.1673],
        ...,
        [0.1620, 0.1700, 0.1770,  ..., 0.1567, 0.1667, 0.1687],
        [0.1612, 0.1664, 0.1746,  ..., 0.1573, 0.1653, 0.1716],
        [0.1557, 0.1674, 0.1737,  ..., 0.1566, 0.1669, 0.1685]],
       grad_fn=<LogBackward>)


 42%|████▏     | 24/57 [00:17<00:22,  1.44it/s]

tensor([[0.1601, 0.1658, 0.1753,  ..., 0.1579, 0.1622, 0.1679],
        [0.1605, 0.1640, 0.1722,  ..., 0.1576, 0.1628, 0.1699],
        [0.1599, 0.1648, 0.1746,  ..., 0.1543, 0.1614, 0.1693],
        ...,
        [0.1596, 0.1653, 0.1756,  ..., 0.1529, 0.1626, 0.1667],
        [0.1578, 0.1682, 0.1723,  ..., 0.1554, 0.1639, 0.1662],
        [0.1577, 0.1648, 0.1724,  ..., 0.1568, 0.1641, 0.1681]],
       grad_fn=<LogBackward>)


 44%|████▍     | 25/57 [00:18<00:22,  1.45it/s]

tensor([[0.1595, 0.1647, 0.1732,  ..., 0.1592, 0.1594, 0.1678],
        [0.1589, 0.1688, 0.1746,  ..., 0.1585, 0.1621, 0.1662],
        [0.1560, 0.1671, 0.1753,  ..., 0.1561, 0.1641, 0.1678],
        ...,
        [0.1574, 0.1653, 0.1713,  ..., 0.1594, 0.1627, 0.1670],
        [0.1590, 0.1678, 0.1731,  ..., 0.1576, 0.1633, 0.1667],
        [0.1440, 0.1558, 0.1559,  ..., 0.1468, 0.1559, 0.1550]],
       grad_fn=<LogBackward>)


 46%|████▌     | 26/57 [00:18<00:21,  1.45it/s]

tensor([[0.1608, 0.1693, 0.1761,  ..., 0.1584, 0.1680, 0.1701],
        [0.1566, 0.1646, 0.1704,  ..., 0.1577, 0.1648, 0.1674],
        [0.1602, 0.1654, 0.1721,  ..., 0.1576, 0.1626, 0.1708],
        ...,
        [0.1560, 0.1640, 0.1700,  ..., 0.1568, 0.1630, 0.1639],
        [0.1573, 0.1642, 0.1721,  ..., 0.1577, 0.1613, 0.1685],
        [0.1542, 0.1635, 0.1730,  ..., 0.1576, 0.1627, 0.1634]],
       grad_fn=<LogBackward>)


 47%|████▋     | 27/57 [00:19<00:20,  1.47it/s]

tensor([[0.1613, 0.1677, 0.1756,  ..., 0.1596, 0.1665, 0.1718],
        [0.1567, 0.1648, 0.1698,  ..., 0.1578, 0.1617, 0.1675],
        [0.1602, 0.1668, 0.1730,  ..., 0.1574, 0.1642, 0.1688],
        ...,
        [0.1588, 0.1661, 0.1718,  ..., 0.1542, 0.1633, 0.1669],
        [0.1589, 0.1597, 0.1715,  ..., 0.1540, 0.1565, 0.1623],
        [0.1583, 0.1656, 0.1721,  ..., 0.1581, 0.1618, 0.1681]],
       grad_fn=<LogBackward>)


 49%|████▉     | 28/57 [00:20<00:19,  1.49it/s]

tensor([[0.1585, 0.1667, 0.1721,  ..., 0.1584, 0.1637, 0.1662],
        [0.1574, 0.1667, 0.1689,  ..., 0.1545, 0.1627, 0.1643],
        [0.1578, 0.1641, 0.1724,  ..., 0.1560, 0.1632, 0.1701],
        ...,
        [0.1562, 0.1661, 0.1729,  ..., 0.1570, 0.1625, 0.1656],
        [0.1446, 0.1567, 0.1571,  ..., 0.1480, 0.1580, 0.1565],
        [0.1611, 0.1675, 0.1748,  ..., 0.1577, 0.1652, 0.1722]],
       grad_fn=<LogBackward>)


 51%|█████     | 29/57 [00:20<00:18,  1.48it/s]

tensor([[0.1580, 0.1680, 0.1735,  ..., 0.1561, 0.1622, 0.1687],
        [0.1592, 0.1644, 0.1713,  ..., 0.1564, 0.1624, 0.1652],
        [0.1573, 0.1641, 0.1698,  ..., 0.1583, 0.1583, 0.1652],
        ...,
        [0.1593, 0.1658, 0.1707,  ..., 0.1538, 0.1636, 0.1653],
        [0.1589, 0.1682, 0.1726,  ..., 0.1580, 0.1617, 0.1684],
        [0.1547, 0.1579, 0.1675,  ..., 0.1502, 0.1564, 0.1642]],
       grad_fn=<LogBackward>)


 53%|█████▎    | 30/57 [00:21<00:18,  1.45it/s]

tensor([[0.1598, 0.1658, 0.1748,  ..., 0.1576, 0.1659, 0.1713],
        [0.1558, 0.1604, 0.1659,  ..., 0.1506, 0.1544, 0.1572],
        [0.1568, 0.1621, 0.1656,  ..., 0.1551, 0.1573, 0.1643],
        ...,
        [0.1568, 0.1650, 0.1724,  ..., 0.1558, 0.1629, 0.1684],
        [0.1581, 0.1681, 0.1762,  ..., 0.1568, 0.1659, 0.1661],
        [0.1586, 0.1658, 0.1756,  ..., 0.1554, 0.1639, 0.1684]],
       grad_fn=<LogBackward>)


 54%|█████▍    | 31/57 [00:22<00:17,  1.45it/s]

tensor([[0.1595, 0.1686, 0.1760,  ..., 0.1574, 0.1638, 0.1687],
        [0.1581, 0.1657, 0.1719,  ..., 0.1532, 0.1619, 0.1674],
        [0.1589, 0.1684, 0.1732,  ..., 0.1565, 0.1657, 0.1710],
        ...,
        [0.1606, 0.1653, 0.1719,  ..., 0.1575, 0.1642, 0.1693],
        [0.1576, 0.1654, 0.1726,  ..., 0.1548, 0.1607, 0.1644],
        [0.1610, 0.1678, 0.1775,  ..., 0.1551, 0.1672, 0.1693]],
       grad_fn=<LogBackward>)


 56%|█████▌    | 32/57 [00:22<00:17,  1.47it/s]

tensor([[0.1544, 0.1621, 0.1682,  ..., 0.1567, 0.1588, 0.1652],
        [0.1560, 0.1657, 0.1705,  ..., 0.1560, 0.1616, 0.1656],
        [0.1584, 0.1671, 0.1726,  ..., 0.1563, 0.1637, 0.1695],
        ...,
        [0.1583, 0.1635, 0.1721,  ..., 0.1561, 0.1632, 0.1707],
        [0.1609, 0.1656, 0.1735,  ..., 0.1560, 0.1643, 0.1701],
        [0.1619, 0.1663, 0.1743,  ..., 0.1562, 0.1623, 0.1684]],
       grad_fn=<LogBackward>)


 58%|█████▊    | 33/57 [00:23<00:16,  1.43it/s]

tensor([[0.1585, 0.1683, 0.1733,  ..., 0.1552, 0.1631, 0.1685],
        [0.1607, 0.1647, 0.1728,  ..., 0.1583, 0.1639, 0.1697],
        [0.1583, 0.1639, 0.1742,  ..., 0.1571, 0.1627, 0.1691],
        ...,
        [0.1581, 0.1667, 0.1736,  ..., 0.1577, 0.1642, 0.1706],
        [0.1596, 0.1665, 0.1753,  ..., 0.1558, 0.1615, 0.1700],
        [0.1573, 0.1655, 0.1710,  ..., 0.1568, 0.1636, 0.1647]],
       grad_fn=<LogBackward>)


 60%|█████▉    | 34/57 [00:24<00:16,  1.41it/s]

tensor([[0.1581, 0.1669, 0.1718,  ..., 0.1571, 0.1637, 0.1679],
        [0.1630, 0.1676, 0.1755,  ..., 0.1558, 0.1645, 0.1707],
        [0.1578, 0.1652, 0.1708,  ..., 0.1542, 0.1617, 0.1635],
        ...,
        [0.1589, 0.1674, 0.1729,  ..., 0.1571, 0.1659, 0.1674],
        [0.1624, 0.1677, 0.1756,  ..., 0.1562, 0.1659, 0.1706],
        [0.1587, 0.1659, 0.1742,  ..., 0.1543, 0.1604, 0.1656]],
       grad_fn=<LogBackward>)


 61%|██████▏   | 35/57 [00:25<00:15,  1.41it/s]

tensor([[0.1598, 0.1665, 0.1738,  ..., 0.1608, 0.1644, 0.1699],
        [0.1562, 0.1641, 0.1676,  ..., 0.1564, 0.1604, 0.1631],
        [0.1604, 0.1662, 0.1754,  ..., 0.1572, 0.1628, 0.1695],
        ...,
        [0.1606, 0.1692, 0.1725,  ..., 0.1564, 0.1649, 0.1659],
        [0.1618, 0.1668, 0.1758,  ..., 0.1581, 0.1655, 0.1705],
        [0.1582, 0.1656, 0.1713,  ..., 0.1586, 0.1629, 0.1674]],
       grad_fn=<LogBackward>)


 63%|██████▎   | 36/57 [00:25<00:15,  1.38it/s]

tensor([[0.1547, 0.1635, 0.1702,  ..., 0.1550, 0.1620, 0.1635],
        [0.1576, 0.1651, 0.1703,  ..., 0.1567, 0.1605, 0.1659],
        [0.1536, 0.1622, 0.1691,  ..., 0.1567, 0.1584, 0.1638],
        ...,
        [0.1597, 0.1662, 0.1751,  ..., 0.1571, 0.1648, 0.1713],
        [0.1592, 0.1659, 0.1719,  ..., 0.1579, 0.1637, 0.1682],
        [0.1592, 0.1666, 0.1741,  ..., 0.1564, 0.1651, 0.1677]],
       grad_fn=<LogBackward>)


 65%|██████▍   | 37/57 [00:26<00:14,  1.39it/s]

tensor([[0.1581, 0.1651, 0.1726,  ..., 0.1578, 0.1626, 0.1688],
        [0.1547, 0.1630, 0.1710,  ..., 0.1544, 0.1570, 0.1596],
        [0.1589, 0.1660, 0.1758,  ..., 0.1553, 0.1607, 0.1639],
        ...,
        [0.1608, 0.1686, 0.1772,  ..., 0.1571, 0.1669, 0.1692],
        [0.1585, 0.1634, 0.1711,  ..., 0.1574, 0.1627, 0.1686],
        [0.1570, 0.1682, 0.1721,  ..., 0.1590, 0.1639, 0.1682]],
       grad_fn=<LogBackward>)


 67%|██████▋   | 38/57 [00:27<00:13,  1.38it/s]

tensor([[0.1627, 0.1684, 0.1739,  ..., 0.1569, 0.1650, 0.1711],
        [0.1619, 0.1682, 0.1754,  ..., 0.1549, 0.1641, 0.1718],
        [0.1557, 0.1659, 0.1713,  ..., 0.1568, 0.1637, 0.1689],
        ...,
        [0.1598, 0.1683, 0.1751,  ..., 0.1553, 0.1624, 0.1676],
        [0.1606, 0.1653, 0.1749,  ..., 0.1558, 0.1636, 0.1693],
        [0.1585, 0.1642, 0.1712,  ..., 0.1553, 0.1612, 0.1688]],
       grad_fn=<LogBackward>)


 68%|██████▊   | 39/57 [00:27<00:12,  1.41it/s]

tensor([[0.1576, 0.1644, 0.1718,  ..., 0.1555, 0.1619, 0.1668],
        [0.1526, 0.1633, 0.1674,  ..., 0.1556, 0.1592, 0.1642],
        [0.1589, 0.1648, 0.1720,  ..., 0.1576, 0.1632, 0.1692],
        ...,
        [0.1610, 0.1686, 0.1743,  ..., 0.1563, 0.1651, 0.1668],
        [0.1549, 0.1641, 0.1700,  ..., 0.1545, 0.1616, 0.1612],
        [0.1605, 0.1648, 0.1715,  ..., 0.1563, 0.1626, 0.1687]],
       grad_fn=<LogBackward>)


 70%|███████   | 40/57 [00:28<00:12,  1.40it/s]

tensor([[0.1586, 0.1660, 0.1718,  ..., 0.1559, 0.1614, 0.1644],
        [0.1570, 0.1644, 0.1712,  ..., 0.1548, 0.1592, 0.1670],
        [0.1597, 0.1656, 0.1724,  ..., 0.1574, 0.1628, 0.1696],
        ...,
        [0.1588, 0.1682, 0.1735,  ..., 0.1574, 0.1647, 0.1691],
        [0.1596, 0.1660, 0.1728,  ..., 0.1579, 0.1631, 0.1673],
        [0.1567, 0.1599, 0.1700,  ..., 0.1539, 0.1565, 0.1630]],
       grad_fn=<LogBackward>)


 72%|███████▏  | 41/57 [00:29<00:11,  1.38it/s]

tensor([[0.1571, 0.1667, 0.1722,  ..., 0.1559, 0.1609, 0.1675],
        [0.1590, 0.1653, 0.1735,  ..., 0.1559, 0.1619, 0.1657],
        [0.1601, 0.1673, 0.1765,  ..., 0.1568, 0.1645, 0.1721],
        ...,
        [0.1569, 0.1660, 0.1691,  ..., 0.1550, 0.1594, 0.1627],
        [0.1583, 0.1657, 0.1723,  ..., 0.1577, 0.1641, 0.1671],
        [0.1594, 0.1664, 0.1750,  ..., 0.1561, 0.1643, 0.1692]],
       grad_fn=<LogBackward>)


 74%|███████▎  | 42/57 [00:30<00:10,  1.38it/s]

tensor([[0.1617, 0.1705, 0.1759,  ..., 0.1578, 0.1639, 0.1697],
        [0.1601, 0.1637, 0.1720,  ..., 0.1565, 0.1610, 0.1673],
        [0.1604, 0.1686, 0.1730,  ..., 0.1570, 0.1659, 0.1667],
        ...,
        [0.1594, 0.1709, 0.1736,  ..., 0.1598, 0.1642, 0.1705],
        [0.1620, 0.1675, 0.1715,  ..., 0.1578, 0.1627, 0.1683],
        [0.1598, 0.1662, 0.1718,  ..., 0.1560, 0.1625, 0.1655]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 43/57 [00:30<00:10,  1.40it/s]

tensor([[0.1609, 0.1672, 0.1744,  ..., 0.1574, 0.1655, 0.1708],
        [0.1590, 0.1666, 0.1719,  ..., 0.1568, 0.1637, 0.1686],
        [0.1626, 0.1680, 0.1769,  ..., 0.1580, 0.1644, 0.1709],
        ...,
        [0.1604, 0.1660, 0.1746,  ..., 0.1563, 0.1631, 0.1679],
        [0.1591, 0.1636, 0.1732,  ..., 0.1555, 0.1599, 0.1668],
        [0.1601, 0.1692, 0.1756,  ..., 0.1555, 0.1653, 0.1660]],
       grad_fn=<LogBackward>)


 77%|███████▋  | 44/57 [00:31<00:09,  1.32it/s]

tensor([[0.1522, 0.1633, 0.1680,  ..., 0.1539, 0.1581, 0.1612],
        [0.1564, 0.1656, 0.1723,  ..., 0.1564, 0.1598, 0.1654],
        [0.1599, 0.1675, 0.1732,  ..., 0.1559, 0.1645, 0.1694],
        ...,
        [0.1594, 0.1671, 0.1742,  ..., 0.1576, 0.1639, 0.1682],
        [0.1594, 0.1657, 0.1712,  ..., 0.1574, 0.1652, 0.1673],
        [0.1572, 0.1647, 0.1675,  ..., 0.1547, 0.1620, 0.1613]],
       grad_fn=<LogBackward>)


 79%|███████▉  | 45/57 [00:32<00:09,  1.29it/s]

tensor([[0.1614, 0.1678, 0.1734,  ..., 0.1581, 0.1662, 0.1704],
        [0.1589, 0.1655, 0.1715,  ..., 0.1574, 0.1627, 0.1663],
        [0.1602, 0.1655, 0.1736,  ..., 0.1576, 0.1661, 0.1697],
        ...,
        [0.1593, 0.1641, 0.1716,  ..., 0.1567, 0.1623, 0.1682],
        [0.1569, 0.1657, 0.1747,  ..., 0.1571, 0.1617, 0.1678],
        [0.1584, 0.1652, 0.1703,  ..., 0.1556, 0.1613, 0.1641]],
       grad_fn=<LogBackward>)


 81%|████████  | 46/57 [00:33<00:08,  1.27it/s]

tensor([[0.1603, 0.1642, 0.1743,  ..., 0.1560, 0.1630, 0.1712],
        [0.1590, 0.1676, 0.1707,  ..., 0.1559, 0.1641, 0.1667],
        [0.1589, 0.1679, 0.1723,  ..., 0.1575, 0.1632, 0.1689],
        ...,
        [0.1574, 0.1660, 0.1711,  ..., 0.1560, 0.1648, 0.1689],
        [0.1609, 0.1661, 0.1740,  ..., 0.1577, 0.1652, 0.1698],
        [0.1577, 0.1640, 0.1718,  ..., 0.1542, 0.1622, 0.1667]],
       grad_fn=<LogBackward>)


 82%|████████▏ | 47/57 [00:34<00:07,  1.31it/s]

tensor([[0.1566, 0.1667, 0.1722,  ..., 0.1576, 0.1639, 0.1703],
        [0.1579, 0.1636, 0.1722,  ..., 0.1556, 0.1628, 0.1675],
        [0.1555, 0.1654, 0.1687,  ..., 0.1586, 0.1613, 0.1643],
        ...,
        [0.1569, 0.1658, 0.1723,  ..., 0.1565, 0.1609, 0.1675],
        [0.1598, 0.1659, 0.1709,  ..., 0.1564, 0.1610, 0.1664],
        [0.1584, 0.1656, 0.1720,  ..., 0.1567, 0.1616, 0.1684]],
       grad_fn=<LogBackward>)


 84%|████████▍ | 48/57 [00:34<00:06,  1.35it/s]

tensor([[0.1616, 0.1680, 0.1767,  ..., 0.1572, 0.1661, 0.1709],
        [0.1608, 0.1667, 0.1738,  ..., 0.1563, 0.1652, 0.1705],
        [0.1616, 0.1694, 0.1743,  ..., 0.1564, 0.1664, 0.1703],
        ...,
        [0.1583, 0.1667, 0.1751,  ..., 0.1569, 0.1633, 0.1671],
        [0.1590, 0.1667, 0.1712,  ..., 0.1554, 0.1631, 0.1635],
        [0.1588, 0.1666, 0.1705,  ..., 0.1580, 0.1620, 0.1685]],
       grad_fn=<LogBackward>)


 86%|████████▌ | 49/57 [00:35<00:05,  1.41it/s]

tensor([[0.1522, 0.1646, 0.1639,  ..., 0.1550, 0.1630, 0.1610],
        [0.1552, 0.1650, 0.1706,  ..., 0.1560, 0.1610, 0.1651],
        [0.1575, 0.1651, 0.1695,  ..., 0.1571, 0.1625, 0.1682],
        ...,
        [0.1533, 0.1626, 0.1660,  ..., 0.1531, 0.1586, 0.1596],
        [0.1595, 0.1672, 0.1758,  ..., 0.1580, 0.1662, 0.1698],
        [0.1598, 0.1653, 0.1744,  ..., 0.1583, 0.1643, 0.1689]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 50/57 [00:35<00:04,  1.45it/s]

tensor([[0.1582, 0.1645, 0.1714,  ..., 0.1569, 0.1623, 0.1700],
        [0.1600, 0.1667, 0.1741,  ..., 0.1556, 0.1645, 0.1686],
        [0.1607, 0.1656, 0.1770,  ..., 0.1564, 0.1640, 0.1703],
        ...,
        [0.1612, 0.1668, 0.1716,  ..., 0.1594, 0.1636, 0.1702],
        [0.1597, 0.1650, 0.1722,  ..., 0.1559, 0.1619, 0.1667],
        [0.1587, 0.1646, 0.1722,  ..., 0.1569, 0.1614, 0.1686]],
       grad_fn=<LogBackward>)


 89%|████████▉ | 51/57 [00:36<00:04,  1.48it/s]

tensor([[0.1587, 0.1659, 0.1731,  ..., 0.1572, 0.1653, 0.1685],
        [0.1589, 0.1665, 0.1719,  ..., 0.1573, 0.1659, 0.1665],
        [0.1591, 0.1659, 0.1707,  ..., 0.1584, 0.1623, 0.1685],
        ...,
        [0.1617, 0.1662, 0.1752,  ..., 0.1568, 0.1638, 0.1673],
        [0.1599, 0.1666, 0.1714,  ..., 0.1574, 0.1622, 0.1686],
        [0.1579, 0.1638, 0.1697,  ..., 0.1566, 0.1622, 0.1674]],
       grad_fn=<LogBackward>)


 91%|█████████ | 52/57 [00:37<00:03,  1.49it/s]

tensor([[0.1539, 0.1607, 0.1683,  ..., 0.1561, 0.1589, 0.1618],
        [0.1598, 0.1675, 0.1723,  ..., 0.1585, 0.1642, 0.1708],
        [0.1569, 0.1629, 0.1713,  ..., 0.1573, 0.1643, 0.1667],
        ...,
        [0.1587, 0.1632, 0.1728,  ..., 0.1577, 0.1635, 0.1696],
        [0.1579, 0.1637, 0.1692,  ..., 0.1583, 0.1661, 0.1652],
        [0.1583, 0.1649, 0.1728,  ..., 0.1582, 0.1639, 0.1690]],
       grad_fn=<LogBackward>)


 93%|█████████▎| 53/57 [00:37<00:02,  1.50it/s]

tensor([[0.1571, 0.1656, 0.1723,  ..., 0.1574, 0.1619, 0.1672],
        [0.1590, 0.1641, 0.1724,  ..., 0.1573, 0.1592, 0.1653],
        [0.1592, 0.1673, 0.1733,  ..., 0.1584, 0.1644, 0.1686],
        ...,
        [0.1584, 0.1647, 0.1716,  ..., 0.1607, 0.1663, 0.1686],
        [0.1604, 0.1658, 0.1732,  ..., 0.1581, 0.1618, 0.1683],
        [0.1585, 0.1665, 0.1728,  ..., 0.1586, 0.1624, 0.1691]],
       grad_fn=<LogBackward>)


 95%|█████████▍| 54/57 [00:38<00:01,  1.50it/s]

tensor([[0.1528, 0.1626, 0.1677,  ..., 0.1539, 0.1587, 0.1621],
        [0.1506, 0.1604, 0.1657,  ..., 0.1484, 0.1516, 0.1581],
        [0.1584, 0.1687, 0.1719,  ..., 0.1588, 0.1650, 0.1678],
        ...,
        [0.1594, 0.1661, 0.1712,  ..., 0.1570, 0.1634, 0.1707],
        [0.1519, 0.1606, 0.1665,  ..., 0.1490, 0.1557, 0.1595],
        [0.1594, 0.1668, 0.1738,  ..., 0.1548, 0.1606, 0.1686]],
       grad_fn=<LogBackward>)


 96%|█████████▋| 55/57 [00:39<00:01,  1.51it/s]

tensor([[0.1542, 0.1635, 0.1707,  ..., 0.1568, 0.1605, 0.1635],
        [0.1597, 0.1662, 0.1723,  ..., 0.1575, 0.1631, 0.1699],
        [0.1586, 0.1631, 0.1703,  ..., 0.1562, 0.1597, 0.1650],
        ...,
        [0.1604, 0.1701, 0.1761,  ..., 0.1582, 0.1616, 0.1691],
        [0.1598, 0.1684, 0.1737,  ..., 0.1585, 0.1664, 0.1694],
        [0.1588, 0.1673, 0.1743,  ..., 0.1564, 0.1678, 0.1705]],
       grad_fn=<LogBackward>)


 98%|█████████▊| 56/57 [00:39<00:00,  1.47it/s]

tensor([[0.1585, 0.1692, 0.1742,  ..., 0.1564, 0.1671, 0.1697],
        [0.1594, 0.1658, 0.1733,  ..., 0.1559, 0.1641, 0.1674],
        [0.1570, 0.1676, 0.1713,  ..., 0.1594, 0.1631, 0.1677],
        ...,
        [0.1602, 0.1676, 0.1743,  ..., 0.1549, 0.1640, 0.1718],
        [0.1590, 0.1645, 0.1730,  ..., 0.1563, 0.1604, 0.1662],
        [0.1595, 0.1639, 0.1701,  ..., 0.1566, 0.1609, 0.1671]],
       grad_fn=<LogBackward>)


100%|██████████| 57/57 [00:40<00:00,  1.40it/s]
 12%|█▎        | 1/8 [00:00<00:01,  3.53it/s]

tensor([[0.1531, 0.1632, 0.1632,  ..., 0.1542, 0.1577, 0.1590],
        [0.1616, 0.1650, 0.1728,  ..., 0.1564, 0.1619, 0.1688],
        [0.1573, 0.1652, 0.1698,  ..., 0.1559, 0.1628, 0.1696],
        ...,
        [0.1600, 0.1696, 0.1746,  ..., 0.1568, 0.1664, 0.1695],
        [0.1600, 0.1633, 0.1713,  ..., 0.1573, 0.1612, 0.1660],
        [0.1550, 0.1656, 0.1708,  ..., 0.1574, 0.1590, 0.1650]],
       grad_fn=<LogBackward>)


 25%|██▌       | 2/8 [00:00<00:01,  3.46it/s]

tensor([[0.1560, 0.1677, 0.1745,  ..., 0.1583, 0.1640, 0.1652],
        [0.1595, 0.1651, 0.1728,  ..., 0.1577, 0.1631, 0.1694],
        [0.1580, 0.1689, 0.1762,  ..., 0.1554, 0.1645, 0.1688],
        ...,
        [0.1602, 0.1699, 0.1738,  ..., 0.1565, 0.1621, 0.1654],
        [0.1600, 0.1660, 0.1739,  ..., 0.1574, 0.1630, 0.1706],
        [0.1599, 0.1666, 0.1724,  ..., 0.1553, 0.1637, 0.1643]],
       grad_fn=<LogBackward>)


 38%|███▊      | 3/8 [00:00<00:01,  3.44it/s]

tensor([[0.1602, 0.1643, 0.1722,  ..., 0.1566, 0.1609, 0.1690],
        [0.1559, 0.1647, 0.1693,  ..., 0.1557, 0.1599, 0.1630],
        [0.1573, 0.1678, 0.1707,  ..., 0.1559, 0.1636, 0.1645],
        ...,
        [0.1557, 0.1633, 0.1716,  ..., 0.1548, 0.1598, 0.1621],
        [0.1571, 0.1652, 0.1717,  ..., 0.1547, 0.1594, 0.1670],
        [0.1586, 0.1644, 0.1719,  ..., 0.1562, 0.1594, 0.1693]],
       grad_fn=<LogBackward>)


 50%|█████     | 4/8 [00:01<00:01,  3.50it/s]

tensor([[0.1551, 0.1615, 0.1664,  ..., 0.1552, 0.1589, 0.1622],
        [0.1592, 0.1683, 0.1735,  ..., 0.1566, 0.1632, 0.1680],
        [0.1593, 0.1681, 0.1707,  ..., 0.1553, 0.1638, 0.1663],
        ...,
        [0.1559, 0.1656, 0.1707,  ..., 0.1561, 0.1617, 0.1676],
        [0.1594, 0.1650, 0.1721,  ..., 0.1573, 0.1629, 0.1690],
        [0.1588, 0.1654, 0.1734,  ..., 0.1558, 0.1675, 0.1690]],
       grad_fn=<LogBackward>)


 62%|██████▎   | 5/8 [00:01<00:00,  3.51it/s]

tensor([[0.1602, 0.1680, 0.1719,  ..., 0.1574, 0.1654, 0.1684],
        [0.1594, 0.1662, 0.1725,  ..., 0.1573, 0.1607, 0.1673],
        [0.1614, 0.1669, 0.1752,  ..., 0.1587, 0.1681, 0.1692],
        ...,
        [0.1592, 0.1664, 0.1711,  ..., 0.1555, 0.1591, 0.1655],
        [0.1571, 0.1673, 0.1690,  ..., 0.1557, 0.1648, 0.1623],
        [0.1591, 0.1640, 0.1750,  ..., 0.1552, 0.1625, 0.1674]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 6/8 [00:01<00:00,  3.51it/s]

tensor([[0.1602, 0.1662, 0.1698,  ..., 0.1542, 0.1630, 0.1684],
        [0.1593, 0.1663, 0.1750,  ..., 0.1587, 0.1647, 0.1654],
        [0.1576, 0.1667, 0.1709,  ..., 0.1567, 0.1626, 0.1668],
        ...,
        [0.1571, 0.1658, 0.1706,  ..., 0.1562, 0.1637, 0.1642],
        [0.1590, 0.1640, 0.1718,  ..., 0.1549, 0.1620, 0.1666],
        [0.1581, 0.1656, 0.1725,  ..., 0.1562, 0.1639, 0.1709]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 7/8 [00:02<00:00,  3.48it/s]

tensor([[0.1580, 0.1667, 0.1707,  ..., 0.1557, 0.1616, 0.1656],
        [0.1550, 0.1675, 0.1698,  ..., 0.1559, 0.1655, 0.1661],
        [0.1545, 0.1640, 0.1687,  ..., 0.1551, 0.1574, 0.1658],
        ...,
        [0.1586, 0.1700, 0.1687,  ..., 0.1573, 0.1638, 0.1666],
        [0.1590, 0.1673, 0.1742,  ..., 0.1583, 0.1644, 0.1676],
        [0.1576, 0.1657, 0.1718,  ..., 0.1575, 0.1643, 0.1689]],
       grad_fn=<LogBackward>)


100%|██████████| 8/8 [00:02<00:00,  3.50it/s]
  0%|          | 0/57 [00:00<?, ?it/s]

tensor([[0.1600, 0.1656, 0.1703,  ..., 0.1573, 0.1611, 0.1652],
        [0.1578, 0.1664, 0.1725,  ..., 0.1561, 0.1636, 0.1651],
        [0.1612, 0.1691, 0.1728,  ..., 0.1550, 0.1630, 0.1689],
        ...,
        [0.1548, 0.1639, 0.1693,  ..., 0.1533, 0.1597, 0.1627],
        [0.1574, 0.1652, 0.1694,  ..., 0.1574, 0.1604, 0.1662],
        [0.1582, 0.1668, 0.1738,  ..., 0.1584, 0.1620, 0.1686]],
       grad_fn=<LogBackward>)
[tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128)]
Epoch: 7, train_loss: -0.28568097949028015, valid_loss: -0.3172493577003479
Parameter containing:
tensor([[ 1.0006,  1.0006,  1.0006,  1.0006,  1.0005,  1.0005,  1.0007,  1.0005,
          1.0005,  1.0006, -0.4994, -0.4995, -0.4995, -0.4994, -0.4994, -0.4994,
         -0.4994, -0.4995, -0.4994, -0.4995],
     

  2%|▏         | 1/57 [00:00<00:37,  1.49it/s]

tensor([[0.1622, 0.1677, 0.1745,  ..., 0.1578, 0.1670, 0.1714],
        [0.1585, 0.1688, 0.1728,  ..., 0.1572, 0.1634, 0.1651],
        [0.1577, 0.1670, 0.1715,  ..., 0.1571, 0.1638, 0.1693],
        ...,
        [0.1569, 0.1676, 0.1694,  ..., 0.1560, 0.1620, 0.1654],
        [0.1537, 0.1630, 0.1673,  ..., 0.1567, 0.1594, 0.1628],
        [0.1565, 0.1631, 0.1707,  ..., 0.1570, 0.1608, 0.1656]],
       grad_fn=<LogBackward>)


  4%|▎         | 2/57 [00:01<00:36,  1.51it/s]

tensor([[0.1558, 0.1641, 0.1684,  ..., 0.1537, 0.1591, 0.1624],
        [0.1569, 0.1653, 0.1705,  ..., 0.1542, 0.1615, 0.1645],
        [0.1590, 0.1650, 0.1723,  ..., 0.1551, 0.1638, 0.1697],
        ...,
        [0.1578, 0.1640, 0.1728,  ..., 0.1565, 0.1632, 0.1670],
        [0.1585, 0.1678, 0.1729,  ..., 0.1560, 0.1654, 0.1679],
        [0.1617, 0.1656, 0.1734,  ..., 0.1602, 0.1632, 0.1703]],
       grad_fn=<LogBackward>)


  5%|▌         | 3/57 [00:02<00:36,  1.49it/s]

tensor([[0.1597, 0.1671, 0.1739,  ..., 0.1571, 0.1636, 0.1697],
        [0.1540, 0.1644, 0.1663,  ..., 0.1549, 0.1600, 0.1658],
        [0.1594, 0.1660, 0.1729,  ..., 0.1556, 0.1615, 0.1668],
        ...,
        [0.1583, 0.1651, 0.1734,  ..., 0.1569, 0.1609, 0.1692],
        [0.1601, 0.1678, 0.1745,  ..., 0.1551, 0.1645, 0.1712],
        [0.1543, 0.1630, 0.1682,  ..., 0.1556, 0.1592, 0.1636]],
       grad_fn=<LogBackward>)


  7%|▋         | 4/57 [00:02<00:36,  1.45it/s]

tensor([[0.1592, 0.1672, 0.1733,  ..., 0.1577, 0.1617, 0.1663],
        [0.1598, 0.1660, 0.1709,  ..., 0.1553, 0.1649, 0.1690],
        [0.1586, 0.1636, 0.1710,  ..., 0.1563, 0.1614, 0.1661],
        ...,
        [0.1641, 0.1670, 0.1737,  ..., 0.1584, 0.1662, 0.1706],
        [0.1606, 0.1649, 0.1726,  ..., 0.1582, 0.1641, 0.1694],
        [0.1570, 0.1656, 0.1721,  ..., 0.1564, 0.1608, 0.1672]],
       grad_fn=<LogBackward>)


  9%|▉         | 5/57 [00:03<00:36,  1.43it/s]

tensor([[0.1565, 0.1658, 0.1705,  ..., 0.1558, 0.1609, 0.1658],
        [0.1589, 0.1623, 0.1712,  ..., 0.1560, 0.1599, 0.1649],
        [0.1591, 0.1671, 0.1745,  ..., 0.1566, 0.1624, 0.1702],
        ...,
        [0.1579, 0.1685, 0.1731,  ..., 0.1574, 0.1646, 0.1679],
        [0.1607, 0.1665, 0.1748,  ..., 0.1556, 0.1638, 0.1693],
        [0.1590, 0.1650, 0.1717,  ..., 0.1564, 0.1636, 0.1690]],
       grad_fn=<LogBackward>)


 11%|█         | 6/57 [00:04<00:35,  1.45it/s]

tensor([[0.1588, 0.1656, 0.1711,  ..., 0.1566, 0.1633, 0.1667],
        [0.1601, 0.1708, 0.1765,  ..., 0.1577, 0.1639, 0.1674],
        [0.1547, 0.1624, 0.1639,  ..., 0.1550, 0.1571, 0.1603],
        ...,
        [0.1530, 0.1617, 0.1648,  ..., 0.1546, 0.1556, 0.1599],
        [0.1582, 0.1640, 0.1734,  ..., 0.1583, 0.1643, 0.1676],
        [0.1590, 0.1672, 0.1740,  ..., 0.1584, 0.1631, 0.1688]],
       grad_fn=<LogBackward>)


 12%|█▏        | 7/57 [00:04<00:33,  1.48it/s]

tensor([[0.1597, 0.1635, 0.1717,  ..., 0.1559, 0.1625, 0.1697],
        [0.1598, 0.1642, 0.1724,  ..., 0.1564, 0.1622, 0.1680],
        [0.1595, 0.1680, 0.1733,  ..., 0.1574, 0.1653, 0.1703],
        ...,
        [0.1556, 0.1641, 0.1640,  ..., 0.1529, 0.1549, 0.1586],
        [0.1594, 0.1669, 0.1720,  ..., 0.1593, 0.1644, 0.1731],
        [0.1590, 0.1691, 0.1728,  ..., 0.1568, 0.1642, 0.1667]],
       grad_fn=<LogBackward>)


 14%|█▍        | 8/57 [00:05<00:32,  1.49it/s]

tensor([[0.1567, 0.1679, 0.1728,  ..., 0.1561, 0.1671, 0.1674],
        [0.1597, 0.1701, 0.1742,  ..., 0.1571, 0.1671, 0.1662],
        [0.1574, 0.1654, 0.1693,  ..., 0.1548, 0.1589, 0.1653],
        ...,
        [0.1613, 0.1675, 0.1754,  ..., 0.1558, 0.1638, 0.1690],
        [0.1563, 0.1626, 0.1682,  ..., 0.1541, 0.1585, 0.1642],
        [0.1604, 0.1662, 0.1719,  ..., 0.1568, 0.1654, 0.1694]],
       grad_fn=<LogBackward>)


 16%|█▌        | 9/57 [00:06<00:31,  1.51it/s]

tensor([[0.1559, 0.1673, 0.1710,  ..., 0.1543, 0.1633, 0.1646],
        [0.1551, 0.1633, 0.1681,  ..., 0.1575, 0.1608, 0.1646],
        [0.1620, 0.1666, 0.1737,  ..., 0.1559, 0.1652, 0.1702],
        ...,
        [0.1579, 0.1676, 0.1711,  ..., 0.1586, 0.1640, 0.1688],
        [0.1612, 0.1695, 0.1757,  ..., 0.1569, 0.1678, 0.1704],
        [0.1595, 0.1665, 0.1743,  ..., 0.1545, 0.1614, 0.1660]],
       grad_fn=<LogBackward>)


 18%|█▊        | 10/57 [00:06<00:30,  1.52it/s]

tensor([[0.1539, 0.1595, 0.1656,  ..., 0.1550, 0.1545, 0.1633],
        [0.1588, 0.1654, 0.1704,  ..., 0.1576, 0.1646, 0.1692],
        [0.1573, 0.1680, 0.1699,  ..., 0.1538, 0.1627, 0.1637],
        ...,
        [0.1618, 0.1686, 0.1748,  ..., 0.1562, 0.1633, 0.1669],
        [0.1569, 0.1686, 0.1743,  ..., 0.1589, 0.1646, 0.1669],
        [0.1552, 0.1617, 0.1677,  ..., 0.1543, 0.1581, 0.1611]],
       grad_fn=<LogBackward>)


 19%|█▉        | 11/57 [00:07<00:29,  1.53it/s]

tensor([[0.1574, 0.1630, 0.1697,  ..., 0.1541, 0.1583, 0.1640],
        [0.1569, 0.1634, 0.1706,  ..., 0.1566, 0.1603, 0.1667],
        [0.1613, 0.1682, 0.1743,  ..., 0.1572, 0.1663, 0.1693],
        ...,
        [0.1545, 0.1674, 0.1675,  ..., 0.1585, 0.1635, 0.1618],
        [0.1556, 0.1610, 0.1667,  ..., 0.1522, 0.1596, 0.1627],
        [0.1565, 0.1679, 0.1719,  ..., 0.1582, 0.1634, 0.1696]],
       grad_fn=<LogBackward>)


 21%|██        | 12/57 [00:08<00:29,  1.53it/s]

tensor([[0.1601, 0.1672, 0.1732,  ..., 0.1574, 0.1625, 0.1680],
        [0.1580, 0.1678, 0.1741,  ..., 0.1586, 0.1627, 0.1656],
        [0.1591, 0.1681, 0.1737,  ..., 0.1543, 0.1641, 0.1648],
        ...,
        [0.1528, 0.1607, 0.1639,  ..., 0.1530, 0.1533, 0.1581],
        [0.1550, 0.1618, 0.1690,  ..., 0.1567, 0.1593, 0.1639],
        [0.1572, 0.1698, 0.1719,  ..., 0.1569, 0.1665, 0.1721]],
       grad_fn=<LogBackward>)


 23%|██▎       | 13/57 [00:08<00:28,  1.52it/s]

tensor([[0.1588, 0.1676, 0.1747,  ..., 0.1571, 0.1627, 0.1683],
        [0.1578, 0.1642, 0.1695,  ..., 0.1561, 0.1593, 0.1674],
        [0.1624, 0.1692, 0.1778,  ..., 0.1595, 0.1675, 0.1702],
        ...,
        [0.1573, 0.1659, 0.1707,  ..., 0.1565, 0.1611, 0.1652],
        [0.1597, 0.1672, 0.1745,  ..., 0.1570, 0.1664, 0.1719],
        [0.1600, 0.1658, 0.1739,  ..., 0.1562, 0.1616, 0.1684]],
       grad_fn=<LogBackward>)


 25%|██▍       | 14/57 [00:09<00:28,  1.51it/s]

tensor([[0.1585, 0.1682, 0.1749,  ..., 0.1573, 0.1662, 0.1690],
        [0.1600, 0.1678, 0.1739,  ..., 0.1574, 0.1663, 0.1674],
        [0.1588, 0.1633, 0.1731,  ..., 0.1582, 0.1629, 0.1660],
        ...,
        [0.1554, 0.1667, 0.1692,  ..., 0.1575, 0.1608, 0.1669],
        [0.1577, 0.1664, 0.1739,  ..., 0.1543, 0.1618, 0.1669],
        [0.1593, 0.1655, 0.1710,  ..., 0.1568, 0.1675, 0.1680]],
       grad_fn=<LogBackward>)


 26%|██▋       | 15/57 [00:10<00:27,  1.50it/s]

tensor([[0.1575, 0.1685, 0.1721,  ..., 0.1587, 0.1649, 0.1678],
        [0.1525, 0.1632, 0.1632,  ..., 0.1518, 0.1585, 0.1594],
        [0.1603, 0.1675, 0.1729,  ..., 0.1577, 0.1645, 0.1674],
        ...,
        [0.1609, 0.1648, 0.1704,  ..., 0.1563, 0.1617, 0.1663],
        [0.1578, 0.1656, 0.1737,  ..., 0.1574, 0.1660, 0.1691],
        [0.1566, 0.1654, 0.1707,  ..., 0.1561, 0.1593, 0.1641]],
       grad_fn=<LogBackward>)


 28%|██▊       | 16/57 [00:10<00:27,  1.48it/s]

tensor([[0.1575, 0.1635, 0.1706,  ..., 0.1567, 0.1613, 0.1706],
        [0.1580, 0.1667, 0.1709,  ..., 0.1566, 0.1603, 0.1650],
        [0.1586, 0.1654, 0.1711,  ..., 0.1591, 0.1635, 0.1660],
        ...,
        [0.1564, 0.1660, 0.1681,  ..., 0.1575, 0.1625, 0.1673],
        [0.1599, 0.1665, 0.1726,  ..., 0.1587, 0.1634, 0.1676],
        [0.1587, 0.1654, 0.1714,  ..., 0.1569, 0.1627, 0.1705]],
       grad_fn=<LogBackward>)


 30%|██▉       | 17/57 [00:11<00:26,  1.49it/s]

tensor([[0.1583, 0.1653, 0.1730,  ..., 0.1552, 0.1616, 0.1711],
        [0.1573, 0.1637, 0.1716,  ..., 0.1564, 0.1601, 0.1655],
        [0.1597, 0.1689, 0.1745,  ..., 0.1560, 0.1637, 0.1717],
        ...,
        [0.1594, 0.1669, 0.1732,  ..., 0.1569, 0.1616, 0.1664],
        [0.1477, 0.1587, 0.1602,  ..., 0.1503, 0.1575, 0.1585],
        [0.1599, 0.1646, 0.1753,  ..., 0.1545, 0.1628, 0.1673]],
       grad_fn=<LogBackward>)


 32%|███▏      | 18/57 [00:12<00:25,  1.50it/s]

tensor([[0.1598, 0.1660, 0.1727,  ..., 0.1579, 0.1643, 0.1667],
        [0.1594, 0.1655, 0.1740,  ..., 0.1552, 0.1644, 0.1696],
        [0.1604, 0.1663, 0.1757,  ..., 0.1571, 0.1636, 0.1704],
        ...,
        [0.1600, 0.1672, 0.1743,  ..., 0.1562, 0.1647, 0.1684],
        [0.1596, 0.1670, 0.1699,  ..., 0.1575, 0.1622, 0.1673],
        [0.1586, 0.1689, 0.1740,  ..., 0.1579, 0.1634, 0.1665]],
       grad_fn=<LogBackward>)


 33%|███▎      | 19/57 [00:12<00:25,  1.51it/s]

tensor([[0.1591, 0.1652, 0.1714,  ..., 0.1557, 0.1637, 0.1674],
        [0.1580, 0.1655, 0.1712,  ..., 0.1568, 0.1627, 0.1691],
        [0.1599, 0.1676, 0.1733,  ..., 0.1597, 0.1655, 0.1691],
        ...,
        [0.1603, 0.1676, 0.1750,  ..., 0.1557, 0.1629, 0.1700],
        [0.1600, 0.1676, 0.1735,  ..., 0.1579, 0.1661, 0.1697],
        [0.1489, 0.1571, 0.1594,  ..., 0.1491, 0.1525, 0.1529]],
       grad_fn=<LogBackward>)


 35%|███▌      | 20/57 [00:13<00:24,  1.51it/s]

tensor([[0.1593, 0.1660, 0.1718,  ..., 0.1580, 0.1623, 0.1671],
        [0.1585, 0.1641, 0.1711,  ..., 0.1585, 0.1623, 0.1684],
        [0.1583, 0.1643, 0.1704,  ..., 0.1566, 0.1641, 0.1658],
        ...,
        [0.1594, 0.1683, 0.1728,  ..., 0.1574, 0.1633, 0.1666],
        [0.1575, 0.1685, 0.1712,  ..., 0.1560, 0.1613, 0.1651],
        [0.1522, 0.1633, 0.1640,  ..., 0.1541, 0.1615, 0.1590]],
       grad_fn=<LogBackward>)


 37%|███▋      | 21/57 [00:14<00:23,  1.51it/s]

tensor([[0.1592, 0.1672, 0.1690,  ..., 0.1545, 0.1608, 0.1654],
        [0.1580, 0.1649, 0.1718,  ..., 0.1551, 0.1612, 0.1607],
        [0.1578, 0.1647, 0.1721,  ..., 0.1579, 0.1636, 0.1690],
        ...,
        [0.1557, 0.1653, 0.1707,  ..., 0.1583, 0.1645, 0.1640],
        [0.1586, 0.1635, 0.1716,  ..., 0.1559, 0.1614, 0.1650],
        [0.1555, 0.1668, 0.1705,  ..., 0.1565, 0.1617, 0.1673]],
       grad_fn=<LogBackward>)


 39%|███▊      | 22/57 [00:14<00:23,  1.50it/s]

tensor([[0.1586, 0.1682, 0.1709,  ..., 0.1563, 0.1649, 0.1661],
        [0.1579, 0.1673, 0.1679,  ..., 0.1581, 0.1626, 0.1657],
        [0.1578, 0.1653, 0.1704,  ..., 0.1560, 0.1623, 0.1659],
        ...,
        [0.1573, 0.1664, 0.1696,  ..., 0.1560, 0.1614, 0.1666],
        [0.1624, 0.1673, 0.1763,  ..., 0.1560, 0.1667, 0.1704],
        [0.1490, 0.1596, 0.1623,  ..., 0.1510, 0.1587, 0.1581]],
       grad_fn=<LogBackward>)


 40%|████      | 23/57 [00:15<00:22,  1.50it/s]

tensor([[0.1622, 0.1681, 0.1744,  ..., 0.1575, 0.1681, 0.1699],
        [0.1578, 0.1685, 0.1727,  ..., 0.1577, 0.1632, 0.1663],
        [0.1570, 0.1650, 0.1702,  ..., 0.1557, 0.1590, 0.1648],
        ...,
        [0.1591, 0.1663, 0.1729,  ..., 0.1569, 0.1627, 0.1686],
        [0.1585, 0.1676, 0.1734,  ..., 0.1582, 0.1656, 0.1680],
        [0.1602, 0.1698, 0.1762,  ..., 0.1558, 0.1675, 0.1703]],
       grad_fn=<LogBackward>)


 42%|████▏     | 24/57 [00:15<00:21,  1.52it/s]

tensor([[0.1608, 0.1685, 0.1756,  ..., 0.1548, 0.1642, 0.1675],
        [0.1545, 0.1618, 0.1683,  ..., 0.1522, 0.1557, 0.1615],
        [0.1584, 0.1682, 0.1739,  ..., 0.1570, 0.1639, 0.1670],
        ...,
        [0.1581, 0.1636, 0.1715,  ..., 0.1567, 0.1599, 0.1669],
        [0.1537, 0.1631, 0.1677,  ..., 0.1546, 0.1610, 0.1622],
        [0.1582, 0.1645, 0.1700,  ..., 0.1568, 0.1632, 0.1663]],
       grad_fn=<LogBackward>)


 44%|████▍     | 25/57 [00:16<00:22,  1.43it/s]

tensor([[0.1584, 0.1664, 0.1713,  ..., 0.1558, 0.1633, 0.1669],
        [0.1576, 0.1640, 0.1700,  ..., 0.1580, 0.1620, 0.1654],
        [0.1570, 0.1659, 0.1720,  ..., 0.1553, 0.1630, 0.1663],
        ...,
        [0.1598, 0.1668, 0.1721,  ..., 0.1572, 0.1642, 0.1670],
        [0.1578, 0.1671, 0.1737,  ..., 0.1544, 0.1630, 0.1651],
        [0.1562, 0.1659, 0.1694,  ..., 0.1568, 0.1609, 0.1658]],
       grad_fn=<LogBackward>)


 46%|████▌     | 26/57 [00:17<00:22,  1.35it/s]

tensor([[0.1592, 0.1638, 0.1727,  ..., 0.1582, 0.1620, 0.1658],
        [0.1604, 0.1685, 0.1758,  ..., 0.1582, 0.1662, 0.1702],
        [0.1566, 0.1621, 0.1699,  ..., 0.1535, 0.1579, 0.1615],
        ...,
        [0.1587, 0.1652, 0.1739,  ..., 0.1569, 0.1641, 0.1656],
        [0.1595, 0.1672, 0.1723,  ..., 0.1568, 0.1647, 0.1674],
        [0.1620, 0.1700, 0.1768,  ..., 0.1598, 0.1669, 0.1723]],
       grad_fn=<LogBackward>)


 47%|████▋     | 27/57 [00:18<00:22,  1.32it/s]

tensor([[0.1582, 0.1638, 0.1729,  ..., 0.1565, 0.1601, 0.1685],
        [0.1591, 0.1646, 0.1722,  ..., 0.1583, 0.1616, 0.1697],
        [0.1558, 0.1654, 0.1695,  ..., 0.1569, 0.1618, 0.1636],
        ...,
        [0.1581, 0.1666, 0.1691,  ..., 0.1540, 0.1604, 0.1653],
        [0.1600, 0.1698, 0.1762,  ..., 0.1572, 0.1664, 0.1702],
        [0.1579, 0.1661, 0.1713,  ..., 0.1555, 0.1651, 0.1673]],
       grad_fn=<LogBackward>)


 49%|████▉     | 28/57 [00:19<00:22,  1.28it/s]

tensor([[0.1541, 0.1657, 0.1697,  ..., 0.1553, 0.1637, 0.1644],
        [0.1563, 0.1637, 0.1714,  ..., 0.1581, 0.1602, 0.1666],
        [0.1600, 0.1661, 0.1765,  ..., 0.1573, 0.1650, 0.1701],
        ...,
        [0.1578, 0.1638, 0.1712,  ..., 0.1572, 0.1615, 0.1680],
        [0.1585, 0.1692, 0.1741,  ..., 0.1565, 0.1668, 0.1694],
        [0.1554, 0.1644, 0.1712,  ..., 0.1555, 0.1603, 0.1662]],
       grad_fn=<LogBackward>)


 51%|█████     | 29/57 [00:20<00:22,  1.22it/s]

tensor([[0.1584, 0.1645, 0.1694,  ..., 0.1535, 0.1586, 0.1633],
        [0.1590, 0.1680, 0.1705,  ..., 0.1560, 0.1644, 0.1667],
        [0.1585, 0.1667, 0.1698,  ..., 0.1557, 0.1632, 0.1670],
        ...,
        [0.1589, 0.1670, 0.1684,  ..., 0.1563, 0.1581, 0.1639],
        [0.1561, 0.1628, 0.1699,  ..., 0.1555, 0.1606, 0.1648],
        [0.1552, 0.1637, 0.1708,  ..., 0.1567, 0.1619, 0.1659]],
       grad_fn=<LogBackward>)


 53%|█████▎    | 30/57 [00:21<00:22,  1.18it/s]

tensor([[0.1576, 0.1660, 0.1713,  ..., 0.1576, 0.1615, 0.1658],
        [0.1596, 0.1654, 0.1729,  ..., 0.1567, 0.1642, 0.1701],
        [0.1567, 0.1635, 0.1723,  ..., 0.1589, 0.1600, 0.1691],
        ...,
        [0.1599, 0.1669, 0.1723,  ..., 0.1559, 0.1646, 0.1656],
        [0.1615, 0.1647, 0.1745,  ..., 0.1570, 0.1661, 0.1669],
        [0.1577, 0.1666, 0.1694,  ..., 0.1566, 0.1638, 0.1654]],
       grad_fn=<LogBackward>)


 54%|█████▍    | 31/57 [00:22<00:22,  1.13it/s]

tensor([[0.1578, 0.1657, 0.1715,  ..., 0.1568, 0.1598, 0.1654],
        [0.1536, 0.1602, 0.1607,  ..., 0.1485, 0.1517, 0.1570],
        [0.1632, 0.1687, 0.1748,  ..., 0.1562, 0.1671, 0.1688],
        ...,
        [0.1578, 0.1649, 0.1696,  ..., 0.1564, 0.1596, 0.1656],
        [0.1592, 0.1655, 0.1702,  ..., 0.1569, 0.1612, 0.1656],
        [0.1577, 0.1645, 0.1742,  ..., 0.1583, 0.1629, 0.1689]],
       grad_fn=<LogBackward>)


 56%|█████▌    | 32/57 [00:22<00:22,  1.12it/s]

tensor([[0.1588, 0.1654, 0.1721,  ..., 0.1566, 0.1614, 0.1677],
        [0.1593, 0.1659, 0.1739,  ..., 0.1573, 0.1632, 0.1699],
        [0.1596, 0.1653, 0.1719,  ..., 0.1623, 0.1614, 0.1630],
        ...,
        [0.1527, 0.1609, 0.1645,  ..., 0.1522, 0.1561, 0.1590],
        [0.1617, 0.1696, 0.1717,  ..., 0.1576, 0.1600, 0.1634],
        [0.1577, 0.1658, 0.1709,  ..., 0.1587, 0.1648, 0.1672]],
       grad_fn=<LogBackward>)


 58%|█████▊    | 33/57 [00:23<00:20,  1.18it/s]

tensor([[0.1566, 0.1646, 0.1691,  ..., 0.1549, 0.1620, 0.1642],
        [0.1605, 0.1661, 0.1697,  ..., 0.1573, 0.1604, 0.1669],
        [0.1581, 0.1668, 0.1727,  ..., 0.1595, 0.1620, 0.1676],
        ...,
        [0.1601, 0.1687, 0.1770,  ..., 0.1599, 0.1666, 0.1713],
        [0.1575, 0.1673, 0.1716,  ..., 0.1565, 0.1627, 0.1646],
        [0.1560, 0.1620, 0.1701,  ..., 0.1581, 0.1604, 0.1649]],
       grad_fn=<LogBackward>)


 60%|█████▉    | 34/57 [00:24<00:18,  1.24it/s]

tensor([[0.1580, 0.1631, 0.1697,  ..., 0.1571, 0.1625, 0.1661],
        [0.1602, 0.1668, 0.1712,  ..., 0.1570, 0.1636, 0.1670],
        [0.1583, 0.1620, 0.1688,  ..., 0.1528, 0.1543, 0.1595],
        ...,
        [0.1563, 0.1663, 0.1684,  ..., 0.1546, 0.1615, 0.1620],
        [0.1557, 0.1641, 0.1670,  ..., 0.1549, 0.1583, 0.1623],
        [0.1579, 0.1641, 0.1711,  ..., 0.1555, 0.1613, 0.1636]],
       grad_fn=<LogBackward>)


 61%|██████▏   | 35/57 [00:25<00:16,  1.30it/s]

tensor([[0.1573, 0.1681, 0.1712,  ..., 0.1554, 0.1640, 0.1672],
        [0.1575, 0.1671, 0.1719,  ..., 0.1564, 0.1606, 0.1668],
        [0.1559, 0.1615, 0.1707,  ..., 0.1529, 0.1568, 0.1638],
        ...,
        [0.1583, 0.1653, 0.1725,  ..., 0.1573, 0.1626, 0.1681],
        [0.1586, 0.1662, 0.1716,  ..., 0.1560, 0.1616, 0.1665],
        [0.1576, 0.1664, 0.1724,  ..., 0.1557, 0.1637, 0.1670]],
       grad_fn=<LogBackward>)


 63%|██████▎   | 36/57 [00:25<00:15,  1.36it/s]

tensor([[0.1572, 0.1643, 0.1686,  ..., 0.1575, 0.1607, 0.1669],
        [0.1609, 0.1667, 0.1743,  ..., 0.1583, 0.1663, 0.1680],
        [0.1618, 0.1683, 0.1731,  ..., 0.1574, 0.1638, 0.1688],
        ...,
        [0.1580, 0.1657, 0.1733,  ..., 0.1570, 0.1641, 0.1678],
        [0.1574, 0.1646, 0.1712,  ..., 0.1559, 0.1620, 0.1662],
        [0.1608, 0.1656, 0.1761,  ..., 0.1570, 0.1628, 0.1697]],
       grad_fn=<LogBackward>)


 65%|██████▍   | 37/57 [00:26<00:14,  1.41it/s]

tensor([[0.1581, 0.1665, 0.1719,  ..., 0.1547, 0.1623, 0.1673],
        [0.1605, 0.1683, 0.1742,  ..., 0.1575, 0.1615, 0.1684],
        [0.1562, 0.1642, 0.1694,  ..., 0.1571, 0.1650, 0.1674],
        ...,
        [0.1582, 0.1644, 0.1744,  ..., 0.1585, 0.1624, 0.1688],
        [0.1598, 0.1636, 0.1717,  ..., 0.1584, 0.1652, 0.1675],
        [0.1581, 0.1667, 0.1728,  ..., 0.1588, 0.1646, 0.1669]],
       grad_fn=<LogBackward>)


 67%|██████▋   | 38/57 [00:27<00:13,  1.44it/s]

tensor([[0.1564, 0.1657, 0.1705,  ..., 0.1542, 0.1641, 0.1633],
        [0.1584, 0.1642, 0.1685,  ..., 0.1573, 0.1590, 0.1609],
        [0.1610, 0.1650, 0.1725,  ..., 0.1580, 0.1617, 0.1688],
        ...,
        [0.1565, 0.1672, 0.1720,  ..., 0.1579, 0.1644, 0.1677],
        [0.1539, 0.1698, 0.1691,  ..., 0.1592, 0.1634, 0.1609],
        [0.1625, 0.1679, 0.1743,  ..., 0.1567, 0.1656, 0.1709]],
       grad_fn=<LogBackward>)


 68%|██████▊   | 39/57 [00:27<00:12,  1.47it/s]

tensor([[0.1539, 0.1619, 0.1662,  ..., 0.1509, 0.1532, 0.1607],
        [0.1586, 0.1670, 0.1716,  ..., 0.1565, 0.1617, 0.1677],
        [0.1589, 0.1652, 0.1717,  ..., 0.1582, 0.1637, 0.1692],
        ...,
        [0.1591, 0.1639, 0.1714,  ..., 0.1572, 0.1621, 0.1659],
        [0.1614, 0.1661, 0.1743,  ..., 0.1562, 0.1646, 0.1721],
        [0.1592, 0.1701, 0.1726,  ..., 0.1574, 0.1630, 0.1677]],
       grad_fn=<LogBackward>)


 70%|███████   | 40/57 [00:28<00:11,  1.51it/s]

tensor([[0.1512, 0.1628, 0.1634,  ..., 0.1522, 0.1542, 0.1588],
        [0.1587, 0.1650, 0.1738,  ..., 0.1573, 0.1629, 0.1663],
        [0.1587, 0.1685, 0.1707,  ..., 0.1562, 0.1613, 0.1652],
        ...,
        [0.1594, 0.1670, 0.1729,  ..., 0.1570, 0.1649, 0.1676],
        [0.1593, 0.1646, 0.1720,  ..., 0.1563, 0.1622, 0.1674],
        [0.1576, 0.1662, 0.1715,  ..., 0.1551, 0.1611, 0.1656]],
       grad_fn=<LogBackward>)


 72%|███████▏  | 41/57 [00:28<00:10,  1.51it/s]

tensor([[0.1569, 0.1654, 0.1712,  ..., 0.1554, 0.1610, 0.1644],
        [0.1560, 0.1650, 0.1702,  ..., 0.1562, 0.1600, 0.1648],
        [0.1601, 0.1689, 0.1737,  ..., 0.1603, 0.1657, 0.1663],
        ...,
        [0.1593, 0.1668, 0.1715,  ..., 0.1574, 0.1638, 0.1663],
        [0.1596, 0.1667, 0.1749,  ..., 0.1570, 0.1661, 0.1659],
        [0.1586, 0.1650, 0.1719,  ..., 0.1602, 0.1614, 0.1643]],
       grad_fn=<LogBackward>)


 74%|███████▎  | 42/57 [00:29<00:09,  1.52it/s]

tensor([[0.1614, 0.1677, 0.1746,  ..., 0.1565, 0.1643, 0.1678],
        [0.1613, 0.1692, 0.1741,  ..., 0.1593, 0.1660, 0.1691],
        [0.1542, 0.1639, 0.1676,  ..., 0.1575, 0.1601, 0.1651],
        ...,
        [0.1542, 0.1636, 0.1685,  ..., 0.1550, 0.1580, 0.1607],
        [0.1592, 0.1643, 0.1749,  ..., 0.1556, 0.1600, 0.1647],
        [0.1573, 0.1676, 0.1719,  ..., 0.1556, 0.1649, 0.1684]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 43/57 [00:30<00:09,  1.53it/s]

tensor([[0.1584, 0.1645, 0.1726,  ..., 0.1575, 0.1616, 0.1683],
        [0.1580, 0.1636, 0.1699,  ..., 0.1541, 0.1614, 0.1658],
        [0.1544, 0.1686, 0.1649,  ..., 0.1570, 0.1630, 0.1609],
        ...,
        [0.1570, 0.1641, 0.1712,  ..., 0.1565, 0.1603, 0.1669],
        [0.1481, 0.1517, 0.1585,  ..., 0.1435, 0.1477, 0.1565],
        [0.1598, 0.1639, 0.1712,  ..., 0.1542, 0.1621, 0.1617]],
       grad_fn=<LogBackward>)


 77%|███████▋  | 44/57 [00:30<00:08,  1.50it/s]

tensor([[0.1601, 0.1654, 0.1737,  ..., 0.1563, 0.1617, 0.1660],
        [0.1590, 0.1668, 0.1736,  ..., 0.1558, 0.1638, 0.1696],
        [0.1575, 0.1637, 0.1720,  ..., 0.1597, 0.1629, 0.1663],
        ...,
        [0.1547, 0.1630, 0.1655,  ..., 0.1534, 0.1571, 0.1601],
        [0.1611, 0.1672, 0.1757,  ..., 0.1584, 0.1647, 0.1683],
        [0.1619, 0.1662, 0.1726,  ..., 0.1595, 0.1648, 0.1670]],
       grad_fn=<LogBackward>)


 79%|███████▉  | 45/57 [00:31<00:08,  1.49it/s]

tensor([[0.1568, 0.1642, 0.1702,  ..., 0.1559, 0.1598, 0.1634],
        [0.1546, 0.1651, 0.1674,  ..., 0.1542, 0.1631, 0.1610],
        [0.1559, 0.1646, 0.1691,  ..., 0.1565, 0.1649, 0.1645],
        ...,
        [0.1595, 0.1674, 0.1727,  ..., 0.1564, 0.1642, 0.1686],
        [0.1592, 0.1691, 0.1689,  ..., 0.1562, 0.1632, 0.1627],
        [0.1585, 0.1644, 0.1734,  ..., 0.1564, 0.1577, 0.1664]],
       grad_fn=<LogBackward>)


 81%|████████  | 46/57 [00:32<00:07,  1.50it/s]

tensor([[0.1589, 0.1677, 0.1722,  ..., 0.1595, 0.1643, 0.1658],
        [0.1579, 0.1672, 0.1700,  ..., 0.1571, 0.1619, 0.1666],
        [0.1611, 0.1660, 0.1736,  ..., 0.1561, 0.1626, 0.1714],
        ...,
        [0.1605, 0.1679, 0.1754,  ..., 0.1574, 0.1602, 0.1660],
        [0.1584, 0.1683, 0.1731,  ..., 0.1574, 0.1657, 0.1687],
        [0.1586, 0.1642, 0.1723,  ..., 0.1568, 0.1619, 0.1686]],
       grad_fn=<LogBackward>)


 82%|████████▏ | 47/57 [00:32<00:06,  1.51it/s]

tensor([[0.1603, 0.1703, 0.1743,  ..., 0.1565, 0.1630, 0.1667],
        [0.1582, 0.1647, 0.1687,  ..., 0.1561, 0.1611, 0.1652],
        [0.1563, 0.1659, 0.1689,  ..., 0.1558, 0.1597, 0.1637],
        ...,
        [0.1596, 0.1690, 0.1762,  ..., 0.1564, 0.1660, 0.1692],
        [0.1585, 0.1692, 0.1744,  ..., 0.1548, 0.1650, 0.1669],
        [0.1570, 0.1648, 0.1699,  ..., 0.1592, 0.1634, 0.1675]],
       grad_fn=<LogBackward>)


 84%|████████▍ | 48/57 [00:33<00:05,  1.51it/s]

tensor([[0.1575, 0.1683, 0.1750,  ..., 0.1583, 0.1623, 0.1680],
        [0.1583, 0.1657, 0.1749,  ..., 0.1573, 0.1642, 0.1661],
        [0.1549, 0.1637, 0.1698,  ..., 0.1566, 0.1602, 0.1637],
        ...,
        [0.1574, 0.1645, 0.1678,  ..., 0.1567, 0.1616, 0.1652],
        [0.1583, 0.1647, 0.1679,  ..., 0.1531, 0.1576, 0.1646],
        [0.1602, 0.1684, 0.1726,  ..., 0.1590, 0.1678, 0.1728]],
       grad_fn=<LogBackward>)


 86%|████████▌ | 49/57 [00:34<00:05,  1.53it/s]

tensor([[0.1531, 0.1624, 0.1669,  ..., 0.1526, 0.1575, 0.1585],
        [0.1579, 0.1648, 0.1720,  ..., 0.1562, 0.1640, 0.1673],
        [0.1599, 0.1682, 0.1723,  ..., 0.1525, 0.1627, 0.1663],
        ...,
        [0.1612, 0.1686, 0.1751,  ..., 0.1591, 0.1656, 0.1711],
        [0.1534, 0.1657, 0.1656,  ..., 0.1548, 0.1603, 0.1608],
        [0.1601, 0.1662, 0.1684,  ..., 0.1587, 0.1652, 0.1663]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 50/57 [00:34<00:04,  1.51it/s]

tensor([[0.1595, 0.1658, 0.1727,  ..., 0.1570, 0.1635, 0.1663],
        [0.1564, 0.1633, 0.1705,  ..., 0.1565, 0.1591, 0.1634],
        [0.1635, 0.1701, 0.1767,  ..., 0.1574, 0.1672, 0.1712],
        ...,
        [0.1609, 0.1662, 0.1736,  ..., 0.1577, 0.1635, 0.1687],
        [0.1579, 0.1681, 0.1717,  ..., 0.1569, 0.1623, 0.1664],
        [0.1597, 0.1658, 0.1749,  ..., 0.1570, 0.1636, 0.1694]],
       grad_fn=<LogBackward>)


 89%|████████▉ | 51/57 [00:35<00:03,  1.52it/s]

tensor([[0.1575, 0.1656, 0.1697,  ..., 0.1557, 0.1591, 0.1659],
        [0.1596, 0.1641, 0.1702,  ..., 0.1570, 0.1609, 0.1678],
        [0.1604, 0.1659, 0.1719,  ..., 0.1582, 0.1633, 0.1704],
        ...,
        [0.1587, 0.1648, 0.1730,  ..., 0.1593, 0.1661, 0.1673],
        [0.1585, 0.1712, 0.1713,  ..., 0.1593, 0.1644, 0.1665],
        [0.1607, 0.1661, 0.1728,  ..., 0.1576, 0.1655, 0.1671]],
       grad_fn=<LogBackward>)


 91%|█████████ | 52/57 [00:36<00:03,  1.53it/s]

tensor([[0.1579, 0.1629, 0.1688,  ..., 0.1549, 0.1595, 0.1650],
        [0.1586, 0.1637, 0.1680,  ..., 0.1557, 0.1622, 0.1634],
        [0.1606, 0.1671, 0.1732,  ..., 0.1561, 0.1653, 0.1678],
        ...,
        [0.1570, 0.1657, 0.1674,  ..., 0.1583, 0.1633, 0.1653],
        [0.1575, 0.1637, 0.1694,  ..., 0.1564, 0.1593, 0.1651],
        [0.1555, 0.1650, 0.1703,  ..., 0.1556, 0.1588, 0.1602]],
       grad_fn=<LogBackward>)


 93%|█████████▎| 53/57 [00:36<00:02,  1.54it/s]

tensor([[0.1604, 0.1657, 0.1750,  ..., 0.1567, 0.1645, 0.1699],
        [0.1564, 0.1687, 0.1707,  ..., 0.1573, 0.1647, 0.1672],
        [0.1599, 0.1745, 0.1738,  ..., 0.1589, 0.1702, 0.1672],
        ...,
        [0.1578, 0.1680, 0.1730,  ..., 0.1566, 0.1638, 0.1661],
        [0.1578, 0.1623, 0.1721,  ..., 0.1556, 0.1604, 0.1655],
        [0.1561, 0.1628, 0.1710,  ..., 0.1571, 0.1598, 0.1630]],
       grad_fn=<LogBackward>)


 95%|█████████▍| 54/57 [00:37<00:01,  1.52it/s]

tensor([[0.1604, 0.1680, 0.1746,  ..., 0.1552, 0.1651, 0.1700],
        [0.1589, 0.1673, 0.1749,  ..., 0.1575, 0.1633, 0.1697],
        [0.1600, 0.1681, 0.1736,  ..., 0.1562, 0.1664, 0.1669],
        ...,
        [0.1577, 0.1645, 0.1691,  ..., 0.1579, 0.1609, 0.1631],
        [0.1609, 0.1690, 0.1750,  ..., 0.1555, 0.1653, 0.1668],
        [0.1588, 0.1670, 0.1713,  ..., 0.1567, 0.1635, 0.1659]],
       grad_fn=<LogBackward>)


 96%|█████████▋| 55/57 [00:38<00:01,  1.50it/s]

tensor([[0.1579, 0.1653, 0.1735,  ..., 0.1574, 0.1641, 0.1670],
        [0.1611, 0.1663, 0.1742,  ..., 0.1567, 0.1664, 0.1699],
        [0.1566, 0.1655, 0.1689,  ..., 0.1565, 0.1605, 0.1652],
        ...,
        [0.1562, 0.1655, 0.1709,  ..., 0.1552, 0.1622, 0.1669],
        [0.1604, 0.1698, 0.1758,  ..., 0.1568, 0.1702, 0.1698],
        [0.1575, 0.1638, 0.1696,  ..., 0.1576, 0.1587, 0.1660]],
       grad_fn=<LogBackward>)


 98%|█████████▊| 56/57 [00:38<00:00,  1.50it/s]

tensor([[0.1563, 0.1639, 0.1672,  ..., 0.1558, 0.1625, 0.1622],
        [0.1586, 0.1656, 0.1720,  ..., 0.1569, 0.1598, 0.1672],
        [0.1587, 0.1656, 0.1703,  ..., 0.1589, 0.1602, 0.1652],
        ...,
        [0.1601, 0.1685, 0.1736,  ..., 0.1596, 0.1676, 0.1697],
        [0.1544, 0.1611, 0.1689,  ..., 0.1535, 0.1599, 0.1631],
        [0.1618, 0.1655, 0.1727,  ..., 0.1580, 0.1628, 0.1667]],
       grad_fn=<LogBackward>)


100%|██████████| 57/57 [00:39<00:00,  1.44it/s]
 12%|█▎        | 1/8 [00:00<00:01,  3.60it/s]

tensor([[0.1578, 0.1653, 0.1702,  ..., 0.1564, 0.1615, 0.1649],
        [0.1592, 0.1649, 0.1731,  ..., 0.1570, 0.1615, 0.1683],
        [0.1605, 0.1690, 0.1749,  ..., 0.1593, 0.1653, 0.1670],
        ...,
        [0.1575, 0.1665, 0.1717,  ..., 0.1599, 0.1664, 0.1693],
        [0.1553, 0.1658, 0.1684,  ..., 0.1551, 0.1619, 0.1653],
        [0.1593, 0.1721, 0.1742,  ..., 0.1586, 0.1628, 0.1678]],
       grad_fn=<LogBackward>)


 25%|██▌       | 2/8 [00:00<00:01,  3.56it/s]

tensor([[0.1550, 0.1692, 0.1702,  ..., 0.1546, 0.1627, 0.1615],
        [0.1581, 0.1684, 0.1721,  ..., 0.1562, 0.1613, 0.1671],
        [0.1605, 0.1697, 0.1756,  ..., 0.1584, 0.1659, 0.1702],
        ...,
        [0.1579, 0.1655, 0.1716,  ..., 0.1568, 0.1642, 0.1680],
        [0.1595, 0.1652, 0.1761,  ..., 0.1586, 0.1634, 0.1666],
        [0.1537, 0.1626, 0.1669,  ..., 0.1553, 0.1595, 0.1613]],
       grad_fn=<LogBackward>)


 38%|███▊      | 3/8 [00:00<00:01,  3.57it/s]

tensor([[0.1575, 0.1628, 0.1704,  ..., 0.1570, 0.1627, 0.1656],
        [0.1591, 0.1674, 0.1739,  ..., 0.1579, 0.1648, 0.1689],
        [0.1599, 0.1659, 0.1717,  ..., 0.1555, 0.1630, 0.1690],
        ...,
        [0.1602, 0.1713, 0.1738,  ..., 0.1577, 0.1621, 0.1641],
        [0.1565, 0.1666, 0.1722,  ..., 0.1557, 0.1645, 0.1654],
        [0.1582, 0.1628, 0.1695,  ..., 0.1545, 0.1592, 0.1642]],
       grad_fn=<LogBackward>)


 50%|█████     | 4/8 [00:01<00:01,  3.56it/s]

tensor([[0.1587, 0.1657, 0.1749,  ..., 0.1566, 0.1622, 0.1704],
        [0.1591, 0.1651, 0.1719,  ..., 0.1548, 0.1591, 0.1697],
        [0.1581, 0.1655, 0.1724,  ..., 0.1580, 0.1646, 0.1707],
        ...,
        [0.1590, 0.1684, 0.1727,  ..., 0.1575, 0.1593, 0.1621],
        [0.1551, 0.1614, 0.1678,  ..., 0.1564, 0.1615, 0.1624],
        [0.1586, 0.1694, 0.1725,  ..., 0.1578, 0.1645, 0.1690]],
       grad_fn=<LogBackward>)


 62%|██████▎   | 5/8 [00:01<00:00,  3.54it/s]

tensor([[0.1557, 0.1651, 0.1693,  ..., 0.1585, 0.1633, 0.1693],
        [0.1601, 0.1669, 0.1766,  ..., 0.1553, 0.1652, 0.1683],
        [0.1601, 0.1658, 0.1718,  ..., 0.1552, 0.1637, 0.1668],
        ...,
        [0.1602, 0.1673, 0.1729,  ..., 0.1582, 0.1630, 0.1690],
        [0.1593, 0.1686, 0.1728,  ..., 0.1579, 0.1648, 0.1701],
        [0.1562, 0.1644, 0.1693,  ..., 0.1571, 0.1584, 0.1641]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 6/8 [00:01<00:00,  3.58it/s]

tensor([[0.1593, 0.1687, 0.1754,  ..., 0.1557, 0.1668, 0.1687],
        [0.1559, 0.1648, 0.1666,  ..., 0.1573, 0.1644, 0.1629],
        [0.1554, 0.1634, 0.1703,  ..., 0.1581, 0.1613, 0.1646],
        ...,
        [0.1616, 0.1652, 0.1711,  ..., 0.1555, 0.1597, 0.1650],
        [0.1585, 0.1671, 0.1735,  ..., 0.1572, 0.1689, 0.1679],
        [0.1590, 0.1657, 0.1714,  ..., 0.1568, 0.1637, 0.1671]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 7/8 [00:01<00:00,  3.52it/s]

tensor([[0.1570, 0.1661, 0.1739,  ..., 0.1586, 0.1608, 0.1652],
        [0.1575, 0.1671, 0.1708,  ..., 0.1575, 0.1637, 0.1653],
        [0.1525, 0.1596, 0.1633,  ..., 0.1520, 0.1585, 0.1623],
        ...,
        [0.1563, 0.1615, 0.1688,  ..., 0.1539, 0.1571, 0.1617],
        [0.1534, 0.1556, 0.1651,  ..., 0.1498, 0.1506, 0.1564],
        [0.1579, 0.1635, 0.1690,  ..., 0.1555, 0.1593, 0.1636]],
       grad_fn=<LogBackward>)


100%|██████████| 8/8 [00:02<00:00,  3.53it/s]
  0%|          | 0/57 [00:00<?, ?it/s]

tensor([[0.1570, 0.1669, 0.1685,  ..., 0.1591, 0.1599, 0.1648],
        [0.1562, 0.1641, 0.1700,  ..., 0.1566, 0.1607, 0.1661],
        [0.1570, 0.1648, 0.1720,  ..., 0.1562, 0.1616, 0.1666],
        ...,
        [0.1570, 0.1643, 0.1709,  ..., 0.1577, 0.1632, 0.1664],
        [0.1515, 0.1602, 0.1634,  ..., 0.1530, 0.1577, 0.1620],
        [0.1558, 0.1674, 0.1725,  ..., 0.1540, 0.1617, 0.1649]],
       grad_fn=<LogBackward>)
[tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128)]
Epoch: 8, train_loss: -0.38564473390579224, valid_loss: -0.4099506139755249
Parameter containing:
tensor([[ 1.0005,  1.0006,  1.0005,  1.0005,  1.0005,  1.0005,  1.0006,  1.0005,
          1.0005,  1.0006, -0.4994, -0.4995, -0.4996, -0.4995, -0.4995, -0.4995,
         -0.4995, -0.4995, -0.4995, -0.4995],
     

  2%|▏         | 1/57 [00:00<00:36,  1.55it/s]

tensor([[0.1563, 0.1672, 0.1667,  ..., 0.1558, 0.1616, 0.1641],
        [0.1603, 0.1667, 0.1722,  ..., 0.1583, 0.1608, 0.1666],
        [0.1525, 0.1615, 0.1647,  ..., 0.1536, 0.1601, 0.1634],
        ...,
        [0.1512, 0.1633, 0.1623,  ..., 0.1563, 0.1587, 0.1581],
        [0.1591, 0.1646, 0.1745,  ..., 0.1569, 0.1636, 0.1705],
        [0.1592, 0.1675, 0.1735,  ..., 0.1574, 0.1638, 0.1703]],
       grad_fn=<LogBackward>)


  4%|▎         | 2/57 [00:01<00:35,  1.56it/s]

tensor([[0.1604, 0.1658, 0.1747,  ..., 0.1603, 0.1671, 0.1684],
        [0.1523, 0.1612, 0.1641,  ..., 0.1547, 0.1581, 0.1602],
        [0.1594, 0.1663, 0.1756,  ..., 0.1596, 0.1640, 0.1684],
        ...,
        [0.1598, 0.1669, 0.1722,  ..., 0.1575, 0.1641, 0.1673],
        [0.1569, 0.1665, 0.1729,  ..., 0.1531, 0.1617, 0.1645],
        [0.1605, 0.1653, 0.1731,  ..., 0.1562, 0.1633, 0.1669]],
       grad_fn=<LogBackward>)


  5%|▌         | 3/57 [00:01<00:34,  1.56it/s]

tensor([[0.1540, 0.1623, 0.1686,  ..., 0.1561, 0.1580, 0.1623],
        [0.1509, 0.1613, 0.1621,  ..., 0.1559, 0.1559, 0.1568],
        [0.1603, 0.1661, 0.1730,  ..., 0.1560, 0.1635, 0.1711],
        ...,
        [0.1593, 0.1681, 0.1734,  ..., 0.1566, 0.1620, 0.1702],
        [0.1587, 0.1660, 0.1758,  ..., 0.1574, 0.1640, 0.1688],
        [0.1556, 0.1643, 0.1731,  ..., 0.1566, 0.1626, 0.1654]],
       grad_fn=<LogBackward>)


  7%|▋         | 4/57 [00:02<00:34,  1.54it/s]

tensor([[0.1556, 0.1628, 0.1669,  ..., 0.1551, 0.1603, 0.1646],
        [0.1596, 0.1658, 0.1724,  ..., 0.1578, 0.1623, 0.1688],
        [0.1595, 0.1641, 0.1713,  ..., 0.1547, 0.1592, 0.1638],
        ...,
        [0.1564, 0.1609, 0.1687,  ..., 0.1534, 0.1547, 0.1616],
        [0.1548, 0.1634, 0.1682,  ..., 0.1547, 0.1588, 0.1580],
        [0.1576, 0.1637, 0.1676,  ..., 0.1545, 0.1595, 0.1617]],
       grad_fn=<LogBackward>)


  9%|▉         | 5/57 [00:03<00:34,  1.49it/s]

tensor([[0.1542, 0.1646, 0.1715,  ..., 0.1555, 0.1646, 0.1623],
        [0.1576, 0.1668, 0.1712,  ..., 0.1569, 0.1620, 0.1689],
        [0.1580, 0.1611, 0.1689,  ..., 0.1553, 0.1588, 0.1632],
        ...,
        [0.1597, 0.1664, 0.1750,  ..., 0.1591, 0.1635, 0.1687],
        [0.1592, 0.1646, 0.1714,  ..., 0.1596, 0.1626, 0.1649],
        [0.1555, 0.1632, 0.1686,  ..., 0.1560, 0.1628, 0.1650]],
       grad_fn=<LogBackward>)


 11%|█         | 6/57 [00:04<00:35,  1.45it/s]

tensor([[0.1596, 0.1701, 0.1733,  ..., 0.1593, 0.1669, 0.1665],
        [0.1564, 0.1633, 0.1703,  ..., 0.1580, 0.1603, 0.1660],
        [0.1501, 0.1602, 0.1632,  ..., 0.1550, 0.1606, 0.1621],
        ...,
        [0.1553, 0.1634, 0.1697,  ..., 0.1539, 0.1579, 0.1610],
        [0.1502, 0.1628, 0.1644,  ..., 0.1528, 0.1590, 0.1606],
        [0.1578, 0.1651, 0.1713,  ..., 0.1591, 0.1616, 0.1686]],
       grad_fn=<LogBackward>)


 12%|█▏        | 7/57 [00:04<00:35,  1.41it/s]

tensor([[0.1576, 0.1644, 0.1724,  ..., 0.1579, 0.1612, 0.1670],
        [0.1560, 0.1624, 0.1680,  ..., 0.1583, 0.1618, 0.1624],
        [0.1589, 0.1661, 0.1713,  ..., 0.1546, 0.1624, 0.1697],
        ...,
        [0.1568, 0.1667, 0.1692,  ..., 0.1556, 0.1653, 0.1672],
        [0.1555, 0.1674, 0.1686,  ..., 0.1542, 0.1644, 0.1627],
        [0.1592, 0.1648, 0.1700,  ..., 0.1571, 0.1634, 0.1651]],
       grad_fn=<LogBackward>)


 14%|█▍        | 8/57 [00:05<00:35,  1.40it/s]

tensor([[0.1553, 0.1614, 0.1683,  ..., 0.1575, 0.1616, 0.1643],
        [0.1578, 0.1629, 0.1709,  ..., 0.1562, 0.1592, 0.1658],
        [0.1595, 0.1676, 0.1762,  ..., 0.1578, 0.1657, 0.1705],
        ...,
        [0.1583, 0.1648, 0.1697,  ..., 0.1557, 0.1607, 0.1630],
        [0.1594, 0.1643, 0.1723,  ..., 0.1587, 0.1620, 0.1671],
        [0.1606, 0.1675, 0.1729,  ..., 0.1585, 0.1644, 0.1672]],
       grad_fn=<LogBackward>)


 16%|█▌        | 9/57 [00:06<00:34,  1.40it/s]

tensor([[0.1587, 0.1637, 0.1720,  ..., 0.1562, 0.1626, 0.1674],
        [0.1570, 0.1650, 0.1698,  ..., 0.1613, 0.1637, 0.1624],
        [0.1540, 0.1612, 0.1660,  ..., 0.1562, 0.1619, 0.1633],
        ...,
        [0.1554, 0.1639, 0.1665,  ..., 0.1540, 0.1568, 0.1592],
        [0.1543, 0.1637, 0.1682,  ..., 0.1523, 0.1580, 0.1588],
        [0.1624, 0.1669, 0.1766,  ..., 0.1569, 0.1673, 0.1704]],
       grad_fn=<LogBackward>)


 18%|█▊        | 10/57 [00:07<00:35,  1.34it/s]

tensor([[0.1596, 0.1654, 0.1711,  ..., 0.1561, 0.1637, 0.1657],
        [0.1576, 0.1650, 0.1709,  ..., 0.1560, 0.1642, 0.1665],
        [0.1566, 0.1630, 0.1691,  ..., 0.1562, 0.1622, 0.1664],
        ...,
        [0.1587, 0.1657, 0.1720,  ..., 0.1575, 0.1606, 0.1680],
        [0.1575, 0.1655, 0.1732,  ..., 0.1562, 0.1618, 0.1636],
        [0.1563, 0.1644, 0.1703,  ..., 0.1539, 0.1605, 0.1632]],
       grad_fn=<LogBackward>)


 19%|█▉        | 11/57 [00:07<00:34,  1.33it/s]

tensor([[0.1589, 0.1687, 0.1728,  ..., 0.1582, 0.1634, 0.1707],
        [0.1595, 0.1639, 0.1731,  ..., 0.1572, 0.1617, 0.1683],
        [0.1564, 0.1635, 0.1663,  ..., 0.1535, 0.1564, 0.1603],
        ...,
        [0.1598, 0.1659, 0.1733,  ..., 0.1568, 0.1633, 0.1702],
        [0.1568, 0.1657, 0.1697,  ..., 0.1572, 0.1653, 0.1639],
        [0.1547, 0.1646, 0.1686,  ..., 0.1570, 0.1608, 0.1652]],
       grad_fn=<LogBackward>)


 21%|██        | 12/57 [00:08<00:33,  1.36it/s]

tensor([[0.1584, 0.1659, 0.1703,  ..., 0.1570, 0.1625, 0.1676],
        [0.1563, 0.1664, 0.1732,  ..., 0.1547, 0.1640, 0.1669],
        [0.1609, 0.1691, 0.1763,  ..., 0.1570, 0.1673, 0.1686],
        ...,
        [0.1586, 0.1678, 0.1736,  ..., 0.1571, 0.1653, 0.1700],
        [0.1561, 0.1591, 0.1652,  ..., 0.1532, 0.1568, 0.1597],
        [0.1589, 0.1665, 0.1725,  ..., 0.1580, 0.1658, 0.1688]],
       grad_fn=<LogBackward>)


 23%|██▎       | 13/57 [00:09<00:33,  1.33it/s]

tensor([[0.1552, 0.1626, 0.1666,  ..., 0.1523, 0.1590, 0.1582],
        [0.1499, 0.1563, 0.1621,  ..., 0.1523, 0.1521, 0.1536],
        [0.1616, 0.1694, 0.1742,  ..., 0.1571, 0.1652, 0.1689],
        ...,
        [0.1600, 0.1641, 0.1732,  ..., 0.1552, 0.1625, 0.1671],
        [0.1582, 0.1624, 0.1713,  ..., 0.1560, 0.1595, 0.1663],
        [0.1568, 0.1644, 0.1699,  ..., 0.1577, 0.1627, 0.1655]],
       grad_fn=<LogBackward>)


 25%|██▍       | 14/57 [00:10<00:32,  1.33it/s]

tensor([[0.1552, 0.1636, 0.1677,  ..., 0.1580, 0.1582, 0.1628],
        [0.1578, 0.1660, 0.1709,  ..., 0.1562, 0.1619, 0.1646],
        [0.1555, 0.1625, 0.1682,  ..., 0.1516, 0.1579, 0.1612],
        ...,
        [0.1587, 0.1674, 0.1722,  ..., 0.1576, 0.1628, 0.1643],
        [0.1594, 0.1666, 0.1732,  ..., 0.1573, 0.1643, 0.1685],
        [0.1606, 0.1655, 0.1704,  ..., 0.1544, 0.1635, 0.1667]],
       grad_fn=<LogBackward>)


 26%|██▋       | 15/57 [00:10<00:30,  1.39it/s]

tensor([[0.1571, 0.1680, 0.1717,  ..., 0.1594, 0.1675, 0.1643],
        [0.1623, 0.1679, 0.1780,  ..., 0.1581, 0.1642, 0.1691],
        [0.1616, 0.1662, 0.1742,  ..., 0.1570, 0.1621, 0.1673],
        ...,
        [0.1578, 0.1646, 0.1703,  ..., 0.1559, 0.1608, 0.1636],
        [0.1603, 0.1647, 0.1720,  ..., 0.1572, 0.1630, 0.1676],
        [0.1616, 0.1685, 0.1766,  ..., 0.1585, 0.1633, 0.1681]],
       grad_fn=<LogBackward>)


 28%|██▊       | 16/57 [00:11<00:30,  1.34it/s]

tensor([[0.1602, 0.1666, 0.1700,  ..., 0.1560, 0.1573, 0.1645],
        [0.1603, 0.1688, 0.1759,  ..., 0.1569, 0.1610, 0.1704],
        [0.1563, 0.1636, 0.1736,  ..., 0.1565, 0.1605, 0.1631],
        ...,
        [0.1496, 0.1590, 0.1621,  ..., 0.1516, 0.1563, 0.1611],
        [0.1578, 0.1640, 0.1713,  ..., 0.1534, 0.1573, 0.1633],
        [0.1543, 0.1646, 0.1710,  ..., 0.1520, 0.1628, 0.1623]],
       grad_fn=<LogBackward>)


 30%|██▉       | 17/57 [00:12<00:29,  1.34it/s]

tensor([[0.1548, 0.1679, 0.1674,  ..., 0.1549, 0.1636, 0.1646],
        [0.1560, 0.1648, 0.1698,  ..., 0.1565, 0.1593, 0.1649],
        [0.1561, 0.1622, 0.1700,  ..., 0.1560, 0.1570, 0.1659],
        ...,
        [0.1613, 0.1674, 0.1746,  ..., 0.1579, 0.1661, 0.1690],
        [0.1570, 0.1644, 0.1718,  ..., 0.1584, 0.1619, 0.1683],
        [0.1570, 0.1656, 0.1716,  ..., 0.1586, 0.1624, 0.1649]],
       grad_fn=<LogBackward>)


 32%|███▏      | 18/57 [00:13<00:29,  1.33it/s]

tensor([[0.1601, 0.1647, 0.1701,  ..., 0.1576, 0.1619, 0.1657],
        [0.1585, 0.1676, 0.1726,  ..., 0.1574, 0.1659, 0.1692],
        [0.1614, 0.1672, 0.1721,  ..., 0.1579, 0.1653, 0.1677],
        ...,
        [0.1577, 0.1638, 0.1713,  ..., 0.1572, 0.1586, 0.1659],
        [0.1591, 0.1627, 0.1680,  ..., 0.1565, 0.1570, 0.1636],
        [0.1588, 0.1655, 0.1718,  ..., 0.1554, 0.1621, 0.1655]],
       grad_fn=<LogBackward>)


 33%|███▎      | 19/57 [00:13<00:29,  1.31it/s]

tensor([[0.1560, 0.1669, 0.1705,  ..., 0.1569, 0.1624, 0.1635],
        [0.1541, 0.1640, 0.1671,  ..., 0.1556, 0.1606, 0.1606],
        [0.1585, 0.1658, 0.1766,  ..., 0.1565, 0.1619, 0.1647],
        ...,
        [0.1585, 0.1633, 0.1704,  ..., 0.1577, 0.1620, 0.1678],
        [0.1612, 0.1687, 0.1751,  ..., 0.1589, 0.1652, 0.1675],
        [0.1619, 0.1667, 0.1754,  ..., 0.1570, 0.1660, 0.1674]],
       grad_fn=<LogBackward>)


 35%|███▌      | 20/57 [00:14<00:27,  1.33it/s]

tensor([[0.1593, 0.1673, 0.1733,  ..., 0.1595, 0.1681, 0.1676],
        [0.1570, 0.1671, 0.1776,  ..., 0.1571, 0.1672, 0.1671],
        [0.1586, 0.1673, 0.1726,  ..., 0.1567, 0.1642, 0.1661],
        ...,
        [0.1583, 0.1675, 0.1724,  ..., 0.1575, 0.1669, 0.1672],
        [0.1581, 0.1658, 0.1740,  ..., 0.1545, 0.1623, 0.1654],
        [0.1577, 0.1722, 0.1725,  ..., 0.1584, 0.1621, 0.1673]],
       grad_fn=<LogBackward>)


 37%|███▋      | 21/57 [00:15<00:26,  1.35it/s]

tensor([[0.1587, 0.1651, 0.1731,  ..., 0.1601, 0.1636, 0.1702],
        [0.1598, 0.1686, 0.1745,  ..., 0.1558, 0.1657, 0.1689],
        [0.1574, 0.1665, 0.1703,  ..., 0.1542, 0.1633, 0.1627],
        ...,
        [0.1594, 0.1645, 0.1725,  ..., 0.1546, 0.1624, 0.1651],
        [0.1635, 0.1700, 0.1781,  ..., 0.1591, 0.1652, 0.1717],
        [0.1583, 0.1629, 0.1693,  ..., 0.1555, 0.1608, 0.1664]],
       grad_fn=<LogBackward>)


 39%|███▊      | 22/57 [00:16<00:26,  1.34it/s]

tensor([[0.1557, 0.1640, 0.1690,  ..., 0.1556, 0.1601, 0.1634],
        [0.1575, 0.1644, 0.1729,  ..., 0.1568, 0.1615, 0.1683],
        [0.1569, 0.1644, 0.1704,  ..., 0.1533, 0.1611, 0.1630],
        ...,
        [0.1576, 0.1623, 0.1701,  ..., 0.1539, 0.1599, 0.1645],
        [0.1562, 0.1629, 0.1704,  ..., 0.1571, 0.1614, 0.1657],
        [0.1532, 0.1606, 0.1656,  ..., 0.1536, 0.1565, 0.1567]],
       grad_fn=<LogBackward>)


 40%|████      | 23/57 [00:16<00:25,  1.32it/s]

tensor([[0.1576, 0.1650, 0.1745,  ..., 0.1593, 0.1610, 0.1649],
        [0.1525, 0.1632, 0.1657,  ..., 0.1538, 0.1586, 0.1606],
        [0.1590, 0.1668, 0.1708,  ..., 0.1573, 0.1623, 0.1691],
        ...,
        [0.1560, 0.1665, 0.1701,  ..., 0.1567, 0.1566, 0.1648],
        [0.1580, 0.1673, 0.1710,  ..., 0.1576, 0.1621, 0.1685],
        [0.1556, 0.1686, 0.1710,  ..., 0.1573, 0.1637, 0.1655]],
       grad_fn=<LogBackward>)


 42%|████▏     | 24/57 [00:17<00:24,  1.33it/s]

tensor([[0.1562, 0.1674, 0.1682,  ..., 0.1582, 0.1639, 0.1623],
        [0.1594, 0.1660, 0.1714,  ..., 0.1563, 0.1658, 0.1687],
        [0.1581, 0.1670, 0.1726,  ..., 0.1565, 0.1633, 0.1662],
        ...,
        [0.1574, 0.1616, 0.1700,  ..., 0.1572, 0.1592, 0.1660],
        [0.1566, 0.1657, 0.1729,  ..., 0.1569, 0.1606, 0.1652],
        [0.1565, 0.1606, 0.1680,  ..., 0.1562, 0.1600, 0.1626]],
       grad_fn=<LogBackward>)


 44%|████▍     | 25/57 [00:18<00:24,  1.32it/s]

tensor([[0.1567, 0.1645, 0.1666,  ..., 0.1563, 0.1587, 0.1643],
        [0.1556, 0.1665, 0.1710,  ..., 0.1561, 0.1642, 0.1640],
        [0.1575, 0.1677, 0.1737,  ..., 0.1590, 0.1632, 0.1650],
        ...,
        [0.1561, 0.1630, 0.1683,  ..., 0.1578, 0.1617, 0.1644],
        [0.1601, 0.1665, 0.1727,  ..., 0.1593, 0.1623, 0.1695],
        [0.1600, 0.1642, 0.1752,  ..., 0.1559, 0.1643, 0.1703]],
       grad_fn=<LogBackward>)


 46%|████▌     | 26/57 [00:19<00:23,  1.29it/s]

tensor([[0.1606, 0.1656, 0.1737,  ..., 0.1568, 0.1646, 0.1705],
        [0.1557, 0.1649, 0.1688,  ..., 0.1551, 0.1603, 0.1653],
        [0.1563, 0.1635, 0.1725,  ..., 0.1558, 0.1626, 0.1656],
        ...,
        [0.1606, 0.1659, 0.1743,  ..., 0.1588, 0.1628, 0.1678],
        [0.1552, 0.1629, 0.1697,  ..., 0.1557, 0.1615, 0.1636],
        [0.1583, 0.1670, 0.1717,  ..., 0.1561, 0.1652, 0.1672]],
       grad_fn=<LogBackward>)


 47%|████▋     | 27/57 [00:19<00:23,  1.30it/s]

tensor([[0.1552, 0.1672, 0.1717,  ..., 0.1581, 0.1615, 0.1639],
        [0.1600, 0.1653, 0.1732,  ..., 0.1563, 0.1620, 0.1695],
        [0.1562, 0.1636, 0.1712,  ..., 0.1564, 0.1629, 0.1661],
        ...,
        [0.1566, 0.1631, 0.1716,  ..., 0.1587, 0.1638, 0.1653],
        [0.1576, 0.1646, 0.1720,  ..., 0.1588, 0.1646, 0.1643],
        [0.1568, 0.1686, 0.1710,  ..., 0.1584, 0.1643, 0.1653]],
       grad_fn=<LogBackward>)


 49%|████▉     | 28/57 [00:20<00:23,  1.25it/s]

tensor([[0.1607, 0.1698, 0.1758,  ..., 0.1590, 0.1634, 0.1695],
        [0.1579, 0.1680, 0.1734,  ..., 0.1566, 0.1638, 0.1666],
        [0.1554, 0.1606, 0.1668,  ..., 0.1574, 0.1622, 0.1631],
        ...,
        [0.1600, 0.1688, 0.1734,  ..., 0.1590, 0.1641, 0.1716],
        [0.1601, 0.1661, 0.1736,  ..., 0.1580, 0.1673, 0.1709],
        [0.1489, 0.1594, 0.1599,  ..., 0.1492, 0.1556, 0.1614]],
       grad_fn=<LogBackward>)


 51%|█████     | 29/57 [00:21<00:22,  1.24it/s]

tensor([[0.1590, 0.1646, 0.1748,  ..., 0.1583, 0.1636, 0.1674],
        [0.1599, 0.1659, 0.1749,  ..., 0.1573, 0.1640, 0.1711],
        [0.1597, 0.1683, 0.1746,  ..., 0.1585, 0.1667, 0.1679],
        ...,
        [0.1614, 0.1671, 0.1728,  ..., 0.1553, 0.1613, 0.1661],
        [0.1557, 0.1655, 0.1688,  ..., 0.1559, 0.1620, 0.1649],
        [0.1562, 0.1642, 0.1691,  ..., 0.1548, 0.1629, 0.1638]],
       grad_fn=<LogBackward>)


 53%|█████▎    | 30/57 [00:22<00:21,  1.25it/s]

tensor([[0.1570, 0.1658, 0.1700,  ..., 0.1578, 0.1610, 0.1661],
        [0.1569, 0.1624, 0.1682,  ..., 0.1573, 0.1578, 0.1629],
        [0.1593, 0.1675, 0.1720,  ..., 0.1560, 0.1649, 0.1678],
        ...,
        [0.1607, 0.1694, 0.1749,  ..., 0.1555, 0.1638, 0.1667],
        [0.1574, 0.1645, 0.1746,  ..., 0.1553, 0.1606, 0.1701],
        [0.1615, 0.1672, 0.1747,  ..., 0.1571, 0.1671, 0.1692]],
       grad_fn=<LogBackward>)


 54%|█████▍    | 31/57 [00:23<00:20,  1.27it/s]

tensor([[0.1621, 0.1682, 0.1756,  ..., 0.1578, 0.1660, 0.1687],
        [0.1573, 0.1634, 0.1712,  ..., 0.1565, 0.1594, 0.1638],
        [0.1617, 0.1663, 0.1752,  ..., 0.1566, 0.1622, 0.1698],
        ...,
        [0.1610, 0.1665, 0.1727,  ..., 0.1571, 0.1633, 0.1682],
        [0.1584, 0.1650, 0.1724,  ..., 0.1567, 0.1664, 0.1674],
        [0.1562, 0.1653, 0.1687,  ..., 0.1561, 0.1599, 0.1642]],
       grad_fn=<LogBackward>)


 56%|█████▌    | 32/57 [00:23<00:19,  1.30it/s]

tensor([[0.1583, 0.1635, 0.1699,  ..., 0.1567, 0.1602, 0.1653],
        [0.1608, 0.1685, 0.1748,  ..., 0.1592, 0.1652, 0.1703],
        [0.1585, 0.1655, 0.1712,  ..., 0.1542, 0.1631, 0.1650],
        ...,
        [0.1578, 0.1653, 0.1717,  ..., 0.1561, 0.1608, 0.1674],
        [0.1612, 0.1687, 0.1745,  ..., 0.1577, 0.1657, 0.1697],
        [0.1585, 0.1648, 0.1692,  ..., 0.1560, 0.1594, 0.1648]],
       grad_fn=<LogBackward>)


 58%|█████▊    | 33/57 [00:24<00:18,  1.32it/s]

tensor([[0.1559, 0.1661, 0.1700,  ..., 0.1550, 0.1624, 0.1623],
        [0.1612, 0.1653, 0.1727,  ..., 0.1581, 0.1637, 0.1696],
        [0.1565, 0.1630, 0.1676,  ..., 0.1532, 0.1612, 0.1613],
        ...,
        [0.1578, 0.1650, 0.1704,  ..., 0.1562, 0.1639, 0.1689],
        [0.1557, 0.1655, 0.1696,  ..., 0.1545, 0.1601, 0.1605],
        [0.1550, 0.1674, 0.1660,  ..., 0.1538, 0.1614, 0.1648]],
       grad_fn=<LogBackward>)


 60%|█████▉    | 34/57 [00:25<00:17,  1.31it/s]

tensor([[0.1594, 0.1692, 0.1723,  ..., 0.1588, 0.1661, 0.1684],
        [0.1589, 0.1633, 0.1692,  ..., 0.1561, 0.1575, 0.1660],
        [0.1571, 0.1661, 0.1694,  ..., 0.1568, 0.1650, 0.1661],
        ...,
        [0.1594, 0.1671, 0.1723,  ..., 0.1572, 0.1643, 0.1685],
        [0.1583, 0.1648, 0.1721,  ..., 0.1564, 0.1628, 0.1679],
        [0.1602, 0.1665, 0.1757,  ..., 0.1561, 0.1663, 0.1704]],
       grad_fn=<LogBackward>)


 61%|██████▏   | 35/57 [00:26<00:16,  1.35it/s]

tensor([[0.1432, 0.1552, 0.1549,  ..., 0.1461, 0.1548, 0.1547],
        [0.1415, 0.1522, 0.1525,  ..., 0.1427, 0.1517, 0.1511],
        [0.1543, 0.1624, 0.1665,  ..., 0.1507, 0.1565, 0.1579],
        ...,
        [0.1607, 0.1635, 0.1690,  ..., 0.1550, 0.1617, 0.1647],
        [0.1578, 0.1654, 0.1725,  ..., 0.1558, 0.1627, 0.1651],
        [0.1592, 0.1656, 0.1709,  ..., 0.1577, 0.1608, 0.1643]],
       grad_fn=<LogBackward>)


 63%|██████▎   | 36/57 [00:26<00:15,  1.34it/s]

tensor([[0.1553, 0.1645, 0.1704,  ..., 0.1573, 0.1569, 0.1639],
        [0.1565, 0.1643, 0.1714,  ..., 0.1577, 0.1631, 0.1654],
        [0.1597, 0.1650, 0.1706,  ..., 0.1566, 0.1619, 0.1653],
        ...,
        [0.1554, 0.1661, 0.1720,  ..., 0.1579, 0.1641, 0.1704],
        [0.1559, 0.1621, 0.1682,  ..., 0.1585, 0.1596, 0.1626],
        [0.1601, 0.1652, 0.1740,  ..., 0.1572, 0.1633, 0.1679]],
       grad_fn=<LogBackward>)


 65%|██████▍   | 37/57 [00:27<00:14,  1.34it/s]

tensor([[0.1589, 0.1659, 0.1745,  ..., 0.1555, 0.1629, 0.1651],
        [0.1576, 0.1655, 0.1692,  ..., 0.1564, 0.1621, 0.1672],
        [0.1585, 0.1661, 0.1694,  ..., 0.1572, 0.1626, 0.1637],
        ...,
        [0.1566, 0.1629, 0.1699,  ..., 0.1550, 0.1563, 0.1626],
        [0.1536, 0.1538, 0.1633,  ..., 0.1493, 0.1508, 0.1539],
        [0.1600, 0.1652, 0.1707,  ..., 0.1579, 0.1624, 0.1677]],
       grad_fn=<LogBackward>)


 67%|██████▋   | 38/57 [00:28<00:14,  1.35it/s]

tensor([[0.1599, 0.1671, 0.1729,  ..., 0.1582, 0.1654, 0.1660],
        [0.1597, 0.1642, 0.1716,  ..., 0.1551, 0.1599, 0.1649],
        [0.1571, 0.1637, 0.1693,  ..., 0.1565, 0.1585, 0.1643],
        ...,
        [0.1570, 0.1642, 0.1690,  ..., 0.1572, 0.1606, 0.1625],
        [0.1571, 0.1670, 0.1720,  ..., 0.1566, 0.1630, 0.1662],
        [0.1591, 0.1664, 0.1739,  ..., 0.1565, 0.1664, 0.1695]],
       grad_fn=<LogBackward>)


 68%|██████▊   | 39/57 [00:28<00:13,  1.37it/s]

tensor([[0.1563, 0.1638, 0.1672,  ..., 0.1573, 0.1606, 0.1642],
        [0.1587, 0.1688, 0.1719,  ..., 0.1569, 0.1630, 0.1660],
        [0.1589, 0.1685, 0.1732,  ..., 0.1589, 0.1660, 0.1684],
        ...,
        [0.1584, 0.1666, 0.1696,  ..., 0.1592, 0.1667, 0.1638],
        [0.1587, 0.1671, 0.1706,  ..., 0.1567, 0.1631, 0.1708],
        [0.1577, 0.1677, 0.1709,  ..., 0.1599, 0.1688, 0.1673]],
       grad_fn=<LogBackward>)


 70%|███████   | 40/57 [00:29<00:12,  1.39it/s]

tensor([[0.1589, 0.1619, 0.1717,  ..., 0.1568, 0.1604, 0.1671],
        [0.1558, 0.1658, 0.1693,  ..., 0.1571, 0.1611, 0.1650],
        [0.1569, 0.1655, 0.1703,  ..., 0.1593, 0.1638, 0.1648],
        ...,
        [0.1568, 0.1652, 0.1700,  ..., 0.1574, 0.1607, 0.1659],
        [0.1546, 0.1626, 0.1664,  ..., 0.1544, 0.1600, 0.1623],
        [0.1552, 0.1624, 0.1708,  ..., 0.1574, 0.1597, 0.1657]],
       grad_fn=<LogBackward>)


 72%|███████▏  | 41/57 [00:30<00:11,  1.43it/s]

tensor([[0.1602, 0.1660, 0.1748,  ..., 0.1555, 0.1642, 0.1680],
        [0.1594, 0.1670, 0.1725,  ..., 0.1555, 0.1643, 0.1673],
        [0.1575, 0.1665, 0.1719,  ..., 0.1570, 0.1630, 0.1624],
        ...,
        [0.1556, 0.1646, 0.1691,  ..., 0.1544, 0.1590, 0.1631],
        [0.1528, 0.1648, 0.1625,  ..., 0.1502, 0.1525, 0.1620],
        [0.1584, 0.1650, 0.1723,  ..., 0.1606, 0.1616, 0.1686]],
       grad_fn=<LogBackward>)


 74%|███████▎  | 42/57 [00:31<00:10,  1.41it/s]

tensor([[0.1549, 0.1638, 0.1667,  ..., 0.1536, 0.1581, 0.1617],
        [0.1585, 0.1673, 0.1731,  ..., 0.1564, 0.1652, 0.1676],
        [0.1566, 0.1664, 0.1701,  ..., 0.1569, 0.1639, 0.1666],
        ...,
        [0.1563, 0.1652, 0.1718,  ..., 0.1560, 0.1590, 0.1662],
        [0.1578, 0.1656, 0.1700,  ..., 0.1553, 0.1586, 0.1663],
        [0.1581, 0.1656, 0.1706,  ..., 0.1577, 0.1607, 0.1654]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 43/57 [00:31<00:10,  1.37it/s]

tensor([[0.1578, 0.1672, 0.1714,  ..., 0.1557, 0.1599, 0.1665],
        [0.1585, 0.1689, 0.1727,  ..., 0.1584, 0.1653, 0.1680],
        [0.1549, 0.1637, 0.1686,  ..., 0.1561, 0.1584, 0.1641],
        ...,
        [0.1552, 0.1615, 0.1674,  ..., 0.1582, 0.1599, 0.1620],
        [0.1570, 0.1648, 0.1710,  ..., 0.1560, 0.1598, 0.1650],
        [0.1603, 0.1677, 0.1727,  ..., 0.1581, 0.1603, 0.1642]],
       grad_fn=<LogBackward>)


 77%|███████▋  | 44/57 [00:32<00:09,  1.36it/s]

tensor([[0.1454, 0.1542, 0.1590,  ..., 0.1506, 0.1498, 0.1520],
        [0.1609, 0.1688, 0.1756,  ..., 0.1596, 0.1690, 0.1693],
        [0.1514, 0.1614, 0.1638,  ..., 0.1543, 0.1568, 0.1578],
        ...,
        [0.1532, 0.1626, 0.1656,  ..., 0.1593, 0.1609, 0.1624],
        [0.1544, 0.1620, 0.1669,  ..., 0.1539, 0.1584, 0.1608],
        [0.1601, 0.1657, 0.1729,  ..., 0.1571, 0.1645, 0.1686]],
       grad_fn=<LogBackward>)


 79%|███████▉  | 45/57 [00:33<00:08,  1.36it/s]

tensor([[0.1579, 0.1646, 0.1724,  ..., 0.1592, 0.1627, 0.1678],
        [0.1597, 0.1648, 0.1689,  ..., 0.1596, 0.1578, 0.1611],
        [0.1519, 0.1612, 0.1636,  ..., 0.1551, 0.1572, 0.1627],
        ...,
        [0.1547, 0.1643, 0.1681,  ..., 0.1553, 0.1578, 0.1628],
        [0.1606, 0.1666, 0.1707,  ..., 0.1565, 0.1624, 0.1698],
        [0.1585, 0.1652, 0.1700,  ..., 0.1589, 0.1622, 0.1639]],
       grad_fn=<LogBackward>)


 81%|████████  | 46/57 [00:34<00:08,  1.36it/s]

tensor([[0.1597, 0.1669, 0.1719,  ..., 0.1561, 0.1628, 0.1646],
        [0.1608, 0.1661, 0.1728,  ..., 0.1579, 0.1619, 0.1658],
        [0.1542, 0.1634, 0.1681,  ..., 0.1538, 0.1606, 0.1620],
        ...,
        [0.1578, 0.1687, 0.1715,  ..., 0.1583, 0.1631, 0.1669],
        [0.1601, 0.1626, 0.1696,  ..., 0.1549, 0.1587, 0.1645],
        [0.1571, 0.1633, 0.1694,  ..., 0.1566, 0.1613, 0.1639]],
       grad_fn=<LogBackward>)


 82%|████████▏ | 47/57 [00:34<00:07,  1.37it/s]

tensor([[0.1575, 0.1636, 0.1696,  ..., 0.1584, 0.1628, 0.1665],
        [0.1556, 0.1616, 0.1650,  ..., 0.1533, 0.1587, 0.1634],
        [0.1591, 0.1659, 0.1713,  ..., 0.1559, 0.1622, 0.1681],
        ...,
        [0.1412, 0.1522, 0.1524,  ..., 0.1427, 0.1517, 0.1509],
        [0.1610, 0.1675, 0.1763,  ..., 0.1563, 0.1650, 0.1682],
        [0.1545, 0.1646, 0.1686,  ..., 0.1554, 0.1605, 0.1635]],
       grad_fn=<LogBackward>)


 84%|████████▍ | 48/57 [00:35<00:06,  1.39it/s]

tensor([[0.1525, 0.1600, 0.1643,  ..., 0.1542, 0.1585, 0.1631],
        [0.1593, 0.1659, 0.1700,  ..., 0.1598, 0.1626, 0.1685],
        [0.1587, 0.1651, 0.1738,  ..., 0.1568, 0.1625, 0.1699],
        ...,
        [0.1604, 0.1648, 0.1732,  ..., 0.1582, 0.1637, 0.1681],
        [0.1586, 0.1658, 0.1730,  ..., 0.1583, 0.1644, 0.1646],
        [0.1567, 0.1628, 0.1688,  ..., 0.1549, 0.1584, 0.1609]],
       grad_fn=<LogBackward>)


 86%|████████▌ | 49/57 [00:36<00:05,  1.38it/s]

tensor([[0.1601, 0.1661, 0.1719,  ..., 0.1566, 0.1617, 0.1671],
        [0.1605, 0.1656, 0.1757,  ..., 0.1578, 0.1657, 0.1680],
        [0.1564, 0.1640, 0.1659,  ..., 0.1531, 0.1588, 0.1622],
        ...,
        [0.1571, 0.1654, 0.1706,  ..., 0.1550, 0.1595, 0.1646],
        [0.1603, 0.1652, 0.1710,  ..., 0.1565, 0.1629, 0.1659],
        [0.1571, 0.1648, 0.1679,  ..., 0.1557, 0.1616, 0.1605]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 50/57 [00:36<00:05,  1.38it/s]

tensor([[0.1560, 0.1650, 0.1723,  ..., 0.1559, 0.1618, 0.1682],
        [0.1591, 0.1672, 0.1704,  ..., 0.1578, 0.1628, 0.1670],
        [0.1571, 0.1634, 0.1704,  ..., 0.1566, 0.1598, 0.1650],
        ...,
        [0.1555, 0.1641, 0.1693,  ..., 0.1521, 0.1598, 0.1615],
        [0.1551, 0.1641, 0.1664,  ..., 0.1558, 0.1599, 0.1623],
        [0.1575, 0.1631, 0.1702,  ..., 0.1568, 0.1617, 0.1616]],
       grad_fn=<LogBackward>)


 89%|████████▉ | 51/57 [00:37<00:04,  1.37it/s]

tensor([[0.1560, 0.1627, 0.1701,  ..., 0.1574, 0.1606, 0.1642],
        [0.1514, 0.1620, 0.1628,  ..., 0.1533, 0.1571, 0.1576],
        [0.1567, 0.1673, 0.1684,  ..., 0.1551, 0.1625, 0.1625],
        ...,
        [0.1560, 0.1664, 0.1685,  ..., 0.1562, 0.1609, 0.1593],
        [0.1577, 0.1659, 0.1716,  ..., 0.1592, 0.1647, 0.1675],
        [0.1602, 0.1659, 0.1751,  ..., 0.1575, 0.1639, 0.1698]],
       grad_fn=<LogBackward>)


 91%|█████████ | 52/57 [00:38<00:03,  1.36it/s]

tensor([[0.1526, 0.1624, 0.1664,  ..., 0.1537, 0.1569, 0.1613],
        [0.1571, 0.1664, 0.1718,  ..., 0.1571, 0.1631, 0.1667],
        [0.1591, 0.1667, 0.1734,  ..., 0.1565, 0.1621, 0.1682],
        ...,
        [0.1582, 0.1659, 0.1674,  ..., 0.1566, 0.1575, 0.1622],
        [0.1590, 0.1641, 0.1728,  ..., 0.1577, 0.1623, 0.1678],
        [0.1569, 0.1627, 0.1676,  ..., 0.1546, 0.1584, 0.1625]],
       grad_fn=<LogBackward>)


 93%|█████████▎| 53/57 [00:39<00:02,  1.34it/s]

tensor([[0.1568, 0.1631, 0.1699,  ..., 0.1536, 0.1605, 0.1616],
        [0.1600, 0.1673, 0.1761,  ..., 0.1589, 0.1631, 0.1696],
        [0.1577, 0.1608, 0.1690,  ..., 0.1560, 0.1595, 0.1632],
        ...,
        [0.1590, 0.1668, 0.1739,  ..., 0.1584, 0.1640, 0.1671],
        [0.1595, 0.1652, 0.1715,  ..., 0.1549, 0.1605, 0.1629],
        [0.1549, 0.1630, 0.1667,  ..., 0.1555, 0.1590, 0.1634]],
       grad_fn=<LogBackward>)


 95%|█████████▍| 54/57 [00:39<00:02,  1.35it/s]

tensor([[0.1575, 0.1705, 0.1684,  ..., 0.1564, 0.1646, 0.1621],
        [0.1565, 0.1627, 0.1705,  ..., 0.1564, 0.1613, 0.1663],
        [0.1551, 0.1632, 0.1704,  ..., 0.1574, 0.1598, 0.1654],
        ...,
        [0.1556, 0.1627, 0.1673,  ..., 0.1563, 0.1589, 0.1641],
        [0.1587, 0.1661, 0.1735,  ..., 0.1567, 0.1624, 0.1676],
        [0.1590, 0.1648, 0.1712,  ..., 0.1584, 0.1654, 0.1669]],
       grad_fn=<LogBackward>)


 96%|█████████▋| 55/57 [00:40<00:01,  1.35it/s]

tensor([[0.1568, 0.1675, 0.1726,  ..., 0.1565, 0.1594, 0.1673],
        [0.1582, 0.1647, 0.1714,  ..., 0.1576, 0.1620, 0.1668],
        [0.1554, 0.1646, 0.1708,  ..., 0.1559, 0.1635, 0.1667],
        ...,
        [0.1577, 0.1662, 0.1709,  ..., 0.1568, 0.1625, 0.1686],
        [0.1546, 0.1617, 0.1685,  ..., 0.1560, 0.1590, 0.1628],
        [0.1548, 0.1668, 0.1693,  ..., 0.1563, 0.1631, 0.1620]],
       grad_fn=<LogBackward>)


 98%|█████████▊| 56/57 [00:41<00:00,  1.35it/s]

tensor([[0.1567, 0.1628, 0.1690,  ..., 0.1561, 0.1614, 0.1651],
        [0.1562, 0.1665, 0.1708,  ..., 0.1540, 0.1609, 0.1632],
        [0.1571, 0.1640, 0.1705,  ..., 0.1561, 0.1590, 0.1608],
        ...,
        [0.1570, 0.1663, 0.1725,  ..., 0.1591, 0.1659, 0.1669],
        [0.1579, 0.1662, 0.1724,  ..., 0.1575, 0.1630, 0.1695],
        [0.1586, 0.1645, 0.1726,  ..., 0.1553, 0.1585, 0.1629]],
       grad_fn=<LogBackward>)


100%|██████████| 57/57 [00:42<00:00,  1.35it/s]
 12%|█▎        | 1/8 [00:00<00:02,  3.03it/s]

tensor([[0.1537, 0.1636, 0.1667,  ..., 0.1502, 0.1586, 0.1581],
        [0.1586, 0.1668, 0.1741,  ..., 0.1559, 0.1631, 0.1669],
        [0.1580, 0.1640, 0.1698,  ..., 0.1567, 0.1605, 0.1643],
        ...,
        [0.1581, 0.1671, 0.1712,  ..., 0.1562, 0.1637, 0.1671],
        [0.1577, 0.1653, 0.1715,  ..., 0.1570, 0.1608, 0.1652],
        [0.1558, 0.1645, 0.1695,  ..., 0.1563, 0.1609, 0.1653]],
       grad_fn=<LogBackward>)


 25%|██▌       | 2/8 [00:00<00:01,  3.04it/s]

tensor([[0.1573, 0.1647, 0.1708,  ..., 0.1543, 0.1587, 0.1684],
        [0.1553, 0.1646, 0.1662,  ..., 0.1552, 0.1609, 0.1615],
        [0.1581, 0.1642, 0.1689,  ..., 0.1560, 0.1591, 0.1636],
        ...,
        [0.1590, 0.1656, 0.1711,  ..., 0.1586, 0.1637, 0.1680],
        [0.1592, 0.1675, 0.1741,  ..., 0.1556, 0.1636, 0.1655],
        [0.1575, 0.1657, 0.1708,  ..., 0.1567, 0.1608, 0.1654]],
       grad_fn=<LogBackward>)


 38%|███▊      | 3/8 [00:00<00:01,  3.00it/s]

tensor([[0.1592, 0.1666, 0.1710,  ..., 0.1582, 0.1637, 0.1670],
        [0.1548, 0.1660, 0.1697,  ..., 0.1599, 0.1649, 0.1621],
        [0.1581, 0.1654, 0.1707,  ..., 0.1566, 0.1635, 0.1670],
        ...,
        [0.1561, 0.1684, 0.1737,  ..., 0.1560, 0.1598, 0.1638],
        [0.1548, 0.1632, 0.1707,  ..., 0.1564, 0.1579, 0.1620],
        [0.1535, 0.1627, 0.1673,  ..., 0.1550, 0.1566, 0.1614]],
       grad_fn=<LogBackward>)


 50%|█████     | 4/8 [00:01<00:01,  2.97it/s]

tensor([[0.1522, 0.1603, 0.1692,  ..., 0.1581, 0.1591, 0.1638],
        [0.1547, 0.1641, 0.1681,  ..., 0.1567, 0.1579, 0.1620],
        [0.1560, 0.1654, 0.1657,  ..., 0.1530, 0.1617, 0.1617],
        ...,
        [0.1560, 0.1646, 0.1680,  ..., 0.1565, 0.1618, 0.1645],
        [0.1582, 0.1632, 0.1705,  ..., 0.1582, 0.1618, 0.1637],
        [0.1588, 0.1672, 0.1716,  ..., 0.1582, 0.1615, 0.1640]],
       grad_fn=<LogBackward>)


 62%|██████▎   | 5/8 [00:01<00:01,  2.94it/s]

tensor([[0.1570, 0.1651, 0.1703,  ..., 0.1547, 0.1615, 0.1646],
        [0.1601, 0.1694, 0.1722,  ..., 0.1574, 0.1616, 0.1651],
        [0.1578, 0.1656, 0.1709,  ..., 0.1556, 0.1631, 0.1639],
        ...,
        [0.1594, 0.1638, 0.1699,  ..., 0.1562, 0.1633, 0.1648],
        [0.1568, 0.1627, 0.1697,  ..., 0.1589, 0.1599, 0.1641],
        [0.1576, 0.1638, 0.1722,  ..., 0.1567, 0.1609, 0.1640]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 6/8 [00:02<00:00,  2.91it/s]

tensor([[0.1592, 0.1682, 0.1761,  ..., 0.1595, 0.1682, 0.1645],
        [0.1574, 0.1645, 0.1705,  ..., 0.1584, 0.1626, 0.1667],
        [0.1457, 0.1567, 0.1573,  ..., 0.1485, 0.1562, 0.1559],
        ...,
        [0.1553, 0.1639, 0.1692,  ..., 0.1574, 0.1632, 0.1649],
        [0.1530, 0.1630, 0.1620,  ..., 0.1510, 0.1562, 0.1590],
        [0.1584, 0.1629, 0.1697,  ..., 0.1574, 0.1587, 0.1647]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 7/8 [00:02<00:00,  2.96it/s]

tensor([[0.1590, 0.1668, 0.1715,  ..., 0.1583, 0.1647, 0.1662],
        [0.1526, 0.1611, 0.1653,  ..., 0.1551, 0.1564, 0.1608],
        [0.1586, 0.1649, 0.1726,  ..., 0.1586, 0.1634, 0.1720],
        ...,
        [0.1610, 0.1663, 0.1743,  ..., 0.1574, 0.1659, 0.1711],
        [0.1528, 0.1586, 0.1608,  ..., 0.1478, 0.1511, 0.1529],
        [0.1581, 0.1668, 0.1692,  ..., 0.1572, 0.1637, 0.1657]],
       grad_fn=<LogBackward>)


100%|██████████| 8/8 [00:02<00:00,  2.94it/s]
  0%|          | 0/57 [00:00<?, ?it/s]

tensor([[0.1626, 0.1666, 0.1718,  ..., 0.1608, 0.1642, 0.1682],
        [0.1551, 0.1663, 0.1665,  ..., 0.1552, 0.1617, 0.1612],
        [0.1581, 0.1648, 0.1705,  ..., 0.1579, 0.1605, 0.1658],
        ...,
        [0.1589, 0.1673, 0.1732,  ..., 0.1566, 0.1655, 0.1686],
        [0.1598, 0.1664, 0.1761,  ..., 0.1552, 0.1646, 0.1675],
        [0.1531, 0.1617, 0.1691,  ..., 0.1549, 0.1573, 0.1602]],
       grad_fn=<LogBackward>)
[tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128)]
Epoch: 9, train_loss: -0.46685704588890076, valid_loss: -0.4743436276912689
Parameter containing:
tensor([[ 1.0005,  1.0006,  1.0005,  1.0005,  1.0005,  1.0005,  1.0006,  1.0005,
          1.0005,  1.0006, -0.4995, -0.4995, -0.4996, -0.4995, -0.4995, -0.4995,
         -0.4995, -0.4996, -0.4995, -0.4996],
     

  2%|▏         | 1/57 [00:00<00:43,  1.27it/s]

tensor([[0.1588, 0.1724, 0.1722,  ..., 0.1604, 0.1646, 0.1648],
        [0.1522, 0.1630, 0.1626,  ..., 0.1552, 0.1599, 0.1589],
        [0.1565, 0.1607, 0.1699,  ..., 0.1563, 0.1589, 0.1650],
        ...,
        [0.1569, 0.1630, 0.1710,  ..., 0.1540, 0.1597, 0.1634],
        [0.1597, 0.1616, 0.1724,  ..., 0.1580, 0.1597, 0.1665],
        [0.1534, 0.1624, 0.1678,  ..., 0.1551, 0.1582, 0.1588]],
       grad_fn=<LogBackward>)


  4%|▎         | 2/57 [00:01<00:43,  1.27it/s]

tensor([[0.1466, 0.1546, 0.1601,  ..., 0.1495, 0.1522, 0.1558],
        [0.1577, 0.1654, 0.1727,  ..., 0.1561, 0.1625, 0.1695],
        [0.1539, 0.1625, 0.1675,  ..., 0.1518, 0.1586, 0.1601],
        ...,
        [0.1598, 0.1662, 0.1733,  ..., 0.1578, 0.1640, 0.1702],
        [0.1596, 0.1675, 0.1713,  ..., 0.1561, 0.1634, 0.1652],
        [0.1576, 0.1647, 0.1709,  ..., 0.1557, 0.1602, 0.1646]],
       grad_fn=<LogBackward>)


  5%|▌         | 3/57 [00:02<00:42,  1.27it/s]

tensor([[0.1586, 0.1658, 0.1726,  ..., 0.1586, 0.1637, 0.1682],
        [0.1601, 0.1642, 0.1722,  ..., 0.1574, 0.1628, 0.1673],
        [0.1592, 0.1656, 0.1713,  ..., 0.1575, 0.1611, 0.1679],
        ...,
        [0.1570, 0.1669, 0.1727,  ..., 0.1575, 0.1611, 0.1656],
        [0.1552, 0.1640, 0.1703,  ..., 0.1537, 0.1597, 0.1606],
        [0.1595, 0.1645, 0.1740,  ..., 0.1559, 0.1634, 0.1701]],
       grad_fn=<LogBackward>)


  7%|▋         | 4/57 [00:03<00:40,  1.31it/s]

tensor([[0.1544, 0.1611, 0.1662,  ..., 0.1492, 0.1565, 0.1597],
        [0.1591, 0.1667, 0.1732,  ..., 0.1574, 0.1664, 0.1689],
        [0.1594, 0.1681, 0.1746,  ..., 0.1589, 0.1651, 0.1670],
        ...,
        [0.1576, 0.1653, 0.1679,  ..., 0.1565, 0.1623, 0.1645],
        [0.1600, 0.1686, 0.1773,  ..., 0.1611, 0.1683, 0.1723],
        [0.1572, 0.1643, 0.1700,  ..., 0.1598, 0.1619, 0.1654]],
       grad_fn=<LogBackward>)


  9%|▉         | 5/57 [00:03<00:38,  1.35it/s]

tensor([[0.1574, 0.1650, 0.1705,  ..., 0.1548, 0.1590, 0.1663],
        [0.1598, 0.1673, 0.1723,  ..., 0.1573, 0.1639, 0.1664],
        [0.1619, 0.1707, 0.1758,  ..., 0.1584, 0.1660, 0.1669],
        ...,
        [0.1575, 0.1643, 0.1715,  ..., 0.1578, 0.1620, 0.1644],
        [0.1553, 0.1595, 0.1673,  ..., 0.1580, 0.1604, 0.1618],
        [0.1560, 0.1646, 0.1666,  ..., 0.1556, 0.1591, 0.1604]],
       grad_fn=<LogBackward>)


 11%|█         | 6/57 [00:04<00:37,  1.36it/s]

tensor([[0.1549, 0.1606, 0.1646,  ..., 0.1531, 0.1564, 0.1628],
        [0.1560, 0.1611, 0.1708,  ..., 0.1557, 0.1607, 0.1654],
        [0.1546, 0.1623, 0.1671,  ..., 0.1573, 0.1571, 0.1634],
        ...,
        [0.1597, 0.1636, 0.1719,  ..., 0.1575, 0.1630, 0.1676],
        [0.1568, 0.1638, 0.1714,  ..., 0.1557, 0.1603, 0.1656],
        [0.1591, 0.1690, 0.1741,  ..., 0.1597, 0.1647, 0.1668]],
       grad_fn=<LogBackward>)


 12%|█▏        | 7/57 [00:05<00:35,  1.40it/s]

tensor([[0.1534, 0.1629, 0.1684,  ..., 0.1570, 0.1585, 0.1647],
        [0.1604, 0.1667, 0.1734,  ..., 0.1551, 0.1629, 0.1645],
        [0.1578, 0.1643, 0.1704,  ..., 0.1553, 0.1616, 0.1662],
        ...,
        [0.1575, 0.1641, 0.1724,  ..., 0.1575, 0.1624, 0.1667],
        [0.1602, 0.1661, 0.1745,  ..., 0.1570, 0.1620, 0.1676],
        [0.1560, 0.1643, 0.1727,  ..., 0.1568, 0.1633, 0.1654]],
       grad_fn=<LogBackward>)


 14%|█▍        | 8/57 [00:05<00:34,  1.44it/s]

tensor([[0.1589, 0.1648, 0.1728,  ..., 0.1554, 0.1604, 0.1660],
        [0.1589, 0.1669, 0.1732,  ..., 0.1559, 0.1617, 0.1671],
        [0.1568, 0.1649, 0.1704,  ..., 0.1582, 0.1634, 0.1629],
        ...,
        [0.1590, 0.1677, 0.1740,  ..., 0.1561, 0.1615, 0.1656],
        [0.1589, 0.1652, 0.1739,  ..., 0.1618, 0.1645, 0.1674],
        [0.1551, 0.1624, 0.1690,  ..., 0.1584, 0.1594, 0.1634]],
       grad_fn=<LogBackward>)


 16%|█▌        | 9/57 [00:06<00:33,  1.45it/s]

tensor([[0.1600, 0.1650, 0.1730,  ..., 0.1591, 0.1640, 0.1672],
        [0.1576, 0.1633, 0.1740,  ..., 0.1561, 0.1632, 0.1647],
        [0.1592, 0.1678, 0.1742,  ..., 0.1566, 0.1600, 0.1665],
        ...,
        [0.1594, 0.1671, 0.1724,  ..., 0.1585, 0.1625, 0.1683],
        [0.1593, 0.1635, 0.1717,  ..., 0.1571, 0.1596, 0.1645],
        [0.1529, 0.1614, 0.1673,  ..., 0.1513, 0.1550, 0.1581]],
       grad_fn=<LogBackward>)


 18%|█▊        | 10/57 [00:07<00:31,  1.47it/s]

tensor([[0.1587, 0.1682, 0.1736,  ..., 0.1587, 0.1634, 0.1689],
        [0.1544, 0.1610, 0.1676,  ..., 0.1557, 0.1593, 0.1609],
        [0.1566, 0.1631, 0.1688,  ..., 0.1547, 0.1596, 0.1621],
        ...,
        [0.1547, 0.1622, 0.1686,  ..., 0.1551, 0.1576, 0.1610],
        [0.1462, 0.1576, 0.1580,  ..., 0.1485, 0.1559, 0.1560],
        [0.1595, 0.1674, 0.1738,  ..., 0.1573, 0.1667, 0.1697]],
       grad_fn=<LogBackward>)


 19%|█▉        | 11/57 [00:07<00:31,  1.46it/s]

tensor([[0.1580, 0.1669, 0.1716,  ..., 0.1570, 0.1634, 0.1679],
        [0.1576, 0.1644, 0.1720,  ..., 0.1583, 0.1619, 0.1644],
        [0.1596, 0.1691, 0.1737,  ..., 0.1569, 0.1678, 0.1699],
        ...,
        [0.1479, 0.1552, 0.1605,  ..., 0.1525, 0.1514, 0.1581],
        [0.1608, 0.1667, 0.1714,  ..., 0.1582, 0.1653, 0.1674],
        [0.1582, 0.1627, 0.1680,  ..., 0.1542, 0.1553, 0.1595]],
       grad_fn=<LogBackward>)


 21%|██        | 12/57 [00:08<00:31,  1.44it/s]

tensor([[0.1585, 0.1672, 0.1736,  ..., 0.1569, 0.1650, 0.1687],
        [0.1559, 0.1625, 0.1687,  ..., 0.1539, 0.1594, 0.1619],
        [0.1592, 0.1647, 0.1705,  ..., 0.1570, 0.1635, 0.1630],
        ...,
        [0.1599, 0.1680, 0.1724,  ..., 0.1580, 0.1641, 0.1646],
        [0.1591, 0.1670, 0.1725,  ..., 0.1574, 0.1665, 0.1677],
        [0.1566, 0.1652, 0.1689,  ..., 0.1574, 0.1629, 0.1625]],
       grad_fn=<LogBackward>)


 23%|██▎       | 13/57 [00:09<00:29,  1.48it/s]

tensor([[0.1592, 0.1616, 0.1705,  ..., 0.1560, 0.1621, 0.1683],
        [0.1524, 0.1634, 0.1646,  ..., 0.1575, 0.1609, 0.1626],
        [0.1581, 0.1656, 0.1732,  ..., 0.1560, 0.1607, 0.1649],
        ...,
        [0.1572, 0.1674, 0.1712,  ..., 0.1563, 0.1620, 0.1690],
        [0.1490, 0.1592, 0.1609,  ..., 0.1512, 0.1508, 0.1515],
        [0.1563, 0.1619, 0.1694,  ..., 0.1537, 0.1593, 0.1623]],
       grad_fn=<LogBackward>)


 25%|██▍       | 14/57 [00:09<00:28,  1.51it/s]

tensor([[0.1592, 0.1658, 0.1734,  ..., 0.1602, 0.1643, 0.1682],
        [0.1504, 0.1603, 0.1621,  ..., 0.1544, 0.1601, 0.1615],
        [0.1577, 0.1627, 0.1694,  ..., 0.1572, 0.1604, 0.1653],
        ...,
        [0.1587, 0.1641, 0.1735,  ..., 0.1583, 0.1630, 0.1679],
        [0.1542, 0.1622, 0.1667,  ..., 0.1575, 0.1601, 0.1609],
        [0.1566, 0.1664, 0.1665,  ..., 0.1578, 0.1627, 0.1631]],
       grad_fn=<LogBackward>)


 26%|██▋       | 15/57 [00:10<00:27,  1.51it/s]

tensor([[0.1573, 0.1641, 0.1710,  ..., 0.1537, 0.1609, 0.1622],
        [0.1593, 0.1658, 0.1722,  ..., 0.1565, 0.1640, 0.1664],
        [0.1575, 0.1656, 0.1681,  ..., 0.1561, 0.1641, 0.1651],
        ...,
        [0.1552, 0.1677, 0.1707,  ..., 0.1584, 0.1624, 0.1663],
        [0.1594, 0.1664, 0.1713,  ..., 0.1578, 0.1618, 0.1654],
        [0.1551, 0.1637, 0.1721,  ..., 0.1595, 0.1604, 0.1683]],
       grad_fn=<LogBackward>)


 28%|██▊       | 16/57 [00:11<00:26,  1.53it/s]

tensor([[0.1585, 0.1663, 0.1719,  ..., 0.1572, 0.1637, 0.1665],
        [0.1573, 0.1630, 0.1703,  ..., 0.1569, 0.1611, 0.1632],
        [0.1580, 0.1692, 0.1716,  ..., 0.1578, 0.1659, 0.1684],
        ...,
        [0.1561, 0.1647, 0.1682,  ..., 0.1567, 0.1616, 0.1605],
        [0.1551, 0.1629, 0.1689,  ..., 0.1575, 0.1613, 0.1641],
        [0.1561, 0.1639, 0.1677,  ..., 0.1569, 0.1604, 0.1624]],
       grad_fn=<LogBackward>)


 30%|██▉       | 17/57 [00:11<00:25,  1.55it/s]

tensor([[0.1599, 0.1667, 0.1736,  ..., 0.1581, 0.1667, 0.1697],
        [0.1591, 0.1640, 0.1707,  ..., 0.1563, 0.1634, 0.1655],
        [0.1568, 0.1651, 0.1684,  ..., 0.1568, 0.1619, 0.1633],
        ...,
        [0.1580, 0.1650, 0.1710,  ..., 0.1567, 0.1656, 0.1640],
        [0.1524, 0.1641, 0.1670,  ..., 0.1533, 0.1575, 0.1600],
        [0.1529, 0.1638, 0.1697,  ..., 0.1607, 0.1637, 0.1638]],
       grad_fn=<LogBackward>)


 32%|███▏      | 18/57 [00:12<00:25,  1.54it/s]

tensor([[0.1590, 0.1703, 0.1720,  ..., 0.1571, 0.1659, 0.1680],
        [0.1565, 0.1665, 0.1704,  ..., 0.1574, 0.1615, 0.1663],
        [0.1566, 0.1644, 0.1715,  ..., 0.1579, 0.1622, 0.1663],
        ...,
        [0.1560, 0.1639, 0.1696,  ..., 0.1548, 0.1607, 0.1630],
        [0.1545, 0.1583, 0.1678,  ..., 0.1510, 0.1557, 0.1602],
        [0.1566, 0.1626, 0.1690,  ..., 0.1583, 0.1611, 0.1646]],
       grad_fn=<LogBackward>)


 33%|███▎      | 19/57 [00:13<00:24,  1.55it/s]

tensor([[0.1598, 0.1649, 0.1722,  ..., 0.1556, 0.1628, 0.1675],
        [0.1562, 0.1663, 0.1713,  ..., 0.1579, 0.1623, 0.1651],
        [0.1549, 0.1624, 0.1686,  ..., 0.1569, 0.1581, 0.1649],
        ...,
        [0.1597, 0.1670, 0.1739,  ..., 0.1563, 0.1669, 0.1684],
        [0.1635, 0.1673, 0.1723,  ..., 0.1586, 0.1658, 0.1699],
        [0.1570, 0.1656, 0.1695,  ..., 0.1572, 0.1602, 0.1648]],
       grad_fn=<LogBackward>)


 35%|███▌      | 20/57 [00:13<00:23,  1.55it/s]

tensor([[0.1595, 0.1651, 0.1748,  ..., 0.1577, 0.1640, 0.1692],
        [0.1559, 0.1652, 0.1683,  ..., 0.1556, 0.1618, 0.1644],
        [0.1522, 0.1612, 0.1664,  ..., 0.1530, 0.1579, 0.1555],
        ...,
        [0.1584, 0.1680, 0.1741,  ..., 0.1559, 0.1665, 0.1683],
        [0.1590, 0.1618, 0.1695,  ..., 0.1569, 0.1603, 0.1653],
        [0.1606, 0.1659, 0.1734,  ..., 0.1561, 0.1648, 0.1689]],
       grad_fn=<LogBackward>)


 37%|███▋      | 21/57 [00:14<00:23,  1.51it/s]

tensor([[0.1565, 0.1638, 0.1685,  ..., 0.1553, 0.1631, 0.1623],
        [0.1539, 0.1596, 0.1651,  ..., 0.1555, 0.1582, 0.1611],
        [0.1605, 0.1672, 0.1740,  ..., 0.1567, 0.1636, 0.1687],
        ...,
        [0.1570, 0.1666, 0.1731,  ..., 0.1580, 0.1643, 0.1652],
        [0.1562, 0.1654, 0.1704,  ..., 0.1557, 0.1608, 0.1631],
        [0.1594, 0.1671, 0.1715,  ..., 0.1568, 0.1612, 0.1655]],
       grad_fn=<LogBackward>)


 39%|███▊      | 22/57 [00:15<00:23,  1.49it/s]

tensor([[0.1598, 0.1656, 0.1709,  ..., 0.1590, 0.1614, 0.1662],
        [0.1565, 0.1635, 0.1677,  ..., 0.1566, 0.1605, 0.1604],
        [0.1574, 0.1690, 0.1697,  ..., 0.1574, 0.1642, 0.1620],
        ...,
        [0.1569, 0.1648, 0.1703,  ..., 0.1574, 0.1612, 0.1642],
        [0.1587, 0.1636, 0.1703,  ..., 0.1580, 0.1620, 0.1670],
        [0.1558, 0.1632, 0.1666,  ..., 0.1584, 0.1591, 0.1645]],
       grad_fn=<LogBackward>)


 40%|████      | 23/57 [00:15<00:22,  1.50it/s]

tensor([[0.1601, 0.1653, 0.1719,  ..., 0.1571, 0.1616, 0.1692],
        [0.1535, 0.1578, 0.1628,  ..., 0.1545, 0.1538, 0.1587],
        [0.1583, 0.1633, 0.1698,  ..., 0.1559, 0.1632, 0.1659],
        ...,
        [0.1582, 0.1637, 0.1693,  ..., 0.1570, 0.1618, 0.1636],
        [0.1534, 0.1643, 0.1667,  ..., 0.1537, 0.1588, 0.1599],
        [0.1507, 0.1628, 0.1634,  ..., 0.1575, 0.1600, 0.1580]],
       grad_fn=<LogBackward>)


 42%|████▏     | 24/57 [00:16<00:21,  1.50it/s]

tensor([[0.1504, 0.1623, 0.1650,  ..., 0.1521, 0.1582, 0.1605],
        [0.1561, 0.1660, 0.1702,  ..., 0.1557, 0.1652, 0.1668],
        [0.1598, 0.1666, 0.1742,  ..., 0.1567, 0.1627, 0.1701],
        ...,
        [0.1549, 0.1657, 0.1685,  ..., 0.1563, 0.1613, 0.1616],
        [0.1612, 0.1647, 0.1690,  ..., 0.1572, 0.1624, 0.1651],
        [0.1538, 0.1635, 0.1697,  ..., 0.1545, 0.1595, 0.1612]],
       grad_fn=<LogBackward>)


 44%|████▍     | 25/57 [00:17<00:20,  1.53it/s]

tensor([[0.1555, 0.1631, 0.1721,  ..., 0.1563, 0.1585, 0.1639],
        [0.1580, 0.1641, 0.1719,  ..., 0.1550, 0.1620, 0.1650],
        [0.1566, 0.1649, 0.1705,  ..., 0.1560, 0.1618, 0.1652],
        ...,
        [0.1551, 0.1634, 0.1687,  ..., 0.1531, 0.1583, 0.1587],
        [0.1552, 0.1591, 0.1667,  ..., 0.1537, 0.1573, 0.1605],
        [0.1535, 0.1607, 0.1641,  ..., 0.1536, 0.1610, 0.1613]],
       grad_fn=<LogBackward>)


 46%|████▌     | 26/57 [00:17<00:20,  1.52it/s]

tensor([[0.1558, 0.1608, 0.1656,  ..., 0.1538, 0.1589, 0.1627],
        [0.1585, 0.1643, 0.1715,  ..., 0.1584, 0.1627, 0.1665],
        [0.1580, 0.1650, 0.1697,  ..., 0.1576, 0.1590, 0.1654],
        ...,
        [0.1553, 0.1625, 0.1661,  ..., 0.1567, 0.1588, 0.1608],
        [0.1592, 0.1675, 0.1734,  ..., 0.1592, 0.1644, 0.1639],
        [0.1588, 0.1644, 0.1711,  ..., 0.1569, 0.1626, 0.1644]],
       grad_fn=<LogBackward>)


 47%|████▋     | 27/57 [00:18<00:19,  1.52it/s]

tensor([[0.1549, 0.1625, 0.1668,  ..., 0.1545, 0.1555, 0.1621],
        [0.1578, 0.1654, 0.1698,  ..., 0.1570, 0.1596, 0.1656],
        [0.1508, 0.1606, 0.1653,  ..., 0.1524, 0.1560, 0.1598],
        ...,
        [0.1586, 0.1639, 0.1732,  ..., 0.1556, 0.1623, 0.1667],
        [0.1581, 0.1627, 0.1696,  ..., 0.1577, 0.1603, 0.1636],
        [0.1593, 0.1655, 0.1733,  ..., 0.1580, 0.1657, 0.1682]],
       grad_fn=<LogBackward>)


 49%|████▉     | 28/57 [00:18<00:18,  1.53it/s]

tensor([[0.1570, 0.1663, 0.1701,  ..., 0.1567, 0.1659, 0.1643],
        [0.1585, 0.1680, 0.1704,  ..., 0.1571, 0.1612, 0.1631],
        [0.1544, 0.1624, 0.1684,  ..., 0.1551, 0.1601, 0.1628],
        ...,
        [0.1509, 0.1546, 0.1622,  ..., 0.1531, 0.1523, 0.1557],
        [0.1573, 0.1646, 0.1737,  ..., 0.1559, 0.1632, 0.1666],
        [0.1590, 0.1671, 0.1737,  ..., 0.1576, 0.1617, 0.1670]],
       grad_fn=<LogBackward>)


 51%|█████     | 29/57 [00:19<00:18,  1.51it/s]

tensor([[0.1568, 0.1647, 0.1697,  ..., 0.1565, 0.1606, 0.1632],
        [0.1537, 0.1632, 0.1708,  ..., 0.1564, 0.1616, 0.1625],
        [0.1578, 0.1665, 0.1722,  ..., 0.1578, 0.1644, 0.1658],
        ...,
        [0.1580, 0.1687, 0.1703,  ..., 0.1565, 0.1663, 0.1685],
        [0.1594, 0.1658, 0.1719,  ..., 0.1567, 0.1617, 0.1673],
        [0.1541, 0.1672, 0.1684,  ..., 0.1565, 0.1598, 0.1655]],
       grad_fn=<LogBackward>)


 53%|█████▎    | 30/57 [00:20<00:17,  1.51it/s]

tensor([[0.1568, 0.1640, 0.1721,  ..., 0.1559, 0.1605, 0.1643],
        [0.1564, 0.1641, 0.1713,  ..., 0.1560, 0.1656, 0.1659],
        [0.1580, 0.1662, 0.1703,  ..., 0.1575, 0.1644, 0.1651],
        ...,
        [0.1582, 0.1678, 0.1727,  ..., 0.1568, 0.1649, 0.1683],
        [0.1542, 0.1606, 0.1624,  ..., 0.1553, 0.1536, 0.1560],
        [0.1562, 0.1626, 0.1694,  ..., 0.1557, 0.1582, 0.1627]],
       grad_fn=<LogBackward>)


 54%|█████▍    | 31/57 [00:20<00:17,  1.52it/s]

tensor([[0.1470, 0.1528, 0.1565,  ..., 0.1457, 0.1513, 0.1501],
        [0.1577, 0.1646, 0.1709,  ..., 0.1538, 0.1616, 0.1633],
        [0.1529, 0.1636, 0.1668,  ..., 0.1533, 0.1580, 0.1616],
        ...,
        [0.1557, 0.1641, 0.1693,  ..., 0.1578, 0.1613, 0.1632],
        [0.1580, 0.1665, 0.1703,  ..., 0.1569, 0.1626, 0.1659],
        [0.1520, 0.1618, 0.1629,  ..., 0.1526, 0.1601, 0.1600]],
       grad_fn=<LogBackward>)


 56%|█████▌    | 32/57 [00:21<00:17,  1.47it/s]

tensor([[0.1564, 0.1658, 0.1692,  ..., 0.1538, 0.1579, 0.1575],
        [0.1568, 0.1631, 0.1668,  ..., 0.1542, 0.1578, 0.1639],
        [0.1572, 0.1624, 0.1692,  ..., 0.1543, 0.1603, 0.1634],
        ...,
        [0.1527, 0.1603, 0.1643,  ..., 0.1519, 0.1564, 0.1581],
        [0.1527, 0.1625, 0.1657,  ..., 0.1564, 0.1593, 0.1603],
        [0.1564, 0.1640, 0.1689,  ..., 0.1531, 0.1611, 0.1600]],
       grad_fn=<LogBackward>)


 58%|█████▊    | 33/57 [00:22<00:16,  1.48it/s]

tensor([[0.1587, 0.1646, 0.1737,  ..., 0.1569, 0.1646, 0.1688],
        [0.1555, 0.1685, 0.1715,  ..., 0.1577, 0.1614, 0.1665],
        [0.1569, 0.1658, 0.1704,  ..., 0.1569, 0.1609, 0.1662],
        ...,
        [0.1548, 0.1634, 0.1668,  ..., 0.1559, 0.1576, 0.1597],
        [0.1608, 0.1672, 0.1756,  ..., 0.1575, 0.1668, 0.1709],
        [0.1515, 0.1608, 0.1624,  ..., 0.1551, 0.1563, 0.1588]],
       grad_fn=<LogBackward>)


 60%|█████▉    | 34/57 [00:23<00:15,  1.49it/s]

tensor([[0.1530, 0.1592, 0.1644,  ..., 0.1525, 0.1555, 0.1603],
        [0.1570, 0.1652, 0.1690,  ..., 0.1556, 0.1616, 0.1639],
        [0.1599, 0.1655, 0.1709,  ..., 0.1596, 0.1638, 0.1651],
        ...,
        [0.1573, 0.1646, 0.1695,  ..., 0.1556, 0.1592, 0.1634],
        [0.1562, 0.1641, 0.1712,  ..., 0.1577, 0.1600, 0.1632],
        [0.1556, 0.1646, 0.1707,  ..., 0.1549, 0.1590, 0.1639]],
       grad_fn=<LogBackward>)


 61%|██████▏   | 35/57 [00:23<00:14,  1.50it/s]

tensor([[0.1567, 0.1654, 0.1699,  ..., 0.1542, 0.1614, 0.1641],
        [0.1603, 0.1673, 0.1751,  ..., 0.1570, 0.1645, 0.1695],
        [0.1543, 0.1620, 0.1648,  ..., 0.1548, 0.1608, 0.1593],
        ...,
        [0.1596, 0.1651, 0.1733,  ..., 0.1573, 0.1649, 0.1671],
        [0.1546, 0.1629, 0.1681,  ..., 0.1553, 0.1591, 0.1675],
        [0.1537, 0.1633, 0.1663,  ..., 0.1515, 0.1570, 0.1568]],
       grad_fn=<LogBackward>)


 63%|██████▎   | 36/57 [00:24<00:13,  1.50it/s]

tensor([[0.1576, 0.1662, 0.1694,  ..., 0.1559, 0.1633, 0.1688],
        [0.1558, 0.1654, 0.1665,  ..., 0.1575, 0.1615, 0.1624],
        [0.1574, 0.1645, 0.1709,  ..., 0.1570, 0.1619, 0.1665],
        ...,
        [0.1615, 0.1662, 0.1737,  ..., 0.1563, 0.1632, 0.1659],
        [0.1575, 0.1681, 0.1712,  ..., 0.1572, 0.1650, 0.1673],
        [0.1565, 0.1639, 0.1691,  ..., 0.1569, 0.1610, 0.1635]],
       grad_fn=<LogBackward>)


 65%|██████▍   | 37/57 [00:24<00:13,  1.52it/s]

tensor([[0.1566, 0.1659, 0.1703,  ..., 0.1558, 0.1618, 0.1642],
        [0.1582, 0.1674, 0.1704,  ..., 0.1582, 0.1637, 0.1655],
        [0.1614, 0.1672, 0.1734,  ..., 0.1593, 0.1626, 0.1659],
        ...,
        [0.1554, 0.1629, 0.1684,  ..., 0.1562, 0.1630, 0.1610],
        [0.1590, 0.1637, 0.1717,  ..., 0.1567, 0.1639, 0.1652],
        [0.1555, 0.1655, 0.1676,  ..., 0.1556, 0.1646, 0.1632]],
       grad_fn=<LogBackward>)


 67%|██████▋   | 38/57 [00:25<00:12,  1.53it/s]

tensor([[0.1569, 0.1652, 0.1730,  ..., 0.1570, 0.1612, 0.1665],
        [0.1497, 0.1602, 0.1597,  ..., 0.1534, 0.1506, 0.1538],
        [0.1596, 0.1649, 0.1747,  ..., 0.1573, 0.1644, 0.1689],
        ...,
        [0.1581, 0.1672, 0.1738,  ..., 0.1553, 0.1629, 0.1672],
        [0.1583, 0.1666, 0.1727,  ..., 0.1562, 0.1627, 0.1659],
        [0.1608, 0.1674, 0.1731,  ..., 0.1585, 0.1653, 0.1666]],
       grad_fn=<LogBackward>)


 68%|██████▊   | 39/57 [00:26<00:11,  1.53it/s]

tensor([[0.1604, 0.1652, 0.1745,  ..., 0.1556, 0.1617, 0.1681],
        [0.1534, 0.1624, 0.1685,  ..., 0.1579, 0.1619, 0.1627],
        [0.1554, 0.1630, 0.1723,  ..., 0.1561, 0.1597, 0.1638],
        ...,
        [0.1607, 0.1667, 0.1733,  ..., 0.1585, 0.1634, 0.1663],
        [0.1524, 0.1550, 0.1625,  ..., 0.1521, 0.1526, 0.1569],
        [0.1583, 0.1650, 0.1727,  ..., 0.1581, 0.1599, 0.1665]],
       grad_fn=<LogBackward>)


 70%|███████   | 40/57 [00:26<00:10,  1.55it/s]

tensor([[0.1581, 0.1643, 0.1707,  ..., 0.1576, 0.1590, 0.1658],
        [0.1582, 0.1644, 0.1674,  ..., 0.1556, 0.1613, 0.1646],
        [0.1604, 0.1668, 0.1744,  ..., 0.1572, 0.1643, 0.1698],
        ...,
        [0.1561, 0.1642, 0.1674,  ..., 0.1556, 0.1597, 0.1633],
        [0.1508, 0.1567, 0.1583,  ..., 0.1472, 0.1489, 0.1541],
        [0.1523, 0.1582, 0.1631,  ..., 0.1500, 0.1545, 0.1604]],
       grad_fn=<LogBackward>)


 72%|███████▏  | 41/57 [00:27<00:10,  1.54it/s]

tensor([[0.1548, 0.1600, 0.1676,  ..., 0.1583, 0.1601, 0.1609],
        [0.1610, 0.1680, 0.1750,  ..., 0.1571, 0.1685, 0.1687],
        [0.1549, 0.1615, 0.1672,  ..., 0.1545, 0.1562, 0.1597],
        ...,
        [0.1593, 0.1668, 0.1751,  ..., 0.1598, 0.1650, 0.1668],
        [0.1588, 0.1666, 0.1748,  ..., 0.1576, 0.1634, 0.1665],
        [0.1557, 0.1652, 0.1714,  ..., 0.1557, 0.1630, 0.1639]],
       grad_fn=<LogBackward>)


 74%|███████▎  | 42/57 [00:28<00:10,  1.49it/s]

tensor([[0.1588, 0.1634, 0.1724,  ..., 0.1593, 0.1612, 0.1677],
        [0.1568, 0.1674, 0.1697,  ..., 0.1590, 0.1611, 0.1630],
        [0.1566, 0.1641, 0.1674,  ..., 0.1554, 0.1576, 0.1584],
        ...,
        [0.1548, 0.1640, 0.1657,  ..., 0.1524, 0.1609, 0.1595],
        [0.1605, 0.1655, 0.1727,  ..., 0.1569, 0.1617, 0.1701],
        [0.1524, 0.1607, 0.1647,  ..., 0.1538, 0.1537, 0.1567]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 43/57 [00:28<00:09,  1.48it/s]

tensor([[0.1549, 0.1631, 0.1690,  ..., 0.1569, 0.1617, 0.1657],
        [0.1540, 0.1601, 0.1652,  ..., 0.1564, 0.1554, 0.1598],
        [0.1566, 0.1652, 0.1694,  ..., 0.1565, 0.1606, 0.1632],
        ...,
        [0.1580, 0.1662, 0.1753,  ..., 0.1574, 0.1621, 0.1678],
        [0.1520, 0.1610, 0.1696,  ..., 0.1567, 0.1552, 0.1617],
        [0.1553, 0.1599, 0.1673,  ..., 0.1541, 0.1577, 0.1615]],
       grad_fn=<LogBackward>)


 77%|███████▋  | 44/57 [00:29<00:08,  1.48it/s]

tensor([[0.1602, 0.1646, 0.1763,  ..., 0.1566, 0.1609, 0.1672],
        [0.1554, 0.1665, 0.1687,  ..., 0.1576, 0.1591, 0.1642],
        [0.1566, 0.1651, 0.1643,  ..., 0.1567, 0.1609, 0.1630],
        ...,
        [0.1572, 0.1647, 0.1675,  ..., 0.1567, 0.1612, 0.1621],
        [0.1579, 0.1632, 0.1697,  ..., 0.1564, 0.1593, 0.1649],
        [0.1583, 0.1658, 0.1736,  ..., 0.1594, 0.1628, 0.1655]],
       grad_fn=<LogBackward>)


 79%|███████▉  | 45/57 [00:30<00:08,  1.44it/s]

tensor([[0.1555, 0.1621, 0.1691,  ..., 0.1583, 0.1601, 0.1637],
        [0.1539, 0.1624, 0.1662,  ..., 0.1567, 0.1586, 0.1582],
        [0.1576, 0.1648, 0.1701,  ..., 0.1560, 0.1631, 0.1648],
        ...,
        [0.1524, 0.1572, 0.1648,  ..., 0.1537, 0.1506, 0.1570],
        [0.1586, 0.1631, 0.1719,  ..., 0.1560, 0.1611, 0.1658],
        [0.1579, 0.1629, 0.1708,  ..., 0.1588, 0.1600, 0.1642]],
       grad_fn=<LogBackward>)


 81%|████████  | 46/57 [00:31<00:07,  1.44it/s]

tensor([[0.1587, 0.1632, 0.1717,  ..., 0.1569, 0.1604, 0.1649],
        [0.1561, 0.1648, 0.1717,  ..., 0.1593, 0.1632, 0.1647],
        [0.1596, 0.1659, 0.1721,  ..., 0.1593, 0.1652, 0.1680],
        ...,
        [0.1584, 0.1658, 0.1730,  ..., 0.1571, 0.1644, 0.1683],
        [0.1555, 0.1648, 0.1673,  ..., 0.1587, 0.1604, 0.1655],
        [0.1527, 0.1604, 0.1647,  ..., 0.1541, 0.1568, 0.1597]],
       grad_fn=<LogBackward>)


 82%|████████▏ | 47/57 [00:31<00:06,  1.46it/s]

tensor([[0.1578, 0.1666, 0.1714,  ..., 0.1573, 0.1650, 0.1672],
        [0.1572, 0.1632, 0.1691,  ..., 0.1582, 0.1608, 0.1637],
        [0.1539, 0.1586, 0.1649,  ..., 0.1572, 0.1589, 0.1617],
        ...,
        [0.1571, 0.1622, 0.1685,  ..., 0.1572, 0.1579, 0.1624],
        [0.1567, 0.1632, 0.1710,  ..., 0.1556, 0.1597, 0.1650],
        [0.1563, 0.1626, 0.1675,  ..., 0.1565, 0.1589, 0.1634]],
       grad_fn=<LogBackward>)


 84%|████████▍ | 48/57 [00:32<00:06,  1.44it/s]

tensor([[0.1552, 0.1624, 0.1658,  ..., 0.1562, 0.1603, 0.1612],
        [0.1574, 0.1668, 0.1708,  ..., 0.1560, 0.1616, 0.1660],
        [0.1586, 0.1618, 0.1687,  ..., 0.1575, 0.1607, 0.1613],
        ...,
        [0.1550, 0.1629, 0.1713,  ..., 0.1560, 0.1606, 0.1619],
        [0.1586, 0.1620, 0.1690,  ..., 0.1567, 0.1596, 0.1621],
        [0.1495, 0.1545, 0.1605,  ..., 0.1478, 0.1536, 0.1581]],
       grad_fn=<LogBackward>)


 86%|████████▌ | 49/57 [00:33<00:05,  1.43it/s]

tensor([[0.1554, 0.1648, 0.1700,  ..., 0.1568, 0.1655, 0.1658],
        [0.1567, 0.1643, 0.1672,  ..., 0.1600, 0.1624, 0.1648],
        [0.1592, 0.1630, 0.1701,  ..., 0.1580, 0.1626, 0.1641],
        ...,
        [0.1579, 0.1671, 0.1718,  ..., 0.1583, 0.1662, 0.1664],
        [0.1572, 0.1658, 0.1691,  ..., 0.1585, 0.1670, 0.1645],
        [0.1588, 0.1626, 0.1698,  ..., 0.1591, 0.1630, 0.1656]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 50/57 [00:33<00:04,  1.43it/s]

tensor([[0.1569, 0.1639, 0.1705,  ..., 0.1579, 0.1605, 0.1625],
        [0.1557, 0.1630, 0.1678,  ..., 0.1569, 0.1572, 0.1609],
        [0.1592, 0.1646, 0.1723,  ..., 0.1572, 0.1619, 0.1698],
        ...,
        [0.1566, 0.1637, 0.1704,  ..., 0.1557, 0.1598, 0.1663],
        [0.1559, 0.1671, 0.1692,  ..., 0.1548, 0.1635, 0.1633],
        [0.1580, 0.1657, 0.1698,  ..., 0.1556, 0.1610, 0.1629]],
       grad_fn=<LogBackward>)


 89%|████████▉ | 51/57 [00:34<00:04,  1.45it/s]

tensor([[0.1558, 0.1628, 0.1681,  ..., 0.1536, 0.1591, 0.1653],
        [0.1578, 0.1645, 0.1699,  ..., 0.1566, 0.1601, 0.1629],
        [0.1541, 0.1601, 0.1676,  ..., 0.1569, 0.1608, 0.1607],
        ...,
        [0.1549, 0.1621, 0.1688,  ..., 0.1569, 0.1613, 0.1615],
        [0.1563, 0.1630, 0.1708,  ..., 0.1586, 0.1607, 0.1659],
        [0.1555, 0.1636, 0.1693,  ..., 0.1565, 0.1593, 0.1620]],
       grad_fn=<LogBackward>)


 91%|█████████ | 52/57 [00:35<00:03,  1.48it/s]

tensor([[0.1523, 0.1629, 0.1640,  ..., 0.1542, 0.1597, 0.1577],
        [0.1578, 0.1627, 0.1711,  ..., 0.1567, 0.1609, 0.1682],
        [0.1574, 0.1649, 0.1704,  ..., 0.1580, 0.1608, 0.1644],
        ...,
        [0.1573, 0.1661, 0.1711,  ..., 0.1610, 0.1677, 0.1656],
        [0.1539, 0.1621, 0.1699,  ..., 0.1530, 0.1603, 0.1629],
        [0.1550, 0.1603, 0.1676,  ..., 0.1549, 0.1571, 0.1648]],
       grad_fn=<LogBackward>)


 93%|█████████▎| 53/57 [00:35<00:02,  1.46it/s]

tensor([[0.1591, 0.1658, 0.1746,  ..., 0.1591, 0.1632, 0.1690],
        [0.1586, 0.1649, 0.1695,  ..., 0.1583, 0.1626, 0.1662],
        [0.1605, 0.1661, 0.1735,  ..., 0.1576, 0.1615, 0.1670],
        ...,
        [0.1526, 0.1638, 0.1669,  ..., 0.1523, 0.1616, 0.1630],
        [0.1577, 0.1645, 0.1696,  ..., 0.1559, 0.1602, 0.1632],
        [0.1560, 0.1641, 0.1705,  ..., 0.1566, 0.1577, 0.1627]],
       grad_fn=<LogBackward>)


 95%|█████████▍| 54/57 [00:36<00:02,  1.48it/s]

tensor([[0.1560, 0.1643, 0.1682,  ..., 0.1564, 0.1596, 0.1629],
        [0.1521, 0.1642, 0.1656,  ..., 0.1564, 0.1631, 0.1603],
        [0.1466, 0.1565, 0.1583,  ..., 0.1514, 0.1535, 0.1535],
        ...,
        [0.1562, 0.1635, 0.1698,  ..., 0.1554, 0.1593, 0.1662],
        [0.1611, 0.1684, 0.1754,  ..., 0.1579, 0.1660, 0.1681],
        [0.1583, 0.1675, 0.1720,  ..., 0.1572, 0.1644, 0.1682]],
       grad_fn=<LogBackward>)


 96%|█████████▋| 55/57 [00:37<00:01,  1.51it/s]

tensor([[0.1545, 0.1664, 0.1689,  ..., 0.1533, 0.1615, 0.1605],
        [0.1579, 0.1682, 0.1722,  ..., 0.1588, 0.1640, 0.1665],
        [0.1491, 0.1585, 0.1596,  ..., 0.1544, 0.1523, 0.1571],
        ...,
        [0.1589, 0.1662, 0.1715,  ..., 0.1563, 0.1604, 0.1651],
        [0.1519, 0.1627, 0.1656,  ..., 0.1548, 0.1551, 0.1588],
        [0.1566, 0.1640, 0.1695,  ..., 0.1574, 0.1628, 0.1686]],
       grad_fn=<LogBackward>)


 98%|█████████▊| 56/57 [00:37<00:00,  1.51it/s]

tensor([[0.1577, 0.1650, 0.1706,  ..., 0.1568, 0.1587, 0.1656],
        [0.1580, 0.1632, 0.1698,  ..., 0.1573, 0.1587, 0.1663],
        [0.1549, 0.1633, 0.1683,  ..., 0.1574, 0.1590, 0.1671],
        ...,
        [0.1551, 0.1617, 0.1672,  ..., 0.1550, 0.1602, 0.1643],
        [0.1564, 0.1658, 0.1701,  ..., 0.1579, 0.1624, 0.1636],
        [0.1576, 0.1637, 0.1718,  ..., 0.1562, 0.1623, 0.1656]],
       grad_fn=<LogBackward>)


100%|██████████| 57/57 [00:38<00:00,  1.48it/s]
 12%|█▎        | 1/8 [00:00<00:01,  3.68it/s]

tensor([[0.1548, 0.1657, 0.1725,  ..., 0.1577, 0.1628, 0.1672],
        [0.1547, 0.1652, 0.1671,  ..., 0.1590, 0.1638, 0.1647],
        [0.1544, 0.1632, 0.1670,  ..., 0.1549, 0.1586, 0.1580],
        ...,
        [0.1556, 0.1625, 0.1687,  ..., 0.1575, 0.1581, 0.1640],
        [0.1525, 0.1631, 0.1662,  ..., 0.1528, 0.1571, 0.1641],
        [0.1590, 0.1651, 0.1725,  ..., 0.1578, 0.1593, 0.1672]],
       grad_fn=<LogBackward>)


 25%|██▌       | 2/8 [00:00<00:01,  3.65it/s]

tensor([[0.1561, 0.1609, 0.1686,  ..., 0.1575, 0.1600, 0.1633],
        [0.1498, 0.1584, 0.1636,  ..., 0.1549, 0.1588, 0.1609],
        [0.1569, 0.1648, 0.1744,  ..., 0.1563, 0.1604, 0.1615],
        ...,
        [0.1556, 0.1602, 0.1630,  ..., 0.1507, 0.1561, 0.1559],
        [0.1542, 0.1640, 0.1692,  ..., 0.1557, 0.1603, 0.1636],
        [0.1513, 0.1613, 0.1634,  ..., 0.1522, 0.1593, 0.1602]],
       grad_fn=<LogBackward>)


 38%|███▊      | 3/8 [00:00<00:01,  3.60it/s]

tensor([[0.1542, 0.1604, 0.1654,  ..., 0.1536, 0.1570, 0.1657],
        [0.1574, 0.1653, 0.1711,  ..., 0.1590, 0.1644, 0.1660],
        [0.1593, 0.1642, 0.1682,  ..., 0.1559, 0.1597, 0.1581],
        ...,
        [0.1538, 0.1601, 0.1689,  ..., 0.1553, 0.1567, 0.1588],
        [0.1565, 0.1655, 0.1684,  ..., 0.1569, 0.1627, 0.1656],
        [0.1526, 0.1620, 0.1652,  ..., 0.1547, 0.1572, 0.1589]],
       grad_fn=<LogBackward>)


 50%|█████     | 4/8 [00:01<00:01,  3.58it/s]

tensor([[0.1537, 0.1607, 0.1640,  ..., 0.1555, 0.1569, 0.1590],
        [0.1542, 0.1660, 0.1710,  ..., 0.1540, 0.1613, 0.1625],
        [0.1516, 0.1612, 0.1674,  ..., 0.1573, 0.1637, 0.1618],
        ...,
        [0.1586, 0.1663, 0.1730,  ..., 0.1570, 0.1634, 0.1669],
        [0.1584, 0.1647, 0.1715,  ..., 0.1595, 0.1617, 0.1648],
        [0.1566, 0.1624, 0.1695,  ..., 0.1546, 0.1588, 0.1628]],
       grad_fn=<LogBackward>)


 62%|██████▎   | 5/8 [00:01<00:00,  3.58it/s]

tensor([[0.1564, 0.1647, 0.1660,  ..., 0.1553, 0.1618, 0.1628],
        [0.1522, 0.1637, 0.1640,  ..., 0.1537, 0.1594, 0.1631],
        [0.1573, 0.1664, 0.1741,  ..., 0.1577, 0.1598, 0.1642],
        ...,
        [0.1570, 0.1628, 0.1687,  ..., 0.1548, 0.1595, 0.1625],
        [0.1596, 0.1664, 0.1718,  ..., 0.1589, 0.1618, 0.1698],
        [0.1581, 0.1657, 0.1691,  ..., 0.1566, 0.1642, 0.1650]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 6/8 [00:01<00:00,  3.45it/s]

tensor([[0.1576, 0.1656, 0.1725,  ..., 0.1575, 0.1641, 0.1661],
        [0.1605, 0.1670, 0.1722,  ..., 0.1576, 0.1644, 0.1655],
        [0.1542, 0.1606, 0.1669,  ..., 0.1533, 0.1565, 0.1575],
        ...,
        [0.1520, 0.1602, 0.1646,  ..., 0.1553, 0.1580, 0.1651],
        [0.1579, 0.1672, 0.1715,  ..., 0.1568, 0.1635, 0.1690],
        [0.1596, 0.1652, 0.1715,  ..., 0.1596, 0.1640, 0.1654]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 7/8 [00:02<00:00,  3.38it/s]

tensor([[0.1580, 0.1633, 0.1701,  ..., 0.1561, 0.1580, 0.1657],
        [0.1553, 0.1609, 0.1670,  ..., 0.1540, 0.1563, 0.1618],
        [0.1512, 0.1628, 0.1649,  ..., 0.1589, 0.1618, 0.1606],
        ...,
        [0.1570, 0.1644, 0.1701,  ..., 0.1578, 0.1600, 0.1643],
        [0.1572, 0.1618, 0.1685,  ..., 0.1551, 0.1581, 0.1636],
        [0.1585, 0.1683, 0.1731,  ..., 0.1585, 0.1650, 0.1677]],
       grad_fn=<LogBackward>)


100%|██████████| 8/8 [00:02<00:00,  3.42it/s]
  0%|          | 0/57 [00:00<?, ?it/s]

tensor([[0.1538, 0.1642, 0.1669,  ..., 0.1533, 0.1588, 0.1604],
        [0.1611, 0.1712, 0.1734,  ..., 0.1584, 0.1685, 0.1669],
        [0.1565, 0.1681, 0.1686,  ..., 0.1561, 0.1623, 0.1639],
        ...,
        [0.1568, 0.1650, 0.1707,  ..., 0.1599, 0.1649, 0.1676],
        [0.1572, 0.1637, 0.1717,  ..., 0.1558, 0.1593, 0.1647],
        [0.1561, 0.1609, 0.1679,  ..., 0.1569, 0.1576, 0.1627]],
       grad_fn=<LogBackward>)
[tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128)]
Epoch: 10, train_loss: -0.5335490107536316, valid_loss: -0.5347757339477539
Parameter containing:
tensor([[ 1.0004,  1.0005,  1.0004,  1.0004,  1.0004,  1.0004,  1.0005,  1.0003,
          1.0003,  1.0006, -0.4996, -0.4996, -0.4997, -0.4996, -0.4996, -0.4996,
         -0.4996, -0.4997, -0.4996, -0.4997],
     

  2%|▏         | 1/57 [00:00<00:37,  1.48it/s]

tensor([[0.1543, 0.1620, 0.1690,  ..., 0.1547, 0.1604, 0.1611],
        [0.1549, 0.1653, 0.1673,  ..., 0.1592, 0.1599, 0.1639],
        [0.1552, 0.1619, 0.1668,  ..., 0.1552, 0.1584, 0.1626],
        ...,
        [0.1576, 0.1665, 0.1711,  ..., 0.1568, 0.1634, 0.1651],
        [0.1575, 0.1643, 0.1700,  ..., 0.1574, 0.1605, 0.1647],
        [0.1502, 0.1602, 0.1604,  ..., 0.1511, 0.1555, 0.1566]],
       grad_fn=<LogBackward>)


  4%|▎         | 2/57 [00:01<00:38,  1.41it/s]

tensor([[0.1607, 0.1684, 0.1762,  ..., 0.1568, 0.1664, 0.1677],
        [0.1570, 0.1639, 0.1702,  ..., 0.1560, 0.1606, 0.1662],
        [0.1597, 0.1659, 0.1749,  ..., 0.1571, 0.1650, 0.1662],
        ...,
        [0.1570, 0.1646, 0.1691,  ..., 0.1579, 0.1579, 0.1623],
        [0.1610, 0.1680, 0.1759,  ..., 0.1572, 0.1628, 0.1691],
        [0.1571, 0.1650, 0.1722,  ..., 0.1585, 0.1626, 0.1640]],
       grad_fn=<LogBackward>)


  5%|▌         | 3/57 [00:02<00:38,  1.40it/s]

tensor([[0.1514, 0.1639, 0.1659,  ..., 0.1578, 0.1590, 0.1606],
        [0.1574, 0.1631, 0.1708,  ..., 0.1565, 0.1597, 0.1675],
        [0.1549, 0.1607, 0.1668,  ..., 0.1512, 0.1546, 0.1574],
        ...,
        [0.1511, 0.1628, 0.1656,  ..., 0.1544, 0.1594, 0.1624],
        [0.1586, 0.1666, 0.1719,  ..., 0.1565, 0.1633, 0.1657],
        [0.1461, 0.1525, 0.1560,  ..., 0.1446, 0.1475, 0.1524]],
       grad_fn=<LogBackward>)


  7%|▋         | 4/57 [00:02<00:37,  1.42it/s]

tensor([[0.1539, 0.1616, 0.1662,  ..., 0.1570, 0.1563, 0.1623],
        [0.1549, 0.1631, 0.1678,  ..., 0.1558, 0.1604, 0.1622],
        [0.1591, 0.1664, 0.1740,  ..., 0.1582, 0.1634, 0.1687],
        ...,
        [0.1556, 0.1653, 0.1692,  ..., 0.1578, 0.1635, 0.1629],
        [0.1572, 0.1655, 0.1677,  ..., 0.1555, 0.1611, 0.1621],
        [0.1575, 0.1640, 0.1705,  ..., 0.1583, 0.1578, 0.1643]],
       grad_fn=<LogBackward>)


  9%|▉         | 5/57 [00:03<00:35,  1.46it/s]

tensor([[0.1574, 0.1624, 0.1669,  ..., 0.1569, 0.1616, 0.1610],
        [0.1563, 0.1616, 0.1670,  ..., 0.1561, 0.1582, 0.1638],
        [0.1563, 0.1648, 0.1696,  ..., 0.1583, 0.1648, 0.1604],
        ...,
        [0.1513, 0.1606, 0.1668,  ..., 0.1571, 0.1564, 0.1577],
        [0.1599, 0.1707, 0.1741,  ..., 0.1567, 0.1650, 0.1647],
        [0.1541, 0.1620, 0.1635,  ..., 0.1543, 0.1559, 0.1571]],
       grad_fn=<LogBackward>)


 11%|█         | 6/57 [00:04<00:34,  1.47it/s]

tensor([[0.1567, 0.1627, 0.1692,  ..., 0.1567, 0.1581, 0.1638],
        [0.1595, 0.1689, 0.1752,  ..., 0.1572, 0.1651, 0.1691],
        [0.1565, 0.1620, 0.1700,  ..., 0.1561, 0.1601, 0.1655],
        ...,
        [0.1575, 0.1676, 0.1721,  ..., 0.1575, 0.1626, 0.1657],
        [0.1559, 0.1639, 0.1691,  ..., 0.1569, 0.1600, 0.1656],
        [0.1553, 0.1645, 0.1695,  ..., 0.1559, 0.1621, 0.1626]],
       grad_fn=<LogBackward>)


 12%|█▏        | 7/57 [00:04<00:33,  1.49it/s]

tensor([[0.1582, 0.1669, 0.1731,  ..., 0.1591, 0.1643, 0.1686],
        [0.1537, 0.1623, 0.1663,  ..., 0.1556, 0.1574, 0.1598],
        [0.1586, 0.1639, 0.1689,  ..., 0.1591, 0.1618, 0.1637],
        ...,
        [0.1541, 0.1649, 0.1678,  ..., 0.1556, 0.1596, 0.1645],
        [0.1564, 0.1643, 0.1704,  ..., 0.1527, 0.1605, 0.1634],
        [0.1530, 0.1646, 0.1676,  ..., 0.1533, 0.1593, 0.1614]],
       grad_fn=<LogBackward>)


 14%|█▍        | 8/57 [00:05<00:33,  1.48it/s]

tensor([[0.1596, 0.1662, 0.1711,  ..., 0.1582, 0.1629, 0.1614],
        [0.1585, 0.1646, 0.1719,  ..., 0.1577, 0.1613, 0.1681],
        [0.1576, 0.1637, 0.1706,  ..., 0.1570, 0.1610, 0.1654],
        ...,
        [0.1604, 0.1678, 0.1739,  ..., 0.1608, 0.1676, 0.1682],
        [0.1574, 0.1637, 0.1700,  ..., 0.1557, 0.1611, 0.1640],
        [0.1570, 0.1651, 0.1711,  ..., 0.1580, 0.1618, 0.1678]],
       grad_fn=<LogBackward>)


 16%|█▌        | 9/57 [00:06<00:33,  1.42it/s]

tensor([[0.1538, 0.1620, 0.1669,  ..., 0.1562, 0.1591, 0.1608],
        [0.1528, 0.1591, 0.1650,  ..., 0.1524, 0.1556, 0.1599],
        [0.1573, 0.1652, 0.1703,  ..., 0.1555, 0.1617, 0.1645],
        ...,
        [0.1522, 0.1605, 0.1646,  ..., 0.1520, 0.1550, 0.1572],
        [0.1432, 0.1541, 0.1543,  ..., 0.1437, 0.1532, 0.1521],
        [0.1562, 0.1657, 0.1696,  ..., 0.1575, 0.1617, 0.1657]],
       grad_fn=<LogBackward>)


 18%|█▊        | 10/57 [00:07<00:33,  1.40it/s]

tensor([[0.1566, 0.1623, 0.1702,  ..., 0.1588, 0.1608, 0.1629],
        [0.1535, 0.1621, 0.1648,  ..., 0.1564, 0.1615, 0.1609],
        [0.1572, 0.1643, 0.1712,  ..., 0.1550, 0.1621, 0.1666],
        ...,
        [0.1552, 0.1645, 0.1701,  ..., 0.1545, 0.1590, 0.1654],
        [0.1590, 0.1676, 0.1745,  ..., 0.1568, 0.1635, 0.1651],
        [0.1518, 0.1671, 0.1646,  ..., 0.1564, 0.1590, 0.1589]],
       grad_fn=<LogBackward>)


 19%|█▉        | 11/57 [00:07<00:32,  1.43it/s]

tensor([[0.1538, 0.1602, 0.1663,  ..., 0.1541, 0.1557, 0.1595],
        [0.1584, 0.1658, 0.1699,  ..., 0.1579, 0.1653, 0.1675],
        [0.1592, 0.1692, 0.1718,  ..., 0.1585, 0.1665, 0.1674],
        ...,
        [0.1558, 0.1638, 0.1702,  ..., 0.1565, 0.1574, 0.1624],
        [0.1592, 0.1696, 0.1715,  ..., 0.1580, 0.1669, 0.1666],
        [0.1603, 0.1668, 0.1746,  ..., 0.1596, 0.1667, 0.1685]],
       grad_fn=<LogBackward>)


 21%|██        | 12/57 [00:08<00:30,  1.46it/s]

tensor([[0.1573, 0.1636, 0.1700,  ..., 0.1591, 0.1601, 0.1638],
        [0.1516, 0.1599, 0.1659,  ..., 0.1543, 0.1546, 0.1585],
        [0.1570, 0.1657, 0.1681,  ..., 0.1604, 0.1615, 0.1640],
        ...,
        [0.1542, 0.1594, 0.1657,  ..., 0.1517, 0.1538, 0.1602],
        [0.1563, 0.1677, 0.1682,  ..., 0.1560, 0.1631, 0.1644],
        [0.1556, 0.1648, 0.1696,  ..., 0.1582, 0.1607, 0.1631]],
       grad_fn=<LogBackward>)


 23%|██▎       | 13/57 [00:08<00:29,  1.48it/s]

tensor([[0.1446, 0.1568, 0.1567,  ..., 0.1473, 0.1554, 0.1561],
        [0.1544, 0.1632, 0.1676,  ..., 0.1546, 0.1568, 0.1622],
        [0.1568, 0.1657, 0.1676,  ..., 0.1544, 0.1613, 0.1614],
        ...,
        [0.1535, 0.1591, 0.1646,  ..., 0.1512, 0.1560, 0.1626],
        [0.1577, 0.1650, 0.1717,  ..., 0.1547, 0.1619, 0.1627],
        [0.1590, 0.1651, 0.1730,  ..., 0.1583, 0.1653, 0.1679]],
       grad_fn=<LogBackward>)


 25%|██▍       | 14/57 [00:09<00:29,  1.46it/s]

tensor([[0.1557, 0.1633, 0.1706,  ..., 0.1555, 0.1585, 0.1620],
        [0.1565, 0.1671, 0.1693,  ..., 0.1592, 0.1608, 0.1626],
        [0.1575, 0.1626, 0.1694,  ..., 0.1596, 0.1605, 0.1646],
        ...,
        [0.1506, 0.1562, 0.1606,  ..., 0.1506, 0.1525, 0.1572],
        [0.1524, 0.1662, 0.1655,  ..., 0.1533, 0.1610, 0.1585],
        [0.1557, 0.1637, 0.1700,  ..., 0.1588, 0.1631, 0.1647]],
       grad_fn=<LogBackward>)


 26%|██▋       | 15/57 [00:10<00:29,  1.43it/s]

tensor([[0.1576, 0.1636, 0.1703,  ..., 0.1541, 0.1609, 0.1668],
        [0.1547, 0.1635, 0.1679,  ..., 0.1567, 0.1600, 0.1654],
        [0.1559, 0.1623, 0.1705,  ..., 0.1580, 0.1591, 0.1634],
        ...,
        [0.1585, 0.1616, 0.1683,  ..., 0.1566, 0.1587, 0.1645],
        [0.1555, 0.1667, 0.1710,  ..., 0.1579, 0.1614, 0.1677],
        [0.1540, 0.1629, 0.1671,  ..., 0.1553, 0.1622, 0.1598]],
       grad_fn=<LogBackward>)


 28%|██▊       | 16/57 [00:11<00:28,  1.46it/s]

tensor([[0.1587, 0.1667, 0.1731,  ..., 0.1575, 0.1639, 0.1672],
        [0.1585, 0.1650, 0.1726,  ..., 0.1568, 0.1606, 0.1672],
        [0.1545, 0.1617, 0.1675,  ..., 0.1561, 0.1565, 0.1571],
        ...,
        [0.1549, 0.1627, 0.1684,  ..., 0.1553, 0.1616, 0.1624],
        [0.1587, 0.1650, 0.1680,  ..., 0.1561, 0.1609, 0.1632],
        [0.1555, 0.1665, 0.1665,  ..., 0.1550, 0.1633, 0.1642]],
       grad_fn=<LogBackward>)


 30%|██▉       | 17/57 [00:11<00:27,  1.45it/s]

tensor([[0.1558, 0.1637, 0.1712,  ..., 0.1576, 0.1615, 0.1633],
        [0.1543, 0.1642, 0.1657,  ..., 0.1518, 0.1618, 0.1621],
        [0.1586, 0.1656, 0.1672,  ..., 0.1566, 0.1595, 0.1614],
        ...,
        [0.1603, 0.1677, 0.1726,  ..., 0.1559, 0.1654, 0.1645],
        [0.1533, 0.1614, 0.1682,  ..., 0.1535, 0.1610, 0.1606],
        [0.1575, 0.1650, 0.1717,  ..., 0.1594, 0.1636, 0.1682]],
       grad_fn=<LogBackward>)


 32%|███▏      | 18/57 [00:12<00:26,  1.48it/s]

tensor([[0.1572, 0.1633, 0.1697,  ..., 0.1562, 0.1634, 0.1660],
        [0.1565, 0.1651, 0.1708,  ..., 0.1561, 0.1609, 0.1629],
        [0.1566, 0.1641, 0.1679,  ..., 0.1540, 0.1585, 0.1612],
        ...,
        [0.1527, 0.1610, 0.1652,  ..., 0.1565, 0.1549, 0.1606],
        [0.1562, 0.1648, 0.1692,  ..., 0.1586, 0.1609, 0.1639],
        [0.1527, 0.1584, 0.1659,  ..., 0.1530, 0.1563, 0.1617]],
       grad_fn=<LogBackward>)


 33%|███▎      | 19/57 [00:13<00:25,  1.48it/s]

tensor([[0.1510, 0.1569, 0.1612,  ..., 0.1513, 0.1534, 0.1554],
        [0.1627, 0.1676, 0.1763,  ..., 0.1597, 0.1654, 0.1697],
        [0.1527, 0.1567, 0.1575,  ..., 0.1445, 0.1510, 0.1544],
        ...,
        [0.1566, 0.1667, 0.1696,  ..., 0.1552, 0.1615, 0.1664],
        [0.1557, 0.1628, 0.1682,  ..., 0.1584, 0.1617, 0.1645],
        [0.1590, 0.1671, 0.1715,  ..., 0.1561, 0.1642, 0.1671]],
       grad_fn=<LogBackward>)


 35%|███▌      | 20/57 [00:13<00:25,  1.45it/s]

tensor([[0.1562, 0.1631, 0.1699,  ..., 0.1556, 0.1594, 0.1659],
        [0.1572, 0.1650, 0.1732,  ..., 0.1549, 0.1630, 0.1662],
        [0.1588, 0.1639, 0.1705,  ..., 0.1568, 0.1596, 0.1648],
        ...,
        [0.1562, 0.1648, 0.1695,  ..., 0.1574, 0.1596, 0.1621],
        [0.1597, 0.1640, 0.1699,  ..., 0.1576, 0.1594, 0.1648],
        [0.1434, 0.1551, 0.1554,  ..., 0.1469, 0.1553, 0.1543]],
       grad_fn=<LogBackward>)


 37%|███▋      | 21/57 [00:14<00:24,  1.47it/s]

tensor([[0.1498, 0.1581, 0.1596,  ..., 0.1504, 0.1512, 0.1540],
        [0.1566, 0.1643, 0.1703,  ..., 0.1577, 0.1613, 0.1657],
        [0.1547, 0.1612, 0.1673,  ..., 0.1554, 0.1598, 0.1649],
        ...,
        [0.1580, 0.1685, 0.1730,  ..., 0.1569, 0.1656, 0.1665],
        [0.1572, 0.1655, 0.1688,  ..., 0.1561, 0.1580, 0.1632],
        [0.1532, 0.1625, 0.1654,  ..., 0.1513, 0.1563, 0.1560]],
       grad_fn=<LogBackward>)


 39%|███▊      | 22/57 [00:15<00:23,  1.50it/s]

tensor([[0.1556, 0.1602, 0.1677,  ..., 0.1536, 0.1582, 0.1613],
        [0.1574, 0.1677, 0.1703,  ..., 0.1541, 0.1573, 0.1606],
        [0.1568, 0.1639, 0.1706,  ..., 0.1585, 0.1611, 0.1633],
        ...,
        [0.1573, 0.1672, 0.1756,  ..., 0.1566, 0.1637, 0.1667],
        [0.1488, 0.1538, 0.1630,  ..., 0.1489, 0.1515, 0.1543],
        [0.1552, 0.1626, 0.1692,  ..., 0.1557, 0.1602, 0.1640]],
       grad_fn=<LogBackward>)


 40%|████      | 23/57 [00:15<00:22,  1.52it/s]

tensor([[0.1570, 0.1651, 0.1678,  ..., 0.1581, 0.1624, 0.1630],
        [0.1577, 0.1643, 0.1727,  ..., 0.1562, 0.1627, 0.1669],
        [0.1591, 0.1666, 0.1748,  ..., 0.1560, 0.1652, 0.1686],
        ...,
        [0.1599, 0.1638, 0.1736,  ..., 0.1562, 0.1648, 0.1686],
        [0.1557, 0.1655, 0.1701,  ..., 0.1571, 0.1642, 0.1662],
        [0.1546, 0.1588, 0.1654,  ..., 0.1550, 0.1562, 0.1594]],
       grad_fn=<LogBackward>)


 42%|████▏     | 24/57 [00:16<00:21,  1.51it/s]

tensor([[0.1572, 0.1607, 0.1709,  ..., 0.1558, 0.1601, 0.1627],
        [0.1535, 0.1612, 0.1639,  ..., 0.1537, 0.1540, 0.1540],
        [0.1523, 0.1593, 0.1646,  ..., 0.1547, 0.1560, 0.1597],
        ...,
        [0.1556, 0.1659, 0.1679,  ..., 0.1598, 0.1645, 0.1619],
        [0.1557, 0.1643, 0.1696,  ..., 0.1561, 0.1621, 0.1664],
        [0.1536, 0.1645, 0.1683,  ..., 0.1558, 0.1592, 0.1626]],
       grad_fn=<LogBackward>)


 44%|████▍     | 25/57 [00:17<00:21,  1.50it/s]

tensor([[0.1567, 0.1646, 0.1673,  ..., 0.1572, 0.1640, 0.1640],
        [0.1562, 0.1610, 0.1675,  ..., 0.1534, 0.1582, 0.1608],
        [0.1621, 0.1703, 0.1767,  ..., 0.1595, 0.1657, 0.1710],
        ...,
        [0.1556, 0.1633, 0.1682,  ..., 0.1535, 0.1581, 0.1623],
        [0.1535, 0.1602, 0.1642,  ..., 0.1567, 0.1578, 0.1635],
        [0.1603, 0.1686, 0.1724,  ..., 0.1585, 0.1659, 0.1681]],
       grad_fn=<LogBackward>)


 46%|████▌     | 26/57 [00:17<00:20,  1.51it/s]

tensor([[0.1561, 0.1630, 0.1705,  ..., 0.1550, 0.1591, 0.1633],
        [0.1585, 0.1646, 0.1750,  ..., 0.1561, 0.1634, 0.1665],
        [0.1557, 0.1665, 0.1672,  ..., 0.1540, 0.1617, 0.1636],
        ...,
        [0.1525, 0.1606, 0.1641,  ..., 0.1544, 0.1548, 0.1552],
        [0.1599, 0.1646, 0.1723,  ..., 0.1564, 0.1612, 0.1665],
        [0.1583, 0.1666, 0.1692,  ..., 0.1582, 0.1640, 0.1687]],
       grad_fn=<LogBackward>)


 47%|████▋     | 27/57 [00:18<00:19,  1.51it/s]

tensor([[0.1563, 0.1616, 0.1688,  ..., 0.1564, 0.1594, 0.1644],
        [0.1618, 0.1661, 0.1727,  ..., 0.1564, 0.1650, 0.1674],
        [0.1567, 0.1640, 0.1694,  ..., 0.1573, 0.1637, 0.1668],
        ...,
        [0.1560, 0.1649, 0.1702,  ..., 0.1554, 0.1619, 0.1626],
        [0.1516, 0.1574, 0.1638,  ..., 0.1541, 0.1558, 0.1624],
        [0.1586, 0.1675, 0.1728,  ..., 0.1564, 0.1667, 0.1675]],
       grad_fn=<LogBackward>)


 49%|████▉     | 28/57 [00:19<00:19,  1.51it/s]

tensor([[0.1592, 0.1628, 0.1707,  ..., 0.1559, 0.1599, 0.1652],
        [0.1590, 0.1638, 0.1709,  ..., 0.1576, 0.1657, 0.1648],
        [0.1579, 0.1647, 0.1672,  ..., 0.1565, 0.1632, 0.1629],
        ...,
        [0.1573, 0.1635, 0.1701,  ..., 0.1554, 0.1594, 0.1639],
        [0.1594, 0.1649, 0.1715,  ..., 0.1558, 0.1624, 0.1662],
        [0.1586, 0.1660, 0.1754,  ..., 0.1594, 0.1639, 0.1675]],
       grad_fn=<LogBackward>)


 51%|█████     | 29/57 [00:19<00:18,  1.51it/s]

tensor([[0.1543, 0.1645, 0.1680,  ..., 0.1553, 0.1597, 0.1603],
        [0.1552, 0.1626, 0.1696,  ..., 0.1554, 0.1632, 0.1657],
        [0.1559, 0.1654, 0.1676,  ..., 0.1573, 0.1598, 0.1628],
        ...,
        [0.1467, 0.1499, 0.1553,  ..., 0.1477, 0.1490, 0.1546],
        [0.1551, 0.1601, 0.1679,  ..., 0.1556, 0.1596, 0.1624],
        [0.1617, 0.1697, 0.1726,  ..., 0.1585, 0.1659, 0.1657]],
       grad_fn=<LogBackward>)


 53%|█████▎    | 30/57 [00:20<00:18,  1.49it/s]

tensor([[0.1572, 0.1595, 0.1658,  ..., 0.1539, 0.1532, 0.1632],
        [0.1558, 0.1642, 0.1699,  ..., 0.1564, 0.1597, 0.1625],
        [0.1553, 0.1668, 0.1702,  ..., 0.1559, 0.1628, 0.1637],
        ...,
        [0.1575, 0.1669, 0.1727,  ..., 0.1555, 0.1627, 0.1644],
        [0.1579, 0.1642, 0.1690,  ..., 0.1579, 0.1613, 0.1624],
        [0.1563, 0.1619, 0.1684,  ..., 0.1558, 0.1591, 0.1621]],
       grad_fn=<LogBackward>)


 54%|█████▍    | 31/57 [00:21<00:17,  1.49it/s]

tensor([[0.1579, 0.1643, 0.1729,  ..., 0.1571, 0.1607, 0.1655],
        [0.1580, 0.1659, 0.1715,  ..., 0.1559, 0.1628, 0.1669],
        [0.1446, 0.1519, 0.1585,  ..., 0.1502, 0.1520, 0.1516],
        ...,
        [0.1562, 0.1626, 0.1674,  ..., 0.1570, 0.1617, 0.1612],
        [0.1571, 0.1642, 0.1696,  ..., 0.1549, 0.1598, 0.1636],
        [0.1568, 0.1646, 0.1719,  ..., 0.1589, 0.1612, 0.1635]],
       grad_fn=<LogBackward>)


 56%|█████▌    | 32/57 [00:21<00:16,  1.50it/s]

tensor([[0.1575, 0.1641, 0.1699,  ..., 0.1573, 0.1614, 0.1642],
        [0.1565, 0.1662, 0.1704,  ..., 0.1564, 0.1635, 0.1675],
        [0.1557, 0.1637, 0.1678,  ..., 0.1544, 0.1589, 0.1646],
        ...,
        [0.1540, 0.1615, 0.1656,  ..., 0.1526, 0.1572, 0.1609],
        [0.1601, 0.1661, 0.1742,  ..., 0.1567, 0.1630, 0.1645],
        [0.1569, 0.1663, 0.1684,  ..., 0.1565, 0.1596, 0.1639]],
       grad_fn=<LogBackward>)


 58%|█████▊    | 33/57 [00:22<00:15,  1.51it/s]

tensor([[0.1550, 0.1657, 0.1698,  ..., 0.1589, 0.1619, 0.1631],
        [0.1516, 0.1622, 0.1631,  ..., 0.1540, 0.1590, 0.1569],
        [0.1532, 0.1655, 0.1669,  ..., 0.1552, 0.1563, 0.1603],
        ...,
        [0.1578, 0.1635, 0.1696,  ..., 0.1566, 0.1592, 0.1651],
        [0.1560, 0.1604, 0.1673,  ..., 0.1587, 0.1590, 0.1620],
        [0.1558, 0.1662, 0.1688,  ..., 0.1554, 0.1618, 0.1639]],
       grad_fn=<LogBackward>)


 60%|█████▉    | 34/57 [00:23<00:15,  1.52it/s]

tensor([[0.1517, 0.1655, 0.1618,  ..., 0.1564, 0.1601, 0.1578],
        [0.1558, 0.1622, 0.1686,  ..., 0.1566, 0.1555, 0.1617],
        [0.1579, 0.1659, 0.1733,  ..., 0.1593, 0.1631, 0.1640],
        ...,
        [0.1542, 0.1649, 0.1691,  ..., 0.1556, 0.1626, 0.1630],
        [0.1484, 0.1527, 0.1566,  ..., 0.1503, 0.1492, 0.1505],
        [0.1565, 0.1661, 0.1702,  ..., 0.1594, 0.1635, 0.1650]],
       grad_fn=<LogBackward>)


 61%|██████▏   | 35/57 [00:23<00:14,  1.52it/s]

tensor([[0.1569, 0.1650, 0.1701,  ..., 0.1557, 0.1604, 0.1646],
        [0.1547, 0.1646, 0.1690,  ..., 0.1560, 0.1575, 0.1615],
        [0.1569, 0.1662, 0.1687,  ..., 0.1565, 0.1598, 0.1636],
        ...,
        [0.1541, 0.1598, 0.1660,  ..., 0.1539, 0.1548, 0.1585],
        [0.1574, 0.1649, 0.1693,  ..., 0.1579, 0.1632, 0.1659],
        [0.1560, 0.1667, 0.1680,  ..., 0.1584, 0.1604, 0.1641]],
       grad_fn=<LogBackward>)


 63%|██████▎   | 36/57 [00:24<00:13,  1.53it/s]

tensor([[0.1537, 0.1609, 0.1658,  ..., 0.1529, 0.1545, 0.1599],
        [0.1539, 0.1622, 0.1651,  ..., 0.1536, 0.1575, 0.1619],
        [0.1593, 0.1640, 0.1721,  ..., 0.1567, 0.1623, 0.1675],
        ...,
        [0.1582, 0.1637, 0.1706,  ..., 0.1572, 0.1626, 0.1660],
        [0.1515, 0.1613, 0.1648,  ..., 0.1545, 0.1569, 0.1621],
        [0.1544, 0.1631, 0.1656,  ..., 0.1557, 0.1570, 0.1599]],
       grad_fn=<LogBackward>)


 65%|██████▍   | 37/57 [00:25<00:13,  1.52it/s]

tensor([[0.1538, 0.1641, 0.1674,  ..., 0.1537, 0.1598, 0.1610],
        [0.1560, 0.1655, 0.1707,  ..., 0.1558, 0.1604, 0.1640],
        [0.1538, 0.1638, 0.1676,  ..., 0.1551, 0.1572, 0.1605],
        ...,
        [0.1549, 0.1694, 0.1721,  ..., 0.1562, 0.1641, 0.1675],
        [0.1575, 0.1647, 0.1692,  ..., 0.1562, 0.1653, 0.1686],
        [0.1581, 0.1633, 0.1697,  ..., 0.1569, 0.1592, 0.1632]],
       grad_fn=<LogBackward>)


 67%|██████▋   | 38/57 [00:25<00:12,  1.52it/s]

tensor([[0.1568, 0.1676, 0.1684,  ..., 0.1572, 0.1609, 0.1618],
        [0.1593, 0.1634, 0.1658,  ..., 0.1518, 0.1547, 0.1638],
        [0.1555, 0.1622, 0.1692,  ..., 0.1564, 0.1570, 0.1657],
        ...,
        [0.1523, 0.1628, 0.1632,  ..., 0.1558, 0.1597, 0.1588],
        [0.1587, 0.1682, 0.1731,  ..., 0.1578, 0.1632, 0.1697],
        [0.1474, 0.1614, 0.1610,  ..., 0.1528, 0.1585, 0.1584]],
       grad_fn=<LogBackward>)


 68%|██████▊   | 39/57 [00:26<00:11,  1.53it/s]

tensor([[0.1516, 0.1641, 0.1644,  ..., 0.1535, 0.1622, 0.1629],
        [0.1572, 0.1649, 0.1713,  ..., 0.1587, 0.1631, 0.1662],
        [0.1579, 0.1621, 0.1691,  ..., 0.1558, 0.1614, 0.1630],
        ...,
        [0.1548, 0.1660, 0.1674,  ..., 0.1560, 0.1610, 0.1609],
        [0.1574, 0.1652, 0.1705,  ..., 0.1558, 0.1632, 0.1648],
        [0.1588, 0.1656, 0.1724,  ..., 0.1586, 0.1617, 0.1696]],
       grad_fn=<LogBackward>)


 70%|███████   | 40/57 [00:27<00:11,  1.49it/s]

tensor([[0.1559, 0.1633, 0.1692,  ..., 0.1557, 0.1590, 0.1672],
        [0.1564, 0.1661, 0.1694,  ..., 0.1557, 0.1643, 0.1641],
        [0.1566, 0.1646, 0.1688,  ..., 0.1553, 0.1595, 0.1643],
        ...,
        [0.1562, 0.1635, 0.1679,  ..., 0.1585, 0.1602, 0.1632],
        [0.1577, 0.1678, 0.1703,  ..., 0.1576, 0.1672, 0.1648],
        [0.1548, 0.1608, 0.1654,  ..., 0.1586, 0.1617, 0.1607]],
       grad_fn=<LogBackward>)


 72%|███████▏  | 41/57 [00:27<00:10,  1.47it/s]

tensor([[0.1559, 0.1644, 0.1702,  ..., 0.1551, 0.1616, 0.1626],
        [0.1558, 0.1662, 0.1661,  ..., 0.1565, 0.1602, 0.1621],
        [0.1566, 0.1659, 0.1685,  ..., 0.1563, 0.1627, 0.1660],
        ...,
        [0.1560, 0.1619, 0.1696,  ..., 0.1602, 0.1636, 0.1641],
        [0.1567, 0.1647, 0.1703,  ..., 0.1565, 0.1588, 0.1679],
        [0.1605, 0.1665, 0.1722,  ..., 0.1570, 0.1631, 0.1685]],
       grad_fn=<LogBackward>)


 74%|███████▎  | 42/57 [00:28<00:10,  1.49it/s]

tensor([[0.1544, 0.1607, 0.1678,  ..., 0.1587, 0.1616, 0.1647],
        [0.1544, 0.1596, 0.1641,  ..., 0.1527, 0.1580, 0.1606],
        [0.1565, 0.1648, 0.1672,  ..., 0.1552, 0.1590, 0.1606],
        ...,
        [0.1447, 0.1533, 0.1547,  ..., 0.1480, 0.1504, 0.1544],
        [0.1565, 0.1614, 0.1691,  ..., 0.1580, 0.1630, 0.1662],
        [0.1583, 0.1665, 0.1695,  ..., 0.1586, 0.1647, 0.1667]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 43/57 [00:29<00:09,  1.50it/s]

tensor([[0.1535, 0.1643, 0.1671,  ..., 0.1540, 0.1577, 0.1619],
        [0.1552, 0.1644, 0.1671,  ..., 0.1558, 0.1642, 0.1622],
        [0.1535, 0.1586, 0.1640,  ..., 0.1548, 0.1561, 0.1598],
        ...,
        [0.1576, 0.1614, 0.1680,  ..., 0.1539, 0.1563, 0.1624],
        [0.1514, 0.1639, 0.1608,  ..., 0.1532, 0.1586, 0.1568],
        [0.1565, 0.1655, 0.1677,  ..., 0.1563, 0.1606, 0.1624]],
       grad_fn=<LogBackward>)


 77%|███████▋  | 44/57 [00:29<00:08,  1.50it/s]

tensor([[0.1543, 0.1638, 0.1653,  ..., 0.1569, 0.1642, 0.1617],
        [0.1581, 0.1653, 0.1713,  ..., 0.1560, 0.1622, 0.1676],
        [0.1511, 0.1639, 0.1604,  ..., 0.1529, 0.1572, 0.1547],
        ...,
        [0.1546, 0.1621, 0.1650,  ..., 0.1537, 0.1601, 0.1613],
        [0.1524, 0.1607, 0.1672,  ..., 0.1563, 0.1579, 0.1590],
        [0.1547, 0.1628, 0.1672,  ..., 0.1558, 0.1614, 0.1612]],
       grad_fn=<LogBackward>)


 79%|███████▉  | 45/57 [00:30<00:07,  1.52it/s]

tensor([[0.1535, 0.1602, 0.1666,  ..., 0.1577, 0.1571, 0.1598],
        [0.1550, 0.1625, 0.1650,  ..., 0.1548, 0.1549, 0.1607],
        [0.1560, 0.1624, 0.1680,  ..., 0.1536, 0.1554, 0.1645],
        ...,
        [0.1588, 0.1645, 0.1718,  ..., 0.1590, 0.1626, 0.1655],
        [0.1564, 0.1648, 0.1708,  ..., 0.1580, 0.1631, 0.1637],
        [0.1527, 0.1599, 0.1663,  ..., 0.1555, 0.1555, 0.1616]],
       grad_fn=<LogBackward>)


 81%|████████  | 46/57 [00:31<00:07,  1.50it/s]

tensor([[0.1566, 0.1620, 0.1695,  ..., 0.1584, 0.1599, 0.1657],
        [0.1555, 0.1614, 0.1679,  ..., 0.1580, 0.1593, 0.1618],
        [0.1565, 0.1637, 0.1674,  ..., 0.1566, 0.1606, 0.1610],
        ...,
        [0.1539, 0.1597, 0.1656,  ..., 0.1526, 0.1554, 0.1583],
        [0.1492, 0.1581, 0.1582,  ..., 0.1515, 0.1525, 0.1553],
        [0.1543, 0.1590, 0.1657,  ..., 0.1551, 0.1550, 0.1613]],
       grad_fn=<LogBackward>)


 82%|████████▏ | 47/57 [00:31<00:06,  1.50it/s]

tensor([[0.1575, 0.1611, 0.1671,  ..., 0.1555, 0.1568, 0.1613],
        [0.1582, 0.1658, 0.1700,  ..., 0.1578, 0.1584, 0.1618],
        [0.1564, 0.1636, 0.1714,  ..., 0.1570, 0.1631, 0.1650],
        ...,
        [0.1563, 0.1633, 0.1675,  ..., 0.1553, 0.1596, 0.1667],
        [0.1504, 0.1628, 0.1621,  ..., 0.1559, 0.1598, 0.1594],
        [0.1510, 0.1570, 0.1624,  ..., 0.1532, 0.1558, 0.1597]],
       grad_fn=<LogBackward>)


 84%|████████▍ | 48/57 [00:32<00:05,  1.52it/s]

tensor([[0.1601, 0.1650, 0.1690,  ..., 0.1568, 0.1612, 0.1665],
        [0.1590, 0.1657, 0.1697,  ..., 0.1567, 0.1614, 0.1624],
        [0.1492, 0.1614, 0.1661,  ..., 0.1524, 0.1554, 0.1566],
        ...,
        [0.1566, 0.1649, 0.1684,  ..., 0.1567, 0.1611, 0.1640],
        [0.1589, 0.1665, 0.1700,  ..., 0.1557, 0.1608, 0.1637],
        [0.1560, 0.1642, 0.1685,  ..., 0.1567, 0.1609, 0.1647]],
       grad_fn=<LogBackward>)


 86%|████████▌ | 49/57 [00:32<00:05,  1.52it/s]

tensor([[0.1553, 0.1615, 0.1678,  ..., 0.1555, 0.1583, 0.1606],
        [0.1517, 0.1632, 0.1659,  ..., 0.1529, 0.1563, 0.1587],
        [0.1588, 0.1658, 0.1715,  ..., 0.1570, 0.1584, 0.1656],
        ...,
        [0.1566, 0.1621, 0.1698,  ..., 0.1548, 0.1569, 0.1645],
        [0.1549, 0.1636, 0.1686,  ..., 0.1579, 0.1627, 0.1651],
        [0.1508, 0.1640, 0.1629,  ..., 0.1573, 0.1603, 0.1623]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 50/57 [00:33<00:04,  1.50it/s]

tensor([[0.1563, 0.1633, 0.1668,  ..., 0.1530, 0.1570, 0.1595],
        [0.1564, 0.1662, 0.1691,  ..., 0.1579, 0.1603, 0.1620],
        [0.1567, 0.1628, 0.1676,  ..., 0.1565, 0.1589, 0.1635],
        ...,
        [0.1537, 0.1659, 0.1671,  ..., 0.1581, 0.1601, 0.1627],
        [0.1604, 0.1660, 0.1752,  ..., 0.1581, 0.1648, 0.1663],
        [0.1567, 0.1658, 0.1690,  ..., 0.1554, 0.1615, 0.1630]],
       grad_fn=<LogBackward>)


 89%|████████▉ | 51/57 [00:34<00:04,  1.48it/s]

tensor([[0.1424, 0.1520, 0.1574,  ..., 0.1482, 0.1494, 0.1527],
        [0.1581, 0.1633, 0.1710,  ..., 0.1576, 0.1601, 0.1636],
        [0.1523, 0.1603, 0.1652,  ..., 0.1540, 0.1580, 0.1581],
        ...,
        [0.1511, 0.1590, 0.1653,  ..., 0.1523, 0.1544, 0.1570],
        [0.1625, 0.1713, 0.1748,  ..., 0.1591, 0.1645, 0.1636],
        [0.1564, 0.1655, 0.1680,  ..., 0.1582, 0.1603, 0.1623]],
       grad_fn=<LogBackward>)


 91%|█████████ | 52/57 [00:35<00:03,  1.48it/s]

tensor([[0.1563, 0.1673, 0.1699,  ..., 0.1585, 0.1619, 0.1631],
        [0.1594, 0.1666, 0.1741,  ..., 0.1567, 0.1679, 0.1697],
        [0.1570, 0.1640, 0.1697,  ..., 0.1574, 0.1601, 0.1634],
        ...,
        [0.1583, 0.1661, 0.1714,  ..., 0.1574, 0.1615, 0.1660],
        [0.1577, 0.1664, 0.1717,  ..., 0.1549, 0.1619, 0.1632],
        [0.1582, 0.1627, 0.1713,  ..., 0.1575, 0.1621, 0.1653]],
       grad_fn=<LogBackward>)


 93%|█████████▎| 53/57 [00:35<00:02,  1.49it/s]

tensor([[0.1566, 0.1664, 0.1703,  ..., 0.1557, 0.1661, 0.1658],
        [0.1557, 0.1661, 0.1697,  ..., 0.1547, 0.1630, 0.1622],
        [0.1568, 0.1633, 0.1715,  ..., 0.1551, 0.1590, 0.1626],
        ...,
        [0.1543, 0.1603, 0.1657,  ..., 0.1545, 0.1582, 0.1629],
        [0.1547, 0.1636, 0.1686,  ..., 0.1582, 0.1608, 0.1629],
        [0.1593, 0.1647, 0.1725,  ..., 0.1585, 0.1626, 0.1697]],
       grad_fn=<LogBackward>)


 95%|█████████▍| 54/57 [00:36<00:01,  1.52it/s]

tensor([[0.1499, 0.1622, 0.1665,  ..., 0.1531, 0.1591, 0.1583],
        [0.1574, 0.1667, 0.1696,  ..., 0.1568, 0.1633, 0.1638],
        [0.1586, 0.1660, 0.1725,  ..., 0.1575, 0.1617, 0.1662],
        ...,
        [0.1553, 0.1622, 0.1691,  ..., 0.1584, 0.1592, 0.1630],
        [0.1579, 0.1640, 0.1711,  ..., 0.1572, 0.1581, 0.1666],
        [0.1599, 0.1634, 0.1724,  ..., 0.1563, 0.1603, 0.1656]],
       grad_fn=<LogBackward>)


 96%|█████████▋| 55/57 [00:37<00:01,  1.51it/s]

tensor([[0.1571, 0.1613, 0.1711,  ..., 0.1559, 0.1570, 0.1637],
        [0.1574, 0.1638, 0.1682,  ..., 0.1557, 0.1620, 0.1615],
        [0.1542, 0.1615, 0.1671,  ..., 0.1528, 0.1562, 0.1608],
        ...,
        [0.1519, 0.1598, 0.1658,  ..., 0.1518, 0.1568, 0.1611],
        [0.1594, 0.1596, 0.1682,  ..., 0.1535, 0.1596, 0.1649],
        [0.1545, 0.1642, 0.1671,  ..., 0.1561, 0.1569, 0.1637]],
       grad_fn=<LogBackward>)


 98%|█████████▊| 56/57 [00:37<00:00,  1.50it/s]

tensor([[0.1555, 0.1661, 0.1651,  ..., 0.1555, 0.1617, 0.1630],
        [0.1534, 0.1593, 0.1648,  ..., 0.1565, 0.1574, 0.1605],
        [0.1567, 0.1632, 0.1708,  ..., 0.1519, 0.1580, 0.1665],
        ...,
        [0.1595, 0.1662, 0.1733,  ..., 0.1568, 0.1636, 0.1652],
        [0.1547, 0.1606, 0.1678,  ..., 0.1553, 0.1594, 0.1607],
        [0.1574, 0.1622, 0.1709,  ..., 0.1583, 0.1590, 0.1649]],
       grad_fn=<LogBackward>)


100%|██████████| 57/57 [00:38<00:00,  1.49it/s]
 12%|█▎        | 1/8 [00:00<00:02,  3.34it/s]

tensor([[0.1532, 0.1620, 0.1654,  ..., 0.1540, 0.1573, 0.1566],
        [0.1543, 0.1649, 0.1659,  ..., 0.1558, 0.1620, 0.1640],
        [0.1549, 0.1633, 0.1699,  ..., 0.1558, 0.1650, 0.1655],
        ...,
        [0.1615, 0.1654, 0.1740,  ..., 0.1574, 0.1627, 0.1652],
        [0.1549, 0.1632, 0.1686,  ..., 0.1562, 0.1618, 0.1660],
        [0.1502, 0.1584, 0.1601,  ..., 0.1509, 0.1513, 0.1527]],
       grad_fn=<LogBackward>)


 25%|██▌       | 2/8 [00:00<00:01,  3.35it/s]

tensor([[0.1574, 0.1637, 0.1686,  ..., 0.1558, 0.1624, 0.1617],
        [0.1625, 0.1661, 0.1757,  ..., 0.1568, 0.1628, 0.1689],
        [0.1495, 0.1608, 0.1575,  ..., 0.1495, 0.1558, 0.1529],
        ...,
        [0.1576, 0.1621, 0.1717,  ..., 0.1574, 0.1597, 0.1636],
        [0.1535, 0.1630, 0.1650,  ..., 0.1563, 0.1611, 0.1592],
        [0.1553, 0.1629, 0.1668,  ..., 0.1551, 0.1591, 0.1641]],
       grad_fn=<LogBackward>)


 38%|███▊      | 3/8 [00:00<00:01,  3.35it/s]

tensor([[0.1515, 0.1621, 0.1662,  ..., 0.1538, 0.1607, 0.1653],
        [0.1572, 0.1649, 0.1679,  ..., 0.1549, 0.1603, 0.1601],
        [0.1564, 0.1621, 0.1695,  ..., 0.1566, 0.1589, 0.1607],
        ...,
        [0.1547, 0.1629, 0.1666,  ..., 0.1566, 0.1588, 0.1613],
        [0.1521, 0.1612, 0.1694,  ..., 0.1583, 0.1596, 0.1619],
        [0.1552, 0.1641, 0.1682,  ..., 0.1550, 0.1629, 0.1619]],
       grad_fn=<LogBackward>)


 50%|█████     | 4/8 [00:01<00:01,  3.31it/s]

tensor([[0.1557, 0.1633, 0.1666,  ..., 0.1540, 0.1571, 0.1603],
        [0.1550, 0.1598, 0.1667,  ..., 0.1538, 0.1572, 0.1625],
        [0.1542, 0.1648, 0.1657,  ..., 0.1523, 0.1606, 0.1607],
        ...,
        [0.1578, 0.1677, 0.1688,  ..., 0.1579, 0.1634, 0.1640],
        [0.1556, 0.1596, 0.1665,  ..., 0.1536, 0.1588, 0.1601],
        [0.1547, 0.1607, 0.1687,  ..., 0.1533, 0.1579, 0.1656]],
       grad_fn=<LogBackward>)


 62%|██████▎   | 5/8 [00:01<00:00,  3.24it/s]

tensor([[0.1553, 0.1665, 0.1690,  ..., 0.1565, 0.1629, 0.1644],
        [0.1597, 0.1646, 0.1747,  ..., 0.1588, 0.1633, 0.1691],
        [0.1512, 0.1615, 0.1667,  ..., 0.1510, 0.1552, 0.1565],
        ...,
        [0.1514, 0.1597, 0.1654,  ..., 0.1515, 0.1553, 0.1549],
        [0.1570, 0.1622, 0.1678,  ..., 0.1556, 0.1583, 0.1636],
        [0.1521, 0.1607, 0.1652,  ..., 0.1546, 0.1557, 0.1613]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 6/8 [00:01<00:00,  3.27it/s]

tensor([[0.1536, 0.1603, 0.1670,  ..., 0.1539, 0.1569, 0.1599],
        [0.1553, 0.1625, 0.1690,  ..., 0.1566, 0.1591, 0.1631],
        [0.1561, 0.1644, 0.1686,  ..., 0.1551, 0.1616, 0.1652],
        ...,
        [0.1588, 0.1656, 0.1730,  ..., 0.1590, 0.1640, 0.1679],
        [0.1586, 0.1638, 0.1672,  ..., 0.1558, 0.1589, 0.1577],
        [0.1493, 0.1594, 0.1605,  ..., 0.1516, 0.1592, 0.1570]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 7/8 [00:02<00:00,  3.18it/s]

tensor([[0.1583, 0.1627, 0.1740,  ..., 0.1584, 0.1619, 0.1646],
        [0.1592, 0.1657, 0.1724,  ..., 0.1596, 0.1658, 0.1686],
        [0.1485, 0.1624, 0.1627,  ..., 0.1557, 0.1581, 0.1579],
        ...,
        [0.1558, 0.1606, 0.1663,  ..., 0.1563, 0.1577, 0.1620],
        [0.1552, 0.1639, 0.1707,  ..., 0.1557, 0.1600, 0.1649],
        [0.1510, 0.1600, 0.1635,  ..., 0.1550, 0.1569, 0.1646]],
       grad_fn=<LogBackward>)


100%|██████████| 8/8 [00:02<00:00,  3.19it/s]
  0%|          | 0/57 [00:00<?, ?it/s]

tensor([[0.1585, 0.1662, 0.1730,  ..., 0.1585, 0.1667, 0.1691],
        [0.1573, 0.1653, 0.1676,  ..., 0.1575, 0.1588, 0.1615],
        [0.1576, 0.1614, 0.1686,  ..., 0.1574, 0.1576, 0.1646],
        ...,
        [0.1480, 0.1551, 0.1592,  ..., 0.1485, 0.1495, 0.1502],
        [0.1567, 0.1640, 0.1713,  ..., 0.1569, 0.1620, 0.1662],
        [0.1546, 0.1645, 0.1669,  ..., 0.1564, 0.1613, 0.1622]],
       grad_fn=<LogBackward>)
[tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128)]
Epoch: 11, train_loss: -0.5845367908477783, valid_loss: -0.57459956407547
Parameter containing:
tensor([[ 1.0004,  1.0004,  1.0004,  1.0004,  1.0004,  1.0004,  1.0006,  1.0003,
          1.0003,  1.0006, -0.4996, -0.4996, -0.4997, -0.4996, -0.4996, -0.4996,
         -0.4996, -0.4997, -0.4996, -0.4997],
       

  2%|▏         | 1/57 [00:00<00:38,  1.46it/s]

tensor([[0.1528, 0.1601, 0.1651,  ..., 0.1520, 0.1564, 0.1594],
        [0.1573, 0.1649, 0.1731,  ..., 0.1547, 0.1622, 0.1666],
        [0.1544, 0.1616, 0.1663,  ..., 0.1546, 0.1575, 0.1611],
        ...,
        [0.1592, 0.1659, 0.1751,  ..., 0.1571, 0.1638, 0.1698],
        [0.1536, 0.1623, 0.1683,  ..., 0.1517, 0.1557, 0.1593],
        [0.1575, 0.1680, 0.1712,  ..., 0.1588, 0.1647, 0.1654]],
       grad_fn=<LogBackward>)


  4%|▎         | 2/57 [00:01<00:37,  1.47it/s]

tensor([[0.1549, 0.1631, 0.1687,  ..., 0.1578, 0.1617, 0.1624],
        [0.1575, 0.1654, 0.1695,  ..., 0.1558, 0.1594, 0.1640],
        [0.1526, 0.1554, 0.1604,  ..., 0.1466, 0.1544, 0.1583],
        ...,
        [0.1567, 0.1643, 0.1666,  ..., 0.1557, 0.1592, 0.1616],
        [0.1486, 0.1623, 0.1605,  ..., 0.1510, 0.1596, 0.1564],
        [0.1538, 0.1653, 0.1655,  ..., 0.1569, 0.1615, 0.1619]],
       grad_fn=<LogBackward>)


  5%|▌         | 3/57 [00:02<00:36,  1.47it/s]

tensor([[0.1583, 0.1672, 0.1705,  ..., 0.1569, 0.1613, 0.1620],
        [0.1584, 0.1629, 0.1695,  ..., 0.1599, 0.1616, 0.1631],
        [0.1561, 0.1640, 0.1693,  ..., 0.1561, 0.1607, 0.1640],
        ...,
        [0.1552, 0.1645, 0.1701,  ..., 0.1587, 0.1598, 0.1624],
        [0.1543, 0.1603, 0.1663,  ..., 0.1545, 0.1561, 0.1597],
        [0.1560, 0.1650, 0.1708,  ..., 0.1576, 0.1627, 0.1641]],
       grad_fn=<LogBackward>)


  7%|▋         | 4/57 [00:02<00:35,  1.50it/s]

tensor([[0.1559, 0.1655, 0.1688,  ..., 0.1549, 0.1613, 0.1664],
        [0.1462, 0.1487, 0.1581,  ..., 0.1488, 0.1502, 0.1538],
        [0.1560, 0.1656, 0.1666,  ..., 0.1571, 0.1627, 0.1645],
        ...,
        [0.1524, 0.1620, 0.1659,  ..., 0.1532, 0.1542, 0.1559],
        [0.1562, 0.1614, 0.1679,  ..., 0.1561, 0.1599, 0.1619],
        [0.1559, 0.1617, 0.1710,  ..., 0.1543, 0.1568, 0.1652]],
       grad_fn=<LogBackward>)


  9%|▉         | 5/57 [00:03<00:34,  1.51it/s]

tensor([[0.1567, 0.1605, 0.1710,  ..., 0.1563, 0.1580, 0.1665],
        [0.1562, 0.1644, 0.1694,  ..., 0.1569, 0.1609, 0.1624],
        [0.1577, 0.1635, 0.1671,  ..., 0.1563, 0.1611, 0.1631],
        ...,
        [0.1557, 0.1640, 0.1692,  ..., 0.1572, 0.1592, 0.1639],
        [0.1583, 0.1692, 0.1708,  ..., 0.1570, 0.1647, 0.1668],
        [0.1550, 0.1625, 0.1683,  ..., 0.1522, 0.1577, 0.1591]],
       grad_fn=<LogBackward>)


 11%|█         | 6/57 [00:03<00:33,  1.52it/s]

tensor([[0.1553, 0.1658, 0.1680,  ..., 0.1549, 0.1590, 0.1613],
        [0.1591, 0.1640, 0.1737,  ..., 0.1563, 0.1618, 0.1686],
        [0.1550, 0.1632, 0.1676,  ..., 0.1579, 0.1598, 0.1674],
        ...,
        [0.1527, 0.1607, 0.1653,  ..., 0.1525, 0.1595, 0.1604],
        [0.1461, 0.1555, 0.1601,  ..., 0.1502, 0.1518, 0.1571],
        [0.1553, 0.1635, 0.1667,  ..., 0.1539, 0.1602, 0.1615]],
       grad_fn=<LogBackward>)


 12%|█▏        | 7/57 [00:04<00:32,  1.52it/s]

tensor([[0.1490, 0.1590, 0.1615,  ..., 0.1520, 0.1572, 0.1578],
        [0.1565, 0.1636, 0.1664,  ..., 0.1555, 0.1588, 0.1597],
        [0.1557, 0.1666, 0.1702,  ..., 0.1556, 0.1590, 0.1611],
        ...,
        [0.1564, 0.1607, 0.1701,  ..., 0.1574, 0.1590, 0.1631],
        [0.1577, 0.1666, 0.1719,  ..., 0.1567, 0.1621, 0.1628],
        [0.1570, 0.1637, 0.1714,  ..., 0.1567, 0.1611, 0.1674]],
       grad_fn=<LogBackward>)


 14%|█▍        | 8/57 [00:05<00:31,  1.55it/s]

tensor([[0.1560, 0.1626, 0.1675,  ..., 0.1560, 0.1598, 0.1626],
        [0.1525, 0.1631, 0.1656,  ..., 0.1564, 0.1585, 0.1605],
        [0.1537, 0.1634, 0.1641,  ..., 0.1582, 0.1580, 0.1601],
        ...,
        [0.1593, 0.1663, 0.1743,  ..., 0.1561, 0.1635, 0.1671],
        [0.1532, 0.1623, 0.1673,  ..., 0.1543, 0.1573, 0.1589],
        [0.1585, 0.1648, 0.1722,  ..., 0.1562, 0.1631, 0.1675]],
       grad_fn=<LogBackward>)


 16%|█▌        | 9/57 [00:05<00:31,  1.53it/s]

tensor([[0.1580, 0.1647, 0.1723,  ..., 0.1567, 0.1642, 0.1641],
        [0.1522, 0.1582, 0.1647,  ..., 0.1550, 0.1550, 0.1554],
        [0.1578, 0.1652, 0.1702,  ..., 0.1559, 0.1592, 0.1628],
        ...,
        [0.1550, 0.1620, 0.1651,  ..., 0.1563, 0.1621, 0.1602],
        [0.1531, 0.1641, 0.1655,  ..., 0.1568, 0.1588, 0.1584],
        [0.1548, 0.1609, 0.1660,  ..., 0.1532, 0.1545, 0.1607]],
       grad_fn=<LogBackward>)


 18%|█▊        | 10/57 [00:06<00:31,  1.49it/s]

tensor([[0.1573, 0.1636, 0.1696,  ..., 0.1561, 0.1611, 0.1660],
        [0.1453, 0.1476, 0.1552,  ..., 0.1430, 0.1456, 0.1508],
        [0.1528, 0.1576, 0.1643,  ..., 0.1560, 0.1578, 0.1638],
        ...,
        [0.1542, 0.1625, 0.1709,  ..., 0.1554, 0.1591, 0.1620],
        [0.1549, 0.1610, 0.1690,  ..., 0.1561, 0.1586, 0.1608],
        [0.1552, 0.1633, 0.1662,  ..., 0.1561, 0.1591, 0.1590]],
       grad_fn=<LogBackward>)


 19%|█▉        | 11/57 [00:07<00:31,  1.47it/s]

tensor([[0.1558, 0.1636, 0.1701,  ..., 0.1571, 0.1650, 0.1668],
        [0.1524, 0.1639, 0.1668,  ..., 0.1551, 0.1557, 0.1594],
        [0.1584, 0.1650, 0.1728,  ..., 0.1582, 0.1645, 0.1679],
        ...,
        [0.1555, 0.1611, 0.1667,  ..., 0.1560, 0.1589, 0.1592],
        [0.1552, 0.1641, 0.1692,  ..., 0.1561, 0.1628, 0.1644],
        [0.1494, 0.1618, 0.1632,  ..., 0.1579, 0.1599, 0.1582]],
       grad_fn=<LogBackward>)


 21%|██        | 12/57 [00:08<00:32,  1.39it/s]

tensor([[0.1588, 0.1609, 0.1734,  ..., 0.1566, 0.1583, 0.1646],
        [0.1503, 0.1534, 0.1608,  ..., 0.1480, 0.1499, 0.1550],
        [0.1581, 0.1620, 0.1682,  ..., 0.1555, 0.1572, 0.1638],
        ...,
        [0.1577, 0.1658, 0.1692,  ..., 0.1574, 0.1633, 0.1643],
        [0.1551, 0.1647, 0.1692,  ..., 0.1549, 0.1598, 0.1619],
        [0.1578, 0.1652, 0.1739,  ..., 0.1579, 0.1624, 0.1667]],
       grad_fn=<LogBackward>)


 23%|██▎       | 13/57 [00:08<00:32,  1.35it/s]

tensor([[0.1538, 0.1580, 0.1652,  ..., 0.1543, 0.1566, 0.1585],
        [0.1554, 0.1648, 0.1675,  ..., 0.1536, 0.1566, 0.1559],
        [0.1583, 0.1652, 0.1712,  ..., 0.1557, 0.1608, 0.1635],
        ...,
        [0.1524, 0.1588, 0.1643,  ..., 0.1539, 0.1561, 0.1602],
        [0.1589, 0.1647, 0.1715,  ..., 0.1575, 0.1638, 0.1675],
        [0.1566, 0.1612, 0.1697,  ..., 0.1590, 0.1586, 0.1619]],
       grad_fn=<LogBackward>)


 25%|██▍       | 14/57 [00:09<00:31,  1.38it/s]

tensor([[0.1453, 0.1517, 0.1560,  ..., 0.1449, 0.1506, 0.1498],
        [0.1561, 0.1633, 0.1690,  ..., 0.1559, 0.1578, 0.1639],
        [0.1570, 0.1600, 0.1689,  ..., 0.1558, 0.1543, 0.1606],
        ...,
        [0.1558, 0.1643, 0.1679,  ..., 0.1556, 0.1621, 0.1626],
        [0.1575, 0.1627, 0.1678,  ..., 0.1577, 0.1579, 0.1606],
        [0.1534, 0.1648, 0.1699,  ..., 0.1565, 0.1606, 0.1605]],
       grad_fn=<LogBackward>)


 26%|██▋       | 15/57 [00:10<00:30,  1.39it/s]

tensor([[0.1529, 0.1620, 0.1656,  ..., 0.1523, 0.1617, 0.1623],
        [0.1585, 0.1620, 0.1689,  ..., 0.1518, 0.1564, 0.1638],
        [0.1548, 0.1636, 0.1676,  ..., 0.1554, 0.1617, 0.1646],
        ...,
        [0.1556, 0.1612, 0.1678,  ..., 0.1597, 0.1608, 0.1636],
        [0.1584, 0.1646, 0.1669,  ..., 0.1576, 0.1600, 0.1596],
        [0.1582, 0.1712, 0.1711,  ..., 0.1607, 0.1642, 0.1639]],
       grad_fn=<LogBackward>)


 28%|██▊       | 16/57 [00:11<00:30,  1.34it/s]

tensor([[0.1539, 0.1601, 0.1649,  ..., 0.1570, 0.1576, 0.1606],
        [0.1514, 0.1615, 0.1644,  ..., 0.1525, 0.1549, 0.1583],
        [0.1548, 0.1619, 0.1671,  ..., 0.1565, 0.1588, 0.1642],
        ...,
        [0.1546, 0.1611, 0.1658,  ..., 0.1541, 0.1573, 0.1625],
        [0.1570, 0.1643, 0.1706,  ..., 0.1566, 0.1604, 0.1651],
        [0.1528, 0.1619, 0.1684,  ..., 0.1534, 0.1562, 0.1610]],
       grad_fn=<LogBackward>)


 30%|██▉       | 17/57 [00:12<00:31,  1.28it/s]

tensor([[0.1595, 0.1710, 0.1706,  ..., 0.1589, 0.1643, 0.1647],
        [0.1501, 0.1567, 0.1607,  ..., 0.1510, 0.1567, 0.1589],
        [0.1574, 0.1636, 0.1690,  ..., 0.1576, 0.1614, 0.1653],
        ...,
        [0.1467, 0.1537, 0.1588,  ..., 0.1502, 0.1487, 0.1506],
        [0.1580, 0.1657, 0.1686,  ..., 0.1559, 0.1617, 0.1636],
        [0.1542, 0.1616, 0.1672,  ..., 0.1574, 0.1588, 0.1615]],
       grad_fn=<LogBackward>)


 32%|███▏      | 18/57 [00:12<00:30,  1.27it/s]

tensor([[0.1561, 0.1600, 0.1686,  ..., 0.1596, 0.1586, 0.1601],
        [0.1572, 0.1660, 0.1726,  ..., 0.1576, 0.1628, 0.1657],
        [0.1558, 0.1638, 0.1677,  ..., 0.1584, 0.1598, 0.1637],
        ...,
        [0.1547, 0.1623, 0.1687,  ..., 0.1559, 0.1575, 0.1617],
        [0.1574, 0.1642, 0.1719,  ..., 0.1571, 0.1625, 0.1658],
        [0.1570, 0.1641, 0.1709,  ..., 0.1559, 0.1606, 0.1649]],
       grad_fn=<LogBackward>)


 33%|███▎      | 19/57 [00:13<00:30,  1.23it/s]

tensor([[0.1557, 0.1602, 0.1682,  ..., 0.1562, 0.1577, 0.1629],
        [0.1571, 0.1622, 0.1687,  ..., 0.1540, 0.1603, 0.1656],
        [0.1561, 0.1653, 0.1682,  ..., 0.1558, 0.1603, 0.1646],
        ...,
        [0.1580, 0.1609, 0.1677,  ..., 0.1559, 0.1575, 0.1627],
        [0.1588, 0.1646, 0.1714,  ..., 0.1548, 0.1581, 0.1649],
        [0.1598, 0.1668, 0.1754,  ..., 0.1592, 0.1660, 0.1680]],
       grad_fn=<LogBackward>)


 35%|███▌      | 20/57 [00:14<00:29,  1.24it/s]

tensor([[0.1463, 0.1538, 0.1599,  ..., 0.1495, 0.1500, 0.1529],
        [0.1548, 0.1603, 0.1672,  ..., 0.1570, 0.1611, 0.1604],
        [0.1554, 0.1598, 0.1658,  ..., 0.1532, 0.1560, 0.1583],
        ...,
        [0.1522, 0.1637, 0.1662,  ..., 0.1565, 0.1591, 0.1590],
        [0.1482, 0.1539, 0.1609,  ..., 0.1508, 0.1520, 0.1525],
        [0.1581, 0.1649, 0.1712,  ..., 0.1577, 0.1633, 0.1665]],
       grad_fn=<LogBackward>)


 37%|███▋      | 21/57 [00:15<00:27,  1.30it/s]

tensor([[0.1464, 0.1509, 0.1575,  ..., 0.1509, 0.1485, 0.1506],
        [0.1484, 0.1582, 0.1609,  ..., 0.1539, 0.1536, 0.1542],
        [0.1539, 0.1580, 0.1635,  ..., 0.1525, 0.1550, 0.1610],
        ...,
        [0.1555, 0.1656, 0.1707,  ..., 0.1578, 0.1622, 0.1697],
        [0.1550, 0.1634, 0.1665,  ..., 0.1571, 0.1584, 0.1624],
        [0.1554, 0.1605, 0.1686,  ..., 0.1570, 0.1588, 0.1621]],
       grad_fn=<LogBackward>)


 39%|███▊      | 22/57 [00:15<00:25,  1.35it/s]

tensor([[0.1552, 0.1629, 0.1689,  ..., 0.1551, 0.1583, 0.1610],
        [0.1574, 0.1653, 0.1700,  ..., 0.1561, 0.1607, 0.1624],
        [0.1574, 0.1649, 0.1697,  ..., 0.1581, 0.1622, 0.1643],
        ...,
        [0.1514, 0.1619, 0.1638,  ..., 0.1533, 0.1603, 0.1601],
        [0.1604, 0.1665, 0.1754,  ..., 0.1577, 0.1664, 0.1692],
        [0.1495, 0.1604, 0.1610,  ..., 0.1529, 0.1586, 0.1597]],
       grad_fn=<LogBackward>)


 40%|████      | 23/57 [00:16<00:24,  1.39it/s]

tensor([[0.1534, 0.1642, 0.1688,  ..., 0.1585, 0.1595, 0.1616],
        [0.1584, 0.1643, 0.1723,  ..., 0.1569, 0.1633, 0.1674],
        [0.1509, 0.1585, 0.1621,  ..., 0.1549, 0.1563, 0.1563],
        ...,
        [0.1575, 0.1650, 0.1711,  ..., 0.1570, 0.1615, 0.1652],
        [0.1501, 0.1589, 0.1629,  ..., 0.1530, 0.1522, 0.1567],
        [0.1555, 0.1641, 0.1702,  ..., 0.1570, 0.1599, 0.1610]],
       grad_fn=<LogBackward>)


 42%|████▏     | 24/57 [00:17<00:23,  1.42it/s]

tensor([[0.1551, 0.1621, 0.1665,  ..., 0.1535, 0.1577, 0.1607],
        [0.1587, 0.1649, 0.1698,  ..., 0.1586, 0.1655, 0.1655],
        [0.1529, 0.1606, 0.1648,  ..., 0.1534, 0.1550, 0.1592],
        ...,
        [0.1578, 0.1639, 0.1708,  ..., 0.1585, 0.1593, 0.1652],
        [0.1540, 0.1637, 0.1681,  ..., 0.1554, 0.1606, 0.1659],
        [0.1582, 0.1648, 0.1738,  ..., 0.1578, 0.1614, 0.1673]],
       grad_fn=<LogBackward>)


 44%|████▍     | 25/57 [00:17<00:22,  1.45it/s]

tensor([[0.1471, 0.1482, 0.1581,  ..., 0.1445, 0.1457, 0.1532],
        [0.1522, 0.1604, 0.1594,  ..., 0.1506, 0.1530, 0.1535],
        [0.1564, 0.1656, 0.1704,  ..., 0.1572, 0.1627, 0.1671],
        ...,
        [0.1596, 0.1663, 0.1706,  ..., 0.1575, 0.1620, 0.1646],
        [0.1578, 0.1637, 0.1711,  ..., 0.1572, 0.1609, 0.1614],
        [0.1566, 0.1665, 0.1729,  ..., 0.1557, 0.1582, 0.1662]],
       grad_fn=<LogBackward>)


 46%|████▌     | 26/57 [00:18<00:20,  1.49it/s]

tensor([[0.1557, 0.1637, 0.1693,  ..., 0.1546, 0.1583, 0.1610],
        [0.1566, 0.1610, 0.1671,  ..., 0.1581, 0.1596, 0.1623],
        [0.1518, 0.1641, 0.1638,  ..., 0.1551, 0.1569, 0.1603],
        ...,
        [0.1585, 0.1659, 0.1724,  ..., 0.1564, 0.1613, 0.1672],
        [0.1523, 0.1614, 0.1643,  ..., 0.1545, 0.1571, 0.1585],
        [0.1556, 0.1621, 0.1684,  ..., 0.1553, 0.1587, 0.1633]],
       grad_fn=<LogBackward>)


 47%|████▋     | 27/57 [00:19<00:20,  1.49it/s]

tensor([[0.1558, 0.1656, 0.1713,  ..., 0.1581, 0.1656, 0.1665],
        [0.1558, 0.1660, 0.1706,  ..., 0.1594, 0.1624, 0.1671],
        [0.1611, 0.1653, 0.1740,  ..., 0.1598, 0.1610, 0.1663],
        ...,
        [0.1546, 0.1652, 0.1674,  ..., 0.1589, 0.1643, 0.1611],
        [0.1545, 0.1668, 0.1700,  ..., 0.1599, 0.1628, 0.1667],
        [0.1556, 0.1669, 0.1660,  ..., 0.1567, 0.1616, 0.1649]],
       grad_fn=<LogBackward>)


 49%|████▉     | 28/57 [00:19<00:19,  1.49it/s]

tensor([[0.1560, 0.1620, 0.1690,  ..., 0.1552, 0.1573, 0.1634],
        [0.1570, 0.1620, 0.1679,  ..., 0.1565, 0.1573, 0.1634],
        [0.1577, 0.1656, 0.1702,  ..., 0.1551, 0.1619, 0.1630],
        ...,
        [0.1576, 0.1626, 0.1683,  ..., 0.1551, 0.1586, 0.1631],
        [0.1551, 0.1622, 0.1693,  ..., 0.1569, 0.1588, 0.1619],
        [0.1531, 0.1587, 0.1646,  ..., 0.1549, 0.1561, 0.1581]],
       grad_fn=<LogBackward>)


 51%|█████     | 29/57 [00:20<00:19,  1.47it/s]

tensor([[0.1565, 0.1613, 0.1695,  ..., 0.1554, 0.1579, 0.1638],
        [0.1523, 0.1628, 0.1660,  ..., 0.1582, 0.1613, 0.1609],
        [0.1565, 0.1633, 0.1680,  ..., 0.1572, 0.1598, 0.1618],
        ...,
        [0.1554, 0.1633, 0.1688,  ..., 0.1585, 0.1582, 0.1632],
        [0.1572, 0.1651, 0.1697,  ..., 0.1574, 0.1625, 0.1638],
        [0.1532, 0.1586, 0.1661,  ..., 0.1523, 0.1531, 0.1608]],
       grad_fn=<LogBackward>)


 53%|█████▎    | 30/57 [00:21<00:19,  1.42it/s]

tensor([[0.1587, 0.1627, 0.1669,  ..., 0.1558, 0.1596, 0.1618],
        [0.1528, 0.1584, 0.1641,  ..., 0.1519, 0.1534, 0.1561],
        [0.1602, 0.1642, 0.1736,  ..., 0.1562, 0.1609, 0.1674],
        ...,
        [0.1547, 0.1640, 0.1692,  ..., 0.1541, 0.1585, 0.1643],
        [0.1506, 0.1509, 0.1594,  ..., 0.1473, 0.1472, 0.1503],
        [0.1561, 0.1628, 0.1696,  ..., 0.1560, 0.1594, 0.1657]],
       grad_fn=<LogBackward>)


 54%|█████▍    | 31/57 [00:21<00:18,  1.44it/s]

tensor([[0.1546, 0.1646, 0.1671,  ..., 0.1562, 0.1612, 0.1645],
        [0.1531, 0.1585, 0.1681,  ..., 0.1559, 0.1565, 0.1605],
        [0.1583, 0.1668, 0.1725,  ..., 0.1588, 0.1632, 0.1654],
        ...,
        [0.1563, 0.1653, 0.1702,  ..., 0.1556, 0.1628, 0.1656],
        [0.1592, 0.1646, 0.1704,  ..., 0.1587, 0.1613, 0.1656],
        [0.1535, 0.1624, 0.1656,  ..., 0.1552, 0.1585, 0.1596]],
       grad_fn=<LogBackward>)


 56%|█████▌    | 32/57 [00:22<00:17,  1.45it/s]

tensor([[0.1567, 0.1636, 0.1689,  ..., 0.1572, 0.1605, 0.1633],
        [0.1565, 0.1646, 0.1703,  ..., 0.1581, 0.1610, 0.1663],
        [0.1543, 0.1631, 0.1665,  ..., 0.1569, 0.1588, 0.1611],
        ...,
        [0.1580, 0.1635, 0.1725,  ..., 0.1562, 0.1593, 0.1645],
        [0.1603, 0.1681, 0.1750,  ..., 0.1582, 0.1647, 0.1688],
        [0.1579, 0.1658, 0.1702,  ..., 0.1554, 0.1627, 0.1658]],
       grad_fn=<LogBackward>)


 58%|█████▊    | 33/57 [00:23<00:16,  1.47it/s]

tensor([[0.1553, 0.1650, 0.1703,  ..., 0.1569, 0.1609, 0.1649],
        [0.1563, 0.1648, 0.1714,  ..., 0.1562, 0.1617, 0.1659],
        [0.1560, 0.1633, 0.1650,  ..., 0.1552, 0.1599, 0.1628],
        ...,
        [0.1579, 0.1640, 0.1661,  ..., 0.1542, 0.1607, 0.1618],
        [0.1529, 0.1609, 0.1645,  ..., 0.1540, 0.1565, 0.1588],
        [0.1579, 0.1634, 0.1697,  ..., 0.1568, 0.1600, 0.1642]],
       grad_fn=<LogBackward>)


 60%|█████▉    | 34/57 [00:23<00:15,  1.49it/s]

tensor([[0.1572, 0.1669, 0.1680,  ..., 0.1597, 0.1649, 0.1644],
        [0.1524, 0.1567, 0.1629,  ..., 0.1504, 0.1553, 0.1570],
        [0.1533, 0.1601, 0.1678,  ..., 0.1587, 0.1563, 0.1634],
        ...,
        [0.1517, 0.1600, 0.1633,  ..., 0.1517, 0.1549, 0.1565],
        [0.1543, 0.1635, 0.1711,  ..., 0.1536, 0.1589, 0.1634],
        [0.1548, 0.1649, 0.1661,  ..., 0.1547, 0.1597, 0.1620]],
       grad_fn=<LogBackward>)


 61%|██████▏   | 35/57 [00:24<00:14,  1.49it/s]

tensor([[0.1629, 0.1681, 0.1768,  ..., 0.1592, 0.1656, 0.1708],
        [0.1612, 0.1695, 0.1738,  ..., 0.1580, 0.1649, 0.1674],
        [0.1533, 0.1589, 0.1628,  ..., 0.1524, 0.1538, 0.1582],
        ...,
        [0.1596, 0.1658, 0.1736,  ..., 0.1568, 0.1630, 0.1696],
        [0.1566, 0.1620, 0.1690,  ..., 0.1544, 0.1590, 0.1629],
        [0.1575, 0.1668, 0.1705,  ..., 0.1587, 0.1643, 0.1664]],
       grad_fn=<LogBackward>)


 63%|██████▎   | 36/57 [00:25<00:14,  1.50it/s]

tensor([[0.1567, 0.1608, 0.1679,  ..., 0.1555, 0.1545, 0.1642],
        [0.1527, 0.1596, 0.1646,  ..., 0.1522, 0.1556, 0.1590],
        [0.1550, 0.1636, 0.1694,  ..., 0.1589, 0.1615, 0.1635],
        ...,
        [0.1570, 0.1634, 0.1712,  ..., 0.1565, 0.1599, 0.1662],
        [0.1578, 0.1613, 0.1722,  ..., 0.1581, 0.1639, 0.1637],
        [0.1522, 0.1559, 0.1624,  ..., 0.1516, 0.1555, 0.1568]],
       grad_fn=<LogBackward>)


 65%|██████▍   | 37/57 [00:25<00:13,  1.49it/s]

tensor([[0.1584, 0.1660, 0.1690,  ..., 0.1565, 0.1608, 0.1637],
        [0.1560, 0.1610, 0.1680,  ..., 0.1577, 0.1599, 0.1639],
        [0.1483, 0.1585, 0.1585,  ..., 0.1521, 0.1567, 0.1554],
        ...,
        [0.1561, 0.1624, 0.1668,  ..., 0.1561, 0.1573, 0.1622],
        [0.1558, 0.1651, 0.1686,  ..., 0.1558, 0.1629, 0.1642],
        [0.1603, 0.1694, 0.1719,  ..., 0.1601, 0.1645, 0.1676]],
       grad_fn=<LogBackward>)


 67%|██████▋   | 38/57 [00:26<00:13,  1.45it/s]

tensor([[0.1553, 0.1643, 0.1646,  ..., 0.1552, 0.1567, 0.1576],
        [0.1504, 0.1539, 0.1602,  ..., 0.1528, 0.1541, 0.1527],
        [0.1519, 0.1600, 0.1624,  ..., 0.1507, 0.1540, 0.1554],
        ...,
        [0.1523, 0.1596, 0.1650,  ..., 0.1550, 0.1537, 0.1583],
        [0.1590, 0.1647, 0.1688,  ..., 0.1589, 0.1625, 0.1624],
        [0.1524, 0.1611, 0.1647,  ..., 0.1531, 0.1543, 0.1566]],
       grad_fn=<LogBackward>)


 68%|██████▊   | 39/57 [00:27<00:12,  1.42it/s]

tensor([[0.1568, 0.1634, 0.1712,  ..., 0.1563, 0.1591, 0.1641],
        [0.1554, 0.1614, 0.1677,  ..., 0.1571, 0.1595, 0.1635],
        [0.1544, 0.1620, 0.1681,  ..., 0.1552, 0.1557, 0.1609],
        ...,
        [0.1552, 0.1657, 0.1661,  ..., 0.1552, 0.1603, 0.1625],
        [0.1562, 0.1616, 0.1698,  ..., 0.1543, 0.1602, 0.1648],
        [0.1530, 0.1660, 0.1680,  ..., 0.1553, 0.1623, 0.1628]],
       grad_fn=<LogBackward>)


 70%|███████   | 40/57 [00:28<00:11,  1.42it/s]

tensor([[0.1544, 0.1654, 0.1684,  ..., 0.1578, 0.1610, 0.1613],
        [0.1545, 0.1660, 0.1674,  ..., 0.1559, 0.1608, 0.1651],
        [0.1557, 0.1657, 0.1684,  ..., 0.1558, 0.1611, 0.1624],
        ...,
        [0.1574, 0.1645, 0.1687,  ..., 0.1554, 0.1615, 0.1639],
        [0.1496, 0.1602, 0.1584,  ..., 0.1573, 0.1609, 0.1575],
        [0.1553, 0.1606, 0.1642,  ..., 0.1538, 0.1575, 0.1603]],
       grad_fn=<LogBackward>)


 72%|███████▏  | 41/57 [00:28<00:11,  1.44it/s]

tensor([[0.1568, 0.1637, 0.1699,  ..., 0.1564, 0.1644, 0.1659],
        [0.1568, 0.1640, 0.1710,  ..., 0.1554, 0.1618, 0.1652],
        [0.1584, 0.1659, 0.1715,  ..., 0.1587, 0.1625, 0.1652],
        ...,
        [0.1599, 0.1657, 0.1718,  ..., 0.1584, 0.1599, 0.1637],
        [0.1583, 0.1651, 0.1712,  ..., 0.1580, 0.1634, 0.1689],
        [0.1568, 0.1676, 0.1710,  ..., 0.1590, 0.1645, 0.1648]],
       grad_fn=<LogBackward>)


 74%|███████▎  | 42/57 [00:29<00:10,  1.46it/s]

tensor([[0.1547, 0.1627, 0.1664,  ..., 0.1539, 0.1611, 0.1636],
        [0.1552, 0.1626, 0.1679,  ..., 0.1579, 0.1599, 0.1614],
        [0.1554, 0.1641, 0.1663,  ..., 0.1566, 0.1603, 0.1614],
        ...,
        [0.1580, 0.1632, 0.1705,  ..., 0.1553, 0.1600, 0.1686],
        [0.1475, 0.1547, 0.1593,  ..., 0.1523, 0.1513, 0.1546],
        [0.1498, 0.1611, 0.1602,  ..., 0.1500, 0.1587, 0.1551]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 43/57 [00:30<00:09,  1.48it/s]

tensor([[0.1525, 0.1584, 0.1630,  ..., 0.1537, 0.1517, 0.1569],
        [0.1581, 0.1639, 0.1728,  ..., 0.1557, 0.1607, 0.1663],
        [0.1535, 0.1607, 0.1608,  ..., 0.1511, 0.1544, 0.1560],
        ...,
        [0.1564, 0.1635, 0.1687,  ..., 0.1556, 0.1588, 0.1641],
        [0.1522, 0.1587, 0.1659,  ..., 0.1558, 0.1562, 0.1591],
        [0.1583, 0.1665, 0.1729,  ..., 0.1567, 0.1635, 0.1687]],
       grad_fn=<LogBackward>)


 77%|███████▋  | 44/57 [00:30<00:08,  1.49it/s]

tensor([[0.1551, 0.1628, 0.1661,  ..., 0.1545, 0.1589, 0.1608],
        [0.1580, 0.1633, 0.1693,  ..., 0.1565, 0.1581, 0.1634],
        [0.1518, 0.1581, 0.1626,  ..., 0.1572, 0.1554, 0.1542],
        ...,
        [0.1566, 0.1674, 0.1693,  ..., 0.1528, 0.1579, 0.1618],
        [0.1558, 0.1607, 0.1693,  ..., 0.1555, 0.1580, 0.1622],
        [0.1574, 0.1656, 0.1708,  ..., 0.1571, 0.1612, 0.1644]],
       grad_fn=<LogBackward>)


 79%|███████▉  | 45/57 [00:31<00:08,  1.45it/s]

tensor([[0.1548, 0.1605, 0.1665,  ..., 0.1564, 0.1584, 0.1607],
        [0.1594, 0.1681, 0.1746,  ..., 0.1566, 0.1638, 0.1694],
        [0.1572, 0.1643, 0.1675,  ..., 0.1551, 0.1609, 0.1631],
        ...,
        [0.1578, 0.1651, 0.1712,  ..., 0.1600, 0.1629, 0.1654],
        [0.1566, 0.1626, 0.1686,  ..., 0.1603, 0.1611, 0.1646],
        [0.1576, 0.1661, 0.1687,  ..., 0.1589, 0.1642, 0.1661]],
       grad_fn=<LogBackward>)


 81%|████████  | 46/57 [00:32<00:07,  1.46it/s]

tensor([[0.1511, 0.1630, 0.1647,  ..., 0.1589, 0.1600, 0.1628],
        [0.1503, 0.1582, 0.1612,  ..., 0.1550, 0.1553, 0.1576],
        [0.1549, 0.1655, 0.1665,  ..., 0.1569, 0.1667, 0.1626],
        ...,
        [0.1593, 0.1619, 0.1703,  ..., 0.1563, 0.1595, 0.1625],
        [0.1523, 0.1594, 0.1641,  ..., 0.1509, 0.1540, 0.1630],
        [0.1594, 0.1672, 0.1732,  ..., 0.1584, 0.1648, 0.1690]],
       grad_fn=<LogBackward>)


 82%|████████▏ | 47/57 [00:32<00:06,  1.48it/s]

tensor([[0.1585, 0.1645, 0.1717,  ..., 0.1550, 0.1628, 0.1632],
        [0.1453, 0.1551, 0.1567,  ..., 0.1509, 0.1518, 0.1521],
        [0.1590, 0.1689, 0.1668,  ..., 0.1579, 0.1640, 0.1599],
        ...,
        [0.1565, 0.1649, 0.1696,  ..., 0.1562, 0.1608, 0.1634],
        [0.1575, 0.1645, 0.1699,  ..., 0.1586, 0.1599, 0.1639],
        [0.1572, 0.1652, 0.1706,  ..., 0.1562, 0.1642, 0.1644]],
       grad_fn=<LogBackward>)


 84%|████████▍ | 48/57 [00:33<00:06,  1.50it/s]

tensor([[0.1591, 0.1642, 0.1739,  ..., 0.1569, 0.1621, 0.1640],
        [0.1563, 0.1628, 0.1704,  ..., 0.1554, 0.1581, 0.1640],
        [0.1554, 0.1611, 0.1669,  ..., 0.1558, 0.1560, 0.1638],
        ...,
        [0.1590, 0.1644, 0.1728,  ..., 0.1584, 0.1643, 0.1659],
        [0.1546, 0.1642, 0.1670,  ..., 0.1543, 0.1586, 0.1604],
        [0.1596, 0.1717, 0.1735,  ..., 0.1605, 0.1656, 0.1674]],
       grad_fn=<LogBackward>)


 86%|████████▌ | 49/57 [00:34<00:05,  1.47it/s]

tensor([[0.1558, 0.1661, 0.1682,  ..., 0.1585, 0.1621, 0.1650],
        [0.1592, 0.1648, 0.1679,  ..., 0.1579, 0.1644, 0.1645],
        [0.1592, 0.1650, 0.1732,  ..., 0.1550, 0.1624, 0.1644],
        ...,
        [0.1583, 0.1667, 0.1694,  ..., 0.1573, 0.1610, 0.1655],
        [0.1551, 0.1613, 0.1678,  ..., 0.1554, 0.1601, 0.1601],
        [0.1550, 0.1677, 0.1684,  ..., 0.1554, 0.1626, 0.1692]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 50/57 [00:34<00:04,  1.45it/s]

tensor([[0.1556, 0.1634, 0.1678,  ..., 0.1547, 0.1617, 0.1634],
        [0.1526, 0.1598, 0.1661,  ..., 0.1548, 0.1560, 0.1587],
        [0.1547, 0.1656, 0.1679,  ..., 0.1583, 0.1608, 0.1657],
        ...,
        [0.1562, 0.1634, 0.1677,  ..., 0.1545, 0.1610, 0.1630],
        [0.1525, 0.1583, 0.1654,  ..., 0.1538, 0.1573, 0.1585],
        [0.1545, 0.1638, 0.1669,  ..., 0.1560, 0.1577, 0.1598]],
       grad_fn=<LogBackward>)


 89%|████████▉ | 51/57 [00:35<00:04,  1.44it/s]

tensor([[0.1504, 0.1595, 0.1623,  ..., 0.1551, 0.1533, 0.1540],
        [0.1542, 0.1622, 0.1664,  ..., 0.1540, 0.1589, 0.1631],
        [0.1524, 0.1652, 0.1646,  ..., 0.1574, 0.1592, 0.1615],
        ...,
        [0.1586, 0.1659, 0.1715,  ..., 0.1583, 0.1673, 0.1662],
        [0.1512, 0.1603, 0.1640,  ..., 0.1558, 0.1578, 0.1609],
        [0.1574, 0.1636, 0.1685,  ..., 0.1577, 0.1633, 0.1649]],
       grad_fn=<LogBackward>)


 91%|█████████ | 52/57 [00:36<00:03,  1.46it/s]

tensor([[0.1577, 0.1640, 0.1680,  ..., 0.1568, 0.1618, 0.1638],
        [0.1496, 0.1564, 0.1608,  ..., 0.1533, 0.1549, 0.1542],
        [0.1553, 0.1589, 0.1681,  ..., 0.1573, 0.1581, 0.1620],
        ...,
        [0.1601, 0.1668, 0.1712,  ..., 0.1609, 0.1628, 0.1634],
        [0.1544, 0.1649, 0.1679,  ..., 0.1553, 0.1637, 0.1607],
        [0.1538, 0.1616, 0.1667,  ..., 0.1573, 0.1602, 0.1616]],
       grad_fn=<LogBackward>)


 93%|█████████▎| 53/57 [00:36<00:02,  1.46it/s]

tensor([[0.1530, 0.1580, 0.1648,  ..., 0.1529, 0.1522, 0.1580],
        [0.1539, 0.1623, 0.1662,  ..., 0.1572, 0.1590, 0.1632],
        [0.1589, 0.1645, 0.1720,  ..., 0.1570, 0.1626, 0.1691],
        ...,
        [0.1569, 0.1619, 0.1666,  ..., 0.1563, 0.1580, 0.1615],
        [0.1506, 0.1613, 0.1619,  ..., 0.1556, 0.1585, 0.1578],
        [0.1537, 0.1602, 0.1667,  ..., 0.1537, 0.1568, 0.1596]],
       grad_fn=<LogBackward>)


 95%|█████████▍| 54/57 [00:37<00:02,  1.47it/s]

tensor([[0.1538, 0.1597, 0.1652,  ..., 0.1581, 0.1581, 0.1617],
        [0.1596, 0.1657, 0.1711,  ..., 0.1573, 0.1652, 0.1672],
        [0.1564, 0.1657, 0.1701,  ..., 0.1580, 0.1650, 0.1651],
        ...,
        [0.1535, 0.1622, 0.1648,  ..., 0.1553, 0.1619, 0.1606],
        [0.1580, 0.1667, 0.1709,  ..., 0.1581, 0.1596, 0.1648],
        [0.1573, 0.1592, 0.1684,  ..., 0.1565, 0.1574, 0.1610]],
       grad_fn=<LogBackward>)


 96%|█████████▋| 55/57 [00:38<00:01,  1.48it/s]

tensor([[0.1554, 0.1653, 0.1674,  ..., 0.1581, 0.1590, 0.1613],
        [0.1557, 0.1638, 0.1672,  ..., 0.1583, 0.1619, 0.1612],
        [0.1564, 0.1659, 0.1695,  ..., 0.1583, 0.1606, 0.1644],
        ...,
        [0.1538, 0.1640, 0.1684,  ..., 0.1575, 0.1597, 0.1617],
        [0.1594, 0.1663, 0.1707,  ..., 0.1583, 0.1668, 0.1640],
        [0.1543, 0.1610, 0.1672,  ..., 0.1546, 0.1584, 0.1620]],
       grad_fn=<LogBackward>)


 98%|█████████▊| 56/57 [00:38<00:00,  1.51it/s]

tensor([[0.1499, 0.1574, 0.1622,  ..., 0.1544, 0.1521, 0.1574],
        [0.1591, 0.1652, 0.1722,  ..., 0.1573, 0.1640, 0.1677],
        [0.1535, 0.1607, 0.1646,  ..., 0.1529, 0.1554, 0.1584],
        ...,
        [0.1580, 0.1662, 0.1735,  ..., 0.1584, 0.1636, 0.1671],
        [0.1535, 0.1609, 0.1656,  ..., 0.1555, 0.1587, 0.1618],
        [0.1543, 0.1649, 0.1683,  ..., 0.1547, 0.1612, 0.1616]],
       grad_fn=<LogBackward>)


100%|██████████| 57/57 [00:39<00:00,  1.44it/s]
 12%|█▎        | 1/8 [00:00<00:02,  3.39it/s]

tensor([[0.1519, 0.1643, 0.1665,  ..., 0.1548, 0.1628, 0.1635],
        [0.1536, 0.1586, 0.1643,  ..., 0.1548, 0.1545, 0.1603],
        [0.1554, 0.1606, 0.1684,  ..., 0.1545, 0.1596, 0.1640],
        ...,
        [0.1541, 0.1641, 0.1658,  ..., 0.1554, 0.1593, 0.1600],
        [0.1571, 0.1656, 0.1719,  ..., 0.1593, 0.1609, 0.1646],
        [0.1571, 0.1651, 0.1715,  ..., 0.1573, 0.1630, 0.1687]],
       grad_fn=<LogBackward>)


 25%|██▌       | 2/8 [00:00<00:01,  3.38it/s]

tensor([[0.1547, 0.1617, 0.1650,  ..., 0.1556, 0.1556, 0.1600],
        [0.1443, 0.1527, 0.1548,  ..., 0.1484, 0.1493, 0.1503],
        [0.1543, 0.1621, 0.1650,  ..., 0.1549, 0.1568, 0.1609],
        ...,
        [0.1555, 0.1639, 0.1667,  ..., 0.1534, 0.1595, 0.1611],
        [0.1565, 0.1633, 0.1704,  ..., 0.1594, 0.1628, 0.1663],
        [0.1558, 0.1646, 0.1685,  ..., 0.1572, 0.1629, 0.1650]],
       grad_fn=<LogBackward>)


 38%|███▊      | 3/8 [00:00<00:01,  3.36it/s]

tensor([[0.1576, 0.1676, 0.1684,  ..., 0.1578, 0.1633, 0.1638],
        [0.1542, 0.1658, 0.1683,  ..., 0.1549, 0.1572, 0.1626],
        [0.1476, 0.1556, 0.1605,  ..., 0.1531, 0.1556, 0.1581],
        ...,
        [0.1538, 0.1599, 0.1664,  ..., 0.1560, 0.1588, 0.1603],
        [0.1565, 0.1652, 0.1715,  ..., 0.1575, 0.1601, 0.1637],
        [0.1557, 0.1640, 0.1684,  ..., 0.1561, 0.1583, 0.1653]],
       grad_fn=<LogBackward>)


 50%|█████     | 4/8 [00:01<00:01,  3.29it/s]

tensor([[0.1484, 0.1537, 0.1587,  ..., 0.1459, 0.1516, 0.1591],
        [0.1551, 0.1616, 0.1670,  ..., 0.1534, 0.1558, 0.1627],
        [0.1552, 0.1605, 0.1660,  ..., 0.1569, 0.1604, 0.1620],
        ...,
        [0.1500, 0.1585, 0.1607,  ..., 0.1502, 0.1559, 0.1540],
        [0.1554, 0.1628, 0.1676,  ..., 0.1575, 0.1607, 0.1641],
        [0.1569, 0.1642, 0.1714,  ..., 0.1588, 0.1609, 0.1653]],
       grad_fn=<LogBackward>)


 62%|██████▎   | 5/8 [00:01<00:00,  3.23it/s]

tensor([[0.1504, 0.1630, 0.1623,  ..., 0.1565, 0.1632, 0.1604],
        [0.1481, 0.1545, 0.1627,  ..., 0.1517, 0.1507, 0.1529],
        [0.1566, 0.1644, 0.1692,  ..., 0.1589, 0.1601, 0.1640],
        ...,
        [0.1559, 0.1590, 0.1675,  ..., 0.1538, 0.1565, 0.1591],
        [0.1586, 0.1643, 0.1726,  ..., 0.1574, 0.1611, 0.1665],
        [0.1563, 0.1620, 0.1661,  ..., 0.1547, 0.1568, 0.1615]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 6/8 [00:01<00:00,  3.13it/s]

tensor([[0.1570, 0.1647, 0.1706,  ..., 0.1564, 0.1608, 0.1643],
        [0.1585, 0.1668, 0.1727,  ..., 0.1581, 0.1611, 0.1664],
        [0.1557, 0.1650, 0.1681,  ..., 0.1585, 0.1607, 0.1626],
        ...,
        [0.1564, 0.1655, 0.1675,  ..., 0.1550, 0.1610, 0.1630],
        [0.1542, 0.1628, 0.1693,  ..., 0.1558, 0.1642, 0.1648],
        [0.1590, 0.1669, 0.1721,  ..., 0.1591, 0.1624, 0.1651]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 7/8 [00:02<00:00,  3.21it/s]

tensor([[0.1569, 0.1618, 0.1691,  ..., 0.1555, 0.1576, 0.1642],
        [0.1576, 0.1627, 0.1709,  ..., 0.1567, 0.1608, 0.1669],
        [0.1538, 0.1608, 0.1669,  ..., 0.1587, 0.1584, 0.1611],
        ...,
        [0.1578, 0.1650, 0.1688,  ..., 0.1562, 0.1637, 0.1667],
        [0.1568, 0.1666, 0.1683,  ..., 0.1585, 0.1650, 0.1639],
        [0.1520, 0.1610, 0.1629,  ..., 0.1551, 0.1549, 0.1594]],
       grad_fn=<LogBackward>)


100%|██████████| 8/8 [00:02<00:00,  3.24it/s]
  0%|          | 0/57 [00:00<?, ?it/s]

tensor([[0.1543, 0.1630, 0.1663,  ..., 0.1570, 0.1585, 0.1624],
        [0.1555, 0.1641, 0.1666,  ..., 0.1579, 0.1624, 0.1630],
        [0.1536, 0.1641, 0.1683,  ..., 0.1600, 0.1597, 0.1619],
        ...,
        [0.1591, 0.1652, 0.1693,  ..., 0.1570, 0.1607, 0.1661],
        [0.1512, 0.1608, 0.1634,  ..., 0.1527, 0.1583, 0.1621],
        [0.1564, 0.1641, 0.1678,  ..., 0.1550, 0.1608, 0.1614]],
       grad_fn=<LogBackward>)
[tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128)]
Epoch: 12, train_loss: -0.6393032670021057, valid_loss: -0.6385416388511658
Parameter containing:
tensor([[ 1.0003,  1.0004,  1.0003,  1.0003,  1.0003,  1.0003,  1.0005,  1.0003,
          1.0003,  1.0005, -0.4996, -0.4997, -0.4998, -0.4997, -0.4997, -0.4997,
         -0.4997, -0.4998, -0.4997, -0.4998],
     

  2%|▏         | 1/57 [00:00<00:37,  1.48it/s]

tensor([[0.1504, 0.1608, 0.1648,  ..., 0.1505, 0.1573, 0.1576],
        [0.1498, 0.1587, 0.1626,  ..., 0.1489, 0.1525, 0.1543],
        [0.1523, 0.1621, 0.1635,  ..., 0.1555, 0.1604, 0.1623],
        ...,
        [0.1562, 0.1645, 0.1708,  ..., 0.1614, 0.1631, 0.1652],
        [0.1525, 0.1632, 0.1647,  ..., 0.1556, 0.1602, 0.1609],
        [0.1555, 0.1624, 0.1677,  ..., 0.1563, 0.1581, 0.1621]],
       grad_fn=<LogBackward>)


  4%|▎         | 2/57 [00:01<00:36,  1.49it/s]

tensor([[0.1575, 0.1655, 0.1728,  ..., 0.1551, 0.1621, 0.1646],
        [0.1526, 0.1606, 0.1635,  ..., 0.1523, 0.1564, 0.1639],
        [0.1533, 0.1568, 0.1650,  ..., 0.1574, 0.1561, 0.1568],
        ...,
        [0.1585, 0.1665, 0.1704,  ..., 0.1590, 0.1663, 0.1640],
        [0.1539, 0.1583, 0.1647,  ..., 0.1546, 0.1542, 0.1601],
        [0.1547, 0.1644, 0.1674,  ..., 0.1555, 0.1572, 0.1618]],
       grad_fn=<LogBackward>)


  5%|▌         | 3/57 [00:01<00:36,  1.50it/s]

tensor([[0.1513, 0.1597, 0.1618,  ..., 0.1515, 0.1538, 0.1570],
        [0.1550, 0.1608, 0.1695,  ..., 0.1548, 0.1586, 0.1640],
        [0.1510, 0.1607, 0.1647,  ..., 0.1530, 0.1543, 0.1560],
        ...,
        [0.1560, 0.1642, 0.1680,  ..., 0.1588, 0.1598, 0.1628],
        [0.1465, 0.1539, 0.1542,  ..., 0.1467, 0.1463, 0.1495],
        [0.1598, 0.1672, 0.1723,  ..., 0.1573, 0.1637, 0.1671]],
       grad_fn=<LogBackward>)


  7%|▋         | 4/57 [00:02<00:35,  1.50it/s]

tensor([[0.1582, 0.1654, 0.1727,  ..., 0.1571, 0.1630, 0.1681],
        [0.1551, 0.1628, 0.1655,  ..., 0.1524, 0.1577, 0.1613],
        [0.1540, 0.1591, 0.1666,  ..., 0.1523, 0.1554, 0.1581],
        ...,
        [0.1523, 0.1602, 0.1649,  ..., 0.1559, 0.1575, 0.1600],
        [0.1536, 0.1665, 0.1673,  ..., 0.1563, 0.1628, 0.1618],
        [0.1529, 0.1591, 0.1652,  ..., 0.1546, 0.1570, 0.1591]],
       grad_fn=<LogBackward>)


  9%|▉         | 5/57 [00:03<00:34,  1.52it/s]

tensor([[0.1570, 0.1646, 0.1722,  ..., 0.1546, 0.1584, 0.1641],
        [0.1490, 0.1573, 0.1618,  ..., 0.1502, 0.1512, 0.1511],
        [0.1519, 0.1627, 0.1673,  ..., 0.1530, 0.1572, 0.1583],
        ...,
        [0.1532, 0.1559, 0.1637,  ..., 0.1530, 0.1539, 0.1558],
        [0.1549, 0.1647, 0.1706,  ..., 0.1545, 0.1591, 0.1648],
        [0.1568, 0.1663, 0.1689,  ..., 0.1560, 0.1621, 0.1646]],
       grad_fn=<LogBackward>)


 11%|█         | 6/57 [00:03<00:33,  1.52it/s]

tensor([[0.1544, 0.1637, 0.1714,  ..., 0.1539, 0.1596, 0.1619],
        [0.1583, 0.1660, 0.1711,  ..., 0.1608, 0.1634, 0.1671],
        [0.1534, 0.1619, 0.1684,  ..., 0.1543, 0.1557, 0.1619],
        ...,
        [0.1538, 0.1644, 0.1654,  ..., 0.1542, 0.1602, 0.1595],
        [0.1543, 0.1641, 0.1657,  ..., 0.1538, 0.1599, 0.1603],
        [0.1579, 0.1608, 0.1706,  ..., 0.1559, 0.1570, 0.1633]],
       grad_fn=<LogBackward>)


 12%|█▏        | 7/57 [00:04<00:33,  1.51it/s]

tensor([[0.1555, 0.1616, 0.1690,  ..., 0.1548, 0.1584, 0.1630],
        [0.1595, 0.1669, 0.1730,  ..., 0.1573, 0.1645, 0.1679],
        [0.1573, 0.1646, 0.1700,  ..., 0.1552, 0.1619, 0.1638],
        ...,
        [0.1536, 0.1593, 0.1622,  ..., 0.1521, 0.1534, 0.1547],
        [0.1491, 0.1593, 0.1604,  ..., 0.1517, 0.1573, 0.1582],
        [0.1563, 0.1660, 0.1680,  ..., 0.1551, 0.1631, 0.1639]],
       grad_fn=<LogBackward>)


 14%|█▍        | 8/57 [00:05<00:34,  1.42it/s]

tensor([[0.1570, 0.1670, 0.1693,  ..., 0.1574, 0.1666, 0.1641],
        [0.1532, 0.1599, 0.1637,  ..., 0.1569, 0.1586, 0.1601],
        [0.1521, 0.1600, 0.1654,  ..., 0.1514, 0.1561, 0.1586],
        ...,
        [0.1518, 0.1622, 0.1628,  ..., 0.1547, 0.1583, 0.1598],
        [0.1556, 0.1634, 0.1687,  ..., 0.1582, 0.1598, 0.1607],
        [0.1621, 0.1671, 0.1751,  ..., 0.1609, 0.1633, 0.1659]],
       grad_fn=<LogBackward>)


 16%|█▌        | 9/57 [00:06<00:36,  1.33it/s]

tensor([[0.1528, 0.1612, 0.1659,  ..., 0.1521, 0.1548, 0.1622],
        [0.1550, 0.1602, 0.1717,  ..., 0.1575, 0.1599, 0.1608],
        [0.1564, 0.1624, 0.1685,  ..., 0.1544, 0.1576, 0.1653],
        ...,
        [0.1597, 0.1657, 0.1738,  ..., 0.1563, 0.1610, 0.1659],
        [0.1553, 0.1658, 0.1685,  ..., 0.1575, 0.1615, 0.1618],
        [0.1532, 0.1593, 0.1663,  ..., 0.1562, 0.1550, 0.1586]],
       grad_fn=<LogBackward>)


 18%|█▊        | 10/57 [00:07<00:35,  1.32it/s]

tensor([[0.1553, 0.1646, 0.1671,  ..., 0.1546, 0.1607, 0.1638],
        [0.1550, 0.1667, 0.1693,  ..., 0.1536, 0.1607, 0.1627],
        [0.1533, 0.1613, 0.1654,  ..., 0.1564, 0.1587, 0.1601],
        ...,
        [0.1547, 0.1624, 0.1703,  ..., 0.1558, 0.1590, 0.1616],
        [0.1511, 0.1618, 0.1637,  ..., 0.1552, 0.1558, 0.1614],
        [0.1530, 0.1627, 0.1657,  ..., 0.1526, 0.1577, 0.1577]],
       grad_fn=<LogBackward>)


 19%|█▉        | 11/57 [00:07<00:35,  1.31it/s]

tensor([[0.1512, 0.1586, 0.1598,  ..., 0.1508, 0.1534, 0.1544],
        [0.1565, 0.1600, 0.1669,  ..., 0.1552, 0.1559, 0.1635],
        [0.1588, 0.1668, 0.1744,  ..., 0.1597, 0.1622, 0.1642],
        ...,
        [0.1551, 0.1641, 0.1671,  ..., 0.1574, 0.1613, 0.1596],
        [0.1589, 0.1639, 0.1708,  ..., 0.1571, 0.1612, 0.1671],
        [0.1512, 0.1642, 0.1634,  ..., 0.1555, 0.1593, 0.1637]],
       grad_fn=<LogBackward>)


 21%|██        | 12/57 [00:08<00:33,  1.34it/s]

tensor([[0.1615, 0.1660, 0.1712,  ..., 0.1593, 0.1647, 0.1682],
        [0.1563, 0.1615, 0.1671,  ..., 0.1559, 0.1596, 0.1609],
        [0.1581, 0.1658, 0.1686,  ..., 0.1576, 0.1631, 0.1638],
        ...,
        [0.1555, 0.1609, 0.1659,  ..., 0.1562, 0.1569, 0.1594],
        [0.1550, 0.1608, 0.1680,  ..., 0.1565, 0.1568, 0.1616],
        [0.1595, 0.1654, 0.1741,  ..., 0.1567, 0.1630, 0.1686]],
       grad_fn=<LogBackward>)


 23%|██▎       | 13/57 [00:09<00:32,  1.37it/s]

tensor([[0.1580, 0.1665, 0.1717,  ..., 0.1554, 0.1626, 0.1664],
        [0.1525, 0.1685, 0.1670,  ..., 0.1589, 0.1619, 0.1597],
        [0.1484, 0.1546, 0.1596,  ..., 0.1509, 0.1515, 0.1525],
        ...,
        [0.1590, 0.1648, 0.1720,  ..., 0.1572, 0.1603, 0.1619],
        [0.1525, 0.1571, 0.1598,  ..., 0.1513, 0.1538, 0.1555],
        [0.1489, 0.1536, 0.1602,  ..., 0.1512, 0.1491, 0.1577]],
       grad_fn=<LogBackward>)


 25%|██▍       | 14/57 [00:09<00:30,  1.41it/s]

tensor([[0.1552, 0.1635, 0.1678,  ..., 0.1570, 0.1593, 0.1630],
        [0.1526, 0.1597, 0.1646,  ..., 0.1547, 0.1560, 0.1571],
        [0.1564, 0.1630, 0.1688,  ..., 0.1550, 0.1592, 0.1651],
        ...,
        [0.1569, 0.1606, 0.1675,  ..., 0.1575, 0.1566, 0.1604],
        [0.1533, 0.1644, 0.1652,  ..., 0.1560, 0.1606, 0.1609],
        [0.1592, 0.1703, 0.1707,  ..., 0.1582, 0.1651, 0.1666]],
       grad_fn=<LogBackward>)


 26%|██▋       | 15/57 [00:10<00:30,  1.39it/s]

tensor([[0.1543, 0.1613, 0.1666,  ..., 0.1557, 0.1581, 0.1607],
        [0.1575, 0.1679, 0.1702,  ..., 0.1595, 0.1634, 0.1656],
        [0.1584, 0.1665, 0.1694,  ..., 0.1567, 0.1609, 0.1615],
        ...,
        [0.1583, 0.1663, 0.1721,  ..., 0.1593, 0.1651, 0.1671],
        [0.1506, 0.1616, 0.1624,  ..., 0.1524, 0.1585, 0.1579],
        [0.1536, 0.1625, 0.1668,  ..., 0.1540, 0.1591, 0.1601]],
       grad_fn=<LogBackward>)


 28%|██▊       | 16/57 [00:11<00:29,  1.38it/s]

tensor([[0.1536, 0.1634, 0.1680,  ..., 0.1574, 0.1618, 0.1647],
        [0.1553, 0.1618, 0.1655,  ..., 0.1560, 0.1600, 0.1618],
        [0.1470, 0.1590, 0.1585,  ..., 0.1526, 0.1569, 0.1562],
        ...,
        [0.1531, 0.1624, 0.1650,  ..., 0.1561, 0.1560, 0.1625],
        [0.1582, 0.1648, 0.1714,  ..., 0.1563, 0.1607, 0.1678],
        [0.1539, 0.1578, 0.1640,  ..., 0.1530, 0.1554, 0.1563]],
       grad_fn=<LogBackward>)


 30%|██▉       | 17/57 [00:12<00:28,  1.41it/s]

tensor([[0.1531, 0.1608, 0.1653,  ..., 0.1539, 0.1558, 0.1596],
        [0.1551, 0.1653, 0.1691,  ..., 0.1543, 0.1616, 0.1638],
        [0.1553, 0.1615, 0.1694,  ..., 0.1557, 0.1570, 0.1618],
        ...,
        [0.1566, 0.1635, 0.1693,  ..., 0.1574, 0.1600, 0.1621],
        [0.1573, 0.1661, 0.1701,  ..., 0.1569, 0.1627, 0.1646],
        [0.1562, 0.1637, 0.1702,  ..., 0.1566, 0.1617, 0.1633]],
       grad_fn=<LogBackward>)


 32%|███▏      | 18/57 [00:12<00:28,  1.38it/s]

tensor([[0.1580, 0.1648, 0.1716,  ..., 0.1563, 0.1618, 0.1659],
        [0.1542, 0.1636, 0.1637,  ..., 0.1537, 0.1571, 0.1591],
        [0.1547, 0.1660, 0.1684,  ..., 0.1566, 0.1623, 0.1622],
        ...,
        [0.1615, 0.1654, 0.1719,  ..., 0.1575, 0.1638, 0.1677],
        [0.1539, 0.1645, 0.1679,  ..., 0.1581, 0.1593, 0.1608],
        [0.1518, 0.1601, 0.1614,  ..., 0.1524, 0.1526, 0.1534]],
       grad_fn=<LogBackward>)


 33%|███▎      | 19/57 [00:13<00:28,  1.35it/s]

tensor([[0.1536, 0.1619, 0.1659,  ..., 0.1574, 0.1555, 0.1579],
        [0.1542, 0.1623, 0.1664,  ..., 0.1561, 0.1578, 0.1642],
        [0.1550, 0.1626, 0.1652,  ..., 0.1565, 0.1590, 0.1606],
        ...,
        [0.1524, 0.1630, 0.1670,  ..., 0.1552, 0.1593, 0.1626],
        [0.1548, 0.1607, 0.1668,  ..., 0.1600, 0.1617, 0.1626],
        [0.1567, 0.1610, 0.1668,  ..., 0.1594, 0.1584, 0.1665]],
       grad_fn=<LogBackward>)


 35%|███▌      | 20/57 [00:14<00:27,  1.32it/s]

tensor([[0.1562, 0.1655, 0.1690,  ..., 0.1569, 0.1645, 0.1668],
        [0.1553, 0.1626, 0.1673,  ..., 0.1583, 0.1598, 0.1619],
        [0.1530, 0.1626, 0.1671,  ..., 0.1559, 0.1573, 0.1626],
        ...,
        [0.1597, 0.1634, 0.1693,  ..., 0.1560, 0.1583, 0.1645],
        [0.1549, 0.1626, 0.1674,  ..., 0.1555, 0.1591, 0.1635],
        [0.1508, 0.1577, 0.1618,  ..., 0.1547, 0.1535, 0.1592]],
       grad_fn=<LogBackward>)


 37%|███▋      | 21/57 [00:15<00:27,  1.33it/s]

tensor([[0.1551, 0.1638, 0.1669,  ..., 0.1572, 0.1589, 0.1593],
        [0.1585, 0.1632, 0.1728,  ..., 0.1576, 0.1612, 0.1675],
        [0.1552, 0.1642, 0.1699,  ..., 0.1550, 0.1610, 0.1626],
        ...,
        [0.1555, 0.1655, 0.1689,  ..., 0.1563, 0.1606, 0.1643],
        [0.1538, 0.1666, 0.1676,  ..., 0.1585, 0.1601, 0.1639],
        [0.1563, 0.1648, 0.1689,  ..., 0.1559, 0.1614, 0.1629]],
       grad_fn=<LogBackward>)


 39%|███▊      | 22/57 [00:15<00:26,  1.31it/s]

tensor([[0.1564, 0.1622, 0.1678,  ..., 0.1545, 0.1595, 0.1642],
        [0.1563, 0.1673, 0.1705,  ..., 0.1569, 0.1638, 0.1659],
        [0.1514, 0.1602, 0.1631,  ..., 0.1527, 0.1557, 0.1644],
        ...,
        [0.1544, 0.1618, 0.1645,  ..., 0.1556, 0.1567, 0.1614],
        [0.1550, 0.1593, 0.1670,  ..., 0.1556, 0.1537, 0.1580],
        [0.1420, 0.1497, 0.1552,  ..., 0.1469, 0.1478, 0.1506]],
       grad_fn=<LogBackward>)


 40%|████      | 23/57 [00:16<00:25,  1.31it/s]

tensor([[0.1550, 0.1624, 0.1685,  ..., 0.1588, 0.1619, 0.1636],
        [0.1545, 0.1638, 0.1674,  ..., 0.1563, 0.1600, 0.1622],
        [0.1552, 0.1630, 0.1676,  ..., 0.1554, 0.1584, 0.1610],
        ...,
        [0.1528, 0.1611, 0.1612,  ..., 0.1564, 0.1563, 0.1584],
        [0.1545, 0.1618, 0.1694,  ..., 0.1516, 0.1603, 0.1584],
        [0.1539, 0.1657, 0.1693,  ..., 0.1541, 0.1612, 0.1628]],
       grad_fn=<LogBackward>)


 42%|████▏     | 24/57 [00:17<00:25,  1.30it/s]

tensor([[0.1583, 0.1625, 0.1691,  ..., 0.1555, 0.1595, 0.1637],
        [0.1550, 0.1594, 0.1657,  ..., 0.1561, 0.1570, 0.1591],
        [0.1541, 0.1626, 0.1681,  ..., 0.1544, 0.1558, 0.1625],
        ...,
        [0.1519, 0.1620, 0.1644,  ..., 0.1543, 0.1556, 0.1577],
        [0.1540, 0.1664, 0.1684,  ..., 0.1574, 0.1615, 0.1616],
        [0.1565, 0.1636, 0.1654,  ..., 0.1566, 0.1672, 0.1618]],
       grad_fn=<LogBackward>)


 44%|████▍     | 25/57 [00:18<00:24,  1.29it/s]

tensor([[0.1550, 0.1648, 0.1687,  ..., 0.1563, 0.1648, 0.1660],
        [0.1544, 0.1629, 0.1658,  ..., 0.1576, 0.1586, 0.1615],
        [0.1519, 0.1622, 0.1643,  ..., 0.1543, 0.1558, 0.1581],
        ...,
        [0.1544, 0.1647, 0.1694,  ..., 0.1588, 0.1613, 0.1627],
        [0.1569, 0.1643, 0.1711,  ..., 0.1570, 0.1596, 0.1650],
        [0.1536, 0.1605, 0.1634,  ..., 0.1539, 0.1564, 0.1640]],
       grad_fn=<LogBackward>)


 46%|████▌     | 26/57 [00:19<00:24,  1.29it/s]

tensor([[0.1520, 0.1607, 0.1615,  ..., 0.1493, 0.1550, 0.1590],
        [0.1563, 0.1604, 0.1673,  ..., 0.1572, 0.1585, 0.1611],
        [0.1559, 0.1592, 0.1689,  ..., 0.1591, 0.1569, 0.1619],
        ...,
        [0.1593, 0.1680, 0.1713,  ..., 0.1591, 0.1652, 0.1669],
        [0.1561, 0.1638, 0.1679,  ..., 0.1572, 0.1621, 0.1607],
        [0.1545, 0.1651, 0.1667,  ..., 0.1543, 0.1582, 0.1605]],
       grad_fn=<LogBackward>)


 47%|████▋     | 27/57 [00:19<00:23,  1.27it/s]

tensor([[0.1556, 0.1620, 0.1660,  ..., 0.1563, 0.1584, 0.1617],
        [0.1551, 0.1622, 0.1664,  ..., 0.1533, 0.1610, 0.1620],
        [0.1526, 0.1628, 0.1663,  ..., 0.1566, 0.1547, 0.1599],
        ...,
        [0.1564, 0.1643, 0.1734,  ..., 0.1560, 0.1617, 0.1630],
        [0.1564, 0.1620, 0.1679,  ..., 0.1571, 0.1586, 0.1635],
        [0.1511, 0.1609, 0.1624,  ..., 0.1535, 0.1572, 0.1561]],
       grad_fn=<LogBackward>)


 49%|████▉     | 28/57 [00:20<00:23,  1.24it/s]

tensor([[0.1581, 0.1654, 0.1707,  ..., 0.1570, 0.1623, 0.1641],
        [0.1529, 0.1651, 0.1672,  ..., 0.1551, 0.1624, 0.1640],
        [0.1540, 0.1618, 0.1657,  ..., 0.1568, 0.1606, 0.1606],
        ...,
        [0.1544, 0.1640, 0.1676,  ..., 0.1544, 0.1604, 0.1616],
        [0.1534, 0.1624, 0.1667,  ..., 0.1571, 0.1555, 0.1619],
        [0.1531, 0.1581, 0.1641,  ..., 0.1552, 0.1572, 0.1604]],
       grad_fn=<LogBackward>)


 51%|█████     | 29/57 [00:21<00:22,  1.26it/s]

tensor([[0.1585, 0.1646, 0.1703,  ..., 0.1551, 0.1599, 0.1637],
        [0.1558, 0.1641, 0.1690,  ..., 0.1562, 0.1578, 0.1649],
        [0.1511, 0.1579, 0.1631,  ..., 0.1494, 0.1518, 0.1545],
        ...,
        [0.1557, 0.1614, 0.1678,  ..., 0.1590, 0.1607, 0.1627],
        [0.1532, 0.1646, 0.1666,  ..., 0.1585, 0.1614, 0.1590],
        [0.1557, 0.1595, 0.1668,  ..., 0.1538, 0.1558, 0.1602]],
       grad_fn=<LogBackward>)


 53%|█████▎    | 30/57 [00:22<00:21,  1.28it/s]

tensor([[0.1587, 0.1686, 0.1725,  ..., 0.1591, 0.1655, 0.1675],
        [0.1530, 0.1602, 0.1646,  ..., 0.1528, 0.1561, 0.1624],
        [0.1541, 0.1646, 0.1689,  ..., 0.1590, 0.1612, 0.1649],
        ...,
        [0.1552, 0.1612, 0.1663,  ..., 0.1589, 0.1603, 0.1612],
        [0.1530, 0.1628, 0.1628,  ..., 0.1524, 0.1589, 0.1580],
        [0.1581, 0.1645, 0.1726,  ..., 0.1564, 0.1625, 0.1666]],
       grad_fn=<LogBackward>)


 54%|█████▍    | 31/57 [00:22<00:19,  1.31it/s]

tensor([[0.1536, 0.1608, 0.1642,  ..., 0.1537, 0.1549, 0.1593],
        [0.1546, 0.1627, 0.1647,  ..., 0.1569, 0.1580, 0.1604],
        [0.1529, 0.1580, 0.1638,  ..., 0.1504, 0.1580, 0.1618],
        ...,
        [0.1488, 0.1577, 0.1594,  ..., 0.1522, 0.1569, 0.1553],
        [0.1524, 0.1589, 0.1648,  ..., 0.1574, 0.1576, 0.1608],
        [0.1527, 0.1658, 0.1649,  ..., 0.1579, 0.1620, 0.1603]],
       grad_fn=<LogBackward>)


 56%|█████▌    | 32/57 [00:23<00:18,  1.35it/s]

tensor([[0.1547, 0.1620, 0.1663,  ..., 0.1540, 0.1571, 0.1611],
        [0.1577, 0.1654, 0.1722,  ..., 0.1577, 0.1622, 0.1658],
        [0.1541, 0.1574, 0.1662,  ..., 0.1544, 0.1550, 0.1594],
        ...,
        [0.1523, 0.1621, 0.1642,  ..., 0.1568, 0.1572, 0.1614],
        [0.1548, 0.1622, 0.1668,  ..., 0.1573, 0.1600, 0.1604],
        [0.1484, 0.1550, 0.1545,  ..., 0.1491, 0.1540, 0.1525]],
       grad_fn=<LogBackward>)


 58%|█████▊    | 33/57 [00:24<00:17,  1.38it/s]

tensor([[0.1488, 0.1519, 0.1588,  ..., 0.1471, 0.1480, 0.1529],
        [0.1565, 0.1631, 0.1695,  ..., 0.1576, 0.1604, 0.1624],
        [0.1562, 0.1636, 0.1709,  ..., 0.1577, 0.1602, 0.1649],
        ...,
        [0.1565, 0.1615, 0.1680,  ..., 0.1569, 0.1575, 0.1605],
        [0.1561, 0.1620, 0.1691,  ..., 0.1544, 0.1619, 0.1626],
        [0.1539, 0.1659, 0.1651,  ..., 0.1566, 0.1592, 0.1599]],
       grad_fn=<LogBackward>)


 60%|█████▉    | 34/57 [00:24<00:16,  1.42it/s]

tensor([[0.1525, 0.1617, 0.1642,  ..., 0.1550, 0.1611, 0.1578],
        [0.1530, 0.1582, 0.1632,  ..., 0.1530, 0.1546, 0.1568],
        [0.1567, 0.1615, 0.1679,  ..., 0.1537, 0.1596, 0.1644],
        ...,
        [0.1570, 0.1694, 0.1711,  ..., 0.1598, 0.1641, 0.1669],
        [0.1544, 0.1608, 0.1670,  ..., 0.1549, 0.1552, 0.1616],
        [0.1539, 0.1596, 0.1639,  ..., 0.1537, 0.1577, 0.1606]],
       grad_fn=<LogBackward>)


 61%|██████▏   | 35/57 [00:25<00:15,  1.45it/s]

tensor([[0.1554, 0.1640, 0.1665,  ..., 0.1545, 0.1623, 0.1603],
        [0.1468, 0.1561, 0.1550,  ..., 0.1478, 0.1520, 0.1504],
        [0.1574, 0.1671, 0.1718,  ..., 0.1572, 0.1634, 0.1668],
        ...,
        [0.1534, 0.1606, 0.1653,  ..., 0.1563, 0.1581, 0.1569],
        [0.1498, 0.1576, 0.1613,  ..., 0.1480, 0.1536, 0.1536],
        [0.1605, 0.1655, 0.1726,  ..., 0.1574, 0.1628, 0.1682]],
       grad_fn=<LogBackward>)


 63%|██████▎   | 36/57 [00:26<00:14,  1.47it/s]

tensor([[0.1519, 0.1596, 0.1641,  ..., 0.1549, 0.1557, 0.1597],
        [0.1524, 0.1652, 0.1645,  ..., 0.1558, 0.1603, 0.1616],
        [0.1543, 0.1690, 0.1701,  ..., 0.1590, 0.1636, 0.1684],
        ...,
        [0.1554, 0.1645, 0.1691,  ..., 0.1568, 0.1630, 0.1630],
        [0.1558, 0.1625, 0.1674,  ..., 0.1565, 0.1583, 0.1637],
        [0.1539, 0.1614, 0.1661,  ..., 0.1534, 0.1567, 0.1588]],
       grad_fn=<LogBackward>)


 65%|██████▍   | 37/57 [00:26<00:13,  1.46it/s]

tensor([[0.1585, 0.1664, 0.1698,  ..., 0.1566, 0.1606, 0.1655],
        [0.1554, 0.1621, 0.1692,  ..., 0.1564, 0.1599, 0.1632],
        [0.1570, 0.1633, 0.1710,  ..., 0.1567, 0.1599, 0.1658],
        ...,
        [0.1531, 0.1661, 0.1638,  ..., 0.1585, 0.1614, 0.1639],
        [0.1543, 0.1648, 0.1703,  ..., 0.1544, 0.1577, 0.1615],
        [0.1548, 0.1639, 0.1681,  ..., 0.1583, 0.1610, 0.1617]],
       grad_fn=<LogBackward>)


 67%|██████▋   | 38/57 [00:27<00:13,  1.46it/s]

tensor([[0.1522, 0.1590, 0.1630,  ..., 0.1532, 0.1552, 0.1601],
        [0.1571, 0.1628, 0.1697,  ..., 0.1556, 0.1589, 0.1641],
        [0.1581, 0.1634, 0.1702,  ..., 0.1566, 0.1592, 0.1659],
        ...,
        [0.1543, 0.1630, 0.1665,  ..., 0.1543, 0.1613, 0.1604],
        [0.1562, 0.1624, 0.1659,  ..., 0.1557, 0.1588, 0.1623],
        [0.1542, 0.1637, 0.1684,  ..., 0.1555, 0.1620, 0.1633]],
       grad_fn=<LogBackward>)


 68%|██████▊   | 39/57 [00:28<00:12,  1.48it/s]

tensor([[0.1549, 0.1655, 0.1704,  ..., 0.1592, 0.1641, 0.1669],
        [0.1541, 0.1600, 0.1650,  ..., 0.1524, 0.1560, 0.1589],
        [0.1543, 0.1602, 0.1652,  ..., 0.1555, 0.1563, 0.1606],
        ...,
        [0.1577, 0.1655, 0.1709,  ..., 0.1563, 0.1611, 0.1669],
        [0.1568, 0.1623, 0.1677,  ..., 0.1564, 0.1570, 0.1618],
        [0.1540, 0.1634, 0.1633,  ..., 0.1554, 0.1602, 0.1615]],
       grad_fn=<LogBackward>)


 70%|███████   | 40/57 [00:28<00:11,  1.48it/s]

tensor([[0.1527, 0.1592, 0.1674,  ..., 0.1554, 0.1550, 0.1600],
        [0.1541, 0.1625, 0.1662,  ..., 0.1550, 0.1548, 0.1596],
        [0.1585, 0.1669, 0.1736,  ..., 0.1564, 0.1645, 0.1682],
        ...,
        [0.1546, 0.1591, 0.1663,  ..., 0.1525, 0.1535, 0.1633],
        [0.1546, 0.1633, 0.1687,  ..., 0.1571, 0.1607, 0.1623],
        [0.1509, 0.1622, 0.1628,  ..., 0.1522, 0.1582, 0.1604]],
       grad_fn=<LogBackward>)


 72%|███████▏  | 41/57 [00:29<00:10,  1.49it/s]

tensor([[0.1589, 0.1657, 0.1717,  ..., 0.1603, 0.1627, 0.1658],
        [0.1565, 0.1628, 0.1713,  ..., 0.1549, 0.1604, 0.1589],
        [0.1585, 0.1651, 0.1728,  ..., 0.1555, 0.1618, 0.1676],
        ...,
        [0.1530, 0.1592, 0.1644,  ..., 0.1541, 0.1542, 0.1635],
        [0.1551, 0.1627, 0.1660,  ..., 0.1567, 0.1578, 0.1604],
        [0.1575, 0.1650, 0.1719,  ..., 0.1569, 0.1649, 0.1670]],
       grad_fn=<LogBackward>)


 74%|███████▎  | 42/57 [00:30<00:10,  1.50it/s]

tensor([[0.1558, 0.1621, 0.1651,  ..., 0.1554, 0.1574, 0.1620],
        [0.1556, 0.1619, 0.1686,  ..., 0.1568, 0.1575, 0.1625],
        [0.1598, 0.1683, 0.1733,  ..., 0.1621, 0.1652, 0.1682],
        ...,
        [0.1560, 0.1627, 0.1707,  ..., 0.1551, 0.1577, 0.1629],
        [0.1527, 0.1625, 0.1658,  ..., 0.1523, 0.1573, 0.1564],
        [0.1532, 0.1603, 0.1638,  ..., 0.1538, 0.1550, 0.1597]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 43/57 [00:31<00:09,  1.48it/s]

tensor([[0.1567, 0.1638, 0.1690,  ..., 0.1573, 0.1597, 0.1630],
        [0.1555, 0.1632, 0.1659,  ..., 0.1559, 0.1589, 0.1622],
        [0.1537, 0.1633, 0.1629,  ..., 0.1578, 0.1578, 0.1595],
        ...,
        [0.1535, 0.1657, 0.1630,  ..., 0.1577, 0.1633, 0.1600],
        [0.1546, 0.1617, 0.1665,  ..., 0.1547, 0.1554, 0.1591],
        [0.1554, 0.1615, 0.1672,  ..., 0.1565, 0.1572, 0.1620]],
       grad_fn=<LogBackward>)


 77%|███████▋  | 44/57 [00:31<00:08,  1.48it/s]

tensor([[0.1556, 0.1643, 0.1675,  ..., 0.1598, 0.1607, 0.1620],
        [0.1555, 0.1643, 0.1692,  ..., 0.1570, 0.1605, 0.1634],
        [0.1561, 0.1629, 0.1683,  ..., 0.1554, 0.1573, 0.1626],
        ...,
        [0.1540, 0.1622, 0.1634,  ..., 0.1597, 0.1584, 0.1593],
        [0.1497, 0.1588, 0.1632,  ..., 0.1510, 0.1543, 0.1574],
        [0.1542, 0.1643, 0.1682,  ..., 0.1552, 0.1647, 0.1623]],
       grad_fn=<LogBackward>)


 79%|███████▉  | 45/57 [00:32<00:08,  1.48it/s]

tensor([[0.1584, 0.1651, 0.1722,  ..., 0.1575, 0.1647, 0.1670],
        [0.1527, 0.1600, 0.1656,  ..., 0.1528, 0.1565, 0.1581],
        [0.1538, 0.1627, 0.1646,  ..., 0.1547, 0.1573, 0.1588],
        ...,
        [0.1564, 0.1659, 0.1713,  ..., 0.1533, 0.1584, 0.1650],
        [0.1538, 0.1646, 0.1687,  ..., 0.1570, 0.1627, 0.1647],
        [0.1545, 0.1630, 0.1656,  ..., 0.1567, 0.1601, 0.1646]],
       grad_fn=<LogBackward>)


 81%|████████  | 46/57 [00:33<00:07,  1.49it/s]

tensor([[0.1553, 0.1603, 0.1646,  ..., 0.1558, 0.1539, 0.1601],
        [0.1574, 0.1619, 0.1709,  ..., 0.1567, 0.1585, 0.1629],
        [0.1536, 0.1611, 0.1648,  ..., 0.1543, 0.1549, 0.1571],
        ...,
        [0.1519, 0.1593, 0.1625,  ..., 0.1547, 0.1539, 0.1550],
        [0.1551, 0.1617, 0.1661,  ..., 0.1545, 0.1594, 0.1613],
        [0.1565, 0.1638, 0.1716,  ..., 0.1560, 0.1630, 0.1653]],
       grad_fn=<LogBackward>)


 82%|████████▏ | 47/57 [00:33<00:06,  1.47it/s]

tensor([[0.1524, 0.1649, 0.1675,  ..., 0.1562, 0.1589, 0.1621],
        [0.1474, 0.1549, 0.1568,  ..., 0.1494, 0.1503, 0.1538],
        [0.1487, 0.1568, 0.1586,  ..., 0.1520, 0.1515, 0.1558],
        ...,
        [0.1577, 0.1609, 0.1674,  ..., 0.1549, 0.1557, 0.1561],
        [0.1554, 0.1659, 0.1688,  ..., 0.1582, 0.1618, 0.1643],
        [0.1554, 0.1650, 0.1697,  ..., 0.1553, 0.1627, 0.1663]],
       grad_fn=<LogBackward>)


 84%|████████▍ | 48/57 [00:34<00:06,  1.45it/s]

tensor([[0.1479, 0.1603, 0.1605,  ..., 0.1531, 0.1554, 0.1565],
        [0.1459, 0.1569, 0.1571,  ..., 0.1509, 0.1554, 0.1565],
        [0.1512, 0.1579, 0.1619,  ..., 0.1576, 0.1554, 0.1540],
        ...,
        [0.1489, 0.1550, 0.1573,  ..., 0.1489, 0.1513, 0.1571],
        [0.1548, 0.1641, 0.1693,  ..., 0.1577, 0.1618, 0.1613],
        [0.1549, 0.1588, 0.1638,  ..., 0.1536, 0.1554, 0.1578]],
       grad_fn=<LogBackward>)


 86%|████████▌ | 49/57 [00:35<00:05,  1.45it/s]

tensor([[0.1535, 0.1645, 0.1655,  ..., 0.1555, 0.1593, 0.1609],
        [0.1493, 0.1630, 0.1621,  ..., 0.1563, 0.1562, 0.1568],
        [0.1581, 0.1649, 0.1727,  ..., 0.1563, 0.1623, 0.1651],
        ...,
        [0.1514, 0.1562, 0.1609,  ..., 0.1549, 0.1524, 0.1532],
        [0.1488, 0.1598, 0.1632,  ..., 0.1529, 0.1565, 0.1577],
        [0.1549, 0.1605, 0.1659,  ..., 0.1541, 0.1593, 0.1628]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 50/57 [00:35<00:04,  1.47it/s]

tensor([[0.1550, 0.1651, 0.1676,  ..., 0.1559, 0.1618, 0.1628],
        [0.1577, 0.1668, 0.1719,  ..., 0.1556, 0.1642, 0.1653],
        [0.1577, 0.1643, 0.1714,  ..., 0.1569, 0.1606, 0.1679],
        ...,
        [0.1549, 0.1624, 0.1670,  ..., 0.1560, 0.1603, 0.1620],
        [0.1542, 0.1625, 0.1665,  ..., 0.1552, 0.1565, 0.1594],
        [0.1431, 0.1549, 0.1548,  ..., 0.1458, 0.1545, 0.1535]],
       grad_fn=<LogBackward>)


 89%|████████▉ | 51/57 [00:36<00:04,  1.48it/s]

tensor([[0.1561, 0.1658, 0.1670,  ..., 0.1576, 0.1633, 0.1619],
        [0.1559, 0.1612, 0.1676,  ..., 0.1567, 0.1565, 0.1636],
        [0.1568, 0.1674, 0.1717,  ..., 0.1560, 0.1625, 0.1644],
        ...,
        [0.1582, 0.1643, 0.1724,  ..., 0.1572, 0.1627, 0.1674],
        [0.1532, 0.1620, 0.1654,  ..., 0.1573, 0.1582, 0.1627],
        [0.1542, 0.1601, 0.1659,  ..., 0.1538, 0.1550, 0.1601]],
       grad_fn=<LogBackward>)


 91%|█████████ | 52/57 [00:37<00:03,  1.49it/s]

tensor([[0.1544, 0.1647, 0.1704,  ..., 0.1534, 0.1611, 0.1638],
        [0.1511, 0.1602, 0.1646,  ..., 0.1524, 0.1560, 0.1538],
        [0.1542, 0.1601, 0.1667,  ..., 0.1540, 0.1562, 0.1632],
        ...,
        [0.1549, 0.1609, 0.1656,  ..., 0.1563, 0.1588, 0.1633],
        [0.1556, 0.1633, 0.1684,  ..., 0.1578, 0.1628, 0.1655],
        [0.1571, 0.1647, 0.1704,  ..., 0.1566, 0.1594, 0.1654]],
       grad_fn=<LogBackward>)


 93%|█████████▎| 53/57 [00:37<00:02,  1.47it/s]

tensor([[0.1533, 0.1629, 0.1647,  ..., 0.1590, 0.1585, 0.1610],
        [0.1562, 0.1643, 0.1687,  ..., 0.1573, 0.1627, 0.1626],
        [0.1508, 0.1576, 0.1591,  ..., 0.1502, 0.1558, 0.1578],
        ...,
        [0.1577, 0.1628, 0.1700,  ..., 0.1588, 0.1629, 0.1637],
        [0.1509, 0.1591, 0.1639,  ..., 0.1572, 0.1588, 0.1609],
        [0.1515, 0.1611, 0.1643,  ..., 0.1516, 0.1591, 0.1606]],
       grad_fn=<LogBackward>)


 95%|█████████▍| 54/57 [00:38<00:02,  1.43it/s]

tensor([[0.1583, 0.1665, 0.1736,  ..., 0.1563, 0.1640, 0.1661],
        [0.1577, 0.1642, 0.1723,  ..., 0.1569, 0.1601, 0.1668],
        [0.1512, 0.1601, 0.1614,  ..., 0.1512, 0.1569, 0.1590],
        ...,
        [0.1583, 0.1647, 0.1685,  ..., 0.1561, 0.1611, 0.1661],
        [0.1561, 0.1652, 0.1691,  ..., 0.1555, 0.1624, 0.1624],
        [0.1561, 0.1658, 0.1666,  ..., 0.1571, 0.1613, 0.1636]],
       grad_fn=<LogBackward>)


 96%|█████████▋| 55/57 [00:39<00:01,  1.43it/s]

tensor([[0.1567, 0.1655, 0.1698,  ..., 0.1571, 0.1619, 0.1627],
        [0.1499, 0.1593, 0.1618,  ..., 0.1520, 0.1583, 0.1594],
        [0.1568, 0.1643, 0.1713,  ..., 0.1564, 0.1609, 0.1615],
        ...,
        [0.1523, 0.1603, 0.1628,  ..., 0.1560, 0.1532, 0.1547],
        [0.1577, 0.1608, 0.1691,  ..., 0.1576, 0.1586, 0.1623],
        [0.1608, 0.1678, 0.1741,  ..., 0.1576, 0.1658, 0.1652]],
       grad_fn=<LogBackward>)


 98%|█████████▊| 56/57 [00:39<00:00,  1.45it/s]

tensor([[0.1513, 0.1626, 0.1626,  ..., 0.1544, 0.1592, 0.1593],
        [0.1488, 0.1558, 0.1592,  ..., 0.1495, 0.1554, 0.1573],
        [0.1588, 0.1661, 0.1699,  ..., 0.1611, 0.1622, 0.1650],
        ...,
        [0.1568, 0.1638, 0.1705,  ..., 0.1560, 0.1621, 0.1661],
        [0.1581, 0.1664, 0.1690,  ..., 0.1612, 0.1656, 0.1663],
        [0.1474, 0.1614, 0.1594,  ..., 0.1569, 0.1574, 0.1562]],
       grad_fn=<LogBackward>)


100%|██████████| 57/57 [00:40<00:00,  1.40it/s]
 12%|█▎        | 1/8 [00:00<00:02,  2.94it/s]

tensor([[0.1588, 0.1644, 0.1726,  ..., 0.1596, 0.1632, 0.1673],
        [0.1573, 0.1623, 0.1690,  ..., 0.1567, 0.1638, 0.1609],
        [0.1547, 0.1623, 0.1673,  ..., 0.1565, 0.1587, 0.1617],
        ...,
        [0.1558, 0.1632, 0.1693,  ..., 0.1582, 0.1625, 0.1664],
        [0.1556, 0.1612, 0.1661,  ..., 0.1544, 0.1560, 0.1607],
        [0.1536, 0.1603, 0.1645,  ..., 0.1552, 0.1552, 0.1600]],
       grad_fn=<LogBackward>)


 25%|██▌       | 2/8 [00:00<00:02,  2.98it/s]

tensor([[0.1561, 0.1633, 0.1688,  ..., 0.1569, 0.1586, 0.1626],
        [0.1581, 0.1645, 0.1727,  ..., 0.1551, 0.1622, 0.1651],
        [0.1543, 0.1618, 0.1648,  ..., 0.1539, 0.1564, 0.1612],
        ...,
        [0.1565, 0.1638, 0.1670,  ..., 0.1564, 0.1593, 0.1622],
        [0.1541, 0.1607, 0.1644,  ..., 0.1582, 0.1581, 0.1604],
        [0.1577, 0.1643, 0.1706,  ..., 0.1574, 0.1597, 0.1657]],
       grad_fn=<LogBackward>)


 38%|███▊      | 3/8 [00:00<00:01,  3.02it/s]

tensor([[0.1578, 0.1641, 0.1719,  ..., 0.1593, 0.1643, 0.1663],
        [0.1541, 0.1613, 0.1687,  ..., 0.1573, 0.1603, 0.1612],
        [0.1577, 0.1642, 0.1720,  ..., 0.1561, 0.1616, 0.1660],
        ...,
        [0.1543, 0.1623, 0.1627,  ..., 0.1563, 0.1577, 0.1599],
        [0.1530, 0.1637, 0.1677,  ..., 0.1596, 0.1594, 0.1613],
        [0.1538, 0.1640, 0.1662,  ..., 0.1567, 0.1577, 0.1622]],
       grad_fn=<LogBackward>)


 50%|█████     | 4/8 [00:01<00:01,  2.84it/s]

tensor([[0.1525, 0.1621, 0.1653,  ..., 0.1572, 0.1589, 0.1612],
        [0.1516, 0.1568, 0.1625,  ..., 0.1548, 0.1555, 0.1582],
        [0.1552, 0.1591, 0.1655,  ..., 0.1542, 0.1559, 0.1603],
        ...,
        [0.1547, 0.1609, 0.1678,  ..., 0.1552, 0.1569, 0.1615],
        [0.1552, 0.1647, 0.1661,  ..., 0.1583, 0.1621, 0.1608],
        [0.1601, 0.1649, 0.1745,  ..., 0.1584, 0.1641, 0.1641]],
       grad_fn=<LogBackward>)


 62%|██████▎   | 5/8 [00:01<00:01,  2.81it/s]

tensor([[0.1539, 0.1585, 0.1659,  ..., 0.1552, 0.1549, 0.1591],
        [0.1569, 0.1657, 0.1704,  ..., 0.1608, 0.1605, 0.1633],
        [0.1576, 0.1628, 0.1708,  ..., 0.1559, 0.1597, 0.1655],
        ...,
        [0.1543, 0.1636, 0.1668,  ..., 0.1583, 0.1595, 0.1620],
        [0.1540, 0.1632, 0.1651,  ..., 0.1553, 0.1619, 0.1618],
        [0.1545, 0.1645, 0.1675,  ..., 0.1554, 0.1577, 0.1617]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 6/8 [00:02<00:00,  2.84it/s]

tensor([[0.1546, 0.1616, 0.1662,  ..., 0.1558, 0.1585, 0.1626],
        [0.1520, 0.1613, 0.1630,  ..., 0.1563, 0.1564, 0.1583],
        [0.1580, 0.1628, 0.1692,  ..., 0.1554, 0.1589, 0.1620],
        ...,
        [0.1471, 0.1573, 0.1610,  ..., 0.1523, 0.1544, 0.1547],
        [0.1575, 0.1647, 0.1731,  ..., 0.1568, 0.1596, 0.1652],
        [0.1548, 0.1606, 0.1666,  ..., 0.1557, 0.1573, 0.1607]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 7/8 [00:02<00:00,  2.94it/s]

tensor([[0.1559, 0.1647, 0.1710,  ..., 0.1572, 0.1626, 0.1642],
        [0.1500, 0.1656, 0.1620,  ..., 0.1549, 0.1616, 0.1583],
        [0.1547, 0.1670, 0.1709,  ..., 0.1560, 0.1584, 0.1588],
        ...,
        [0.1561, 0.1629, 0.1697,  ..., 0.1536, 0.1582, 0.1620],
        [0.1577, 0.1666, 0.1725,  ..., 0.1557, 0.1645, 0.1666],
        [0.1531, 0.1638, 0.1645,  ..., 0.1563, 0.1587, 0.1579]],
       grad_fn=<LogBackward>)


100%|██████████| 8/8 [00:02<00:00,  2.95it/s]
  0%|          | 0/57 [00:00<?, ?it/s]

tensor([[0.1523, 0.1621, 0.1651,  ..., 0.1548, 0.1561, 0.1607],
        [0.1561, 0.1641, 0.1720,  ..., 0.1579, 0.1601, 0.1638],
        [0.1572, 0.1633, 0.1702,  ..., 0.1543, 0.1612, 0.1613],
        ...,
        [0.1576, 0.1617, 0.1688,  ..., 0.1591, 0.1607, 0.1640],
        [0.1507, 0.1596, 0.1632,  ..., 0.1523, 0.1581, 0.1591],
        [0.1539, 0.1649, 0.1657,  ..., 0.1562, 0.1631, 0.1626]],
       grad_fn=<LogBackward>)
[tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128)]
Epoch: 13, train_loss: -0.6969858407974243, valid_loss: -0.687627375125885
Parameter containing:
tensor([[ 1.0003,  1.0003,  1.0003,  1.0003,  1.0003,  1.0002,  1.0004,  1.0002,
          1.0002,  1.0005, -0.4997, -0.4998, -0.4998, -0.4997, -0.4997, -0.4997,
         -0.4997, -0.4998, -0.4997, -0.4998],
      

  2%|▏         | 1/57 [00:00<00:38,  1.46it/s]

tensor([[0.1530, 0.1587, 0.1652,  ..., 0.1565, 0.1583, 0.1586],
        [0.1556, 0.1614, 0.1667,  ..., 0.1553, 0.1576, 0.1612],
        [0.1516, 0.1611, 0.1653,  ..., 0.1554, 0.1540, 0.1592],
        ...,
        [0.1548, 0.1641, 0.1670,  ..., 0.1576, 0.1606, 0.1629],
        [0.1527, 0.1613, 0.1636,  ..., 0.1548, 0.1575, 0.1599],
        [0.1533, 0.1608, 0.1695,  ..., 0.1554, 0.1583, 0.1627]],
       grad_fn=<LogBackward>)


  4%|▎         | 2/57 [00:01<00:37,  1.48it/s]

tensor([[0.1572, 0.1627, 0.1704,  ..., 0.1570, 0.1636, 0.1659],
        [0.1520, 0.1576, 0.1642,  ..., 0.1524, 0.1540, 0.1583],
        [0.1521, 0.1643, 0.1688,  ..., 0.1525, 0.1578, 0.1610],
        ...,
        [0.1557, 0.1627, 0.1669,  ..., 0.1572, 0.1594, 0.1610],
        [0.1542, 0.1636, 0.1666,  ..., 0.1573, 0.1621, 0.1618],
        [0.1584, 0.1672, 0.1713,  ..., 0.1568, 0.1618, 0.1648]],
       grad_fn=<LogBackward>)


  5%|▌         | 3/57 [00:01<00:35,  1.50it/s]

tensor([[0.1533, 0.1643, 0.1658,  ..., 0.1556, 0.1624, 0.1640],
        [0.1527, 0.1638, 0.1628,  ..., 0.1576, 0.1600, 0.1577],
        [0.1560, 0.1647, 0.1672,  ..., 0.1577, 0.1614, 0.1626],
        ...,
        [0.1499, 0.1574, 0.1619,  ..., 0.1519, 0.1537, 0.1575],
        [0.1564, 0.1633, 0.1706,  ..., 0.1561, 0.1609, 0.1669],
        [0.1575, 0.1655, 0.1700,  ..., 0.1560, 0.1641, 0.1655]],
       grad_fn=<LogBackward>)


  7%|▋         | 4/57 [00:02<00:35,  1.51it/s]

tensor([[0.1538, 0.1606, 0.1669,  ..., 0.1550, 0.1566, 0.1611],
        [0.1596, 0.1593, 0.1678,  ..., 0.1514, 0.1557, 0.1565],
        [0.1517, 0.1575, 0.1644,  ..., 0.1506, 0.1534, 0.1557],
        ...,
        [0.1567, 0.1664, 0.1686,  ..., 0.1606, 0.1601, 0.1624],
        [0.1562, 0.1638, 0.1680,  ..., 0.1570, 0.1621, 0.1607],
        [0.1524, 0.1593, 0.1636,  ..., 0.1540, 0.1562, 0.1582]],
       grad_fn=<LogBackward>)


  9%|▉         | 5/57 [00:03<00:34,  1.51it/s]

tensor([[0.1534, 0.1619, 0.1633,  ..., 0.1547, 0.1573, 0.1564],
        [0.1560, 0.1648, 0.1680,  ..., 0.1579, 0.1596, 0.1607],
        [0.1586, 0.1649, 0.1725,  ..., 0.1574, 0.1630, 0.1671],
        ...,
        [0.1596, 0.1663, 0.1708,  ..., 0.1578, 0.1671, 0.1637],
        [0.1566, 0.1618, 0.1675,  ..., 0.1555, 0.1576, 0.1622],
        [0.1505, 0.1578, 0.1578,  ..., 0.1525, 0.1522, 0.1537]],
       grad_fn=<LogBackward>)


 11%|█         | 6/57 [00:03<00:33,  1.52it/s]

tensor([[0.1542, 0.1614, 0.1656,  ..., 0.1533, 0.1554, 0.1597],
        [0.1517, 0.1619, 0.1613,  ..., 0.1532, 0.1596, 0.1558],
        [0.1596, 0.1637, 0.1717,  ..., 0.1571, 0.1629, 0.1674],
        ...,
        [0.1586, 0.1663, 0.1711,  ..., 0.1570, 0.1634, 0.1647],
        [0.1586, 0.1648, 0.1712,  ..., 0.1552, 0.1620, 0.1660],
        [0.1512, 0.1619, 0.1637,  ..., 0.1534, 0.1549, 0.1598]],
       grad_fn=<LogBackward>)


 12%|█▏        | 7/57 [00:04<00:33,  1.50it/s]

tensor([[0.1530, 0.1614, 0.1681,  ..., 0.1581, 0.1608, 0.1639],
        [0.1588, 0.1649, 0.1712,  ..., 0.1581, 0.1640, 0.1658],
        [0.1570, 0.1623, 0.1707,  ..., 0.1550, 0.1600, 0.1645],
        ...,
        [0.1558, 0.1657, 0.1685,  ..., 0.1549, 0.1596, 0.1651],
        [0.1531, 0.1619, 0.1655,  ..., 0.1512, 0.1560, 0.1561],
        [0.1541, 0.1631, 0.1654,  ..., 0.1528, 0.1597, 0.1609]],
       grad_fn=<LogBackward>)


 14%|█▍        | 8/57 [00:05<00:33,  1.47it/s]

tensor([[0.1569, 0.1646, 0.1698,  ..., 0.1568, 0.1641, 0.1660],
        [0.1532, 0.1595, 0.1648,  ..., 0.1537, 0.1559, 0.1588],
        [0.1534, 0.1596, 0.1679,  ..., 0.1554, 0.1578, 0.1625],
        ...,
        [0.1580, 0.1657, 0.1699,  ..., 0.1596, 0.1643, 0.1652],
        [0.1572, 0.1630, 0.1706,  ..., 0.1564, 0.1614, 0.1625],
        [0.1548, 0.1615, 0.1669,  ..., 0.1546, 0.1563, 0.1650]],
       grad_fn=<LogBackward>)


 16%|█▌        | 9/57 [00:06<00:32,  1.46it/s]

tensor([[0.1546, 0.1621, 0.1675,  ..., 0.1557, 0.1567, 0.1610],
        [0.1488, 0.1621, 0.1588,  ..., 0.1538, 0.1563, 0.1586],
        [0.1572, 0.1622, 0.1680,  ..., 0.1559, 0.1599, 0.1609],
        ...,
        [0.1567, 0.1610, 0.1626,  ..., 0.1535, 0.1533, 0.1586],
        [0.1581, 0.1684, 0.1699,  ..., 0.1580, 0.1626, 0.1651],
        [0.1553, 0.1646, 0.1687,  ..., 0.1546, 0.1598, 0.1633]],
       grad_fn=<LogBackward>)


 18%|█▊        | 10/57 [00:06<00:32,  1.43it/s]

tensor([[0.1458, 0.1571, 0.1581,  ..., 0.1493, 0.1571, 0.1570],
        [0.1494, 0.1556, 0.1630,  ..., 0.1553, 0.1536, 0.1563],
        [0.1522, 0.1604, 0.1649,  ..., 0.1541, 0.1560, 0.1623],
        ...,
        [0.1529, 0.1641, 0.1675,  ..., 0.1539, 0.1610, 0.1658],
        [0.1572, 0.1631, 0.1675,  ..., 0.1565, 0.1603, 0.1648],
        [0.1547, 0.1644, 0.1732,  ..., 0.1571, 0.1622, 0.1651]],
       grad_fn=<LogBackward>)


 19%|█▉        | 11/57 [00:07<00:31,  1.46it/s]

tensor([[0.1580, 0.1633, 0.1691,  ..., 0.1579, 0.1582, 0.1642],
        [0.1556, 0.1643, 0.1701,  ..., 0.1616, 0.1631, 0.1645],
        [0.1564, 0.1654, 0.1666,  ..., 0.1572, 0.1602, 0.1630],
        ...,
        [0.1513, 0.1597, 0.1606,  ..., 0.1522, 0.1524, 0.1547],
        [0.1528, 0.1597, 0.1644,  ..., 0.1527, 0.1542, 0.1573],
        [0.1614, 0.1673, 0.1743,  ..., 0.1592, 0.1666, 0.1679]],
       grad_fn=<LogBackward>)


 21%|██        | 12/57 [00:08<00:30,  1.46it/s]

tensor([[0.1506, 0.1599, 0.1609,  ..., 0.1520, 0.1583, 0.1585],
        [0.1567, 0.1636, 0.1678,  ..., 0.1548, 0.1610, 0.1621],
        [0.1522, 0.1606, 0.1633,  ..., 0.1524, 0.1586, 0.1599],
        ...,
        [0.1551, 0.1608, 0.1653,  ..., 0.1559, 0.1565, 0.1586],
        [0.1546, 0.1616, 0.1700,  ..., 0.1568, 0.1579, 0.1647],
        [0.1572, 0.1664, 0.1713,  ..., 0.1584, 0.1621, 0.1652]],
       grad_fn=<LogBackward>)


 23%|██▎       | 13/57 [00:08<00:29,  1.48it/s]

tensor([[0.1480, 0.1555, 0.1595,  ..., 0.1502, 0.1529, 0.1606],
        [0.1465, 0.1553, 0.1551,  ..., 0.1486, 0.1528, 0.1487],
        [0.1533, 0.1625, 0.1654,  ..., 0.1540, 0.1569, 0.1597],
        ...,
        [0.1559, 0.1650, 0.1694,  ..., 0.1565, 0.1613, 0.1644],
        [0.1530, 0.1630, 0.1673,  ..., 0.1572, 0.1605, 0.1647],
        [0.1513, 0.1605, 0.1630,  ..., 0.1544, 0.1564, 0.1618]],
       grad_fn=<LogBackward>)


 25%|██▍       | 14/57 [00:09<00:28,  1.51it/s]

tensor([[0.1568, 0.1651, 0.1701,  ..., 0.1585, 0.1602, 0.1625],
        [0.1520, 0.1595, 0.1649,  ..., 0.1543, 0.1564, 0.1604],
        [0.1562, 0.1589, 0.1667,  ..., 0.1545, 0.1547, 0.1601],
        ...,
        [0.1576, 0.1654, 0.1739,  ..., 0.1577, 0.1634, 0.1671],
        [0.1571, 0.1661, 0.1719,  ..., 0.1568, 0.1629, 0.1679],
        [0.1581, 0.1642, 0.1698,  ..., 0.1579, 0.1636, 0.1664]],
       grad_fn=<LogBackward>)


 26%|██▋       | 15/57 [00:10<00:27,  1.51it/s]

tensor([[0.1538, 0.1645, 0.1662,  ..., 0.1566, 0.1641, 0.1614],
        [0.1507, 0.1589, 0.1638,  ..., 0.1571, 0.1587, 0.1607],
        [0.1578, 0.1633, 0.1698,  ..., 0.1583, 0.1626, 0.1648],
        ...,
        [0.1546, 0.1623, 0.1652,  ..., 0.1582, 0.1588, 0.1604],
        [0.1520, 0.1573, 0.1622,  ..., 0.1511, 0.1516, 0.1555],
        [0.1526, 0.1626, 0.1677,  ..., 0.1528, 0.1547, 0.1591]],
       grad_fn=<LogBackward>)


 28%|██▊       | 16/57 [00:10<00:27,  1.51it/s]

tensor([[0.1597, 0.1648, 0.1719,  ..., 0.1565, 0.1631, 0.1667],
        [0.1528, 0.1617, 0.1666,  ..., 0.1510, 0.1592, 0.1618],
        [0.1544, 0.1647, 0.1656,  ..., 0.1555, 0.1615, 0.1640],
        ...,
        [0.1561, 0.1618, 0.1695,  ..., 0.1537, 0.1592, 0.1632],
        [0.1552, 0.1650, 0.1647,  ..., 0.1585, 0.1633, 0.1609],
        [0.1543, 0.1583, 0.1639,  ..., 0.1535, 0.1534, 0.1584]],
       grad_fn=<LogBackward>)


 30%|██▉       | 17/57 [00:11<00:27,  1.48it/s]

tensor([[0.1499, 0.1586, 0.1611,  ..., 0.1522, 0.1527, 0.1594],
        [0.1531, 0.1617, 0.1640,  ..., 0.1564, 0.1577, 0.1599],
        [0.1541, 0.1604, 0.1660,  ..., 0.1552, 0.1566, 0.1588],
        ...,
        [0.1530, 0.1630, 0.1645,  ..., 0.1544, 0.1574, 0.1608],
        [0.1545, 0.1610, 0.1676,  ..., 0.1547, 0.1575, 0.1601],
        [0.1525, 0.1631, 0.1650,  ..., 0.1517, 0.1565, 0.1558]],
       grad_fn=<LogBackward>)


 32%|███▏      | 18/57 [00:12<00:26,  1.46it/s]

tensor([[0.1565, 0.1613, 0.1706,  ..., 0.1557, 0.1571, 0.1617],
        [0.1540, 0.1597, 0.1654,  ..., 0.1549, 0.1561, 0.1603],
        [0.1586, 0.1664, 0.1725,  ..., 0.1566, 0.1620, 0.1667],
        ...,
        [0.1554, 0.1658, 0.1689,  ..., 0.1582, 0.1619, 0.1641],
        [0.1587, 0.1642, 0.1722,  ..., 0.1540, 0.1620, 0.1636],
        [0.1476, 0.1524, 0.1575,  ..., 0.1527, 0.1477, 0.1534]],
       grad_fn=<LogBackward>)


 33%|███▎      | 19/57 [00:12<00:25,  1.47it/s]

tensor([[0.1535, 0.1640, 0.1653,  ..., 0.1561, 0.1587, 0.1595],
        [0.1597, 0.1635, 0.1704,  ..., 0.1583, 0.1650, 0.1658],
        [0.1554, 0.1638, 0.1699,  ..., 0.1564, 0.1595, 0.1677],
        ...,
        [0.1535, 0.1652, 0.1682,  ..., 0.1544, 0.1625, 0.1614],
        [0.1542, 0.1628, 0.1667,  ..., 0.1564, 0.1557, 0.1640],
        [0.1544, 0.1574, 0.1629,  ..., 0.1515, 0.1531, 0.1570]],
       grad_fn=<LogBackward>)


 35%|███▌      | 20/57 [00:13<00:24,  1.50it/s]

tensor([[0.1528, 0.1609, 0.1659,  ..., 0.1512, 0.1571, 0.1618],
        [0.1495, 0.1591, 0.1629,  ..., 0.1547, 0.1576, 0.1604],
        [0.1525, 0.1602, 0.1660,  ..., 0.1556, 0.1575, 0.1639],
        ...,
        [0.1578, 0.1648, 0.1711,  ..., 0.1575, 0.1606, 0.1649],
        [0.1586, 0.1649, 0.1722,  ..., 0.1568, 0.1635, 0.1668],
        [0.1531, 0.1614, 0.1662,  ..., 0.1558, 0.1572, 0.1614]],
       grad_fn=<LogBackward>)


 37%|███▋      | 21/57 [00:14<00:24,  1.48it/s]

tensor([[0.1504, 0.1630, 0.1661,  ..., 0.1521, 0.1595, 0.1611],
        [0.1553, 0.1624, 0.1665,  ..., 0.1544, 0.1596, 0.1614],
        [0.1534, 0.1616, 0.1670,  ..., 0.1554, 0.1598, 0.1629],
        ...,
        [0.1512, 0.1603, 0.1634,  ..., 0.1495, 0.1550, 0.1595],
        [0.1560, 0.1633, 0.1676,  ..., 0.1558, 0.1607, 0.1618],
        [0.1567, 0.1621, 0.1655,  ..., 0.1560, 0.1603, 0.1621]],
       grad_fn=<LogBackward>)


 39%|███▊      | 22/57 [00:14<00:23,  1.50it/s]

tensor([[0.1522, 0.1647, 0.1643,  ..., 0.1557, 0.1578, 0.1582],
        [0.1576, 0.1637, 0.1718,  ..., 0.1572, 0.1622, 0.1663],
        [0.1543, 0.1614, 0.1654,  ..., 0.1549, 0.1565, 0.1603],
        ...,
        [0.1534, 0.1627, 0.1658,  ..., 0.1515, 0.1550, 0.1589],
        [0.1562, 0.1647, 0.1692,  ..., 0.1559, 0.1618, 0.1665],
        [0.1558, 0.1596, 0.1668,  ..., 0.1521, 0.1579, 0.1619]],
       grad_fn=<LogBackward>)


 40%|████      | 23/57 [00:15<00:22,  1.51it/s]

tensor([[0.1566, 0.1606, 0.1665,  ..., 0.1567, 0.1584, 0.1615],
        [0.1600, 0.1670, 0.1715,  ..., 0.1591, 0.1645, 0.1668],
        [0.1564, 0.1691, 0.1677,  ..., 0.1589, 0.1661, 0.1628],
        ...,
        [0.1509, 0.1575, 0.1620,  ..., 0.1547, 0.1539, 0.1553],
        [0.1538, 0.1602, 0.1662,  ..., 0.1552, 0.1545, 0.1585],
        [0.1566, 0.1649, 0.1701,  ..., 0.1578, 0.1637, 0.1654]],
       grad_fn=<LogBackward>)


 42%|████▏     | 24/57 [00:16<00:21,  1.52it/s]

tensor([[0.1519, 0.1607, 0.1636,  ..., 0.1558, 0.1571, 0.1590],
        [0.1550, 0.1614, 0.1694,  ..., 0.1568, 0.1590, 0.1651],
        [0.1479, 0.1573, 0.1617,  ..., 0.1532, 0.1537, 0.1576],
        ...,
        [0.1484, 0.1577, 0.1586,  ..., 0.1503, 0.1532, 0.1561],
        [0.1583, 0.1637, 0.1679,  ..., 0.1586, 0.1618, 0.1615],
        [0.1517, 0.1622, 0.1631,  ..., 0.1560, 0.1551, 0.1576]],
       grad_fn=<LogBackward>)


 44%|████▍     | 25/57 [00:16<00:21,  1.51it/s]

tensor([[0.1551, 0.1605, 0.1666,  ..., 0.1550, 0.1562, 0.1597],
        [0.1500, 0.1593, 0.1625,  ..., 0.1523, 0.1573, 0.1582],
        [0.1527, 0.1607, 0.1647,  ..., 0.1529, 0.1585, 0.1608],
        ...,
        [0.1558, 0.1621, 0.1679,  ..., 0.1567, 0.1589, 0.1634],
        [0.1565, 0.1660, 0.1676,  ..., 0.1573, 0.1633, 0.1637],
        [0.1564, 0.1652, 0.1702,  ..., 0.1586, 0.1613, 0.1659]],
       grad_fn=<LogBackward>)


 46%|████▌     | 26/57 [00:17<00:20,  1.52it/s]

tensor([[0.1581, 0.1641, 0.1723,  ..., 0.1571, 0.1626, 0.1671],
        [0.1526, 0.1635, 0.1661,  ..., 0.1550, 0.1575, 0.1598],
        [0.1525, 0.1611, 0.1673,  ..., 0.1541, 0.1576, 0.1611],
        ...,
        [0.1528, 0.1601, 0.1648,  ..., 0.1555, 0.1577, 0.1559],
        [0.1551, 0.1638, 0.1694,  ..., 0.1587, 0.1641, 0.1664],
        [0.1530, 0.1616, 0.1672,  ..., 0.1531, 0.1596, 0.1579]],
       grad_fn=<LogBackward>)


 47%|████▋     | 27/57 [00:18<00:19,  1.51it/s]

tensor([[0.1553, 0.1650, 0.1702,  ..., 0.1549, 0.1599, 0.1628],
        [0.1512, 0.1593, 0.1620,  ..., 0.1505, 0.1546, 0.1552],
        [0.1516, 0.1617, 0.1637,  ..., 0.1550, 0.1557, 0.1608],
        ...,
        [0.1577, 0.1652, 0.1699,  ..., 0.1557, 0.1591, 0.1648],
        [0.1519, 0.1649, 0.1634,  ..., 0.1553, 0.1624, 0.1598],
        [0.1529, 0.1624, 0.1652,  ..., 0.1521, 0.1546, 0.1579]],
       grad_fn=<LogBackward>)


 49%|████▉     | 28/57 [00:18<00:19,  1.47it/s]

tensor([[0.1515, 0.1640, 0.1652,  ..., 0.1575, 0.1630, 0.1598],
        [0.1554, 0.1603, 0.1676,  ..., 0.1537, 0.1569, 0.1594],
        [0.1587, 0.1657, 0.1728,  ..., 0.1581, 0.1642, 0.1673],
        ...,
        [0.1532, 0.1601, 0.1632,  ..., 0.1532, 0.1552, 0.1568],
        [0.1536, 0.1626, 0.1665,  ..., 0.1582, 0.1624, 0.1631],
        [0.1516, 0.1605, 0.1616,  ..., 0.1527, 0.1544, 0.1560]],
       grad_fn=<LogBackward>)


 51%|█████     | 29/57 [00:19<00:19,  1.47it/s]

tensor([[0.1535, 0.1568, 0.1641,  ..., 0.1519, 0.1527, 0.1564],
        [0.1502, 0.1609, 0.1610,  ..., 0.1548, 0.1551, 0.1556],
        [0.1582, 0.1654, 0.1746,  ..., 0.1583, 0.1619, 0.1672],
        ...,
        [0.1565, 0.1673, 0.1706,  ..., 0.1588, 0.1622, 0.1692],
        [0.1568, 0.1641, 0.1651,  ..., 0.1538, 0.1579, 0.1610],
        [0.1553, 0.1644, 0.1694,  ..., 0.1587, 0.1599, 0.1626]],
       grad_fn=<LogBackward>)


 53%|█████▎    | 30/57 [00:20<00:18,  1.47it/s]

tensor([[0.1500, 0.1577, 0.1608,  ..., 0.1561, 0.1564, 0.1555],
        [0.1495, 0.1547, 0.1600,  ..., 0.1531, 0.1498, 0.1543],
        [0.1568, 0.1629, 0.1697,  ..., 0.1563, 0.1597, 0.1624],
        ...,
        [0.1539, 0.1637, 0.1686,  ..., 0.1578, 0.1596, 0.1623],
        [0.1571, 0.1692, 0.1694,  ..., 0.1605, 0.1649, 0.1654],
        [0.1465, 0.1549, 0.1597,  ..., 0.1512, 0.1526, 0.1534]],
       grad_fn=<LogBackward>)


 54%|█████▍    | 31/57 [00:20<00:17,  1.49it/s]

tensor([[0.1558, 0.1640, 0.1672,  ..., 0.1568, 0.1618, 0.1637],
        [0.1536, 0.1626, 0.1646,  ..., 0.1564, 0.1597, 0.1590],
        [0.1552, 0.1630, 0.1664,  ..., 0.1577, 0.1587, 0.1623],
        ...,
        [0.1523, 0.1584, 0.1656,  ..., 0.1520, 0.1556, 0.1563],
        [0.1618, 0.1647, 0.1709,  ..., 0.1589, 0.1670, 0.1643],
        [0.1527, 0.1627, 0.1631,  ..., 0.1547, 0.1576, 0.1576]],
       grad_fn=<LogBackward>)


 56%|█████▌    | 32/57 [00:21<00:16,  1.49it/s]

tensor([[0.1561, 0.1644, 0.1691,  ..., 0.1582, 0.1624, 0.1636],
        [0.1559, 0.1645, 0.1675,  ..., 0.1566, 0.1612, 0.1633],
        [0.1521, 0.1627, 0.1673,  ..., 0.1552, 0.1594, 0.1605],
        ...,
        [0.1565, 0.1638, 0.1692,  ..., 0.1563, 0.1624, 0.1611],
        [0.1534, 0.1597, 0.1637,  ..., 0.1526, 0.1544, 0.1564],
        [0.1528, 0.1608, 0.1651,  ..., 0.1518, 0.1558, 0.1576]],
       grad_fn=<LogBackward>)


 58%|█████▊    | 33/57 [00:22<00:16,  1.49it/s]

tensor([[0.1469, 0.1531, 0.1560,  ..., 0.1479, 0.1457, 0.1461],
        [0.1582, 0.1648, 0.1700,  ..., 0.1560, 0.1604, 0.1643],
        [0.1576, 0.1632, 0.1700,  ..., 0.1565, 0.1588, 0.1649],
        ...,
        [0.1512, 0.1587, 0.1618,  ..., 0.1555, 0.1576, 0.1579],
        [0.1530, 0.1612, 0.1636,  ..., 0.1538, 0.1550, 0.1577],
        [0.1554, 0.1635, 0.1657,  ..., 0.1576, 0.1619, 0.1601]],
       grad_fn=<LogBackward>)


 60%|█████▉    | 34/57 [00:22<00:15,  1.51it/s]

tensor([[0.1466, 0.1562, 0.1559,  ..., 0.1486, 0.1508, 0.1528],
        [0.1574, 0.1634, 0.1692,  ..., 0.1592, 0.1626, 0.1636],
        [0.1556, 0.1639, 0.1665,  ..., 0.1581, 0.1596, 0.1627],
        ...,
        [0.1553, 0.1637, 0.1635,  ..., 0.1571, 0.1616, 0.1585],
        [0.1552, 0.1629, 0.1670,  ..., 0.1556, 0.1593, 0.1609],
        [0.1509, 0.1590, 0.1649,  ..., 0.1545, 0.1532, 0.1576]],
       grad_fn=<LogBackward>)


 61%|██████▏   | 35/57 [00:23<00:14,  1.51it/s]

tensor([[0.1515, 0.1655, 0.1673,  ..., 0.1565, 0.1586, 0.1623],
        [0.1522, 0.1572, 0.1629,  ..., 0.1519, 0.1536, 0.1557],
        [0.1559, 0.1670, 0.1724,  ..., 0.1561, 0.1640, 0.1675],
        ...,
        [0.1568, 0.1637, 0.1708,  ..., 0.1554, 0.1607, 0.1645],
        [0.1546, 0.1615, 0.1668,  ..., 0.1546, 0.1589, 0.1596],
        [0.1496, 0.1621, 0.1635,  ..., 0.1572, 0.1577, 0.1627]],
       grad_fn=<LogBackward>)


 63%|██████▎   | 36/57 [00:24<00:13,  1.52it/s]

tensor([[0.1573, 0.1635, 0.1712,  ..., 0.1569, 0.1632, 0.1643],
        [0.1547, 0.1610, 0.1654,  ..., 0.1533, 0.1579, 0.1603],
        [0.1582, 0.1638, 0.1721,  ..., 0.1564, 0.1611, 0.1656],
        ...,
        [0.1529, 0.1629, 0.1677,  ..., 0.1576, 0.1616, 0.1638],
        [0.1548, 0.1627, 0.1656,  ..., 0.1540, 0.1578, 0.1593],
        [0.1543, 0.1608, 0.1655,  ..., 0.1557, 0.1582, 0.1610]],
       grad_fn=<LogBackward>)


 65%|██████▍   | 37/57 [00:24<00:13,  1.52it/s]

tensor([[0.1560, 0.1635, 0.1677,  ..., 0.1544, 0.1581, 0.1605],
        [0.1575, 0.1639, 0.1690,  ..., 0.1556, 0.1612, 0.1615],
        [0.1539, 0.1590, 0.1654,  ..., 0.1528, 0.1573, 0.1594],
        ...,
        [0.1577, 0.1664, 0.1692,  ..., 0.1598, 0.1622, 0.1617],
        [0.1571, 0.1622, 0.1675,  ..., 0.1556, 0.1566, 0.1628],
        [0.1547, 0.1622, 0.1665,  ..., 0.1576, 0.1591, 0.1595]],
       grad_fn=<LogBackward>)


 67%|██████▋   | 38/57 [00:25<00:12,  1.49it/s]

tensor([[0.1577, 0.1652, 0.1699,  ..., 0.1561, 0.1617, 0.1654],
        [0.1556, 0.1633, 0.1687,  ..., 0.1570, 0.1589, 0.1639],
        [0.1543, 0.1620, 0.1655,  ..., 0.1554, 0.1575, 0.1595],
        ...,
        [0.1457, 0.1520, 0.1548,  ..., 0.1505, 0.1491, 0.1489],
        [0.1509, 0.1623, 0.1624,  ..., 0.1546, 0.1584, 0.1569],
        [0.1546, 0.1606, 0.1689,  ..., 0.1565, 0.1572, 0.1625]],
       grad_fn=<LogBackward>)


 68%|██████▊   | 39/57 [00:26<00:11,  1.50it/s]

tensor([[0.1510, 0.1619, 0.1648,  ..., 0.1554, 0.1574, 0.1583],
        [0.1594, 0.1679, 0.1737,  ..., 0.1594, 0.1639, 0.1675],
        [0.1565, 0.1640, 0.1680,  ..., 0.1579, 0.1602, 0.1646],
        ...,
        [0.1517, 0.1591, 0.1655,  ..., 0.1562, 0.1559, 0.1606],
        [0.1511, 0.1623, 0.1668,  ..., 0.1577, 0.1579, 0.1601],
        [0.1580, 0.1658, 0.1728,  ..., 0.1569, 0.1632, 0.1642]],
       grad_fn=<LogBackward>)


 70%|███████   | 40/57 [00:26<00:11,  1.51it/s]

tensor([[0.1563, 0.1618, 0.1666,  ..., 0.1567, 0.1578, 0.1624],
        [0.1556, 0.1640, 0.1698,  ..., 0.1548, 0.1625, 0.1651],
        [0.1561, 0.1645, 0.1688,  ..., 0.1551, 0.1597, 0.1597],
        ...,
        [0.1514, 0.1596, 0.1610,  ..., 0.1552, 0.1563, 0.1596],
        [0.1559, 0.1609, 0.1663,  ..., 0.1558, 0.1574, 0.1601],
        [0.1462, 0.1558, 0.1583,  ..., 0.1503, 0.1543, 0.1507]],
       grad_fn=<LogBackward>)


 72%|███████▏  | 41/57 [00:27<00:10,  1.52it/s]

tensor([[0.1565, 0.1639, 0.1701,  ..., 0.1555, 0.1631, 0.1640],
        [0.1522, 0.1586, 0.1621,  ..., 0.1528, 0.1585, 0.1594],
        [0.1431, 0.1492, 0.1505,  ..., 0.1481, 0.1449, 0.1462],
        ...,
        [0.1607, 0.1680, 0.1734,  ..., 0.1589, 0.1640, 0.1670],
        [0.1554, 0.1613, 0.1662,  ..., 0.1574, 0.1576, 0.1595],
        [0.1522, 0.1605, 0.1646,  ..., 0.1533, 0.1532, 0.1558]],
       grad_fn=<LogBackward>)


 74%|███████▎  | 42/57 [00:28<00:09,  1.54it/s]

tensor([[0.1529, 0.1617, 0.1646,  ..., 0.1557, 0.1575, 0.1598],
        [0.1570, 0.1660, 0.1706,  ..., 0.1575, 0.1613, 0.1644],
        [0.1524, 0.1628, 0.1666,  ..., 0.1553, 0.1623, 0.1628],
        ...,
        [0.1558, 0.1634, 0.1673,  ..., 0.1546, 0.1584, 0.1599],
        [0.1589, 0.1647, 0.1714,  ..., 0.1559, 0.1606, 0.1634],
        [0.1491, 0.1553, 0.1602,  ..., 0.1527, 0.1559, 0.1529]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 43/57 [00:28<00:09,  1.53it/s]

tensor([[0.1586, 0.1599, 0.1695,  ..., 0.1573, 0.1584, 0.1631],
        [0.1498, 0.1588, 0.1615,  ..., 0.1514, 0.1549, 0.1553],
        [0.1549, 0.1602, 0.1656,  ..., 0.1549, 0.1570, 0.1616],
        ...,
        [0.1576, 0.1721, 0.1717,  ..., 0.1606, 0.1674, 0.1654],
        [0.1551, 0.1591, 0.1675,  ..., 0.1552, 0.1574, 0.1616],
        [0.1562, 0.1655, 0.1666,  ..., 0.1548, 0.1580, 0.1606]],
       grad_fn=<LogBackward>)


 77%|███████▋  | 44/57 [00:29<00:08,  1.54it/s]

tensor([[0.1543, 0.1638, 0.1656,  ..., 0.1560, 0.1601, 0.1613],
        [0.1538, 0.1626, 0.1688,  ..., 0.1519, 0.1609, 0.1613],
        [0.1587, 0.1637, 0.1717,  ..., 0.1569, 0.1574, 0.1635],
        ...,
        [0.1532, 0.1646, 0.1657,  ..., 0.1604, 0.1640, 0.1608],
        [0.1470, 0.1564, 0.1593,  ..., 0.1532, 0.1536, 0.1586],
        [0.1535, 0.1668, 0.1665,  ..., 0.1598, 0.1621, 0.1625]],
       grad_fn=<LogBackward>)


 79%|███████▉  | 45/57 [00:29<00:07,  1.54it/s]

tensor([[0.1533, 0.1601, 0.1657,  ..., 0.1550, 0.1550, 0.1563],
        [0.1565, 0.1605, 0.1703,  ..., 0.1563, 0.1577, 0.1624],
        [0.1567, 0.1623, 0.1681,  ..., 0.1575, 0.1634, 0.1639],
        ...,
        [0.1567, 0.1665, 0.1689,  ..., 0.1582, 0.1586, 0.1624],
        [0.1511, 0.1616, 0.1631,  ..., 0.1544, 0.1563, 0.1594],
        [0.1510, 0.1561, 0.1603,  ..., 0.1509, 0.1523, 0.1553]],
       grad_fn=<LogBackward>)


 81%|████████  | 46/57 [00:30<00:07,  1.55it/s]

tensor([[0.1509, 0.1603, 0.1635,  ..., 0.1539, 0.1587, 0.1609],
        [0.1525, 0.1599, 0.1632,  ..., 0.1545, 0.1553, 0.1569],
        [0.1423, 0.1470, 0.1506,  ..., 0.1441, 0.1455, 0.1443],
        ...,
        [0.1518, 0.1626, 0.1628,  ..., 0.1544, 0.1574, 0.1582],
        [0.1520, 0.1582, 0.1631,  ..., 0.1520, 0.1522, 0.1561],
        [0.1511, 0.1586, 0.1638,  ..., 0.1522, 0.1549, 0.1606]],
       grad_fn=<LogBackward>)


 82%|████████▏ | 47/57 [00:31<00:06,  1.54it/s]

tensor([[0.1511, 0.1593, 0.1638,  ..., 0.1552, 0.1574, 0.1602],
        [0.1568, 0.1640, 0.1705,  ..., 0.1571, 0.1614, 0.1680],
        [0.1509, 0.1660, 0.1613,  ..., 0.1543, 0.1631, 0.1584],
        ...,
        [0.1556, 0.1649, 0.1708,  ..., 0.1576, 0.1654, 0.1661],
        [0.1543, 0.1672, 0.1667,  ..., 0.1547, 0.1584, 0.1604],
        [0.1595, 0.1651, 0.1738,  ..., 0.1598, 0.1645, 0.1681]],
       grad_fn=<LogBackward>)


 84%|████████▍ | 48/57 [00:31<00:05,  1.51it/s]

tensor([[0.1571, 0.1624, 0.1715,  ..., 0.1573, 0.1599, 0.1638],
        [0.1520, 0.1632, 0.1657,  ..., 0.1558, 0.1595, 0.1630],
        [0.1564, 0.1655, 0.1673,  ..., 0.1570, 0.1589, 0.1626],
        ...,
        [0.1541, 0.1616, 0.1652,  ..., 0.1557, 0.1572, 0.1597],
        [0.1566, 0.1666, 0.1681,  ..., 0.1605, 0.1634, 0.1628],
        [0.1513, 0.1587, 0.1612,  ..., 0.1525, 0.1528, 0.1565]],
       grad_fn=<LogBackward>)


 86%|████████▌ | 49/57 [00:32<00:05,  1.48it/s]

tensor([[0.1552, 0.1658, 0.1649,  ..., 0.1540, 0.1588, 0.1581],
        [0.1505, 0.1598, 0.1620,  ..., 0.1532, 0.1554, 0.1553],
        [0.1550, 0.1595, 0.1651,  ..., 0.1551, 0.1579, 0.1600],
        ...,
        [0.1519, 0.1569, 0.1629,  ..., 0.1519, 0.1549, 0.1574],
        [0.1591, 0.1639, 0.1717,  ..., 0.1568, 0.1625, 0.1675],
        [0.1534, 0.1646, 0.1677,  ..., 0.1559, 0.1624, 0.1641]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 50/57 [00:33<00:04,  1.51it/s]

tensor([[0.1540, 0.1624, 0.1667,  ..., 0.1572, 0.1562, 0.1606],
        [0.1497, 0.1611, 0.1618,  ..., 0.1518, 0.1578, 0.1571],
        [0.1526, 0.1625, 0.1647,  ..., 0.1535, 0.1569, 0.1581],
        ...,
        [0.1542, 0.1580, 0.1650,  ..., 0.1552, 0.1557, 0.1591],
        [0.1524, 0.1589, 0.1627,  ..., 0.1524, 0.1543, 0.1604],
        [0.1548, 0.1631, 0.1661,  ..., 0.1553, 0.1608, 0.1644]],
       grad_fn=<LogBackward>)


 89%|████████▉ | 51/57 [00:33<00:03,  1.50it/s]

tensor([[0.1543, 0.1671, 0.1653,  ..., 0.1572, 0.1603, 0.1636],
        [0.1531, 0.1611, 0.1655,  ..., 0.1544, 0.1564, 0.1610],
        [0.1489, 0.1584, 0.1591,  ..., 0.1519, 0.1511, 0.1566],
        ...,
        [0.1545, 0.1569, 0.1665,  ..., 0.1558, 0.1548, 0.1604],
        [0.1548, 0.1622, 0.1685,  ..., 0.1532, 0.1612, 0.1575],
        [0.1514, 0.1600, 0.1633,  ..., 0.1527, 0.1579, 0.1612]],
       grad_fn=<LogBackward>)


 91%|█████████ | 52/57 [00:34<00:03,  1.51it/s]

tensor([[0.1560, 0.1657, 0.1693,  ..., 0.1582, 0.1646, 0.1640],
        [0.1538, 0.1615, 0.1653,  ..., 0.1568, 0.1567, 0.1602],
        [0.1535, 0.1602, 0.1646,  ..., 0.1530, 0.1579, 0.1606],
        ...,
        [0.1573, 0.1618, 0.1687,  ..., 0.1539, 0.1574, 0.1656],
        [0.1567, 0.1638, 0.1669,  ..., 0.1571, 0.1582, 0.1594],
        [0.1557, 0.1638, 0.1681,  ..., 0.1569, 0.1624, 0.1619]],
       grad_fn=<LogBackward>)


 93%|█████████▎| 53/57 [00:35<00:02,  1.52it/s]

tensor([[0.1528, 0.1619, 0.1650,  ..., 0.1544, 0.1607, 0.1599],
        [0.1550, 0.1665, 0.1679,  ..., 0.1552, 0.1597, 0.1629],
        [0.1514, 0.1605, 0.1635,  ..., 0.1541, 0.1564, 0.1552],
        ...,
        [0.1548, 0.1622, 0.1671,  ..., 0.1564, 0.1592, 0.1623],
        [0.1518, 0.1590, 0.1639,  ..., 0.1535, 0.1567, 0.1600],
        [0.1503, 0.1606, 0.1633,  ..., 0.1568, 0.1569, 0.1580]],
       grad_fn=<LogBackward>)


 95%|█████████▍| 54/57 [00:35<00:01,  1.50it/s]

tensor([[0.1555, 0.1598, 0.1669,  ..., 0.1555, 0.1568, 0.1598],
        [0.1540, 0.1624, 0.1661,  ..., 0.1549, 0.1569, 0.1579],
        [0.1553, 0.1628, 0.1665,  ..., 0.1568, 0.1597, 0.1610],
        ...,
        [0.1527, 0.1586, 0.1630,  ..., 0.1554, 0.1529, 0.1571],
        [0.1565, 0.1602, 0.1658,  ..., 0.1549, 0.1560, 0.1614],
        [0.1486, 0.1607, 0.1617,  ..., 0.1508, 0.1540, 0.1568]],
       grad_fn=<LogBackward>)


 96%|█████████▋| 55/57 [00:36<00:01,  1.51it/s]

tensor([[0.1561, 0.1630, 0.1694,  ..., 0.1539, 0.1598, 0.1635],
        [0.1562, 0.1621, 0.1675,  ..., 0.1581, 0.1575, 0.1613],
        [0.1575, 0.1685, 0.1703,  ..., 0.1608, 0.1624, 0.1637],
        ...,
        [0.1583, 0.1634, 0.1689,  ..., 0.1544, 0.1591, 0.1634],
        [0.1561, 0.1642, 0.1705,  ..., 0.1568, 0.1593, 0.1616],
        [0.1518, 0.1639, 0.1647,  ..., 0.1550, 0.1588, 0.1580]],
       grad_fn=<LogBackward>)


 98%|█████████▊| 56/57 [00:37<00:00,  1.50it/s]

tensor([[0.1549, 0.1656, 0.1700,  ..., 0.1566, 0.1620, 0.1693],
        [0.1548, 0.1631, 0.1674,  ..., 0.1555, 0.1584, 0.1606],
        [0.1531, 0.1640, 0.1656,  ..., 0.1553, 0.1644, 0.1642],
        ...,
        [0.1506, 0.1585, 0.1590,  ..., 0.1521, 0.1540, 0.1542],
        [0.1513, 0.1608, 0.1607,  ..., 0.1524, 0.1559, 0.1574],
        [0.1507, 0.1604, 0.1628,  ..., 0.1553, 0.1556, 0.1601]],
       grad_fn=<LogBackward>)


100%|██████████| 57/57 [00:37<00:00,  1.50it/s]
 12%|█▎        | 1/8 [00:00<00:02,  3.43it/s]

tensor([[0.1573, 0.1618, 0.1666,  ..., 0.1540, 0.1558, 0.1595],
        [0.1484, 0.1579, 0.1618,  ..., 0.1568, 0.1565, 0.1595],
        [0.1546, 0.1604, 0.1635,  ..., 0.1576, 0.1566, 0.1599],
        ...,
        [0.1522, 0.1625, 0.1635,  ..., 0.1547, 0.1590, 0.1613],
        [0.1574, 0.1644, 0.1720,  ..., 0.1579, 0.1619, 0.1661],
        [0.1478, 0.1561, 0.1591,  ..., 0.1541, 0.1549, 0.1579]],
       grad_fn=<LogBackward>)


 25%|██▌       | 2/8 [00:00<00:01,  3.39it/s]

tensor([[0.1518, 0.1612, 0.1646,  ..., 0.1546, 0.1553, 0.1600],
        [0.1504, 0.1569, 0.1621,  ..., 0.1533, 0.1552, 0.1586],
        [0.1560, 0.1644, 0.1681,  ..., 0.1540, 0.1589, 0.1618],
        ...,
        [0.1541, 0.1622, 0.1651,  ..., 0.1537, 0.1566, 0.1582],
        [0.1562, 0.1622, 0.1688,  ..., 0.1598, 0.1612, 0.1653],
        [0.1526, 0.1629, 0.1656,  ..., 0.1570, 0.1602, 0.1627]],
       grad_fn=<LogBackward>)


 38%|███▊      | 3/8 [00:00<00:01,  3.31it/s]

tensor([[0.1577, 0.1645, 0.1704,  ..., 0.1565, 0.1646, 0.1657],
        [0.1519, 0.1619, 0.1651,  ..., 0.1538, 0.1605, 0.1639],
        [0.1549, 0.1573, 0.1663,  ..., 0.1555, 0.1563, 0.1581],
        ...,
        [0.1562, 0.1655, 0.1690,  ..., 0.1572, 0.1605, 0.1631],
        [0.1553, 0.1628, 0.1675,  ..., 0.1566, 0.1622, 0.1611],
        [0.1473, 0.1558, 0.1602,  ..., 0.1531, 0.1533, 0.1560]],
       grad_fn=<LogBackward>)


 50%|█████     | 4/8 [00:01<00:01,  3.27it/s]

tensor([[0.1532, 0.1624, 0.1650,  ..., 0.1515, 0.1565, 0.1558],
        [0.1549, 0.1626, 0.1681,  ..., 0.1561, 0.1575, 0.1603],
        [0.1522, 0.1614, 0.1646,  ..., 0.1507, 0.1607, 0.1598],
        ...,
        [0.1459, 0.1585, 0.1580,  ..., 0.1486, 0.1564, 0.1562],
        [0.1547, 0.1610, 0.1711,  ..., 0.1553, 0.1601, 0.1595],
        [0.1551, 0.1605, 0.1659,  ..., 0.1567, 0.1580, 0.1602]],
       grad_fn=<LogBackward>)


 62%|██████▎   | 5/8 [00:01<00:00,  3.29it/s]

tensor([[0.1526, 0.1616, 0.1641,  ..., 0.1530, 0.1591, 0.1608],
        [0.1535, 0.1612, 0.1645,  ..., 0.1542, 0.1567, 0.1596],
        [0.1517, 0.1536, 0.1609,  ..., 0.1504, 0.1514, 0.1550],
        ...,
        [0.1496, 0.1554, 0.1611,  ..., 0.1537, 0.1536, 0.1556],
        [0.1530, 0.1593, 0.1655,  ..., 0.1538, 0.1549, 0.1585],
        [0.1507, 0.1629, 0.1595,  ..., 0.1543, 0.1570, 0.1576]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 6/8 [00:01<00:00,  3.31it/s]

tensor([[0.1597, 0.1647, 0.1734,  ..., 0.1584, 0.1640, 0.1631],
        [0.1556, 0.1615, 0.1677,  ..., 0.1553, 0.1611, 0.1635],
        [0.1572, 0.1668, 0.1721,  ..., 0.1589, 0.1632, 0.1634],
        ...,
        [0.1571, 0.1657, 0.1717,  ..., 0.1553, 0.1639, 0.1660],
        [0.1537, 0.1596, 0.1645,  ..., 0.1543, 0.1564, 0.1594],
        [0.1500, 0.1624, 0.1634,  ..., 0.1588, 0.1607, 0.1600]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 7/8 [00:02<00:00,  3.34it/s]

tensor([[0.1552, 0.1592, 0.1679,  ..., 0.1543, 0.1587, 0.1606],
        [0.1545, 0.1623, 0.1661,  ..., 0.1574, 0.1576, 0.1596],
        [0.1586, 0.1673, 0.1700,  ..., 0.1581, 0.1648, 0.1646],
        ...,
        [0.1563, 0.1632, 0.1668,  ..., 0.1560, 0.1615, 0.1631],
        [0.1567, 0.1619, 0.1687,  ..., 0.1569, 0.1598, 0.1653],
        [0.1583, 0.1653, 0.1724,  ..., 0.1566, 0.1630, 0.1681]],
       grad_fn=<LogBackward>)


100%|██████████| 8/8 [00:02<00:00,  3.32it/s]
  0%|          | 0/57 [00:00<?, ?it/s]

tensor([[0.1552, 0.1644, 0.1649,  ..., 0.1560, 0.1590, 0.1594],
        [0.1586, 0.1642, 0.1722,  ..., 0.1567, 0.1637, 0.1675],
        [0.1555, 0.1632, 0.1681,  ..., 0.1563, 0.1605, 0.1634],
        ...,
        [0.1555, 0.1609, 0.1660,  ..., 0.1552, 0.1591, 0.1616],
        [0.1559, 0.1607, 0.1678,  ..., 0.1549, 0.1579, 0.1623],
        [0.1534, 0.1623, 0.1679,  ..., 0.1533, 0.1593, 0.1604]],
       grad_fn=<LogBackward>)
[tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128)]
Epoch: 14, train_loss: -0.7444634437561035, valid_loss: -0.7276705503463745
Parameter containing:
tensor([[ 1.0002,  1.0002,  1.0002,  1.0002,  1.0002,  1.0002,  1.0004,  1.0001,
          1.0001,  1.0005, -0.4998, -0.4998, -0.4999, -0.4998, -0.4998, -0.4998,
         -0.4998, -0.4999, -0.4998, -0.4999],
     

  2%|▏         | 1/57 [00:00<00:37,  1.48it/s]

tensor([[0.1450, 0.1573, 0.1572,  ..., 0.1483, 0.1560, 0.1571],
        [0.1584, 0.1655, 0.1735,  ..., 0.1566, 0.1628, 0.1668],
        [0.1498, 0.1567, 0.1579,  ..., 0.1498, 0.1522, 0.1530],
        ...,
        [0.1525, 0.1605, 0.1657,  ..., 0.1551, 0.1580, 0.1636],
        [0.1544, 0.1651, 0.1657,  ..., 0.1543, 0.1603, 0.1590],
        [0.1517, 0.1593, 0.1638,  ..., 0.1519, 0.1549, 0.1614]],
       grad_fn=<LogBackward>)


  4%|▎         | 2/57 [00:01<00:37,  1.48it/s]

tensor([[0.1517, 0.1625, 0.1646,  ..., 0.1512, 0.1569, 0.1578],
        [0.1584, 0.1657, 0.1706,  ..., 0.1598, 0.1636, 0.1644],
        [0.1546, 0.1596, 0.1660,  ..., 0.1580, 0.1591, 0.1605],
        ...,
        [0.1519, 0.1623, 0.1624,  ..., 0.1545, 0.1585, 0.1612],
        [0.1525, 0.1645, 0.1660,  ..., 0.1570, 0.1608, 0.1596],
        [0.1545, 0.1632, 0.1673,  ..., 0.1556, 0.1598, 0.1610]],
       grad_fn=<LogBackward>)


  5%|▌         | 3/57 [00:02<00:36,  1.49it/s]

tensor([[0.1576, 0.1641, 0.1713,  ..., 0.1557, 0.1595, 0.1661],
        [0.1529, 0.1632, 0.1640,  ..., 0.1539, 0.1569, 0.1577],
        [0.1592, 0.1660, 0.1735,  ..., 0.1589, 0.1656, 0.1676],
        ...,
        [0.1558, 0.1615, 0.1670,  ..., 0.1585, 0.1604, 0.1598],
        [0.1580, 0.1675, 0.1730,  ..., 0.1576, 0.1636, 0.1669],
        [0.1567, 0.1614, 0.1710,  ..., 0.1541, 0.1602, 0.1638]],
       grad_fn=<LogBackward>)


  7%|▋         | 4/57 [00:02<00:35,  1.51it/s]

tensor([[0.1549, 0.1623, 0.1676,  ..., 0.1559, 0.1598, 0.1671],
        [0.1579, 0.1642, 0.1695,  ..., 0.1571, 0.1646, 0.1648],
        [0.1562, 0.1608, 0.1689,  ..., 0.1561, 0.1579, 0.1629],
        ...,
        [0.1544, 0.1639, 0.1664,  ..., 0.1547, 0.1595, 0.1594],
        [0.1435, 0.1541, 0.1531,  ..., 0.1456, 0.1501, 0.1541],
        [0.1515, 0.1598, 0.1648,  ..., 0.1528, 0.1551, 0.1594]],
       grad_fn=<LogBackward>)


  9%|▉         | 5/57 [00:03<00:34,  1.52it/s]

tensor([[0.1496, 0.1559, 0.1594,  ..., 0.1496, 0.1534, 0.1548],
        [0.1416, 0.1525, 0.1527,  ..., 0.1431, 0.1519, 0.1513],
        [0.1475, 0.1560, 0.1576,  ..., 0.1528, 0.1512, 0.1527],
        ...,
        [0.1488, 0.1606, 0.1642,  ..., 0.1549, 0.1583, 0.1610],
        [0.1511, 0.1612, 0.1619,  ..., 0.1546, 0.1586, 0.1580],
        [0.1517, 0.1611, 0.1640,  ..., 0.1558, 0.1589, 0.1573]],
       grad_fn=<LogBackward>)


 11%|█         | 6/57 [00:03<00:33,  1.53it/s]

tensor([[0.1560, 0.1597, 0.1659,  ..., 0.1522, 0.1529, 0.1591],
        [0.1528, 0.1596, 0.1631,  ..., 0.1559, 0.1572, 0.1587],
        [0.1438, 0.1515, 0.1527,  ..., 0.1447, 0.1470, 0.1461],
        ...,
        [0.1451, 0.1487, 0.1548,  ..., 0.1495, 0.1468, 0.1490],
        [0.1543, 0.1612, 0.1651,  ..., 0.1546, 0.1580, 0.1618],
        [0.1567, 0.1659, 0.1714,  ..., 0.1576, 0.1649, 0.1672]],
       grad_fn=<LogBackward>)


 12%|█▏        | 7/57 [00:04<00:32,  1.53it/s]

tensor([[0.1572, 0.1638, 0.1716,  ..., 0.1557, 0.1633, 0.1656],
        [0.1442, 0.1457, 0.1544,  ..., 0.1429, 0.1430, 0.1501],
        [0.1584, 0.1646, 0.1707,  ..., 0.1582, 0.1598, 0.1653],
        ...,
        [0.1518, 0.1612, 0.1653,  ..., 0.1534, 0.1552, 0.1616],
        [0.1595, 0.1670, 0.1736,  ..., 0.1595, 0.1667, 0.1641],
        [0.1571, 0.1683, 0.1664,  ..., 0.1569, 0.1638, 0.1600]],
       grad_fn=<LogBackward>)


 14%|█▍        | 8/57 [00:05<00:32,  1.50it/s]

tensor([[0.1441, 0.1476, 0.1527,  ..., 0.1454, 0.1430, 0.1457],
        [0.1541, 0.1621, 0.1647,  ..., 0.1548, 0.1594, 0.1610],
        [0.1551, 0.1594, 0.1669,  ..., 0.1552, 0.1552, 0.1616],
        ...,
        [0.1564, 0.1637, 0.1709,  ..., 0.1572, 0.1637, 0.1627],
        [0.1528, 0.1623, 0.1662,  ..., 0.1556, 0.1584, 0.1625],
        [0.1580, 0.1646, 0.1692,  ..., 0.1566, 0.1605, 0.1606]],
       grad_fn=<LogBackward>)


 16%|█▌        | 9/57 [00:05<00:32,  1.50it/s]

tensor([[0.1532, 0.1579, 0.1649,  ..., 0.1532, 0.1557, 0.1586],
        [0.1535, 0.1596, 0.1657,  ..., 0.1548, 0.1561, 0.1612],
        [0.1561, 0.1614, 0.1664,  ..., 0.1566, 0.1575, 0.1620],
        ...,
        [0.1441, 0.1561, 0.1562,  ..., 0.1472, 0.1549, 0.1551],
        [0.1490, 0.1558, 0.1593,  ..., 0.1502, 0.1540, 0.1570],
        [0.1544, 0.1584, 0.1664,  ..., 0.1544, 0.1553, 0.1575]],
       grad_fn=<LogBackward>)


 18%|█▊        | 10/57 [00:06<00:31,  1.51it/s]

tensor([[0.1521, 0.1579, 0.1642,  ..., 0.1531, 0.1560, 0.1585],
        [0.1375, 0.1454, 0.1477,  ..., 0.1425, 0.1424, 0.1429],
        [0.1573, 0.1663, 0.1676,  ..., 0.1562, 0.1601, 0.1634],
        ...,
        [0.1588, 0.1676, 0.1715,  ..., 0.1587, 0.1663, 0.1646],
        [0.1496, 0.1568, 0.1616,  ..., 0.1505, 0.1533, 0.1527],
        [0.1561, 0.1622, 0.1669,  ..., 0.1550, 0.1589, 0.1659]],
       grad_fn=<LogBackward>)


 19%|█▉        | 11/57 [00:07<00:30,  1.51it/s]

tensor([[0.1536, 0.1604, 0.1645,  ..., 0.1541, 0.1571, 0.1602],
        [0.1554, 0.1637, 0.1663,  ..., 0.1562, 0.1621, 0.1613],
        [0.1570, 0.1626, 0.1707,  ..., 0.1570, 0.1618, 0.1637],
        ...,
        [0.1478, 0.1589, 0.1565,  ..., 0.1466, 0.1467, 0.1550],
        [0.1490, 0.1586, 0.1598,  ..., 0.1531, 0.1546, 0.1540],
        [0.1530, 0.1569, 0.1641,  ..., 0.1533, 0.1543, 0.1561]],
       grad_fn=<LogBackward>)


 21%|██        | 12/57 [00:07<00:29,  1.51it/s]

tensor([[0.1496, 0.1620, 0.1644,  ..., 0.1581, 0.1607, 0.1592],
        [0.1416, 0.1533, 0.1528,  ..., 0.1443, 0.1530, 0.1517],
        [0.1576, 0.1688, 0.1752,  ..., 0.1586, 0.1660, 0.1675],
        ...,
        [0.1468, 0.1583, 0.1583,  ..., 0.1501, 0.1568, 0.1567],
        [0.1575, 0.1676, 0.1695,  ..., 0.1580, 0.1626, 0.1668],
        [0.1578, 0.1660, 0.1690,  ..., 0.1611, 0.1657, 0.1659]],
       grad_fn=<LogBackward>)


 23%|██▎       | 13/57 [00:08<00:29,  1.51it/s]

tensor([[0.1562, 0.1656, 0.1695,  ..., 0.1584, 0.1617, 0.1652],
        [0.1540, 0.1585, 0.1641,  ..., 0.1527, 0.1545, 0.1562],
        [0.1546, 0.1587, 0.1665,  ..., 0.1548, 0.1573, 0.1579],
        ...,
        [0.1567, 0.1705, 0.1700,  ..., 0.1575, 0.1652, 0.1606],
        [0.1552, 0.1617, 0.1669,  ..., 0.1527, 0.1565, 0.1644],
        [0.1509, 0.1609, 0.1603,  ..., 0.1509, 0.1557, 0.1576]],
       grad_fn=<LogBackward>)


 25%|██▍       | 14/57 [00:09<00:28,  1.51it/s]

tensor([[0.1559, 0.1626, 0.1670,  ..., 0.1559, 0.1601, 0.1630],
        [0.1553, 0.1631, 0.1666,  ..., 0.1527, 0.1574, 0.1586],
        [0.1550, 0.1616, 0.1651,  ..., 0.1525, 0.1578, 0.1596],
        ...,
        [0.1356, 0.1423, 0.1466,  ..., 0.1446, 0.1389, 0.1432],
        [0.1549, 0.1623, 0.1669,  ..., 0.1533, 0.1584, 0.1614],
        [0.1555, 0.1636, 0.1693,  ..., 0.1539, 0.1603, 0.1621]],
       grad_fn=<LogBackward>)


 26%|██▋       | 15/57 [00:09<00:27,  1.52it/s]

tensor([[0.1553, 0.1621, 0.1682,  ..., 0.1551, 0.1592, 0.1645],
        [0.1466, 0.1588, 0.1594,  ..., 0.1501, 0.1530, 0.1550],
        [0.1518, 0.1594, 0.1652,  ..., 0.1530, 0.1558, 0.1592],
        ...,
        [0.1544, 0.1623, 0.1649,  ..., 0.1524, 0.1570, 0.1608],
        [0.1568, 0.1632, 0.1697,  ..., 0.1557, 0.1618, 0.1639],
        [0.1567, 0.1613, 0.1677,  ..., 0.1561, 0.1583, 0.1629]],
       grad_fn=<LogBackward>)


 28%|██▊       | 16/57 [00:10<00:27,  1.51it/s]

tensor([[0.1542, 0.1596, 0.1656,  ..., 0.1563, 0.1568, 0.1592],
        [0.1524, 0.1591, 0.1681,  ..., 0.1566, 0.1555, 0.1598],
        [0.1522, 0.1581, 0.1628,  ..., 0.1521, 0.1569, 0.1575],
        ...,
        [0.1533, 0.1609, 0.1652,  ..., 0.1526, 0.1556, 0.1603],
        [0.1542, 0.1624, 0.1653,  ..., 0.1555, 0.1642, 0.1640],
        [0.1513, 0.1575, 0.1641,  ..., 0.1536, 0.1555, 0.1575]],
       grad_fn=<LogBackward>)


 30%|██▉       | 17/57 [00:11<00:26,  1.53it/s]

tensor([[0.1515, 0.1608, 0.1637,  ..., 0.1548, 0.1602, 0.1600],
        [0.1586, 0.1652, 0.1734,  ..., 0.1578, 0.1640, 0.1671],
        [0.1495, 0.1544, 0.1593,  ..., 0.1480, 0.1530, 0.1538],
        ...,
        [0.1541, 0.1599, 0.1664,  ..., 0.1582, 0.1601, 0.1611],
        [0.1490, 0.1579, 0.1603,  ..., 0.1514, 0.1557, 0.1575],
        [0.1533, 0.1589, 0.1635,  ..., 0.1526, 0.1551, 0.1575]],
       grad_fn=<LogBackward>)


 32%|███▏      | 18/57 [00:11<00:26,  1.49it/s]

tensor([[0.1424, 0.1484, 0.1498,  ..., 0.1473, 0.1440, 0.1454],
        [0.1571, 0.1666, 0.1715,  ..., 0.1581, 0.1676, 0.1639],
        [0.1521, 0.1581, 0.1646,  ..., 0.1536, 0.1552, 0.1572],
        ...,
        [0.1529, 0.1580, 0.1619,  ..., 0.1544, 0.1551, 0.1601],
        [0.1523, 0.1622, 0.1664,  ..., 0.1564, 0.1537, 0.1563],
        [0.1538, 0.1607, 0.1650,  ..., 0.1552, 0.1551, 0.1596]],
       grad_fn=<LogBackward>)


 33%|███▎      | 19/57 [00:12<00:25,  1.48it/s]

tensor([[0.1517, 0.1597, 0.1633,  ..., 0.1548, 0.1580, 0.1598],
        [0.1519, 0.1604, 0.1624,  ..., 0.1561, 0.1537, 0.1580],
        [0.1552, 0.1638, 0.1664,  ..., 0.1575, 0.1629, 0.1622],
        ...,
        [0.1530, 0.1582, 0.1634,  ..., 0.1531, 0.1537, 0.1582],
        [0.1556, 0.1583, 0.1674,  ..., 0.1552, 0.1573, 0.1602],
        [0.1561, 0.1616, 0.1692,  ..., 0.1587, 0.1612, 0.1639]],
       grad_fn=<LogBackward>)


 35%|███▌      | 20/57 [00:13<00:24,  1.49it/s]

tensor([[0.1527, 0.1603, 0.1655,  ..., 0.1555, 0.1558, 0.1595],
        [0.1578, 0.1631, 0.1696,  ..., 0.1562, 0.1605, 0.1657],
        [0.1556, 0.1616, 0.1666,  ..., 0.1551, 0.1575, 0.1603],
        ...,
        [0.1568, 0.1624, 0.1695,  ..., 0.1547, 0.1581, 0.1658],
        [0.1572, 0.1629, 0.1703,  ..., 0.1584, 0.1607, 0.1654],
        [0.1498, 0.1641, 0.1645,  ..., 0.1555, 0.1596, 0.1598]],
       grad_fn=<LogBackward>)


 37%|███▋      | 21/57 [00:13<00:24,  1.50it/s]

tensor([[0.1549, 0.1626, 0.1658,  ..., 0.1542, 0.1582, 0.1606],
        [0.1541, 0.1621, 0.1681,  ..., 0.1550, 0.1573, 0.1608],
        [0.1569, 0.1651, 0.1700,  ..., 0.1596, 0.1626, 0.1619],
        ...,
        [0.1533, 0.1596, 0.1679,  ..., 0.1556, 0.1565, 0.1616],
        [0.1522, 0.1623, 0.1624,  ..., 0.1541, 0.1571, 0.1570],
        [0.1578, 0.1639, 0.1707,  ..., 0.1565, 0.1634, 0.1667]],
       grad_fn=<LogBackward>)


 39%|███▊      | 22/57 [00:14<00:23,  1.49it/s]

tensor([[0.1563, 0.1624, 0.1669,  ..., 0.1579, 0.1597, 0.1629],
        [0.1516, 0.1595, 0.1631,  ..., 0.1540, 0.1593, 0.1581],
        [0.1520, 0.1622, 0.1612,  ..., 0.1547, 0.1587, 0.1591],
        ...,
        [0.1476, 0.1547, 0.1578,  ..., 0.1483, 0.1497, 0.1494],
        [0.1478, 0.1586, 0.1601,  ..., 0.1535, 0.1541, 0.1562],
        [0.1578, 0.1632, 0.1676,  ..., 0.1553, 0.1587, 0.1615]],
       grad_fn=<LogBackward>)


 40%|████      | 23/57 [00:15<00:23,  1.46it/s]

tensor([[0.1599, 0.1650, 0.1720,  ..., 0.1579, 0.1669, 0.1650],
        [0.1483, 0.1571, 0.1609,  ..., 0.1506, 0.1539, 0.1536],
        [0.1574, 0.1657, 0.1726,  ..., 0.1586, 0.1629, 0.1659],
        ...,
        [0.1569, 0.1626, 0.1678,  ..., 0.1547, 0.1580, 0.1645],
        [0.1549, 0.1631, 0.1675,  ..., 0.1554, 0.1616, 0.1619],
        [0.1491, 0.1566, 0.1581,  ..., 0.1521, 0.1527, 0.1548]],
       grad_fn=<LogBackward>)


 42%|████▏     | 24/57 [00:16<00:24,  1.33it/s]

tensor([[0.1472, 0.1531, 0.1571,  ..., 0.1487, 0.1488, 0.1571],
        [0.1485, 0.1549, 0.1583,  ..., 0.1517, 0.1507, 0.1533],
        [0.1607, 0.1645, 0.1710,  ..., 0.1570, 0.1629, 0.1643],
        ...,
        [0.1493, 0.1549, 0.1574,  ..., 0.1482, 0.1510, 0.1520],
        [0.1544, 0.1626, 0.1697,  ..., 0.1537, 0.1586, 0.1593],
        [0.1523, 0.1606, 0.1651,  ..., 0.1545, 0.1549, 0.1567]],
       grad_fn=<LogBackward>)


 44%|████▍     | 25/57 [00:17<00:24,  1.30it/s]

tensor([[0.1542, 0.1624, 0.1649,  ..., 0.1557, 0.1579, 0.1587],
        [0.1575, 0.1648, 0.1706,  ..., 0.1582, 0.1649, 0.1652],
        [0.1504, 0.1594, 0.1629,  ..., 0.1535, 0.1552, 0.1603],
        ...,
        [0.1517, 0.1576, 0.1655,  ..., 0.1551, 0.1556, 0.1572],
        [0.1534, 0.1611, 0.1636,  ..., 0.1549, 0.1573, 0.1591],
        [0.1505, 0.1601, 0.1623,  ..., 0.1535, 0.1593, 0.1558]],
       grad_fn=<LogBackward>)


 46%|████▌     | 26/57 [00:17<00:24,  1.27it/s]

tensor([[0.1515, 0.1616, 0.1645,  ..., 0.1555, 0.1581, 0.1573],
        [0.1457, 0.1579, 0.1568,  ..., 0.1484, 0.1563, 0.1557],
        [0.1545, 0.1624, 0.1662,  ..., 0.1541, 0.1586, 0.1597],
        ...,
        [0.1538, 0.1612, 0.1658,  ..., 0.1583, 0.1560, 0.1596],
        [0.1560, 0.1646, 0.1694,  ..., 0.1583, 0.1637, 0.1645],
        [0.1536, 0.1661, 0.1632,  ..., 0.1537, 0.1614, 0.1594]],
       grad_fn=<LogBackward>)


 47%|████▋     | 27/57 [00:18<00:23,  1.26it/s]

tensor([[0.1504, 0.1569, 0.1602,  ..., 0.1506, 0.1501, 0.1541],
        [0.1562, 0.1612, 0.1697,  ..., 0.1544, 0.1594, 0.1632],
        [0.1587, 0.1638, 0.1713,  ..., 0.1564, 0.1643, 0.1661],
        ...,
        [0.1549, 0.1624, 0.1640,  ..., 0.1556, 0.1586, 0.1577],
        [0.1557, 0.1650, 0.1671,  ..., 0.1568, 0.1586, 0.1622],
        [0.1513, 0.1612, 0.1635,  ..., 0.1528, 0.1566, 0.1562]],
       grad_fn=<LogBackward>)


 49%|████▉     | 28/57 [00:19<00:23,  1.26it/s]

tensor([[0.1556, 0.1618, 0.1678,  ..., 0.1537, 0.1586, 0.1617],
        [0.1518, 0.1565, 0.1665,  ..., 0.1563, 0.1567, 0.1568],
        [0.1455, 0.1557, 0.1618,  ..., 0.1516, 0.1488, 0.1550],
        ...,
        [0.1562, 0.1628, 0.1681,  ..., 0.1553, 0.1603, 0.1634],
        [0.1546, 0.1609, 0.1668,  ..., 0.1536, 0.1565, 0.1627],
        [0.1541, 0.1625, 0.1658,  ..., 0.1586, 0.1617, 0.1609]],
       grad_fn=<LogBackward>)


 51%|█████     | 29/57 [00:20<00:22,  1.27it/s]

tensor([[0.1541, 0.1594, 0.1674,  ..., 0.1538, 0.1564, 0.1587],
        [0.1537, 0.1626, 0.1646,  ..., 0.1562, 0.1585, 0.1586],
        [0.1580, 0.1634, 0.1709,  ..., 0.1552, 0.1622, 0.1664],
        ...,
        [0.1568, 0.1664, 0.1713,  ..., 0.1590, 0.1650, 0.1671],
        [0.1553, 0.1601, 0.1659,  ..., 0.1572, 0.1586, 0.1599],
        [0.1558, 0.1658, 0.1688,  ..., 0.1592, 0.1647, 0.1649]],
       grad_fn=<LogBackward>)


 53%|█████▎    | 30/57 [00:20<00:20,  1.31it/s]

tensor([[0.1533, 0.1573, 0.1661,  ..., 0.1543, 0.1547, 0.1576],
        [0.1599, 0.1670, 0.1727,  ..., 0.1564, 0.1638, 0.1677],
        [0.1528, 0.1623, 0.1668,  ..., 0.1534, 0.1585, 0.1595],
        ...,
        [0.1569, 0.1618, 0.1695,  ..., 0.1556, 0.1589, 0.1619],
        [0.1524, 0.1606, 0.1628,  ..., 0.1529, 0.1541, 0.1555],
        [0.1549, 0.1592, 0.1665,  ..., 0.1549, 0.1562, 0.1592]],
       grad_fn=<LogBackward>)


 54%|█████▍    | 31/57 [00:21<00:19,  1.35it/s]

tensor([[0.1520, 0.1591, 0.1610,  ..., 0.1521, 0.1548, 0.1533],
        [0.1526, 0.1629, 0.1665,  ..., 0.1561, 0.1584, 0.1598],
        [0.1534, 0.1624, 0.1697,  ..., 0.1533, 0.1586, 0.1603],
        ...,
        [0.1475, 0.1597, 0.1596,  ..., 0.1521, 0.1567, 0.1588],
        [0.1538, 0.1650, 0.1661,  ..., 0.1553, 0.1624, 0.1630],
        [0.1550, 0.1639, 0.1692,  ..., 0.1615, 0.1638, 0.1623]],
       grad_fn=<LogBackward>)


 56%|█████▌    | 32/57 [00:22<00:18,  1.37it/s]

tensor([[0.1516, 0.1576, 0.1645,  ..., 0.1540, 0.1571, 0.1616],
        [0.1517, 0.1601, 0.1655,  ..., 0.1561, 0.1562, 0.1597],
        [0.1540, 0.1605, 0.1658,  ..., 0.1541, 0.1579, 0.1575],
        ...,
        [0.1527, 0.1631, 0.1642,  ..., 0.1566, 0.1609, 0.1610],
        [0.1455, 0.1565, 0.1608,  ..., 0.1525, 0.1518, 0.1557],
        [0.1527, 0.1618, 0.1645,  ..., 0.1558, 0.1587, 0.1596]],
       grad_fn=<LogBackward>)


 58%|█████▊    | 33/57 [00:23<00:17,  1.37it/s]

tensor([[0.1523, 0.1558, 0.1617,  ..., 0.1541, 0.1536, 0.1556],
        [0.1515, 0.1572, 0.1623,  ..., 0.1527, 0.1524, 0.1542],
        [0.1520, 0.1622, 0.1627,  ..., 0.1543, 0.1573, 0.1607],
        ...,
        [0.1464, 0.1543, 0.1555,  ..., 0.1485, 0.1488, 0.1512],
        [0.1573, 0.1658, 0.1730,  ..., 0.1576, 0.1636, 0.1661],
        [0.1558, 0.1644, 0.1691,  ..., 0.1558, 0.1599, 0.1651]],
       grad_fn=<LogBackward>)


 60%|█████▉    | 34/57 [00:23<00:16,  1.36it/s]

tensor([[0.1547, 0.1597, 0.1680,  ..., 0.1547, 0.1567, 0.1643],
        [0.1520, 0.1638, 0.1639,  ..., 0.1549, 0.1594, 0.1627],
        [0.1565, 0.1638, 0.1707,  ..., 0.1574, 0.1609, 0.1654],
        ...,
        [0.1527, 0.1604, 0.1641,  ..., 0.1541, 0.1567, 0.1582],
        [0.1523, 0.1604, 0.1645,  ..., 0.1535, 0.1549, 0.1585],
        [0.1454, 0.1516, 0.1567,  ..., 0.1474, 0.1467, 0.1479]],
       grad_fn=<LogBackward>)


 61%|██████▏   | 35/57 [00:24<00:16,  1.37it/s]

tensor([[0.1537, 0.1624, 0.1628,  ..., 0.1535, 0.1586, 0.1588],
        [0.1581, 0.1665, 0.1722,  ..., 0.1566, 0.1634, 0.1667],
        [0.1618, 0.1682, 0.1737,  ..., 0.1582, 0.1666, 0.1664],
        ...,
        [0.1411, 0.1457, 0.1494,  ..., 0.1436, 0.1443, 0.1431],
        [0.1550, 0.1635, 0.1665,  ..., 0.1566, 0.1629, 0.1607],
        [0.1585, 0.1648, 0.1730,  ..., 0.1582, 0.1625, 0.1673]],
       grad_fn=<LogBackward>)


 63%|██████▎   | 36/57 [00:25<00:15,  1.40it/s]

tensor([[0.1569, 0.1642, 0.1698,  ..., 0.1546, 0.1600, 0.1655],
        [0.1553, 0.1634, 0.1676,  ..., 0.1574, 0.1606, 0.1626],
        [0.1415, 0.1526, 0.1527,  ..., 0.1431, 0.1519, 0.1512],
        ...,
        [0.1544, 0.1657, 0.1667,  ..., 0.1555, 0.1624, 0.1606],
        [0.1563, 0.1618, 0.1703,  ..., 0.1564, 0.1584, 0.1633],
        [0.1592, 0.1623, 0.1720,  ..., 0.1569, 0.1596, 0.1630]],
       grad_fn=<LogBackward>)


 65%|██████▍   | 37/57 [00:25<00:14,  1.38it/s]

tensor([[0.1537, 0.1600, 0.1643,  ..., 0.1539, 0.1609, 0.1607],
        [0.1535, 0.1609, 0.1636,  ..., 0.1574, 0.1579, 0.1591],
        [0.1554, 0.1643, 0.1695,  ..., 0.1568, 0.1628, 0.1635],
        ...,
        [0.1523, 0.1606, 0.1632,  ..., 0.1517, 0.1575, 0.1596],
        [0.1568, 0.1641, 0.1684,  ..., 0.1609, 0.1602, 0.1649],
        [0.1561, 0.1611, 0.1681,  ..., 0.1535, 0.1583, 0.1624]],
       grad_fn=<LogBackward>)


 67%|██████▋   | 38/57 [00:26<00:13,  1.36it/s]

tensor([[0.1525, 0.1596, 0.1648,  ..., 0.1524, 0.1552, 0.1596],
        [0.1545, 0.1575, 0.1676,  ..., 0.1581, 0.1557, 0.1603],
        [0.1518, 0.1653, 0.1642,  ..., 0.1577, 0.1615, 0.1597],
        ...,
        [0.1536, 0.1625, 0.1661,  ..., 0.1549, 0.1595, 0.1616],
        [0.1517, 0.1606, 0.1659,  ..., 0.1533, 0.1601, 0.1613],
        [0.1566, 0.1655, 0.1674,  ..., 0.1560, 0.1584, 0.1625]],
       grad_fn=<LogBackward>)


 68%|██████▊   | 39/57 [00:27<00:13,  1.30it/s]

tensor([[0.1542, 0.1599, 0.1645,  ..., 0.1566, 0.1572, 0.1594],
        [0.1562, 0.1623, 0.1678,  ..., 0.1561, 0.1601, 0.1639],
        [0.1480, 0.1576, 0.1603,  ..., 0.1517, 0.1550, 0.1568],
        ...,
        [0.1516, 0.1635, 0.1662,  ..., 0.1596, 0.1634, 0.1642],
        [0.1527, 0.1611, 0.1655,  ..., 0.1556, 0.1597, 0.1592],
        [0.1487, 0.1579, 0.1610,  ..., 0.1520, 0.1536, 0.1534]],
       grad_fn=<LogBackward>)


 70%|███████   | 40/57 [00:28<00:13,  1.26it/s]

tensor([[0.1563, 0.1646, 0.1706,  ..., 0.1565, 0.1612, 0.1648],
        [0.1540, 0.1604, 0.1645,  ..., 0.1529, 0.1556, 0.1597],
        [0.1500, 0.1603, 0.1615,  ..., 0.1535, 0.1583, 0.1574],
        ...,
        [0.1528, 0.1588, 0.1664,  ..., 0.1542, 0.1572, 0.1602],
        [0.1527, 0.1603, 0.1631,  ..., 0.1540, 0.1543, 0.1591],
        [0.1531, 0.1619, 0.1682,  ..., 0.1577, 0.1611, 0.1645]],
       grad_fn=<LogBackward>)


 72%|███████▏  | 41/57 [00:29<00:12,  1.32it/s]

tensor([[0.1518, 0.1591, 0.1632,  ..., 0.1532, 0.1550, 0.1543],
        [0.1458, 0.1545, 0.1553,  ..., 0.1468, 0.1517, 0.1521],
        [0.1541, 0.1609, 0.1677,  ..., 0.1543, 0.1554, 0.1596],
        ...,
        [0.1583, 0.1648, 0.1710,  ..., 0.1563, 0.1613, 0.1653],
        [0.1479, 0.1509, 0.1578,  ..., 0.1473, 0.1450, 0.1493],
        [0.1482, 0.1556, 0.1571,  ..., 0.1495, 0.1506, 0.1507]],
       grad_fn=<LogBackward>)


 74%|███████▎  | 42/57 [00:29<00:11,  1.32it/s]

tensor([[0.1546, 0.1611, 0.1668,  ..., 0.1550, 0.1578, 0.1620],
        [0.1503, 0.1617, 0.1637,  ..., 0.1515, 0.1547, 0.1566],
        [0.1405, 0.1454, 0.1511,  ..., 0.1429, 0.1429, 0.1466],
        ...,
        [0.1580, 0.1640, 0.1708,  ..., 0.1556, 0.1608, 0.1649],
        [0.1518, 0.1579, 0.1639,  ..., 0.1528, 0.1526, 0.1554],
        [0.1446, 0.1533, 0.1566,  ..., 0.1496, 0.1515, 0.1517]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 43/57 [00:30<00:10,  1.36it/s]

tensor([[0.1540, 0.1625, 0.1668,  ..., 0.1577, 0.1596, 0.1627],
        [0.1528, 0.1612, 0.1649,  ..., 0.1557, 0.1565, 0.1585],
        [0.1545, 0.1622, 0.1671,  ..., 0.1560, 0.1587, 0.1649],
        ...,
        [0.1540, 0.1611, 0.1679,  ..., 0.1532, 0.1587, 0.1608],
        [0.1566, 0.1620, 0.1692,  ..., 0.1557, 0.1595, 0.1637],
        [0.1552, 0.1641, 0.1657,  ..., 0.1561, 0.1602, 0.1617]],
       grad_fn=<LogBackward>)


 77%|███████▋  | 44/57 [00:31<00:09,  1.39it/s]

tensor([[0.1557, 0.1640, 0.1682,  ..., 0.1584, 0.1590, 0.1619],
        [0.1529, 0.1616, 0.1634,  ..., 0.1540, 0.1547, 0.1559],
        [0.1555, 0.1669, 0.1708,  ..., 0.1563, 0.1620, 0.1675],
        ...,
        [0.1465, 0.1576, 0.1609,  ..., 0.1513, 0.1512, 0.1514],
        [0.1497, 0.1565, 0.1626,  ..., 0.1548, 0.1531, 0.1584],
        [0.1498, 0.1614, 0.1634,  ..., 0.1544, 0.1597, 0.1591]],
       grad_fn=<LogBackward>)


 79%|███████▉  | 45/57 [00:31<00:08,  1.38it/s]

tensor([[0.1486, 0.1504, 0.1580,  ..., 0.1491, 0.1476, 0.1522],
        [0.1553, 0.1626, 0.1672,  ..., 0.1579, 0.1600, 0.1643],
        [0.1549, 0.1598, 0.1652,  ..., 0.1560, 0.1558, 0.1604],
        ...,
        [0.1534, 0.1629, 0.1622,  ..., 0.1558, 0.1609, 0.1579],
        [0.1571, 0.1638, 0.1700,  ..., 0.1547, 0.1606, 0.1656],
        [0.1553, 0.1596, 0.1688,  ..., 0.1550, 0.1573, 0.1596]],
       grad_fn=<LogBackward>)


 81%|████████  | 46/57 [00:32<00:07,  1.40it/s]

tensor([[0.1541, 0.1634, 0.1658,  ..., 0.1556, 0.1602, 0.1635],
        [0.1486, 0.1558, 0.1587,  ..., 0.1521, 0.1517, 0.1531],
        [0.1469, 0.1539, 0.1571,  ..., 0.1477, 0.1526, 0.1539],
        ...,
        [0.1556, 0.1629, 0.1685,  ..., 0.1539, 0.1600, 0.1624],
        [0.1554, 0.1592, 0.1688,  ..., 0.1555, 0.1564, 0.1613],
        [0.1525, 0.1637, 0.1679,  ..., 0.1571, 0.1600, 0.1650]],
       grad_fn=<LogBackward>)


 82%|████████▏ | 47/57 [00:33<00:06,  1.44it/s]

tensor([[0.1546, 0.1607, 0.1675,  ..., 0.1548, 0.1569, 0.1610],
        [0.1448, 0.1440, 0.1544,  ..., 0.1443, 0.1400, 0.1464],
        [0.1579, 0.1641, 0.1711,  ..., 0.1564, 0.1610, 0.1657],
        ...,
        [0.1527, 0.1588, 0.1647,  ..., 0.1541, 0.1565, 0.1575],
        [0.1518, 0.1590, 0.1650,  ..., 0.1557, 0.1540, 0.1612],
        [0.1552, 0.1602, 0.1665,  ..., 0.1540, 0.1571, 0.1576]],
       grad_fn=<LogBackward>)


 84%|████████▍ | 48/57 [00:34<00:06,  1.40it/s]

tensor([[0.1507, 0.1591, 0.1605,  ..., 0.1547, 0.1514, 0.1551],
        [0.1420, 0.1462, 0.1528,  ..., 0.1417, 0.1444, 0.1473],
        [0.1473, 0.1530, 0.1570,  ..., 0.1485, 0.1486, 0.1544],
        ...,
        [0.1587, 0.1647, 0.1715,  ..., 0.1574, 0.1633, 0.1662],
        [0.1496, 0.1579, 0.1566,  ..., 0.1502, 0.1537, 0.1523],
        [0.1557, 0.1634, 0.1698,  ..., 0.1547, 0.1587, 0.1634]],
       grad_fn=<LogBackward>)


 86%|████████▌ | 49/57 [00:34<00:05,  1.35it/s]

tensor([[0.1570, 0.1652, 0.1708,  ..., 0.1559, 0.1642, 0.1668],
        [0.1522, 0.1620, 0.1662,  ..., 0.1511, 0.1604, 0.1628],
        [0.1496, 0.1620, 0.1632,  ..., 0.1553, 0.1581, 0.1590],
        ...,
        [0.1499, 0.1564, 0.1610,  ..., 0.1464, 0.1524, 0.1550],
        [0.1502, 0.1568, 0.1618,  ..., 0.1534, 0.1541, 0.1578],
        [0.1523, 0.1608, 0.1621,  ..., 0.1538, 0.1591, 0.1592]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 50/57 [00:35<00:05,  1.32it/s]

tensor([[0.1523, 0.1570, 0.1639,  ..., 0.1534, 0.1546, 0.1565],
        [0.1519, 0.1576, 0.1633,  ..., 0.1503, 0.1534, 0.1547],
        [0.1430, 0.1527, 0.1546,  ..., 0.1483, 0.1482, 0.1487],
        ...,
        [0.1553, 0.1612, 0.1661,  ..., 0.1563, 0.1594, 0.1597],
        [0.1589, 0.1659, 0.1674,  ..., 0.1582, 0.1617, 0.1612],
        [0.1582, 0.1654, 0.1709,  ..., 0.1581, 0.1622, 0.1663]],
       grad_fn=<LogBackward>)


 89%|████████▉ | 51/57 [00:36<00:04,  1.32it/s]

tensor([[0.1545, 0.1615, 0.1683,  ..., 0.1555, 0.1583, 0.1616],
        [0.1497, 0.1611, 0.1612,  ..., 0.1518, 0.1593, 0.1592],
        [0.1535, 0.1615, 0.1665,  ..., 0.1531, 0.1569, 0.1609],
        ...,
        [0.1506, 0.1558, 0.1626,  ..., 0.1524, 0.1502, 0.1592],
        [0.1451, 0.1580, 0.1579,  ..., 0.1528, 0.1583, 0.1545],
        [0.1510, 0.1570, 0.1627,  ..., 0.1510, 0.1514, 0.1550]],
       grad_fn=<LogBackward>)


 91%|█████████ | 52/57 [00:37<00:03,  1.31it/s]

tensor([[0.1536, 0.1628, 0.1670,  ..., 0.1572, 0.1614, 0.1617],
        [0.1506, 0.1607, 0.1650,  ..., 0.1519, 0.1597, 0.1582],
        [0.1432, 0.1517, 0.1573,  ..., 0.1462, 0.1483, 0.1508],
        ...,
        [0.1597, 0.1673, 0.1712,  ..., 0.1560, 0.1633, 0.1652],
        [0.1560, 0.1645, 0.1686,  ..., 0.1578, 0.1620, 0.1658],
        [0.1493, 0.1580, 0.1606,  ..., 0.1506, 0.1517, 0.1551]],
       grad_fn=<LogBackward>)


 93%|█████████▎| 53/57 [00:37<00:03,  1.31it/s]

tensor([[0.1489, 0.1616, 0.1616,  ..., 0.1522, 0.1559, 0.1575],
        [0.1482, 0.1576, 0.1587,  ..., 0.1519, 0.1542, 0.1553],
        [0.1553, 0.1618, 0.1674,  ..., 0.1545, 0.1594, 0.1612],
        ...,
        [0.1574, 0.1650, 0.1690,  ..., 0.1565, 0.1634, 0.1656],
        [0.1582, 0.1644, 0.1721,  ..., 0.1569, 0.1626, 0.1660],
        [0.1410, 0.1434, 0.1501,  ..., 0.1411, 0.1413, 0.1461]],
       grad_fn=<LogBackward>)


 95%|█████████▍| 54/57 [00:38<00:02,  1.33it/s]

tensor([[0.1562, 0.1661, 0.1701,  ..., 0.1582, 0.1639, 0.1656],
        [0.1547, 0.1603, 0.1666,  ..., 0.1565, 0.1599, 0.1613],
        [0.1434, 0.1537, 0.1546,  ..., 0.1450, 0.1535, 0.1520],
        ...,
        [0.1564, 0.1603, 0.1681,  ..., 0.1591, 0.1616, 0.1628],
        [0.1498, 0.1604, 0.1611,  ..., 0.1543, 0.1567, 0.1578],
        [0.1544, 0.1634, 0.1673,  ..., 0.1574, 0.1644, 0.1617]],
       grad_fn=<LogBackward>)


 96%|█████████▋| 55/57 [00:39<00:01,  1.37it/s]

tensor([[0.1574, 0.1636, 0.1709,  ..., 0.1554, 0.1607, 0.1656],
        [0.1550, 0.1615, 0.1679,  ..., 0.1542, 0.1595, 0.1624],
        [0.1556, 0.1613, 0.1658,  ..., 0.1581, 0.1588, 0.1596],
        ...,
        [0.1425, 0.1450, 0.1544,  ..., 0.1462, 0.1471, 0.1483],
        [0.1473, 0.1559, 0.1597,  ..., 0.1472, 0.1508, 0.1535],
        [0.1529, 0.1631, 0.1630,  ..., 0.1526, 0.1558, 0.1558]],
       grad_fn=<LogBackward>)


 98%|█████████▊| 56/57 [00:40<00:00,  1.36it/s]

tensor([[0.1548, 0.1614, 0.1677,  ..., 0.1563, 0.1605, 0.1621],
        [0.1583, 0.1639, 0.1719,  ..., 0.1561, 0.1611, 0.1664],
        [0.1540, 0.1584, 0.1647,  ..., 0.1547, 0.1572, 0.1586],
        ...,
        [0.1522, 0.1596, 0.1630,  ..., 0.1566, 0.1554, 0.1568],
        [0.1420, 0.1536, 0.1538,  ..., 0.1440, 0.1528, 0.1518],
        [0.1582, 0.1665, 0.1723,  ..., 0.1574, 0.1633, 0.1662]],
       grad_fn=<LogBackward>)


100%|██████████| 57/57 [00:40<00:00,  1.39it/s]
 12%|█▎        | 1/8 [00:00<00:02,  2.92it/s]

tensor([[0.1450, 0.1506, 0.1527,  ..., 0.1461, 0.1476, 0.1459],
        [0.1565, 0.1629, 0.1675,  ..., 0.1597, 0.1602, 0.1646],
        [0.1532, 0.1615, 0.1649,  ..., 0.1580, 0.1593, 0.1607],
        ...,
        [0.1535, 0.1621, 0.1672,  ..., 0.1547, 0.1617, 0.1624],
        [0.1533, 0.1622, 0.1662,  ..., 0.1560, 0.1576, 0.1623],
        [0.1570, 0.1643, 0.1703,  ..., 0.1570, 0.1632, 0.1650]],
       grad_fn=<LogBackward>)


 25%|██▌       | 2/8 [00:00<00:02,  2.91it/s]

tensor([[0.1529, 0.1570, 0.1612,  ..., 0.1552, 0.1532, 0.1564],
        [0.1540, 0.1625, 0.1667,  ..., 0.1549, 0.1588, 0.1630],
        [0.1568, 0.1634, 0.1682,  ..., 0.1557, 0.1600, 0.1609],
        ...,
        [0.1503, 0.1624, 0.1632,  ..., 0.1529, 0.1571, 0.1580],
        [0.1537, 0.1631, 0.1666,  ..., 0.1558, 0.1596, 0.1609],
        [0.1507, 0.1601, 0.1654,  ..., 0.1548, 0.1543, 0.1578]],
       grad_fn=<LogBackward>)


 38%|███▊      | 3/8 [00:01<00:01,  2.95it/s]

tensor([[0.1502, 0.1604, 0.1641,  ..., 0.1530, 0.1544, 0.1581],
        [0.1564, 0.1624, 0.1698,  ..., 0.1562, 0.1590, 0.1628],
        [0.1559, 0.1654, 0.1668,  ..., 0.1530, 0.1596, 0.1624],
        ...,
        [0.1554, 0.1632, 0.1693,  ..., 0.1546, 0.1622, 0.1635],
        [0.1519, 0.1606, 0.1616,  ..., 0.1554, 0.1568, 0.1601],
        [0.1521, 0.1560, 0.1622,  ..., 0.1512, 0.1529, 0.1560]],
       grad_fn=<LogBackward>)


 50%|█████     | 4/8 [00:01<00:01,  3.06it/s]

tensor([[0.1542, 0.1576, 0.1643,  ..., 0.1544, 0.1545, 0.1587],
        [0.1599, 0.1673, 0.1761,  ..., 0.1579, 0.1651, 0.1688],
        [0.1525, 0.1636, 0.1697,  ..., 0.1565, 0.1581, 0.1644],
        ...,
        [0.1446, 0.1544, 0.1578,  ..., 0.1499, 0.1510, 0.1558],
        [0.1564, 0.1630, 0.1706,  ..., 0.1577, 0.1597, 0.1641],
        [0.1583, 0.1653, 0.1693,  ..., 0.1601, 0.1644, 0.1642]],
       grad_fn=<LogBackward>)


 62%|██████▎   | 5/8 [00:01<00:00,  3.12it/s]

tensor([[0.1590, 0.1649, 0.1738,  ..., 0.1580, 0.1631, 0.1677],
        [0.1416, 0.1526, 0.1528,  ..., 0.1431, 0.1519, 0.1513],
        [0.1505, 0.1596, 0.1643,  ..., 0.1540, 0.1556, 0.1574],
        ...,
        [0.1565, 0.1684, 0.1695,  ..., 0.1559, 0.1640, 0.1632],
        [0.1508, 0.1581, 0.1625,  ..., 0.1538, 0.1553, 0.1575],
        [0.1511, 0.1610, 0.1661,  ..., 0.1544, 0.1578, 0.1594]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 6/8 [00:01<00:00,  3.10it/s]

tensor([[0.1553, 0.1634, 0.1697,  ..., 0.1591, 0.1607, 0.1650],
        [0.1543, 0.1572, 0.1631,  ..., 0.1520, 0.1550, 0.1552],
        [0.1551, 0.1611, 0.1686,  ..., 0.1532, 0.1572, 0.1664],
        ...,
        [0.1547, 0.1642, 0.1665,  ..., 0.1573, 0.1613, 0.1619],
        [0.1535, 0.1608, 0.1672,  ..., 0.1564, 0.1562, 0.1602],
        [0.1512, 0.1559, 0.1635,  ..., 0.1529, 0.1522, 0.1569]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 7/8 [00:02<00:00,  2.93it/s]

tensor([[0.1506, 0.1594, 0.1635,  ..., 0.1504, 0.1569, 0.1584],
        [0.1602, 0.1636, 0.1729,  ..., 0.1555, 0.1637, 0.1660],
        [0.1521, 0.1588, 0.1640,  ..., 0.1542, 0.1559, 0.1557],
        ...,
        [0.1572, 0.1601, 0.1693,  ..., 0.1537, 0.1583, 0.1620],
        [0.1503, 0.1595, 0.1619,  ..., 0.1516, 0.1533, 0.1553],
        [0.1551, 0.1617, 0.1677,  ..., 0.1562, 0.1587, 0.1627]],
       grad_fn=<LogBackward>)


100%|██████████| 8/8 [00:02<00:00,  2.98it/s]
  0%|          | 0/57 [00:00<?, ?it/s]

tensor([[0.1528, 0.1628, 0.1658,  ..., 0.1571, 0.1604, 0.1604],
        [0.1561, 0.1638, 0.1676,  ..., 0.1578, 0.1620, 0.1628],
        [0.1552, 0.1625, 0.1679,  ..., 0.1557, 0.1597, 0.1624],
        ...,
        [0.1532, 0.1621, 0.1663,  ..., 0.1549, 0.1571, 0.1605],
        [0.1503, 0.1556, 0.1591,  ..., 0.1500, 0.1557, 0.1556],
        [0.1502, 0.1596, 0.1629,  ..., 0.1557, 0.1601, 0.1582]],
       grad_fn=<LogBackward>)
[tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128)]
Epoch: 15, train_loss: -0.7853381633758545, valid_loss: -0.7611634135246277
Parameter containing:
tensor([[ 1.0001,  1.0001,  1.0001,  1.0001,  1.0001,  1.0000,  1.0003,  1.0000,
          1.0000,  1.0005, -0.4999, -0.4999, -0.5000, -0.4999, -0.4999, -0.4999,
         -0.4999, -0.5000, -0.4999, -0.5000],
     

  2%|▏         | 1/57 [00:00<00:42,  1.33it/s]

tensor([[0.1528, 0.1598, 0.1655,  ..., 0.1541, 0.1559, 0.1593],
        [0.1534, 0.1623, 0.1639,  ..., 0.1515, 0.1526, 0.1578],
        [0.1441, 0.1566, 0.1562,  ..., 0.1478, 0.1559, 0.1550],
        ...,
        [0.1575, 0.1649, 0.1690,  ..., 0.1561, 0.1606, 0.1618],
        [0.1514, 0.1607, 0.1626,  ..., 0.1538, 0.1565, 0.1574],
        [0.1533, 0.1624, 0.1664,  ..., 0.1567, 0.1619, 0.1654]],
       grad_fn=<LogBackward>)


  4%|▎         | 2/57 [00:01<00:41,  1.34it/s]

tensor([[0.1552, 0.1586, 0.1639,  ..., 0.1538, 0.1564, 0.1595],
        [0.1551, 0.1634, 0.1679,  ..., 0.1550, 0.1590, 0.1587],
        [0.1479, 0.1599, 0.1599,  ..., 0.1544, 0.1572, 0.1578],
        ...,
        [0.1547, 0.1625, 0.1640,  ..., 0.1557, 0.1573, 0.1598],
        [0.1527, 0.1622, 0.1676,  ..., 0.1558, 0.1603, 0.1594],
        [0.1565, 0.1617, 0.1697,  ..., 0.1531, 0.1576, 0.1598]],
       grad_fn=<LogBackward>)


  5%|▌         | 3/57 [00:02<00:41,  1.30it/s]

tensor([[0.1520, 0.1599, 0.1647,  ..., 0.1553, 0.1580, 0.1601],
        [0.1510, 0.1594, 0.1630,  ..., 0.1539, 0.1553, 0.1576],
        [0.1485, 0.1602, 0.1607,  ..., 0.1527, 0.1572, 0.1575],
        ...,
        [0.1514, 0.1584, 0.1631,  ..., 0.1555, 0.1546, 0.1553],
        [0.1516, 0.1584, 0.1626,  ..., 0.1535, 0.1542, 0.1560],
        [0.1495, 0.1599, 0.1641,  ..., 0.1559, 0.1585, 0.1603]],
       grad_fn=<LogBackward>)


  7%|▋         | 4/57 [00:03<00:42,  1.24it/s]

tensor([[0.1527, 0.1619, 0.1642,  ..., 0.1545, 0.1597, 0.1605],
        [0.1550, 0.1663, 0.1708,  ..., 0.1589, 0.1635, 0.1646],
        [0.1539, 0.1601, 0.1650,  ..., 0.1546, 0.1565, 0.1636],
        ...,
        [0.1552, 0.1594, 0.1656,  ..., 0.1543, 0.1560, 0.1594],
        [0.1556, 0.1609, 0.1691,  ..., 0.1556, 0.1568, 0.1620],
        [0.1575, 0.1650, 0.1705,  ..., 0.1553, 0.1635, 0.1664]],
       grad_fn=<LogBackward>)


  9%|▉         | 5/57 [00:03<00:41,  1.26it/s]

tensor([[0.1595, 0.1647, 0.1715,  ..., 0.1566, 0.1651, 0.1654],
        [0.1540, 0.1644, 0.1655,  ..., 0.1536, 0.1579, 0.1621],
        [0.1538, 0.1627, 0.1690,  ..., 0.1530, 0.1581, 0.1629],
        ...,
        [0.1554, 0.1603, 0.1704,  ..., 0.1549, 0.1607, 0.1660],
        [0.1575, 0.1672, 0.1691,  ..., 0.1578, 0.1621, 0.1640],
        [0.1345, 0.1416, 0.1420,  ..., 0.1365, 0.1387, 0.1395]],
       grad_fn=<LogBackward>)


 11%|█         | 6/57 [00:04<00:40,  1.25it/s]

tensor([[0.1559, 0.1609, 0.1670,  ..., 0.1539, 0.1574, 0.1593],
        [0.1558, 0.1625, 0.1687,  ..., 0.1554, 0.1583, 0.1630],
        [0.1507, 0.1586, 0.1621,  ..., 0.1501, 0.1594, 0.1586],
        ...,
        [0.1520, 0.1600, 0.1642,  ..., 0.1531, 0.1546, 0.1580],
        [0.1517, 0.1567, 0.1656,  ..., 0.1533, 0.1527, 0.1576],
        [0.1537, 0.1597, 0.1641,  ..., 0.1540, 0.1565, 0.1571]],
       grad_fn=<LogBackward>)


 12%|█▏        | 7/57 [00:05<00:41,  1.21it/s]

tensor([[0.1580, 0.1627, 0.1710,  ..., 0.1540, 0.1599, 0.1631],
        [0.1522, 0.1585, 0.1607,  ..., 0.1499, 0.1535, 0.1574],
        [0.1469, 0.1558, 0.1568,  ..., 0.1496, 0.1548, 0.1550],
        ...,
        [0.1519, 0.1579, 0.1623,  ..., 0.1531, 0.1577, 0.1560],
        [0.1571, 0.1625, 0.1691,  ..., 0.1537, 0.1594, 0.1654],
        [0.1529, 0.1601, 0.1631,  ..., 0.1516, 0.1553, 0.1588]],
       grad_fn=<LogBackward>)


 14%|█▍        | 8/57 [00:06<00:39,  1.23it/s]

tensor([[0.1539, 0.1579, 0.1651,  ..., 0.1545, 0.1556, 0.1579],
        [0.1528, 0.1612, 0.1666,  ..., 0.1582, 0.1575, 0.1602],
        [0.1500, 0.1582, 0.1626,  ..., 0.1536, 0.1557, 0.1565],
        ...,
        [0.1551, 0.1628, 0.1683,  ..., 0.1568, 0.1616, 0.1658],
        [0.1565, 0.1641, 0.1702,  ..., 0.1564, 0.1624, 0.1647],
        [0.1565, 0.1670, 0.1689,  ..., 0.1577, 0.1623, 0.1661]],
       grad_fn=<LogBackward>)


 16%|█▌        | 9/57 [00:07<00:36,  1.30it/s]

tensor([[0.1519, 0.1613, 0.1619,  ..., 0.1529, 0.1561, 0.1582],
        [0.1533, 0.1663, 0.1677,  ..., 0.1563, 0.1651, 0.1637],
        [0.1565, 0.1595, 0.1641,  ..., 0.1549, 0.1549, 0.1575],
        ...,
        [0.1583, 0.1630, 0.1700,  ..., 0.1593, 0.1608, 0.1640],
        [0.1528, 0.1588, 0.1642,  ..., 0.1538, 0.1547, 0.1575],
        [0.1562, 0.1615, 0.1668,  ..., 0.1541, 0.1549, 0.1618]],
       grad_fn=<LogBackward>)


 18%|█▊        | 10/57 [00:07<00:35,  1.32it/s]

tensor([[0.1516, 0.1606, 0.1633,  ..., 0.1534, 0.1539, 0.1557],
        [0.1584, 0.1680, 0.1718,  ..., 0.1587, 0.1648, 0.1669],
        [0.1514, 0.1576, 0.1623,  ..., 0.1522, 0.1548, 0.1577],
        ...,
        [0.1508, 0.1594, 0.1598,  ..., 0.1534, 0.1569, 0.1579],
        [0.1598, 0.1649, 0.1745,  ..., 0.1571, 0.1664, 0.1676],
        [0.1517, 0.1573, 0.1638,  ..., 0.1525, 0.1542, 0.1583]],
       grad_fn=<LogBackward>)


 19%|█▉        | 11/57 [00:08<00:35,  1.30it/s]

tensor([[0.1531, 0.1587, 0.1666,  ..., 0.1537, 0.1560, 0.1593],
        [0.1553, 0.1644, 0.1695,  ..., 0.1560, 0.1635, 0.1638],
        [0.1522, 0.1621, 0.1679,  ..., 0.1561, 0.1617, 0.1618],
        ...,
        [0.1529, 0.1631, 0.1672,  ..., 0.1536, 0.1606, 0.1644],
        [0.1546, 0.1621, 0.1672,  ..., 0.1561, 0.1590, 0.1637],
        [0.1546, 0.1612, 0.1656,  ..., 0.1549, 0.1571, 0.1604]],
       grad_fn=<LogBackward>)


 21%|██        | 12/57 [00:09<00:35,  1.27it/s]

tensor([[0.1520, 0.1584, 0.1615,  ..., 0.1519, 0.1578, 0.1561],
        [0.1552, 0.1640, 0.1702,  ..., 0.1573, 0.1622, 0.1642],
        [0.1548, 0.1640, 0.1665,  ..., 0.1542, 0.1571, 0.1572],
        ...,
        [0.1534, 0.1598, 0.1649,  ..., 0.1552, 0.1551, 0.1597],
        [0.1515, 0.1610, 0.1653,  ..., 0.1566, 0.1557, 0.1606],
        [0.1562, 0.1675, 0.1698,  ..., 0.1617, 0.1673, 0.1655]],
       grad_fn=<LogBackward>)


 23%|██▎       | 13/57 [00:10<00:33,  1.31it/s]

tensor([[0.1558, 0.1642, 0.1684,  ..., 0.1576, 0.1619, 0.1656],
        [0.1523, 0.1596, 0.1645,  ..., 0.1523, 0.1553, 0.1572],
        [0.1547, 0.1586, 0.1667,  ..., 0.1540, 0.1548, 0.1596],
        ...,
        [0.1510, 0.1626, 0.1654,  ..., 0.1543, 0.1597, 0.1615],
        [0.1560, 0.1604, 0.1676,  ..., 0.1557, 0.1565, 0.1624],
        [0.1515, 0.1628, 0.1650,  ..., 0.1559, 0.1572, 0.1631]],
       grad_fn=<LogBackward>)


 25%|██▍       | 14/57 [00:10<00:31,  1.35it/s]

tensor([[0.1534, 0.1599, 0.1669,  ..., 0.1556, 0.1563, 0.1581],
        [0.1555, 0.1662, 0.1674,  ..., 0.1589, 0.1602, 0.1608],
        [0.1561, 0.1643, 0.1709,  ..., 0.1565, 0.1625, 0.1642],
        ...,
        [0.1501, 0.1561, 0.1618,  ..., 0.1542, 0.1530, 0.1540],
        [0.1555, 0.1622, 0.1689,  ..., 0.1560, 0.1605, 0.1644],
        [0.1599, 0.1663, 0.1702,  ..., 0.1561, 0.1642, 0.1634]],
       grad_fn=<LogBackward>)


 26%|██▋       | 15/57 [00:11<00:30,  1.38it/s]

tensor([[0.1523, 0.1573, 0.1639,  ..., 0.1537, 0.1536, 0.1576],
        [0.1566, 0.1617, 0.1701,  ..., 0.1587, 0.1620, 0.1638],
        [0.1483, 0.1616, 0.1612,  ..., 0.1549, 0.1577, 0.1602],
        ...,
        [0.1488, 0.1546, 0.1592,  ..., 0.1503, 0.1504, 0.1534],
        [0.1470, 0.1551, 0.1563,  ..., 0.1520, 0.1507, 0.1520],
        [0.1551, 0.1636, 0.1699,  ..., 0.1535, 0.1603, 0.1613]],
       grad_fn=<LogBackward>)


 28%|██▊       | 16/57 [00:12<00:29,  1.40it/s]

tensor([[0.1469, 0.1576, 0.1541,  ..., 0.1491, 0.1516, 0.1497],
        [0.1578, 0.1639, 0.1720,  ..., 0.1570, 0.1620, 0.1673],
        [0.1542, 0.1611, 0.1631,  ..., 0.1549, 0.1563, 0.1581],
        ...,
        [0.1583, 0.1660, 0.1710,  ..., 0.1586, 0.1640, 0.1638],
        [0.1486, 0.1558, 0.1614,  ..., 0.1519, 0.1544, 0.1554],
        [0.1528, 0.1609, 0.1643,  ..., 0.1551, 0.1583, 0.1593]],
       grad_fn=<LogBackward>)


 30%|██▉       | 17/57 [00:12<00:27,  1.43it/s]

tensor([[0.1573, 0.1622, 0.1690,  ..., 0.1559, 0.1598, 0.1648],
        [0.1544, 0.1626, 0.1647,  ..., 0.1571, 0.1610, 0.1589],
        [0.1541, 0.1626, 0.1665,  ..., 0.1574, 0.1635, 0.1625],
        ...,
        [0.1455, 0.1521, 0.1552,  ..., 0.1501, 0.1503, 0.1489],
        [0.1477, 0.1538, 0.1591,  ..., 0.1513, 0.1541, 0.1513],
        [0.1549, 0.1640, 0.1693,  ..., 0.1566, 0.1626, 0.1632]],
       grad_fn=<LogBackward>)


 32%|███▏      | 18/57 [00:13<00:26,  1.45it/s]

tensor([[0.1555, 0.1598, 0.1670,  ..., 0.1563, 0.1571, 0.1608],
        [0.1546, 0.1606, 0.1649,  ..., 0.1545, 0.1562, 0.1599],
        [0.1468, 0.1570, 0.1582,  ..., 0.1491, 0.1507, 0.1531],
        ...,
        [0.1524, 0.1563, 0.1622,  ..., 0.1531, 0.1532, 0.1550],
        [0.1531, 0.1583, 0.1646,  ..., 0.1550, 0.1546, 0.1608],
        [0.1529, 0.1609, 0.1656,  ..., 0.1535, 0.1599, 0.1614]],
       grad_fn=<LogBackward>)


 33%|███▎      | 19/57 [00:14<00:26,  1.45it/s]

tensor([[0.1478, 0.1583, 0.1569,  ..., 0.1502, 0.1533, 0.1530],
        [0.1508, 0.1623, 0.1637,  ..., 0.1548, 0.1588, 0.1588],
        [0.1559, 0.1626, 0.1695,  ..., 0.1555, 0.1592, 0.1630],
        ...,
        [0.1482, 0.1586, 0.1604,  ..., 0.1519, 0.1561, 0.1561],
        [0.1535, 0.1604, 0.1652,  ..., 0.1541, 0.1564, 0.1594],
        [0.1512, 0.1587, 0.1605,  ..., 0.1524, 0.1583, 0.1564]],
       grad_fn=<LogBackward>)


 35%|███▌      | 20/57 [00:14<00:25,  1.47it/s]

tensor([[0.1562, 0.1651, 0.1696,  ..., 0.1568, 0.1653, 0.1646],
        [0.1534, 0.1627, 0.1649,  ..., 0.1522, 0.1597, 0.1616],
        [0.1573, 0.1644, 0.1721,  ..., 0.1550, 0.1619, 0.1661],
        ...,
        [0.1548, 0.1622, 0.1684,  ..., 0.1537, 0.1561, 0.1603],
        [0.1507, 0.1605, 0.1632,  ..., 0.1526, 0.1559, 0.1569],
        [0.1559, 0.1639, 0.1686,  ..., 0.1554, 0.1623, 0.1616]],
       grad_fn=<LogBackward>)


 37%|███▋      | 21/57 [00:15<00:23,  1.50it/s]

tensor([[0.1538, 0.1593, 0.1664,  ..., 0.1552, 0.1564, 0.1593],
        [0.1479, 0.1583, 0.1593,  ..., 0.1495, 0.1570, 0.1567],
        [0.1494, 0.1564, 0.1621,  ..., 0.1525, 0.1525, 0.1557],
        ...,
        [0.1518, 0.1603, 0.1651,  ..., 0.1552, 0.1574, 0.1588],
        [0.1540, 0.1591, 0.1650,  ..., 0.1562, 0.1564, 0.1607],
        [0.1477, 0.1536, 0.1568,  ..., 0.1491, 0.1504, 0.1514]],
       grad_fn=<LogBackward>)


 39%|███▊      | 22/57 [00:16<00:23,  1.51it/s]

tensor([[0.1513, 0.1606, 0.1641,  ..., 0.1555, 0.1547, 0.1595],
        [0.1592, 0.1641, 0.1722,  ..., 0.1585, 0.1652, 0.1662],
        [0.1560, 0.1625, 0.1688,  ..., 0.1572, 0.1616, 0.1635],
        ...,
        [0.1537, 0.1632, 0.1685,  ..., 0.1552, 0.1632, 0.1622],
        [0.1548, 0.1620, 0.1656,  ..., 0.1540, 0.1576, 0.1584],
        [0.1589, 0.1639, 0.1708,  ..., 0.1580, 0.1635, 0.1633]],
       grad_fn=<LogBackward>)


 40%|████      | 23/57 [00:16<00:22,  1.50it/s]

tensor([[0.1530, 0.1611, 0.1665,  ..., 0.1561, 0.1586, 0.1637],
        [0.1538, 0.1602, 0.1659,  ..., 0.1543, 0.1578, 0.1592],
        [0.1539, 0.1605, 0.1669,  ..., 0.1540, 0.1565, 0.1602],
        ...,
        [0.1504, 0.1586, 0.1608,  ..., 0.1506, 0.1566, 0.1582],
        [0.1546, 0.1654, 0.1717,  ..., 0.1570, 0.1631, 0.1653],
        [0.1540, 0.1634, 0.1664,  ..., 0.1572, 0.1570, 0.1606]],
       grad_fn=<LogBackward>)


 42%|████▏     | 24/57 [00:17<00:21,  1.50it/s]

tensor([[0.1568, 0.1634, 0.1714,  ..., 0.1605, 0.1601, 0.1651],
        [0.1530, 0.1583, 0.1607,  ..., 0.1542, 0.1548, 0.1565],
        [0.1488, 0.1610, 0.1596,  ..., 0.1515, 0.1570, 0.1560],
        ...,
        [0.1525, 0.1580, 0.1648,  ..., 0.1527, 0.1555, 0.1571],
        [0.1524, 0.1616, 0.1652,  ..., 0.1560, 0.1559, 0.1571],
        [0.1527, 0.1623, 0.1641,  ..., 0.1533, 0.1592, 0.1604]],
       grad_fn=<LogBackward>)


 44%|████▍     | 25/57 [00:18<00:21,  1.50it/s]

tensor([[0.1503, 0.1639, 0.1633,  ..., 0.1552, 0.1612, 0.1613],
        [0.1534, 0.1597, 0.1654,  ..., 0.1548, 0.1587, 0.1608],
        [0.1537, 0.1576, 0.1651,  ..., 0.1560, 0.1559, 0.1585],
        ...,
        [0.1518, 0.1611, 0.1643,  ..., 0.1505, 0.1540, 0.1573],
        [0.1496, 0.1580, 0.1593,  ..., 0.1516, 0.1542, 0.1552],
        [0.1493, 0.1561, 0.1609,  ..., 0.1536, 0.1533, 0.1534]],
       grad_fn=<LogBackward>)


 46%|████▌     | 26/57 [00:18<00:21,  1.46it/s]

tensor([[0.1472, 0.1592, 0.1610,  ..., 0.1510, 0.1570, 0.1567],
        [0.1544, 0.1629, 0.1661,  ..., 0.1605, 0.1617, 0.1603],
        [0.1563, 0.1641, 0.1697,  ..., 0.1538, 0.1597, 0.1631],
        ...,
        [0.1570, 0.1656, 0.1710,  ..., 0.1552, 0.1627, 0.1673],
        [0.1525, 0.1586, 0.1634,  ..., 0.1544, 0.1542, 0.1545],
        [0.1498, 0.1565, 0.1589,  ..., 0.1518, 0.1514, 0.1526]],
       grad_fn=<LogBackward>)


 47%|████▋     | 27/57 [00:19<00:20,  1.47it/s]

tensor([[0.1547, 0.1637, 0.1689,  ..., 0.1541, 0.1620, 0.1643],
        [0.1517, 0.1584, 0.1634,  ..., 0.1537, 0.1557, 0.1571],
        [0.1514, 0.1588, 0.1618,  ..., 0.1515, 0.1549, 0.1563],
        ...,
        [0.1458, 0.1545, 0.1579,  ..., 0.1504, 0.1516, 0.1493],
        [0.1528, 0.1650, 0.1651,  ..., 0.1545, 0.1615, 0.1620],
        [0.1464, 0.1524, 0.1553,  ..., 0.1479, 0.1491, 0.1508]],
       grad_fn=<LogBackward>)


 49%|████▉     | 28/57 [00:20<00:19,  1.48it/s]

tensor([[0.1555, 0.1631, 0.1693,  ..., 0.1552, 0.1594, 0.1633],
        [0.1483, 0.1534, 0.1587,  ..., 0.1513, 0.1530, 0.1517],
        [0.1456, 0.1506, 0.1538,  ..., 0.1445, 0.1466, 0.1528],
        ...,
        [0.1528, 0.1582, 0.1658,  ..., 0.1535, 0.1577, 0.1572],
        [0.1465, 0.1594, 0.1579,  ..., 0.1509, 0.1551, 0.1558],
        [0.1566, 0.1632, 0.1697,  ..., 0.1576, 0.1625, 0.1639]],
       grad_fn=<LogBackward>)


 51%|█████     | 29/57 [00:20<00:18,  1.50it/s]

tensor([[0.1513, 0.1606, 0.1641,  ..., 0.1537, 0.1558, 0.1568],
        [0.1497, 0.1572, 0.1602,  ..., 0.1506, 0.1513, 0.1543],
        [0.1539, 0.1607, 0.1673,  ..., 0.1542, 0.1576, 0.1592],
        ...,
        [0.1544, 0.1597, 0.1669,  ..., 0.1558, 0.1569, 0.1587],
        [0.1376, 0.1494, 0.1481,  ..., 0.1430, 0.1431, 0.1448],
        [0.1492, 0.1580, 0.1622,  ..., 0.1503, 0.1552, 0.1586]],
       grad_fn=<LogBackward>)


 53%|█████▎    | 30/57 [00:21<00:17,  1.50it/s]

tensor([[0.1543, 0.1576, 0.1650,  ..., 0.1549, 0.1544, 0.1585],
        [0.1518, 0.1570, 0.1629,  ..., 0.1531, 0.1529, 0.1584],
        [0.1571, 0.1609, 0.1702,  ..., 0.1540, 0.1584, 0.1624],
        ...,
        [0.1537, 0.1662, 0.1653,  ..., 0.1565, 0.1622, 0.1611],
        [0.1564, 0.1653, 0.1684,  ..., 0.1557, 0.1619, 0.1638],
        [0.1545, 0.1641, 0.1649,  ..., 0.1572, 0.1607, 0.1600]],
       grad_fn=<LogBackward>)


 54%|█████▍    | 31/57 [00:22<00:17,  1.51it/s]

tensor([[0.1560, 0.1666, 0.1676,  ..., 0.1565, 0.1626, 0.1636],
        [0.1525, 0.1580, 0.1631,  ..., 0.1542, 0.1537, 0.1584],
        [0.1513, 0.1557, 0.1635,  ..., 0.1534, 0.1532, 0.1567],
        ...,
        [0.1531, 0.1639, 0.1653,  ..., 0.1549, 0.1600, 0.1599],
        [0.1518, 0.1618, 0.1638,  ..., 0.1552, 0.1563, 0.1600],
        [0.1524, 0.1601, 0.1634,  ..., 0.1531, 0.1563, 0.1570]],
       grad_fn=<LogBackward>)


 56%|█████▌    | 32/57 [00:22<00:16,  1.53it/s]

tensor([[0.1545, 0.1632, 0.1681,  ..., 0.1567, 0.1600, 0.1633],
        [0.1506, 0.1611, 0.1646,  ..., 0.1549, 0.1588, 0.1592],
        [0.1509, 0.1568, 0.1613,  ..., 0.1526, 0.1548, 0.1587],
        ...,
        [0.1553, 0.1589, 0.1679,  ..., 0.1556, 0.1566, 0.1613],
        [0.1414, 0.1486, 0.1508,  ..., 0.1434, 0.1439, 0.1443],
        [0.1530, 0.1606, 0.1651,  ..., 0.1531, 0.1560, 0.1579]],
       grad_fn=<LogBackward>)


 58%|█████▊    | 33/57 [00:23<00:15,  1.53it/s]

tensor([[0.1492, 0.1570, 0.1603,  ..., 0.1556, 0.1560, 0.1569],
        [0.1548, 0.1589, 0.1654,  ..., 0.1544, 0.1541, 0.1594],
        [0.1507, 0.1603, 0.1639,  ..., 0.1513, 0.1573, 0.1592],
        ...,
        [0.1497, 0.1582, 0.1593,  ..., 0.1503, 0.1548, 0.1559],
        [0.1538, 0.1609, 0.1643,  ..., 0.1547, 0.1578, 0.1611],
        [0.1562, 0.1643, 0.1695,  ..., 0.1560, 0.1621, 0.1641]],
       grad_fn=<LogBackward>)


 60%|█████▉    | 34/57 [00:24<00:15,  1.53it/s]

tensor([[0.1567, 0.1645, 0.1720,  ..., 0.1565, 0.1629, 0.1658],
        [0.1505, 0.1573, 0.1639,  ..., 0.1510, 0.1502, 0.1551],
        [0.1556, 0.1600, 0.1681,  ..., 0.1551, 0.1579, 0.1636],
        ...,
        [0.1547, 0.1634, 0.1682,  ..., 0.1574, 0.1606, 0.1645],
        [0.1380, 0.1444, 0.1482,  ..., 0.1430, 0.1406, 0.1426],
        [0.1524, 0.1591, 0.1627,  ..., 0.1512, 0.1561, 0.1569]],
       grad_fn=<LogBackward>)


 61%|██████▏   | 35/57 [00:24<00:14,  1.53it/s]

tensor([[0.1563, 0.1619, 0.1698,  ..., 0.1554, 0.1595, 0.1670],
        [0.1511, 0.1603, 0.1612,  ..., 0.1524, 0.1558, 0.1552],
        [0.1553, 0.1653, 0.1691,  ..., 0.1580, 0.1616, 0.1628],
        ...,
        [0.1491, 0.1579, 0.1611,  ..., 0.1530, 0.1577, 0.1576],
        [0.1512, 0.1571, 0.1632,  ..., 0.1521, 0.1520, 0.1544],
        [0.1585, 0.1636, 0.1703,  ..., 0.1562, 0.1639, 0.1656]],
       grad_fn=<LogBackward>)


 63%|██████▎   | 36/57 [00:25<00:14,  1.48it/s]

tensor([[0.1499, 0.1584, 0.1598,  ..., 0.1543, 0.1509, 0.1544],
        [0.1510, 0.1582, 0.1618,  ..., 0.1542, 0.1547, 0.1555],
        [0.1499, 0.1563, 0.1595,  ..., 0.1504, 0.1514, 0.1527],
        ...,
        [0.1526, 0.1619, 0.1663,  ..., 0.1564, 0.1595, 0.1600],
        [0.1453, 0.1535, 0.1550,  ..., 0.1446, 0.1485, 0.1495],
        [0.1556, 0.1641, 0.1670,  ..., 0.1578, 0.1629, 0.1616]],
       grad_fn=<LogBackward>)


 65%|██████▍   | 37/57 [00:26<00:13,  1.47it/s]

tensor([[0.1568, 0.1686, 0.1688,  ..., 0.1601, 0.1645, 0.1647],
        [0.1514, 0.1647, 0.1642,  ..., 0.1533, 0.1605, 0.1613],
        [0.1475, 0.1543, 0.1611,  ..., 0.1483, 0.1490, 0.1553],
        ...,
        [0.1525, 0.1560, 0.1629,  ..., 0.1540, 0.1548, 0.1558],
        [0.1512, 0.1554, 0.1637,  ..., 0.1516, 0.1521, 0.1532],
        [0.1559, 0.1612, 0.1684,  ..., 0.1535, 0.1600, 0.1602]],
       grad_fn=<LogBackward>)


 67%|██████▋   | 38/57 [00:26<00:12,  1.50it/s]

tensor([[0.1542, 0.1576, 0.1659,  ..., 0.1541, 0.1547, 0.1578],
        [0.1521, 0.1608, 0.1635,  ..., 0.1545, 0.1581, 0.1587],
        [0.1513, 0.1599, 0.1621,  ..., 0.1551, 0.1566, 0.1608],
        ...,
        [0.1545, 0.1583, 0.1668,  ..., 0.1524, 0.1554, 0.1564],
        [0.1549, 0.1639, 0.1671,  ..., 0.1555, 0.1626, 0.1634],
        [0.1542, 0.1606, 0.1667,  ..., 0.1542, 0.1549, 0.1578]],
       grad_fn=<LogBackward>)


 68%|██████▊   | 39/57 [00:27<00:11,  1.51it/s]

tensor([[0.1514, 0.1598, 0.1643,  ..., 0.1533, 0.1556, 0.1566],
        [0.1539, 0.1619, 0.1678,  ..., 0.1570, 0.1586, 0.1630],
        [0.1541, 0.1679, 0.1659,  ..., 0.1574, 0.1619, 0.1627],
        ...,
        [0.1543, 0.1632, 0.1658,  ..., 0.1568, 0.1568, 0.1594],
        [0.1558, 0.1626, 0.1680,  ..., 0.1553, 0.1615, 0.1662],
        [0.1515, 0.1585, 0.1624,  ..., 0.1509, 0.1546, 0.1597]],
       grad_fn=<LogBackward>)


 70%|███████   | 40/57 [00:28<00:11,  1.51it/s]

tensor([[0.1570, 0.1626, 0.1697,  ..., 0.1573, 0.1598, 0.1625],
        [0.1525, 0.1619, 0.1672,  ..., 0.1549, 0.1592, 0.1621],
        [0.1498, 0.1554, 0.1615,  ..., 0.1524, 0.1531, 0.1564],
        ...,
        [0.1530, 0.1625, 0.1654,  ..., 0.1536, 0.1600, 0.1600],
        [0.1541, 0.1593, 0.1643,  ..., 0.1534, 0.1574, 0.1609],
        [0.1464, 0.1546, 0.1607,  ..., 0.1497, 0.1512, 0.1525]],
       grad_fn=<LogBackward>)


 72%|███████▏  | 41/57 [00:28<00:10,  1.51it/s]

tensor([[0.1515, 0.1588, 0.1620,  ..., 0.1515, 0.1554, 0.1564],
        [0.1529, 0.1568, 0.1643,  ..., 0.1537, 0.1526, 0.1555],
        [0.1541, 0.1637, 0.1700,  ..., 0.1551, 0.1609, 0.1619],
        ...,
        [0.1526, 0.1598, 0.1628,  ..., 0.1546, 0.1551, 0.1613],
        [0.1514, 0.1623, 0.1634,  ..., 0.1541, 0.1565, 0.1574],
        [0.1600, 0.1666, 0.1697,  ..., 0.1570, 0.1638, 0.1646]],
       grad_fn=<LogBackward>)


 74%|███████▎  | 42/57 [00:29<00:09,  1.53it/s]

tensor([[0.1441, 0.1516, 0.1556,  ..., 0.1494, 0.1475, 0.1493],
        [0.1515, 0.1611, 0.1616,  ..., 0.1531, 0.1580, 0.1587],
        [0.1525, 0.1587, 0.1650,  ..., 0.1545, 0.1559, 0.1600],
        ...,
        [0.1557, 0.1635, 0.1702,  ..., 0.1547, 0.1589, 0.1614],
        [0.1557, 0.1644, 0.1677,  ..., 0.1543, 0.1597, 0.1619],
        [0.1558, 0.1625, 0.1678,  ..., 0.1532, 0.1580, 0.1607]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 43/57 [00:30<00:09,  1.53it/s]

tensor([[0.1568, 0.1647, 0.1709,  ..., 0.1558, 0.1646, 0.1642],
        [0.1492, 0.1565, 0.1599,  ..., 0.1518, 0.1524, 0.1561],
        [0.1517, 0.1597, 0.1651,  ..., 0.1547, 0.1577, 0.1578],
        ...,
        [0.1509, 0.1607, 0.1616,  ..., 0.1533, 0.1583, 0.1588],
        [0.1499, 0.1596, 0.1615,  ..., 0.1520, 0.1565, 0.1566],
        [0.1582, 0.1656, 0.1712,  ..., 0.1575, 0.1655, 0.1673]],
       grad_fn=<LogBackward>)


 77%|███████▋  | 44/57 [00:30<00:08,  1.53it/s]

tensor([[0.1511, 0.1609, 0.1626,  ..., 0.1531, 0.1592, 0.1591],
        [0.1505, 0.1535, 0.1606,  ..., 0.1515, 0.1506, 0.1523],
        [0.1509, 0.1596, 0.1615,  ..., 0.1543, 0.1593, 0.1561],
        ...,
        [0.1569, 0.1664, 0.1716,  ..., 0.1605, 0.1672, 0.1649],
        [0.1456, 0.1500, 0.1557,  ..., 0.1508, 0.1461, 0.1509],
        [0.1514, 0.1626, 0.1653,  ..., 0.1575, 0.1606, 0.1605]],
       grad_fn=<LogBackward>)


 79%|███████▉  | 45/57 [00:31<00:07,  1.51it/s]

tensor([[0.1547, 0.1613, 0.1675,  ..., 0.1555, 0.1578, 0.1600],
        [0.1508, 0.1606, 0.1641,  ..., 0.1564, 0.1573, 0.1605],
        [0.1551, 0.1660, 0.1679,  ..., 0.1547, 0.1598, 0.1612],
        ...,
        [0.1513, 0.1614, 0.1632,  ..., 0.1522, 0.1595, 0.1598],
        [0.1459, 0.1554, 0.1580,  ..., 0.1492, 0.1497, 0.1516],
        [0.1352, 0.1435, 0.1478,  ..., 0.1413, 0.1414, 0.1464]],
       grad_fn=<LogBackward>)


 81%|████████  | 46/57 [00:32<00:07,  1.50it/s]

tensor([[0.1540, 0.1636, 0.1673,  ..., 0.1562, 0.1569, 0.1600],
        [0.1572, 0.1628, 0.1676,  ..., 0.1567, 0.1634, 0.1618],
        [0.1473, 0.1522, 0.1565,  ..., 0.1485, 0.1489, 0.1530],
        ...,
        [0.1541, 0.1604, 0.1675,  ..., 0.1536, 0.1569, 0.1607],
        [0.1570, 0.1628, 0.1693,  ..., 0.1558, 0.1587, 0.1621],
        [0.1459, 0.1583, 0.1552,  ..., 0.1500, 0.1518, 0.1527]],
       grad_fn=<LogBackward>)


 82%|████████▏ | 47/57 [00:32<00:06,  1.48it/s]

tensor([[0.1468, 0.1530, 0.1566,  ..., 0.1538, 0.1527, 0.1535],
        [0.1506, 0.1579, 0.1632,  ..., 0.1519, 0.1516, 0.1554],
        [0.1541, 0.1619, 0.1679,  ..., 0.1568, 0.1583, 0.1636],
        ...,
        [0.1455, 0.1535, 0.1558,  ..., 0.1517, 0.1498, 0.1504],
        [0.1514, 0.1589, 0.1620,  ..., 0.1579, 0.1581, 0.1572],
        [0.1549, 0.1599, 0.1691,  ..., 0.1565, 0.1628, 0.1623]],
       grad_fn=<LogBackward>)


 84%|████████▍ | 48/57 [00:33<00:06,  1.49it/s]

tensor([[0.1534, 0.1614, 0.1645,  ..., 0.1522, 0.1546, 0.1561],
        [0.1558, 0.1646, 0.1696,  ..., 0.1562, 0.1607, 0.1639],
        [0.1511, 0.1618, 0.1629,  ..., 0.1534, 0.1577, 0.1596],
        ...,
        [0.1547, 0.1620, 0.1673,  ..., 0.1554, 0.1572, 0.1608],
        [0.1548, 0.1649, 0.1701,  ..., 0.1546, 0.1623, 0.1668],
        [0.1488, 0.1575, 0.1579,  ..., 0.1513, 0.1542, 0.1558]],
       grad_fn=<LogBackward>)


 86%|████████▌ | 49/57 [00:34<00:05,  1.51it/s]

tensor([[0.1503, 0.1577, 0.1593,  ..., 0.1510, 0.1567, 0.1566],
        [0.1507, 0.1543, 0.1629,  ..., 0.1502, 0.1496, 0.1550],
        [0.1571, 0.1640, 0.1711,  ..., 0.1564, 0.1636, 0.1644],
        ...,
        [0.1593, 0.1667, 0.1675,  ..., 0.1614, 0.1627, 0.1610],
        [0.1536, 0.1605, 0.1636,  ..., 0.1522, 0.1557, 0.1579],
        [0.1533, 0.1639, 0.1651,  ..., 0.1528, 0.1626, 0.1597]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 50/57 [00:34<00:04,  1.51it/s]

tensor([[0.1562, 0.1661, 0.1685,  ..., 0.1559, 0.1632, 0.1622],
        [0.1512, 0.1603, 0.1617,  ..., 0.1542, 0.1569, 0.1545],
        [0.1519, 0.1634, 0.1667,  ..., 0.1562, 0.1621, 0.1642],
        ...,
        [0.1543, 0.1594, 0.1663,  ..., 0.1545, 0.1564, 0.1617],
        [0.1550, 0.1616, 0.1677,  ..., 0.1568, 0.1567, 0.1614],
        [0.1512, 0.1619, 0.1634,  ..., 0.1558, 0.1598, 0.1605]],
       grad_fn=<LogBackward>)


 89%|████████▉ | 51/57 [00:35<00:03,  1.50it/s]

tensor([[0.1539, 0.1639, 0.1657,  ..., 0.1569, 0.1577, 0.1607],
        [0.1561, 0.1645, 0.1706,  ..., 0.1563, 0.1583, 0.1619],
        [0.1511, 0.1606, 0.1618,  ..., 0.1538, 0.1590, 0.1590],
        ...,
        [0.1494, 0.1565, 0.1624,  ..., 0.1526, 0.1553, 0.1566],
        [0.1523, 0.1591, 0.1632,  ..., 0.1525, 0.1576, 0.1571],
        [0.1493, 0.1562, 0.1623,  ..., 0.1532, 0.1547, 0.1565]],
       grad_fn=<LogBackward>)


 91%|█████████ | 52/57 [00:36<00:03,  1.52it/s]

tensor([[0.1496, 0.1580, 0.1602,  ..., 0.1526, 0.1537, 0.1525],
        [0.1515, 0.1599, 0.1639,  ..., 0.1541, 0.1582, 0.1623],
        [0.1537, 0.1614, 0.1660,  ..., 0.1561, 0.1584, 0.1612],
        ...,
        [0.1551, 0.1607, 0.1677,  ..., 0.1548, 0.1621, 0.1625],
        [0.1539, 0.1565, 0.1657,  ..., 0.1547, 0.1557, 0.1593],
        [0.1562, 0.1624, 0.1687,  ..., 0.1553, 0.1582, 0.1625]],
       grad_fn=<LogBackward>)


 93%|█████████▎| 53/57 [00:36<00:02,  1.49it/s]

tensor([[0.1472, 0.1569, 0.1636,  ..., 0.1536, 0.1526, 0.1575],
        [0.1522, 0.1584, 0.1615,  ..., 0.1531, 0.1545, 0.1564],
        [0.1577, 0.1625, 0.1685,  ..., 0.1576, 0.1619, 0.1642],
        ...,
        [0.1528, 0.1614, 0.1653,  ..., 0.1537, 0.1553, 0.1581],
        [0.1464, 0.1526, 0.1559,  ..., 0.1463, 0.1494, 0.1534],
        [0.1503, 0.1611, 0.1622,  ..., 0.1531, 0.1594, 0.1600]],
       grad_fn=<LogBackward>)


 95%|█████████▍| 54/57 [00:37<00:02,  1.42it/s]

tensor([[0.1556, 0.1645, 0.1716,  ..., 0.1600, 0.1619, 0.1670],
        [0.1537, 0.1623, 0.1676,  ..., 0.1525, 0.1574, 0.1611],
        [0.1584, 0.1635, 0.1692,  ..., 0.1554, 0.1596, 0.1642],
        ...,
        [0.1551, 0.1589, 0.1673,  ..., 0.1534, 0.1561, 0.1590],
        [0.1526, 0.1661, 0.1669,  ..., 0.1558, 0.1584, 0.1609],
        [0.1525, 0.1605, 0.1688,  ..., 0.1558, 0.1602, 0.1611]],
       grad_fn=<LogBackward>)


 96%|█████████▋| 55/57 [00:38<00:01,  1.35it/s]

tensor([[0.1539, 0.1654, 0.1674,  ..., 0.1548, 0.1619, 0.1604],
        [0.1588, 0.1628, 0.1697,  ..., 0.1559, 0.1631, 0.1651],
        [0.1551, 0.1625, 0.1697,  ..., 0.1550, 0.1607, 0.1630],
        ...,
        [0.1515, 0.1634, 0.1651,  ..., 0.1529, 0.1589, 0.1606],
        [0.1563, 0.1615, 0.1693,  ..., 0.1549, 0.1602, 0.1656],
        [0.1556, 0.1627, 0.1664,  ..., 0.1532, 0.1577, 0.1639]],
       grad_fn=<LogBackward>)


 98%|█████████▊| 56/57 [00:39<00:00,  1.29it/s]

tensor([[0.1488, 0.1564, 0.1627,  ..., 0.1513, 0.1548, 0.1542],
        [0.1560, 0.1637, 0.1686,  ..., 0.1555, 0.1603, 0.1606],
        [0.1531, 0.1632, 0.1652,  ..., 0.1581, 0.1581, 0.1583],
        ...,
        [0.1546, 0.1608, 0.1709,  ..., 0.1549, 0.1572, 0.1645],
        [0.1543, 0.1617, 0.1685,  ..., 0.1536, 0.1587, 0.1605],
        [0.1523, 0.1603, 0.1645,  ..., 0.1570, 0.1592, 0.1614]],
       grad_fn=<LogBackward>)


100%|██████████| 57/57 [00:40<00:00,  1.42it/s]
 12%|█▎        | 1/8 [00:00<00:02,  2.94it/s]

tensor([[0.1533, 0.1588, 0.1646,  ..., 0.1520, 0.1561, 0.1579],
        [0.1538, 0.1613, 0.1654,  ..., 0.1564, 0.1570, 0.1598],
        [0.1533, 0.1661, 0.1710,  ..., 0.1584, 0.1625, 0.1637],
        ...,
        [0.1539, 0.1600, 0.1666,  ..., 0.1526, 0.1602, 0.1595],
        [0.1532, 0.1637, 0.1653,  ..., 0.1565, 0.1635, 0.1639],
        [0.1584, 0.1639, 0.1716,  ..., 0.1569, 0.1630, 0.1676]],
       grad_fn=<LogBackward>)


 25%|██▌       | 2/8 [00:00<00:02,  2.88it/s]

tensor([[0.1548, 0.1622, 0.1652,  ..., 0.1564, 0.1605, 0.1639],
        [0.1526, 0.1600, 0.1652,  ..., 0.1548, 0.1534, 0.1573],
        [0.1516, 0.1608, 0.1634,  ..., 0.1532, 0.1580, 0.1588],
        ...,
        [0.1527, 0.1576, 0.1650,  ..., 0.1540, 0.1545, 0.1568],
        [0.1511, 0.1591, 0.1623,  ..., 0.1508, 0.1569, 0.1535],
        [0.1550, 0.1632, 0.1683,  ..., 0.1544, 0.1603, 0.1630]],
       grad_fn=<LogBackward>)


 38%|███▊      | 3/8 [00:01<00:01,  2.90it/s]

tensor([[0.1512, 0.1620, 0.1658,  ..., 0.1533, 0.1603, 0.1622],
        [0.1529, 0.1585, 0.1644,  ..., 0.1526, 0.1558, 0.1569],
        [0.1570, 0.1661, 0.1712,  ..., 0.1567, 0.1654, 0.1657],
        ...,
        [0.1555, 0.1618, 0.1680,  ..., 0.1546, 0.1630, 0.1606],
        [0.1494, 0.1541, 0.1612,  ..., 0.1519, 0.1514, 0.1560],
        [0.1506, 0.1569, 0.1618,  ..., 0.1543, 0.1523, 0.1540]],
       grad_fn=<LogBackward>)


 50%|█████     | 4/8 [00:01<00:01,  2.89it/s]

tensor([[0.1517, 0.1583, 0.1609,  ..., 0.1529, 0.1565, 0.1592],
        [0.1480, 0.1577, 0.1604,  ..., 0.1495, 0.1559, 0.1562],
        [0.1576, 0.1642, 0.1688,  ..., 0.1548, 0.1610, 0.1627],
        ...,
        [0.1549, 0.1649, 0.1685,  ..., 0.1554, 0.1622, 0.1652],
        [0.1529, 0.1594, 0.1634,  ..., 0.1540, 0.1562, 0.1577],
        [0.1510, 0.1573, 0.1606,  ..., 0.1527, 0.1522, 0.1562]],
       grad_fn=<LogBackward>)


 62%|██████▎   | 5/8 [00:01<00:01,  2.93it/s]

tensor([[0.1534, 0.1600, 0.1665,  ..., 0.1572, 0.1592, 0.1613],
        [0.1534, 0.1615, 0.1655,  ..., 0.1570, 0.1579, 0.1596],
        [0.1560, 0.1635, 0.1705,  ..., 0.1569, 0.1604, 0.1664],
        ...,
        [0.1504, 0.1571, 0.1622,  ..., 0.1516, 0.1533, 0.1540],
        [0.1458, 0.1529, 0.1579,  ..., 0.1487, 0.1510, 0.1525],
        [0.1566, 0.1664, 0.1677,  ..., 0.1580, 0.1664, 0.1615]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 6/8 [00:02<00:00,  2.94it/s]

tensor([[0.1519, 0.1587, 0.1615,  ..., 0.1528, 0.1561, 0.1580],
        [0.1553, 0.1628, 0.1683,  ..., 0.1544, 0.1602, 0.1603],
        [0.1605, 0.1671, 0.1712,  ..., 0.1589, 0.1618, 0.1631],
        ...,
        [0.1478, 0.1586, 0.1589,  ..., 0.1507, 0.1568, 0.1565],
        [0.1490, 0.1548, 0.1614,  ..., 0.1508, 0.1498, 0.1565],
        [0.1542, 0.1648, 0.1671,  ..., 0.1584, 0.1609, 0.1642]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 7/8 [00:02<00:00,  2.89it/s]

tensor([[0.1496, 0.1594, 0.1609,  ..., 0.1533, 0.1578, 0.1555],
        [0.1536, 0.1632, 0.1664,  ..., 0.1578, 0.1600, 0.1607],
        [0.1496, 0.1593, 0.1614,  ..., 0.1534, 0.1565, 0.1589],
        ...,
        [0.1558, 0.1642, 0.1702,  ..., 0.1581, 0.1622, 0.1621],
        [0.1422, 0.1549, 0.1540,  ..., 0.1456, 0.1545, 0.1533],
        [0.1516, 0.1628, 0.1633,  ..., 0.1545, 0.1556, 0.1582]],
       grad_fn=<LogBackward>)


100%|██████████| 8/8 [00:02<00:00,  2.89it/s]
  0%|          | 0/57 [00:00<?, ?it/s]

tensor([[0.1552, 0.1691, 0.1715,  ..., 0.1581, 0.1624, 0.1655],
        [0.1493, 0.1590, 0.1623,  ..., 0.1552, 0.1565, 0.1582],
        [0.1531, 0.1574, 0.1635,  ..., 0.1525, 0.1522, 0.1554],
        ...,
        [0.1562, 0.1626, 0.1710,  ..., 0.1557, 0.1611, 0.1647],
        [0.1507, 0.1615, 0.1630,  ..., 0.1507, 0.1533, 0.1583],
        [0.1541, 0.1578, 0.1646,  ..., 0.1567, 0.1576, 0.1595]],
       grad_fn=<LogBackward>)
[tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128)]
Epoch: 16, train_loss: -0.8156052827835083, valid_loss: -0.788493812084198
Parameter containing:
tensor([[ 1.0000,  1.0000,  1.0000,  1.0000,  1.0001,  1.0000,  1.0002,  1.0000,
          0.9999,  1.0005, -0.4999, -0.5000, -0.5001, -0.5000, -0.5000, -0.5000,
         -0.4999, -0.5001, -0.5000, -0.5001],
      

  2%|▏         | 1/57 [00:00<00:42,  1.32it/s]

tensor([[0.1601, 0.1656, 0.1720,  ..., 0.1559, 0.1617, 0.1650],
        [0.1529, 0.1583, 0.1666,  ..., 0.1532, 0.1554, 0.1613],
        [0.1590, 0.1673, 0.1709,  ..., 0.1575, 0.1650, 0.1667],
        ...,
        [0.1565, 0.1644, 0.1699,  ..., 0.1555, 0.1614, 0.1658],
        [0.1528, 0.1611, 0.1663,  ..., 0.1537, 0.1575, 0.1628],
        [0.1506, 0.1556, 0.1593,  ..., 0.1507, 0.1527, 0.1567]],
       grad_fn=<LogBackward>)


  4%|▎         | 2/57 [00:01<00:40,  1.34it/s]

tensor([[0.1488, 0.1597, 0.1592,  ..., 0.1534, 0.1544, 0.1551],
        [0.1477, 0.1577, 0.1594,  ..., 0.1504, 0.1510, 0.1516],
        [0.1549, 0.1639, 0.1693,  ..., 0.1594, 0.1608, 0.1624],
        ...,
        [0.1558, 0.1633, 0.1695,  ..., 0.1547, 0.1595, 0.1640],
        [0.1407, 0.1465, 0.1505,  ..., 0.1475, 0.1484, 0.1494],
        [0.1555, 0.1622, 0.1675,  ..., 0.1547, 0.1598, 0.1597]],
       grad_fn=<LogBackward>)


  5%|▌         | 3/57 [00:02<00:39,  1.36it/s]

tensor([[0.1502, 0.1573, 0.1610,  ..., 0.1497, 0.1526, 0.1562],
        [0.1534, 0.1617, 0.1657,  ..., 0.1536, 0.1613, 0.1626],
        [0.1560, 0.1615, 0.1650,  ..., 0.1546, 0.1578, 0.1575],
        ...,
        [0.1469, 0.1554, 0.1600,  ..., 0.1544, 0.1555, 0.1546],
        [0.1507, 0.1571, 0.1620,  ..., 0.1557, 0.1557, 0.1564],
        [0.1550, 0.1638, 0.1667,  ..., 0.1587, 0.1602, 0.1575]],
       grad_fn=<LogBackward>)


  7%|▋         | 4/57 [00:02<00:39,  1.35it/s]

tensor([[0.1552, 0.1585, 0.1660,  ..., 0.1549, 0.1554, 0.1597],
        [0.1545, 0.1621, 0.1682,  ..., 0.1553, 0.1611, 0.1620],
        [0.1521, 0.1631, 0.1656,  ..., 0.1565, 0.1604, 0.1587],
        ...,
        [0.1521, 0.1591, 0.1635,  ..., 0.1513, 0.1564, 0.1598],
        [0.1533, 0.1611, 0.1637,  ..., 0.1537, 0.1559, 0.1624],
        [0.1557, 0.1604, 0.1691,  ..., 0.1569, 0.1602, 0.1641]],
       grad_fn=<LogBackward>)


  9%|▉         | 5/57 [00:03<00:38,  1.36it/s]

tensor([[0.1565, 0.1647, 0.1702,  ..., 0.1577, 0.1618, 0.1633],
        [0.1549, 0.1622, 0.1654,  ..., 0.1555, 0.1621, 0.1641],
        [0.1537, 0.1601, 0.1654,  ..., 0.1529, 0.1573, 0.1599],
        ...,
        [0.1545, 0.1612, 0.1673,  ..., 0.1555, 0.1576, 0.1598],
        [0.1519, 0.1573, 0.1639,  ..., 0.1519, 0.1542, 0.1564],
        [0.1555, 0.1606, 0.1691,  ..., 0.1539, 0.1570, 0.1628]],
       grad_fn=<LogBackward>)


 11%|█         | 6/57 [00:04<00:36,  1.41it/s]

tensor([[0.1524, 0.1635, 0.1682,  ..., 0.1533, 0.1593, 0.1594],
        [0.1553, 0.1625, 0.1682,  ..., 0.1536, 0.1584, 0.1619],
        [0.1564, 0.1630, 0.1713,  ..., 0.1572, 0.1607, 0.1649],
        ...,
        [0.1559, 0.1625, 0.1697,  ..., 0.1528, 0.1580, 0.1624],
        [0.1546, 0.1634, 0.1661,  ..., 0.1565, 0.1587, 0.1621],
        [0.1545, 0.1675, 0.1642,  ..., 0.1583, 0.1619, 0.1595]],
       grad_fn=<LogBackward>)


 12%|█▏        | 7/57 [00:05<00:35,  1.41it/s]

tensor([[0.1554, 0.1644, 0.1700,  ..., 0.1544, 0.1581, 0.1626],
        [0.1471, 0.1543, 0.1577,  ..., 0.1499, 0.1492, 0.1516],
        [0.1512, 0.1576, 0.1669,  ..., 0.1560, 0.1547, 0.1583],
        ...,
        [0.1533, 0.1621, 0.1635,  ..., 0.1552, 0.1567, 0.1589],
        [0.1550, 0.1647, 0.1693,  ..., 0.1543, 0.1611, 0.1632],
        [0.1558, 0.1594, 0.1653,  ..., 0.1528, 0.1554, 0.1637]],
       grad_fn=<LogBackward>)


 14%|█▍        | 8/57 [00:05<00:34,  1.42it/s]

tensor([[0.1459, 0.1578, 0.1607,  ..., 0.1535, 0.1549, 0.1566],
        [0.1516, 0.1551, 0.1623,  ..., 0.1525, 0.1515, 0.1561],
        [0.1451, 0.1538, 0.1543,  ..., 0.1465, 0.1470, 0.1495],
        ...,
        [0.1535, 0.1613, 0.1661,  ..., 0.1509, 0.1557, 0.1601],
        [0.1471, 0.1595, 0.1604,  ..., 0.1497, 0.1555, 0.1562],
        [0.1555, 0.1629, 0.1679,  ..., 0.1552, 0.1620, 0.1607]],
       grad_fn=<LogBackward>)


 16%|█▌        | 9/57 [00:06<00:32,  1.46it/s]

tensor([[0.1568, 0.1629, 0.1668,  ..., 0.1565, 0.1603, 0.1607],
        [0.1459, 0.1577, 0.1567,  ..., 0.1512, 0.1535, 0.1553],
        [0.1538, 0.1619, 0.1683,  ..., 0.1546, 0.1579, 0.1617],
        ...,
        [0.1520, 0.1569, 0.1612,  ..., 0.1520, 0.1527, 0.1563],
        [0.1569, 0.1634, 0.1718,  ..., 0.1543, 0.1642, 0.1668],
        [0.1490, 0.1518, 0.1590,  ..., 0.1490, 0.1469, 0.1500]],
       grad_fn=<LogBackward>)


 18%|█▊        | 10/57 [00:06<00:31,  1.49it/s]

tensor([[0.1486, 0.1545, 0.1591,  ..., 0.1495, 0.1507, 0.1510],
        [0.1585, 0.1689, 0.1692,  ..., 0.1573, 0.1638, 0.1645],
        [0.1520, 0.1610, 0.1642,  ..., 0.1566, 0.1587, 0.1601],
        ...,
        [0.1539, 0.1635, 0.1669,  ..., 0.1545, 0.1603, 0.1602],
        [0.1522, 0.1619, 0.1657,  ..., 0.1569, 0.1598, 0.1622],
        [0.1538, 0.1623, 0.1673,  ..., 0.1566, 0.1623, 0.1638]],
       grad_fn=<LogBackward>)


 19%|█▉        | 11/57 [00:07<00:30,  1.50it/s]

tensor([[0.1586, 0.1674, 0.1679,  ..., 0.1601, 0.1659, 0.1626],
        [0.1490, 0.1567, 0.1604,  ..., 0.1523, 0.1501, 0.1521],
        [0.1549, 0.1662, 0.1690,  ..., 0.1580, 0.1619, 0.1620],
        ...,
        [0.1543, 0.1599, 0.1659,  ..., 0.1555, 0.1561, 0.1611],
        [0.1515, 0.1561, 0.1624,  ..., 0.1536, 0.1539, 0.1557],
        [0.1585, 0.1655, 0.1667,  ..., 0.1573, 0.1601, 0.1607]],
       grad_fn=<LogBackward>)


 21%|██        | 12/57 [00:08<00:30,  1.47it/s]

tensor([[0.1512, 0.1579, 0.1578,  ..., 0.1487, 0.1524, 0.1537],
        [0.1506, 0.1582, 0.1617,  ..., 0.1526, 0.1553, 0.1545],
        [0.1553, 0.1635, 0.1688,  ..., 0.1589, 0.1609, 0.1654],
        ...,
        [0.1500, 0.1570, 0.1614,  ..., 0.1524, 0.1547, 0.1543],
        [0.1560, 0.1622, 0.1673,  ..., 0.1572, 0.1598, 0.1613],
        [0.1535, 0.1656, 0.1676,  ..., 0.1603, 0.1624, 0.1639]],
       grad_fn=<LogBackward>)


 23%|██▎       | 13/57 [00:09<00:29,  1.48it/s]

tensor([[0.1563, 0.1650, 0.1691,  ..., 0.1560, 0.1626, 0.1618],
        [0.1540, 0.1663, 0.1702,  ..., 0.1574, 0.1622, 0.1624],
        [0.1509, 0.1570, 0.1623,  ..., 0.1502, 0.1525, 0.1542],
        ...,
        [0.1487, 0.1553, 0.1600,  ..., 0.1524, 0.1537, 0.1550],
        [0.1525, 0.1623, 0.1658,  ..., 0.1554, 0.1575, 0.1602],
        [0.1522, 0.1615, 0.1642,  ..., 0.1518, 0.1584, 0.1604]],
       grad_fn=<LogBackward>)


 25%|██▍       | 14/57 [00:09<00:30,  1.41it/s]

tensor([[0.1510, 0.1604, 0.1601,  ..., 0.1563, 0.1566, 0.1577],
        [0.1534, 0.1603, 0.1671,  ..., 0.1522, 0.1563, 0.1579],
        [0.1481, 0.1508, 0.1589,  ..., 0.1481, 0.1487, 0.1509],
        ...,
        [0.1545, 0.1632, 0.1661,  ..., 0.1563, 0.1593, 0.1594],
        [0.1481, 0.1579, 0.1593,  ..., 0.1509, 0.1554, 0.1564],
        [0.1555, 0.1623, 0.1682,  ..., 0.1577, 0.1600, 0.1629]],
       grad_fn=<LogBackward>)


 26%|██▋       | 15/57 [00:10<00:32,  1.31it/s]

tensor([[0.1479, 0.1593, 0.1600,  ..., 0.1533, 0.1567, 0.1587],
        [0.1529, 0.1570, 0.1644,  ..., 0.1564, 0.1570, 0.1590],
        [0.1546, 0.1627, 0.1660,  ..., 0.1565, 0.1592, 0.1591],
        ...,
        [0.1510, 0.1624, 0.1687,  ..., 0.1566, 0.1619, 0.1619],
        [0.1479, 0.1591, 0.1647,  ..., 0.1543, 0.1558, 0.1582],
        [0.1506, 0.1575, 0.1623,  ..., 0.1529, 0.1529, 0.1541]],
       grad_fn=<LogBackward>)


 28%|██▊       | 16/57 [00:11<00:32,  1.24it/s]

tensor([[0.1583, 0.1630, 0.1721,  ..., 0.1565, 0.1603, 0.1617],
        [0.1515, 0.1620, 0.1645,  ..., 0.1533, 0.1558, 0.1574],
        [0.1513, 0.1585, 0.1626,  ..., 0.1514, 0.1562, 0.1593],
        ...,
        [0.1480, 0.1583, 0.1623,  ..., 0.1522, 0.1565, 0.1575],
        [0.1559, 0.1615, 0.1668,  ..., 0.1533, 0.1556, 0.1602],
        [0.1522, 0.1600, 0.1660,  ..., 0.1529, 0.1573, 0.1584]],
       grad_fn=<LogBackward>)


 30%|██▉       | 17/57 [00:12<00:31,  1.28it/s]

tensor([[0.1584, 0.1626, 0.1686,  ..., 0.1574, 0.1609, 0.1636],
        [0.1546, 0.1664, 0.1654,  ..., 0.1562, 0.1598, 0.1604],
        [0.1519, 0.1594, 0.1635,  ..., 0.1534, 0.1578, 0.1582],
        ...,
        [0.1529, 0.1640, 0.1669,  ..., 0.1532, 0.1589, 0.1609],
        [0.1532, 0.1633, 0.1645,  ..., 0.1532, 0.1569, 0.1610],
        [0.1559, 0.1622, 0.1676,  ..., 0.1543, 0.1581, 0.1609]],
       grad_fn=<LogBackward>)


 32%|███▏      | 18/57 [00:13<00:30,  1.26it/s]

tensor([[0.1494, 0.1567, 0.1603,  ..., 0.1521, 0.1544, 0.1547],
        [0.1491, 0.1577, 0.1599,  ..., 0.1502, 0.1560, 0.1565],
        [0.1535, 0.1634, 0.1669,  ..., 0.1551, 0.1595, 0.1631],
        ...,
        [0.1490, 0.1603, 0.1618,  ..., 0.1521, 0.1556, 0.1583],
        [0.1502, 0.1624, 0.1619,  ..., 0.1548, 0.1592, 0.1583],
        [0.1541, 0.1577, 0.1629,  ..., 0.1531, 0.1555, 0.1583]],
       grad_fn=<LogBackward>)


 33%|███▎      | 19/57 [00:13<00:29,  1.30it/s]

tensor([[0.1494, 0.1552, 0.1609,  ..., 0.1499, 0.1519, 0.1509],
        [0.1484, 0.1582, 0.1608,  ..., 0.1513, 0.1555, 0.1565],
        [0.1498, 0.1600, 0.1626,  ..., 0.1528, 0.1540, 0.1576],
        ...,
        [0.1483, 0.1543, 0.1626,  ..., 0.1510, 0.1524, 0.1536],
        [0.1543, 0.1629, 0.1674,  ..., 0.1556, 0.1633, 0.1624],
        [0.1516, 0.1590, 0.1616,  ..., 0.1524, 0.1539, 0.1559]],
       grad_fn=<LogBackward>)


 35%|███▌      | 20/57 [00:14<00:28,  1.29it/s]

tensor([[0.1468, 0.1570, 0.1584,  ..., 0.1493, 0.1539, 0.1558],
        [0.1552, 0.1634, 0.1695,  ..., 0.1584, 0.1626, 0.1624],
        [0.1489, 0.1587, 0.1580,  ..., 0.1512, 0.1562, 0.1560],
        ...,
        [0.1533, 0.1571, 0.1638,  ..., 0.1543, 0.1533, 0.1566],
        [0.1547, 0.1605, 0.1668,  ..., 0.1541, 0.1583, 0.1617],
        [0.1488, 0.1552, 0.1604,  ..., 0.1513, 0.1514, 0.1519]],
       grad_fn=<LogBackward>)


 37%|███▋      | 21/57 [00:15<00:27,  1.33it/s]

tensor([[0.1535, 0.1646, 0.1650,  ..., 0.1561, 0.1651, 0.1607],
        [0.1556, 0.1611, 0.1680,  ..., 0.1565, 0.1562, 0.1605],
        [0.1562, 0.1616, 0.1679,  ..., 0.1548, 0.1592, 0.1613],
        ...,
        [0.1545, 0.1597, 0.1671,  ..., 0.1556, 0.1574, 0.1602],
        [0.1554, 0.1601, 0.1678,  ..., 0.1542, 0.1571, 0.1603],
        [0.1551, 0.1611, 0.1669,  ..., 0.1558, 0.1630, 0.1618]],
       grad_fn=<LogBackward>)


 39%|███▊      | 22/57 [00:16<00:25,  1.36it/s]

tensor([[0.1537, 0.1607, 0.1645,  ..., 0.1539, 0.1553, 0.1569],
        [0.1549, 0.1647, 0.1654,  ..., 0.1553, 0.1585, 0.1621],
        [0.1481, 0.1567, 0.1584,  ..., 0.1493, 0.1541, 0.1562],
        ...,
        [0.1483, 0.1608, 0.1595,  ..., 0.1513, 0.1518, 0.1534],
        [0.1563, 0.1631, 0.1700,  ..., 0.1555, 0.1580, 0.1629],
        [0.1514, 0.1553, 0.1633,  ..., 0.1535, 0.1533, 0.1557]],
       grad_fn=<LogBackward>)


 40%|████      | 23/57 [00:16<00:24,  1.38it/s]

tensor([[0.1509, 0.1590, 0.1641,  ..., 0.1523, 0.1526, 0.1561],
        [0.1501, 0.1545, 0.1589,  ..., 0.1496, 0.1492, 0.1511],
        [0.1517, 0.1606, 0.1670,  ..., 0.1524, 0.1585, 0.1582],
        ...,
        [0.1516, 0.1621, 0.1659,  ..., 0.1569, 0.1582, 0.1621],
        [0.1510, 0.1616, 0.1641,  ..., 0.1561, 0.1588, 0.1591],
        [0.1515, 0.1576, 0.1633,  ..., 0.1515, 0.1527, 0.1571]],
       grad_fn=<LogBackward>)


 42%|████▏     | 24/57 [00:17<00:23,  1.39it/s]

tensor([[0.1548, 0.1618, 0.1670,  ..., 0.1562, 0.1590, 0.1594],
        [0.1563, 0.1653, 0.1727,  ..., 0.1578, 0.1620, 0.1650],
        [0.1577, 0.1652, 0.1691,  ..., 0.1588, 0.1609, 0.1607],
        ...,
        [0.1497, 0.1581, 0.1606,  ..., 0.1515, 0.1559, 0.1578],
        [0.1529, 0.1592, 0.1646,  ..., 0.1536, 0.1554, 0.1582],
        [0.1559, 0.1616, 0.1701,  ..., 0.1555, 0.1586, 0.1569]],
       grad_fn=<LogBackward>)


 44%|████▍     | 25/57 [00:18<00:22,  1.43it/s]

tensor([[0.1490, 0.1563, 0.1617,  ..., 0.1507, 0.1513, 0.1539],
        [0.1538, 0.1579, 0.1679,  ..., 0.1539, 0.1557, 0.1602],
        [0.1577, 0.1655, 0.1727,  ..., 0.1571, 0.1680, 0.1664],
        ...,
        [0.1561, 0.1652, 0.1722,  ..., 0.1557, 0.1615, 0.1632],
        [0.1565, 0.1614, 0.1710,  ..., 0.1553, 0.1609, 0.1648],
        [0.1346, 0.1429, 0.1472,  ..., 0.1410, 0.1409, 0.1459]],
       grad_fn=<LogBackward>)


 46%|████▌     | 26/57 [00:18<00:21,  1.45it/s]

tensor([[0.1513, 0.1578, 0.1619,  ..., 0.1531, 0.1512, 0.1548],
        [0.1527, 0.1563, 0.1639,  ..., 0.1532, 0.1549, 0.1579],
        [0.1529, 0.1566, 0.1648,  ..., 0.1528, 0.1554, 0.1555],
        ...,
        [0.1446, 0.1473, 0.1541,  ..., 0.1451, 0.1444, 0.1466],
        [0.1489, 0.1595, 0.1594,  ..., 0.1521, 0.1554, 0.1544],
        [0.1528, 0.1551, 0.1629,  ..., 0.1535, 0.1528, 0.1557]],
       grad_fn=<LogBackward>)


 47%|████▋     | 27/57 [00:19<00:20,  1.48it/s]

tensor([[0.1540, 0.1632, 0.1660,  ..., 0.1569, 0.1597, 0.1617],
        [0.1513, 0.1585, 0.1628,  ..., 0.1516, 0.1547, 0.1545],
        [0.1555, 0.1653, 0.1703,  ..., 0.1585, 0.1642, 0.1659],
        ...,
        [0.1514, 0.1595, 0.1622,  ..., 0.1518, 0.1533, 0.1587],
        [0.1431, 0.1550, 0.1546,  ..., 0.1452, 0.1543, 0.1528],
        [0.1518, 0.1564, 0.1617,  ..., 0.1542, 0.1537, 0.1561]],
       grad_fn=<LogBackward>)


 49%|████▉     | 28/57 [00:20<00:19,  1.49it/s]

tensor([[0.1556, 0.1625, 0.1690,  ..., 0.1542, 0.1588, 0.1633],
        [0.1459, 0.1548, 0.1561,  ..., 0.1511, 0.1493, 0.1504],
        [0.1506, 0.1605, 0.1624,  ..., 0.1516, 0.1570, 0.1549],
        ...,
        [0.1446, 0.1507, 0.1539,  ..., 0.1466, 0.1444, 0.1441],
        [0.1555, 0.1648, 0.1683,  ..., 0.1583, 0.1623, 0.1630],
        [0.1519, 0.1569, 0.1619,  ..., 0.1511, 0.1521, 0.1544]],
       grad_fn=<LogBackward>)


 51%|█████     | 29/57 [00:20<00:18,  1.50it/s]

tensor([[0.1501, 0.1626, 0.1657,  ..., 0.1571, 0.1607, 0.1636],
        [0.1524, 0.1612, 0.1638,  ..., 0.1566, 0.1562, 0.1601],
        [0.1561, 0.1619, 0.1678,  ..., 0.1577, 0.1617, 0.1624],
        ...,
        [0.1527, 0.1574, 0.1620,  ..., 0.1531, 0.1519, 0.1549],
        [0.1457, 0.1544, 0.1572,  ..., 0.1477, 0.1501, 0.1560],
        [0.1494, 0.1597, 0.1636,  ..., 0.1511, 0.1584, 0.1567]],
       grad_fn=<LogBackward>)


 53%|█████▎    | 30/57 [00:21<00:17,  1.51it/s]

tensor([[0.1542, 0.1602, 0.1665,  ..., 0.1559, 0.1570, 0.1608],
        [0.1565, 0.1627, 0.1697,  ..., 0.1555, 0.1629, 0.1642],
        [0.1486, 0.1526, 0.1597,  ..., 0.1508, 0.1481, 0.1527],
        ...,
        [0.1505, 0.1557, 0.1623,  ..., 0.1517, 0.1516, 0.1528],
        [0.1488, 0.1567, 0.1593,  ..., 0.1491, 0.1562, 0.1557],
        [0.1485, 0.1591, 0.1637,  ..., 0.1548, 0.1561, 0.1548]],
       grad_fn=<LogBackward>)


 54%|█████▍    | 31/57 [00:22<00:17,  1.53it/s]

tensor([[0.1441, 0.1525, 0.1560,  ..., 0.1465, 0.1479, 0.1489],
        [0.1509, 0.1594, 0.1629,  ..., 0.1548, 0.1579, 0.1570],
        [0.1519, 0.1629, 0.1652,  ..., 0.1575, 0.1600, 0.1588],
        ...,
        [0.1531, 0.1639, 0.1648,  ..., 0.1545, 0.1565, 0.1583],
        [0.1512, 0.1593, 0.1660,  ..., 0.1525, 0.1566, 0.1594],
        [0.1486, 0.1585, 0.1592,  ..., 0.1518, 0.1538, 0.1554]],
       grad_fn=<LogBackward>)


 56%|█████▌    | 32/57 [00:22<00:16,  1.52it/s]

tensor([[0.1506, 0.1609, 0.1649,  ..., 0.1521, 0.1582, 0.1592],
        [0.1498, 0.1570, 0.1613,  ..., 0.1534, 0.1542, 0.1564],
        [0.1503, 0.1583, 0.1595,  ..., 0.1483, 0.1504, 0.1559],
        ...,
        [0.1560, 0.1626, 0.1700,  ..., 0.1600, 0.1633, 0.1614],
        [0.1381, 0.1421, 0.1458,  ..., 0.1425, 0.1398, 0.1411],
        [0.1560, 0.1629, 0.1695,  ..., 0.1550, 0.1613, 0.1636]],
       grad_fn=<LogBackward>)


 58%|█████▊    | 33/57 [00:23<00:15,  1.52it/s]

tensor([[0.1489, 0.1564, 0.1596,  ..., 0.1501, 0.1521, 0.1572],
        [0.1531, 0.1604, 0.1655,  ..., 0.1546, 0.1570, 0.1602],
        [0.1546, 0.1582, 0.1626,  ..., 0.1528, 0.1529, 0.1581],
        ...,
        [0.1519, 0.1592, 0.1603,  ..., 0.1507, 0.1570, 0.1569],
        [0.1545, 0.1590, 0.1685,  ..., 0.1545, 0.1583, 0.1611],
        [0.1522, 0.1571, 0.1632,  ..., 0.1504, 0.1521, 0.1577]],
       grad_fn=<LogBackward>)


 60%|█████▉    | 34/57 [00:24<00:15,  1.48it/s]

tensor([[0.1524, 0.1632, 0.1677,  ..., 0.1547, 0.1583, 0.1610],
        [0.1481, 0.1536, 0.1612,  ..., 0.1528, 0.1533, 0.1544],
        [0.1558, 0.1617, 0.1680,  ..., 0.1549, 0.1619, 0.1621],
        ...,
        [0.1439, 0.1540, 0.1559,  ..., 0.1508, 0.1509, 0.1556],
        [0.1601, 0.1660, 0.1726,  ..., 0.1586, 0.1653, 0.1665],
        [0.1556, 0.1616, 0.1696,  ..., 0.1559, 0.1598, 0.1630]],
       grad_fn=<LogBackward>)


 61%|██████▏   | 35/57 [00:24<00:14,  1.47it/s]

tensor([[0.1540, 0.1625, 0.1653,  ..., 0.1560, 0.1590, 0.1632],
        [0.1567, 0.1636, 0.1702,  ..., 0.1574, 0.1608, 0.1662],
        [0.1592, 0.1671, 0.1694,  ..., 0.1587, 0.1620, 0.1643],
        ...,
        [0.1496, 0.1531, 0.1609,  ..., 0.1554, 0.1526, 0.1540],
        [0.1564, 0.1639, 0.1702,  ..., 0.1561, 0.1617, 0.1637],
        [0.1543, 0.1602, 0.1691,  ..., 0.1568, 0.1571, 0.1585]],
       grad_fn=<LogBackward>)


 63%|██████▎   | 36/57 [00:25<00:14,  1.49it/s]

tensor([[0.1543, 0.1586, 0.1662,  ..., 0.1544, 0.1565, 0.1576],
        [0.1558, 0.1636, 0.1679,  ..., 0.1561, 0.1649, 0.1603],
        [0.1549, 0.1588, 0.1680,  ..., 0.1549, 0.1571, 0.1606],
        ...,
        [0.1537, 0.1595, 0.1644,  ..., 0.1524, 0.1550, 0.1581],
        [0.1481, 0.1576, 0.1615,  ..., 0.1480, 0.1543, 0.1537],
        [0.1527, 0.1577, 0.1636,  ..., 0.1531, 0.1548, 0.1568]],
       grad_fn=<LogBackward>)


 65%|██████▍   | 37/57 [00:26<00:13,  1.51it/s]

tensor([[0.1495, 0.1578, 0.1619,  ..., 0.1518, 0.1537, 0.1555],
        [0.1528, 0.1612, 0.1637,  ..., 0.1531, 0.1598, 0.1589],
        [0.1520, 0.1589, 0.1630,  ..., 0.1550, 0.1571, 0.1619],
        ...,
        [0.1576, 0.1637, 0.1720,  ..., 0.1567, 0.1637, 0.1673],
        [0.1529, 0.1629, 0.1635,  ..., 0.1551, 0.1568, 0.1616],
        [0.1550, 0.1611, 0.1672,  ..., 0.1544, 0.1602, 0.1602]],
       grad_fn=<LogBackward>)


 67%|██████▋   | 38/57 [00:26<00:12,  1.51it/s]

tensor([[0.1453, 0.1559, 0.1566,  ..., 0.1481, 0.1546, 0.1549],
        [0.1556, 0.1618, 0.1710,  ..., 0.1554, 0.1609, 0.1635],
        [0.1533, 0.1607, 0.1683,  ..., 0.1533, 0.1590, 0.1601],
        ...,
        [0.1516, 0.1570, 0.1627,  ..., 0.1515, 0.1526, 0.1555],
        [0.1564, 0.1648, 0.1683,  ..., 0.1591, 0.1604, 0.1601],
        [0.1528, 0.1612, 0.1624,  ..., 0.1540, 0.1561, 0.1565]],
       grad_fn=<LogBackward>)


 68%|██████▊   | 39/57 [00:27<00:13,  1.33it/s]

tensor([[0.1513, 0.1551, 0.1629,  ..., 0.1533, 0.1536, 0.1561],
        [0.1498, 0.1630, 0.1612,  ..., 0.1498, 0.1572, 0.1587],
        [0.1532, 0.1617, 0.1672,  ..., 0.1556, 0.1611, 0.1601],
        ...,
        [0.1505, 0.1597, 0.1608,  ..., 0.1530, 0.1574, 0.1602],
        [0.1531, 0.1592, 0.1644,  ..., 0.1541, 0.1579, 0.1580],
        [0.1530, 0.1613, 0.1657,  ..., 0.1566, 0.1576, 0.1604]],
       grad_fn=<LogBackward>)


 70%|███████   | 40/57 [00:29<00:17,  1.01s/it]

tensor([[0.1485, 0.1576, 0.1598,  ..., 0.1492, 0.1500, 0.1558],
        [0.1517, 0.1599, 0.1644,  ..., 0.1557, 0.1538, 0.1607],
        [0.1489, 0.1594, 0.1610,  ..., 0.1515, 0.1558, 0.1578],
        ...,
        [0.1525, 0.1603, 0.1650,  ..., 0.1549, 0.1583, 0.1589],
        [0.1485, 0.1579, 0.1594,  ..., 0.1532, 0.1547, 0.1565],
        [0.1518, 0.1576, 0.1631,  ..., 0.1527, 0.1560, 0.1577]],
       grad_fn=<LogBackward>)


 72%|███████▏  | 41/57 [00:30<00:18,  1.19s/it]

tensor([[0.1506, 0.1586, 0.1640,  ..., 0.1515, 0.1573, 0.1602],
        [0.1472, 0.1589, 0.1607,  ..., 0.1533, 0.1562, 0.1591],
        [0.1518, 0.1589, 0.1644,  ..., 0.1544, 0.1570, 0.1586],
        ...,
        [0.1494, 0.1615, 0.1601,  ..., 0.1549, 0.1573, 0.1558],
        [0.1479, 0.1614, 0.1636,  ..., 0.1508, 0.1560, 0.1566],
        [0.1490, 0.1576, 0.1588,  ..., 0.1536, 0.1502, 0.1536]],
       grad_fn=<LogBackward>)


 74%|███████▎  | 42/57 [00:32<00:17,  1.18s/it]

tensor([[0.1551, 0.1615, 0.1682,  ..., 0.1541, 0.1583, 0.1616],
        [0.1530, 0.1619, 0.1654,  ..., 0.1531, 0.1569, 0.1629],
        [0.1507, 0.1587, 0.1625,  ..., 0.1525, 0.1553, 0.1593],
        ...,
        [0.1573, 0.1644, 0.1696,  ..., 0.1584, 0.1635, 0.1655],
        [0.1558, 0.1614, 0.1698,  ..., 0.1542, 0.1570, 0.1582],
        [0.1512, 0.1612, 0.1657,  ..., 0.1550, 0.1602, 0.1620]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 43/57 [00:32<00:15,  1.11s/it]

tensor([[0.1501, 0.1573, 0.1606,  ..., 0.1554, 0.1539, 0.1555],
        [0.1521, 0.1580, 0.1652,  ..., 0.1574, 0.1562, 0.1577],
        [0.1512, 0.1570, 0.1583,  ..., 0.1484, 0.1532, 0.1566],
        ...,
        [0.1553, 0.1645, 0.1680,  ..., 0.1546, 0.1605, 0.1658],
        [0.1573, 0.1632, 0.1694,  ..., 0.1546, 0.1616, 0.1637],
        [0.1529, 0.1598, 0.1628,  ..., 0.1519, 0.1551, 0.1570]],
       grad_fn=<LogBackward>)


 77%|███████▋  | 44/57 [00:34<00:14,  1.10s/it]

tensor([[0.1540, 0.1606, 0.1646,  ..., 0.1547, 0.1567, 0.1580],
        [0.1508, 0.1600, 0.1622,  ..., 0.1529, 0.1553, 0.1552],
        [0.1526, 0.1598, 0.1647,  ..., 0.1505, 0.1519, 0.1604],
        ...,
        [0.1487, 0.1563, 0.1588,  ..., 0.1471, 0.1482, 0.1490],
        [0.1490, 0.1623, 0.1634,  ..., 0.1554, 0.1575, 0.1592],
        [0.1495, 0.1601, 0.1632,  ..., 0.1525, 0.1558, 0.1575]],
       grad_fn=<LogBackward>)


 79%|███████▉  | 45/57 [00:35<00:13,  1.09s/it]

tensor([[0.1533, 0.1594, 0.1648,  ..., 0.1539, 0.1579, 0.1579],
        [0.1543, 0.1608, 0.1681,  ..., 0.1537, 0.1581, 0.1607],
        [0.1508, 0.1569, 0.1628,  ..., 0.1532, 0.1545, 0.1572],
        ...,
        [0.1517, 0.1584, 0.1640,  ..., 0.1524, 0.1545, 0.1577],
        [0.1567, 0.1628, 0.1701,  ..., 0.1584, 0.1615, 0.1639],
        [0.1608, 0.1635, 0.1724,  ..., 0.1563, 0.1660, 0.1666]],
       grad_fn=<LogBackward>)


 81%|████████  | 46/57 [00:36<00:11,  1.05s/it]

tensor([[0.1505, 0.1589, 0.1630,  ..., 0.1522, 0.1544, 0.1567],
        [0.1534, 0.1621, 0.1645,  ..., 0.1559, 0.1576, 0.1605],
        [0.1502, 0.1597, 0.1623,  ..., 0.1535, 0.1557, 0.1568],
        ...,
        [0.1582, 0.1634, 0.1691,  ..., 0.1555, 0.1605, 0.1618],
        [0.1541, 0.1635, 0.1680,  ..., 0.1566, 0.1619, 0.1644],
        [0.1572, 0.1663, 0.1703,  ..., 0.1600, 0.1666, 0.1638]],
       grad_fn=<LogBackward>)


 82%|████████▏ | 47/57 [00:36<00:09,  1.04it/s]

tensor([[0.1522, 0.1650, 0.1645,  ..., 0.1537, 0.1580, 0.1607],
        [0.1542, 0.1643, 0.1690,  ..., 0.1546, 0.1606, 0.1616],
        [0.1528, 0.1619, 0.1662,  ..., 0.1544, 0.1578, 0.1610],
        ...,
        [0.1561, 0.1628, 0.1699,  ..., 0.1561, 0.1606, 0.1627],
        [0.1494, 0.1580, 0.1643,  ..., 0.1511, 0.1526, 0.1582],
        [0.1531, 0.1638, 0.1691,  ..., 0.1544, 0.1618, 0.1628]],
       grad_fn=<LogBackward>)


 84%|████████▍ | 48/57 [00:37<00:08,  1.10it/s]

tensor([[0.1527, 0.1639, 0.1689,  ..., 0.1531, 0.1629, 0.1634],
        [0.1523, 0.1570, 0.1621,  ..., 0.1523, 0.1542, 0.1571],
        [0.1493, 0.1574, 0.1603,  ..., 0.1557, 0.1548, 0.1551],
        ...,
        [0.1512, 0.1584, 0.1646,  ..., 0.1547, 0.1547, 0.1561],
        [0.1517, 0.1582, 0.1632,  ..., 0.1532, 0.1548, 0.1572],
        [0.1509, 0.1621, 0.1669,  ..., 0.1555, 0.1615, 0.1607]],
       grad_fn=<LogBackward>)


 86%|████████▌ | 49/57 [00:38<00:06,  1.15it/s]

tensor([[0.1543, 0.1604, 0.1675,  ..., 0.1527, 0.1560, 0.1620],
        [0.1588, 0.1621, 0.1681,  ..., 0.1599, 0.1616, 0.1613],
        [0.1512, 0.1602, 0.1645,  ..., 0.1536, 0.1566, 0.1603],
        ...,
        [0.1556, 0.1612, 0.1662,  ..., 0.1579, 0.1577, 0.1608],
        [0.1516, 0.1633, 0.1662,  ..., 0.1562, 0.1579, 0.1576],
        [0.1458, 0.1475, 0.1553,  ..., 0.1440, 0.1437, 0.1468]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 50/57 [00:39<00:05,  1.19it/s]

tensor([[0.1518, 0.1559, 0.1633,  ..., 0.1546, 0.1552, 0.1564],
        [0.1513, 0.1610, 0.1635,  ..., 0.1558, 0.1585, 0.1606],
        [0.1531, 0.1640, 0.1644,  ..., 0.1557, 0.1571, 0.1614],
        ...,
        [0.1506, 0.1595, 0.1614,  ..., 0.1537, 0.1554, 0.1564],
        [0.1545, 0.1601, 0.1690,  ..., 0.1530, 0.1604, 0.1612],
        [0.1457, 0.1537, 0.1576,  ..., 0.1494, 0.1497, 0.1495]],
       grad_fn=<LogBackward>)


 89%|████████▉ | 51/57 [00:39<00:04,  1.22it/s]

tensor([[0.1559, 0.1622, 0.1694,  ..., 0.1553, 0.1604, 0.1624],
        [0.1568, 0.1611, 0.1675,  ..., 0.1577, 0.1613, 0.1601],
        [0.1523, 0.1582, 0.1629,  ..., 0.1525, 0.1538, 0.1561],
        ...,
        [0.1558, 0.1626, 0.1701,  ..., 0.1568, 0.1578, 0.1627],
        [0.1497, 0.1595, 0.1625,  ..., 0.1553, 0.1568, 0.1568],
        [0.1536, 0.1665, 0.1669,  ..., 0.1579, 0.1600, 0.1620]],
       grad_fn=<LogBackward>)


 91%|█████████ | 52/57 [00:40<00:03,  1.28it/s]

tensor([[0.1497, 0.1608, 0.1664,  ..., 0.1528, 0.1581, 0.1570],
        [0.1509, 0.1593, 0.1611,  ..., 0.1516, 0.1539, 0.1572],
        [0.1536, 0.1586, 0.1669,  ..., 0.1536, 0.1564, 0.1592],
        ...,
        [0.1560, 0.1608, 0.1686,  ..., 0.1575, 0.1628, 0.1593],
        [0.1494, 0.1563, 0.1621,  ..., 0.1527, 0.1525, 0.1546],
        [0.1524, 0.1656, 0.1669,  ..., 0.1556, 0.1643, 0.1630]],
       grad_fn=<LogBackward>)


 93%|█████████▎| 53/57 [00:41<00:03,  1.33it/s]

tensor([[0.1529, 0.1624, 0.1654,  ..., 0.1573, 0.1586, 0.1624],
        [0.1482, 0.1551, 0.1595,  ..., 0.1533, 0.1528, 0.1522],
        [0.1488, 0.1585, 0.1623,  ..., 0.1517, 0.1571, 0.1576],
        ...,
        [0.1375, 0.1424, 0.1466,  ..., 0.1415, 0.1409, 0.1451],
        [0.1523, 0.1634, 0.1666,  ..., 0.1557, 0.1632, 0.1630],
        [0.1455, 0.1599, 0.1603,  ..., 0.1518, 0.1549, 0.1545]],
       grad_fn=<LogBackward>)


 95%|█████████▍| 54/57 [00:41<00:02,  1.37it/s]

tensor([[0.1472, 0.1569, 0.1630,  ..., 0.1536, 0.1513, 0.1554],
        [0.1553, 0.1645, 0.1674,  ..., 0.1559, 0.1633, 0.1619],
        [0.1553, 0.1623, 0.1648,  ..., 0.1548, 0.1638, 0.1598],
        ...,
        [0.1499, 0.1583, 0.1610,  ..., 0.1510, 0.1566, 0.1571],
        [0.1571, 0.1620, 0.1710,  ..., 0.1566, 0.1577, 0.1627],
        [0.1525, 0.1593, 0.1637,  ..., 0.1522, 0.1549, 0.1575]],
       grad_fn=<LogBackward>)


 96%|█████████▋| 55/57 [00:42<00:01,  1.39it/s]

tensor([[0.1536, 0.1650, 0.1670,  ..., 0.1560, 0.1621, 0.1602],
        [0.1483, 0.1560, 0.1630,  ..., 0.1516, 0.1546, 0.1574],
        [0.1501, 0.1613, 0.1650,  ..., 0.1548, 0.1597, 0.1617],
        ...,
        [0.1543, 0.1616, 0.1690,  ..., 0.1545, 0.1599, 0.1620],
        [0.1575, 0.1671, 0.1710,  ..., 0.1583, 0.1640, 0.1663],
        [0.1504, 0.1592, 0.1624,  ..., 0.1532, 0.1561, 0.1560]],
       grad_fn=<LogBackward>)


 98%|█████████▊| 56/57 [00:43<00:00,  1.40it/s]

tensor([[0.1528, 0.1600, 0.1678,  ..., 0.1522, 0.1558, 0.1607],
        [0.1555, 0.1630, 0.1646,  ..., 0.1565, 0.1628, 0.1614],
        [0.1554, 0.1659, 0.1703,  ..., 0.1545, 0.1603, 0.1631],
        ...,
        [0.1495, 0.1579, 0.1615,  ..., 0.1498, 0.1524, 0.1588],
        [0.1554, 0.1613, 0.1672,  ..., 0.1522, 0.1589, 0.1633],
        [0.1517, 0.1561, 0.1630,  ..., 0.1520, 0.1530, 0.1575]],
       grad_fn=<LogBackward>)


100%|██████████| 57/57 [00:44<00:00,  1.29it/s]
 12%|█▎        | 1/8 [00:00<00:02,  3.29it/s]

tensor([[0.1561, 0.1625, 0.1676,  ..., 0.1571, 0.1601, 0.1608],
        [0.1514, 0.1543, 0.1630,  ..., 0.1520, 0.1521, 0.1520],
        [0.1551, 0.1617, 0.1686,  ..., 0.1543, 0.1576, 0.1615],
        ...,
        [0.1494, 0.1564, 0.1619,  ..., 0.1529, 0.1536, 0.1559],
        [0.1542, 0.1620, 0.1681,  ..., 0.1561, 0.1614, 0.1625],
        [0.1494, 0.1549, 0.1585,  ..., 0.1496, 0.1514, 0.1536]],
       grad_fn=<LogBackward>)


 25%|██▌       | 2/8 [00:00<00:01,  3.25it/s]

tensor([[0.1397, 0.1437, 0.1494,  ..., 0.1436, 0.1405, 0.1408],
        [0.1536, 0.1609, 0.1657,  ..., 0.1558, 0.1573, 0.1591],
        [0.1543, 0.1630, 0.1677,  ..., 0.1603, 0.1633, 0.1602],
        ...,
        [0.1543, 0.1624, 0.1677,  ..., 0.1538, 0.1596, 0.1623],
        [0.1571, 0.1607, 0.1692,  ..., 0.1558, 0.1610, 0.1634],
        [0.1583, 0.1683, 0.1716,  ..., 0.1585, 0.1640, 0.1640]],
       grad_fn=<LogBackward>)


 38%|███▊      | 3/8 [00:00<00:01,  3.12it/s]

tensor([[0.1523, 0.1586, 0.1644,  ..., 0.1526, 0.1559, 0.1558],
        [0.1475, 0.1569, 0.1580,  ..., 0.1509, 0.1533, 0.1527],
        [0.1517, 0.1632, 0.1643,  ..., 0.1561, 0.1574, 0.1594],
        ...,
        [0.1520, 0.1623, 0.1656,  ..., 0.1537, 0.1614, 0.1632],
        [0.1477, 0.1543, 0.1585,  ..., 0.1499, 0.1503, 0.1503],
        [0.1552, 0.1609, 0.1659,  ..., 0.1547, 0.1609, 0.1619]],
       grad_fn=<LogBackward>)


 50%|█████     | 4/8 [00:01<00:01,  3.04it/s]

tensor([[0.1536, 0.1650, 0.1696,  ..., 0.1581, 0.1651, 0.1600],
        [0.1512, 0.1579, 0.1628,  ..., 0.1524, 0.1559, 0.1559],
        [0.1497, 0.1571, 0.1617,  ..., 0.1524, 0.1539, 0.1569],
        ...,
        [0.1521, 0.1603, 0.1631,  ..., 0.1537, 0.1601, 0.1578],
        [0.1451, 0.1523, 0.1566,  ..., 0.1489, 0.1474, 0.1491],
        [0.1492, 0.1611, 0.1596,  ..., 0.1495, 0.1547, 0.1552]],
       grad_fn=<LogBackward>)


 62%|██████▎   | 5/8 [00:01<00:01,  2.93it/s]

tensor([[0.1517, 0.1625, 0.1652,  ..., 0.1545, 0.1588, 0.1601],
        [0.1512, 0.1589, 0.1615,  ..., 0.1565, 0.1588, 0.1587],
        [0.1533, 0.1580, 0.1638,  ..., 0.1534, 0.1533, 0.1571],
        ...,
        [0.1535, 0.1636, 0.1687,  ..., 0.1549, 0.1626, 0.1643],
        [0.1509, 0.1575, 0.1647,  ..., 0.1539, 0.1547, 0.1595],
        [0.1505, 0.1554, 0.1590,  ..., 0.1515, 0.1548, 0.1564]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 6/8 [00:02<00:00,  2.92it/s]

tensor([[0.1530, 0.1625, 0.1643,  ..., 0.1518, 0.1558, 0.1599],
        [0.1565, 0.1620, 0.1702,  ..., 0.1553, 0.1617, 0.1651],
        [0.1514, 0.1581, 0.1626,  ..., 0.1540, 0.1548, 0.1565],
        ...,
        [0.1533, 0.1569, 0.1639,  ..., 0.1559, 0.1566, 0.1585],
        [0.1507, 0.1597, 0.1637,  ..., 0.1519, 0.1576, 0.1549],
        [0.1566, 0.1638, 0.1713,  ..., 0.1540, 0.1631, 0.1638]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 7/8 [00:02<00:00,  2.80it/s]

tensor([[0.1447, 0.1528, 0.1555,  ..., 0.1463, 0.1500, 0.1491],
        [0.1586, 0.1653, 0.1703,  ..., 0.1565, 0.1647, 0.1656],
        [0.1576, 0.1664, 0.1691,  ..., 0.1569, 0.1641, 0.1639],
        ...,
        [0.1489, 0.1574, 0.1614,  ..., 0.1534, 0.1545, 0.1563],
        [0.1493, 0.1580, 0.1615,  ..., 0.1521, 0.1519, 0.1547],
        [0.1479, 0.1578, 0.1614,  ..., 0.1517, 0.1547, 0.1561]],
       grad_fn=<LogBackward>)


100%|██████████| 8/8 [00:02<00:00,  2.89it/s]
  0%|          | 0/57 [00:00<?, ?it/s]

tensor([[0.1463, 0.1504, 0.1538,  ..., 0.1461, 0.1456, 0.1470],
        [0.1577, 0.1636, 0.1712,  ..., 0.1561, 0.1627, 0.1671],
        [0.1508, 0.1536, 0.1620,  ..., 0.1518, 0.1513, 0.1542],
        ...,
        [0.1514, 0.1595, 0.1664,  ..., 0.1578, 0.1583, 0.1579],
        [0.1509, 0.1574, 0.1621,  ..., 0.1534, 0.1540, 0.1564],
        [0.1504, 0.1578, 0.1603,  ..., 0.1514, 0.1556, 0.1577]],
       grad_fn=<LogBackward>)
[tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128)]
Epoch: 17, train_loss: -0.8417783975601196, valid_loss: -0.813694953918457
Parameter containing:
tensor([[ 1.0000,  1.0000,  1.0000,  1.0000,  1.0000,  0.9999,  1.0002,  0.9999,
          0.9999,  1.0005, -0.5000, -0.5001, -0.5002, -0.5000, -0.5001, -0.5000,
         -0.5000, -0.5002, -0.5001, -0.5002],
      

  2%|▏         | 1/57 [00:00<00:44,  1.27it/s]

tensor([[0.1522, 0.1595, 0.1646,  ..., 0.1563, 0.1559, 0.1597],
        [0.1515, 0.1652, 0.1623,  ..., 0.1539, 0.1593, 0.1580],
        [0.1522, 0.1618, 0.1665,  ..., 0.1562, 0.1602, 0.1589],
        ...,
        [0.1536, 0.1630, 0.1676,  ..., 0.1563, 0.1623, 0.1627],
        [0.1460, 0.1555, 0.1577,  ..., 0.1489, 0.1530, 0.1557],
        [0.1569, 0.1628, 0.1713,  ..., 0.1558, 0.1620, 0.1641]],
       grad_fn=<LogBackward>)


  4%|▎         | 2/57 [00:01<00:42,  1.28it/s]

tensor([[0.1593, 0.1679, 0.1750,  ..., 0.1593, 0.1648, 0.1672],
        [0.1495, 0.1588, 0.1615,  ..., 0.1506, 0.1567, 0.1571],
        [0.1490, 0.1554, 0.1621,  ..., 0.1529, 0.1565, 0.1571],
        ...,
        [0.1490, 0.1556, 0.1600,  ..., 0.1521, 0.1534, 0.1576],
        [0.1492, 0.1567, 0.1609,  ..., 0.1506, 0.1557, 0.1577],
        [0.1468, 0.1562, 0.1580,  ..., 0.1500, 0.1517, 0.1511]],
       grad_fn=<LogBackward>)


  5%|▌         | 3/57 [00:02<00:41,  1.30it/s]

tensor([[0.1570, 0.1627, 0.1649,  ..., 0.1573, 0.1592, 0.1554],
        [0.1520, 0.1620, 0.1665,  ..., 0.1548, 0.1604, 0.1585],
        [0.1464, 0.1591, 0.1588,  ..., 0.1507, 0.1572, 0.1567],
        ...,
        [0.1544, 0.1647, 0.1679,  ..., 0.1553, 0.1636, 0.1627],
        [0.1512, 0.1581, 0.1623,  ..., 0.1534, 0.1560, 0.1565],
        [0.1458, 0.1560, 0.1564,  ..., 0.1487, 0.1541, 0.1540]],
       grad_fn=<LogBackward>)


  7%|▋         | 4/57 [00:03<00:39,  1.33it/s]

tensor([[0.1516, 0.1579, 0.1609,  ..., 0.1565, 0.1558, 0.1528],
        [0.1563, 0.1634, 0.1707,  ..., 0.1545, 0.1621, 0.1651],
        [0.1480, 0.1574, 0.1592,  ..., 0.1524, 0.1515, 0.1523],
        ...,
        [0.1518, 0.1580, 0.1654,  ..., 0.1546, 0.1549, 0.1581],
        [0.1503, 0.1614, 0.1642,  ..., 0.1538, 0.1570, 0.1570],
        [0.1514, 0.1610, 0.1661,  ..., 0.1563, 0.1583, 0.1611]],
       grad_fn=<LogBackward>)


  9%|▉         | 5/57 [00:03<00:38,  1.34it/s]

tensor([[0.1548, 0.1616, 0.1665,  ..., 0.1538, 0.1592, 0.1631],
        [0.1528, 0.1577, 0.1648,  ..., 0.1545, 0.1554, 0.1593],
        [0.1505, 0.1561, 0.1627,  ..., 0.1490, 0.1517, 0.1538],
        ...,
        [0.1431, 0.1551, 0.1548,  ..., 0.1460, 0.1543, 0.1534],
        [0.1533, 0.1610, 0.1643,  ..., 0.1543, 0.1573, 0.1608],
        [0.1571, 0.1655, 0.1711,  ..., 0.1581, 0.1620, 0.1650]],
       grad_fn=<LogBackward>)


 11%|█         | 6/57 [00:04<00:36,  1.39it/s]

tensor([[0.1551, 0.1615, 0.1678,  ..., 0.1538, 0.1585, 0.1621],
        [0.1525, 0.1595, 0.1616,  ..., 0.1535, 0.1547, 0.1563],
        [0.1517, 0.1587, 0.1644,  ..., 0.1515, 0.1552, 0.1586],
        ...,
        [0.1474, 0.1542, 0.1609,  ..., 0.1508, 0.1534, 0.1555],
        [0.1508, 0.1597, 0.1682,  ..., 0.1564, 0.1569, 0.1584],
        [0.1535, 0.1569, 0.1639,  ..., 0.1532, 0.1545, 0.1562]],
       grad_fn=<LogBackward>)


 12%|█▏        | 7/57 [00:05<00:34,  1.44it/s]

tensor([[0.1488, 0.1641, 0.1633,  ..., 0.1558, 0.1595, 0.1551],
        [0.1537, 0.1584, 0.1646,  ..., 0.1525, 0.1537, 0.1564],
        [0.1515, 0.1567, 0.1628,  ..., 0.1530, 0.1565, 0.1554],
        ...,
        [0.1522, 0.1623, 0.1668,  ..., 0.1557, 0.1597, 0.1625],
        [0.1530, 0.1641, 0.1643,  ..., 0.1542, 0.1616, 0.1604],
        [0.1472, 0.1546, 0.1575,  ..., 0.1529, 0.1524, 0.1525]],
       grad_fn=<LogBackward>)


 14%|█▍        | 8/57 [00:05<00:33,  1.46it/s]

tensor([[0.1509, 0.1576, 0.1628,  ..., 0.1530, 0.1528, 0.1565],
        [0.1573, 0.1639, 0.1722,  ..., 0.1574, 0.1661, 0.1669],
        [0.1502, 0.1560, 0.1599,  ..., 0.1541, 0.1541, 0.1537],
        ...,
        [0.1475, 0.1543, 0.1577,  ..., 0.1512, 0.1517, 0.1545],
        [0.1517, 0.1586, 0.1636,  ..., 0.1523, 0.1547, 0.1556],
        [0.1560, 0.1611, 0.1680,  ..., 0.1554, 0.1577, 0.1628]],
       grad_fn=<LogBackward>)


 16%|█▌        | 9/57 [00:06<00:32,  1.46it/s]

tensor([[0.1504, 0.1583, 0.1618,  ..., 0.1515, 0.1543, 0.1556],
        [0.1504, 0.1622, 0.1660,  ..., 0.1557, 0.1590, 0.1602],
        [0.1420, 0.1537, 0.1539,  ..., 0.1441, 0.1528, 0.1518],
        ...,
        [0.1543, 0.1606, 0.1690,  ..., 0.1534, 0.1591, 0.1618],
        [0.1492, 0.1573, 0.1608,  ..., 0.1504, 0.1568, 0.1560],
        [0.1553, 0.1594, 0.1679,  ..., 0.1580, 0.1563, 0.1583]],
       grad_fn=<LogBackward>)


 18%|█▊        | 10/57 [00:07<00:32,  1.45it/s]

tensor([[0.1540, 0.1590, 0.1655,  ..., 0.1545, 0.1588, 0.1592],
        [0.1564, 0.1621, 0.1699,  ..., 0.1538, 0.1594, 0.1635],
        [0.1525, 0.1556, 0.1656,  ..., 0.1569, 0.1542, 0.1582],
        ...,
        [0.1537, 0.1619, 0.1679,  ..., 0.1548, 0.1564, 0.1619],
        [0.1505, 0.1593, 0.1618,  ..., 0.1538, 0.1567, 0.1567],
        [0.1554, 0.1613, 0.1678,  ..., 0.1571, 0.1602, 0.1611]],
       grad_fn=<LogBackward>)


 19%|█▉        | 11/57 [00:07<00:32,  1.42it/s]

tensor([[0.1562, 0.1618, 0.1686,  ..., 0.1553, 0.1582, 0.1612],
        [0.1530, 0.1598, 0.1648,  ..., 0.1539, 0.1585, 0.1561],
        [0.1483, 0.1596, 0.1596,  ..., 0.1536, 0.1568, 0.1561],
        ...,
        [0.1474, 0.1537, 0.1580,  ..., 0.1503, 0.1503, 0.1539],
        [0.1495, 0.1586, 0.1602,  ..., 0.1545, 0.1522, 0.1560],
        [0.1530, 0.1596, 0.1650,  ..., 0.1553, 0.1554, 0.1601]],
       grad_fn=<LogBackward>)


 21%|██        | 12/57 [00:08<00:32,  1.37it/s]

tensor([[0.1463, 0.1576, 0.1577,  ..., 0.1496, 0.1558, 0.1575],
        [0.1494, 0.1577, 0.1610,  ..., 0.1515, 0.1523, 0.1545],
        [0.1453, 0.1503, 0.1581,  ..., 0.1466, 0.1481, 0.1499],
        ...,
        [0.1457, 0.1564, 0.1580,  ..., 0.1501, 0.1544, 0.1545],
        [0.1530, 0.1581, 0.1639,  ..., 0.1525, 0.1568, 0.1552],
        [0.1541, 0.1626, 0.1678,  ..., 0.1561, 0.1586, 0.1646]],
       grad_fn=<LogBackward>)


 23%|██▎       | 13/57 [00:09<00:32,  1.35it/s]

tensor([[0.1496, 0.1582, 0.1609,  ..., 0.1535, 0.1531, 0.1529],
        [0.1436, 0.1463, 0.1536,  ..., 0.1429, 0.1437, 0.1470],
        [0.1523, 0.1581, 0.1629,  ..., 0.1523, 0.1537, 0.1566],
        ...,
        [0.1476, 0.1586, 0.1616,  ..., 0.1521, 0.1559, 0.1600],
        [0.1470, 0.1541, 0.1578,  ..., 0.1492, 0.1512, 0.1509],
        [0.1508, 0.1624, 0.1612,  ..., 0.1524, 0.1584, 0.1550]],
       grad_fn=<LogBackward>)


 25%|██▍       | 14/57 [00:10<00:31,  1.36it/s]

tensor([[0.1487, 0.1593, 0.1606,  ..., 0.1532, 0.1537, 0.1584],
        [0.1548, 0.1599, 0.1678,  ..., 0.1531, 0.1580, 0.1609],
        [0.1544, 0.1616, 0.1686,  ..., 0.1543, 0.1596, 0.1619],
        ...,
        [0.1524, 0.1567, 0.1633,  ..., 0.1533, 0.1522, 0.1566],
        [0.1509, 0.1560, 0.1602,  ..., 0.1528, 0.1522, 0.1551],
        [0.1550, 0.1590, 0.1662,  ..., 0.1548, 0.1574, 0.1616]],
       grad_fn=<LogBackward>)


 26%|██▋       | 15/57 [00:10<00:31,  1.35it/s]

tensor([[0.1539, 0.1600, 0.1656,  ..., 0.1543, 0.1564, 0.1601],
        [0.1470, 0.1591, 0.1591,  ..., 0.1522, 0.1561, 0.1561],
        [0.1556, 0.1622, 0.1687,  ..., 0.1532, 0.1584, 0.1623],
        ...,
        [0.1523, 0.1578, 0.1618,  ..., 0.1529, 0.1524, 0.1563],
        [0.1515, 0.1578, 0.1628,  ..., 0.1526, 0.1538, 0.1546],
        [0.1486, 0.1565, 0.1630,  ..., 0.1514, 0.1557, 0.1573]],
       grad_fn=<LogBackward>)


 28%|██▊       | 16/57 [00:11<00:30,  1.36it/s]

tensor([[0.1517, 0.1594, 0.1629,  ..., 0.1532, 0.1561, 0.1580],
        [0.1511, 0.1574, 0.1604,  ..., 0.1537, 0.1559, 0.1578],
        [0.1547, 0.1610, 0.1694,  ..., 0.1553, 0.1590, 0.1626],
        ...,
        [0.1531, 0.1581, 0.1650,  ..., 0.1516, 0.1564, 0.1577],
        [0.1488, 0.1577, 0.1597,  ..., 0.1504, 0.1561, 0.1565],
        [0.1516, 0.1570, 0.1646,  ..., 0.1532, 0.1548, 0.1581]],
       grad_fn=<LogBackward>)


 30%|██▉       | 17/57 [00:12<00:28,  1.38it/s]

tensor([[0.1540, 0.1621, 0.1646,  ..., 0.1556, 0.1583, 0.1604],
        [0.1558, 0.1644, 0.1677,  ..., 0.1591, 0.1626, 0.1637],
        [0.1494, 0.1579, 0.1609,  ..., 0.1511, 0.1536, 0.1559],
        ...,
        [0.1538, 0.1647, 0.1683,  ..., 0.1577, 0.1637, 0.1632],
        [0.1513, 0.1617, 0.1640,  ..., 0.1570, 0.1605, 0.1591],
        [0.1415, 0.1462, 0.1522,  ..., 0.1470, 0.1463, 0.1454]],
       grad_fn=<LogBackward>)


 32%|███▏      | 18/57 [00:13<00:29,  1.34it/s]

tensor([[0.1528, 0.1589, 0.1633,  ..., 0.1540, 0.1560, 0.1594],
        [0.1476, 0.1543, 0.1588,  ..., 0.1518, 0.1528, 0.1541],
        [0.1510, 0.1574, 0.1643,  ..., 0.1523, 0.1551, 0.1549],
        ...,
        [0.1489, 0.1588, 0.1609,  ..., 0.1530, 0.1556, 0.1570],
        [0.1479, 0.1553, 0.1580,  ..., 0.1516, 0.1528, 0.1550],
        [0.1513, 0.1606, 0.1632,  ..., 0.1577, 0.1573, 0.1583]],
       grad_fn=<LogBackward>)


 33%|███▎      | 19/57 [00:13<00:28,  1.33it/s]

tensor([[0.1473, 0.1582, 0.1552,  ..., 0.1464, 0.1516, 0.1508],
        [0.1513, 0.1605, 0.1653,  ..., 0.1554, 0.1570, 0.1580],
        [0.1517, 0.1635, 0.1661,  ..., 0.1556, 0.1602, 0.1616],
        ...,
        [0.1539, 0.1593, 0.1666,  ..., 0.1532, 0.1562, 0.1595],
        [0.1491, 0.1580, 0.1609,  ..., 0.1516, 0.1532, 0.1546],
        [0.1555, 0.1635, 0.1675,  ..., 0.1557, 0.1646, 0.1598]],
       grad_fn=<LogBackward>)


 35%|███▌      | 20/57 [00:14<00:26,  1.38it/s]

tensor([[0.1505, 0.1552, 0.1611,  ..., 0.1526, 0.1536, 0.1556],
        [0.1545, 0.1643, 0.1677,  ..., 0.1581, 0.1623, 0.1629],
        [0.1514, 0.1586, 0.1639,  ..., 0.1542, 0.1566, 0.1581],
        ...,
        [0.1486, 0.1554, 0.1584,  ..., 0.1494, 0.1532, 0.1526],
        [0.1523, 0.1600, 0.1630,  ..., 0.1553, 0.1557, 0.1577],
        [0.1521, 0.1657, 0.1664,  ..., 0.1556, 0.1581, 0.1605]],
       grad_fn=<LogBackward>)


 37%|███▋      | 21/57 [00:15<00:25,  1.42it/s]

tensor([[0.1537, 0.1592, 0.1669,  ..., 0.1555, 0.1573, 0.1631],
        [0.1550, 0.1659, 0.1685,  ..., 0.1569, 0.1677, 0.1592],
        [0.1496, 0.1585, 0.1612,  ..., 0.1536, 0.1530, 0.1554],
        ...,
        [0.1549, 0.1599, 0.1662,  ..., 0.1534, 0.1587, 0.1592],
        [0.1527, 0.1651, 0.1667,  ..., 0.1581, 0.1631, 0.1602],
        [0.1444, 0.1514, 0.1540,  ..., 0.1478, 0.1478, 0.1504]],
       grad_fn=<LogBackward>)


 39%|███▊      | 22/57 [00:15<00:24,  1.44it/s]

tensor([[0.1527, 0.1609, 0.1654,  ..., 0.1512, 0.1578, 0.1612],
        [0.1488, 0.1547, 0.1599,  ..., 0.1501, 0.1502, 0.1530],
        [0.1543, 0.1623, 0.1663,  ..., 0.1542, 0.1603, 0.1636],
        ...,
        [0.1536, 0.1626, 0.1671,  ..., 0.1533, 0.1594, 0.1605],
        [0.1550, 0.1620, 0.1691,  ..., 0.1554, 0.1612, 0.1637],
        [0.1536, 0.1581, 0.1660,  ..., 0.1526, 0.1568, 0.1615]],
       grad_fn=<LogBackward>)


 40%|████      | 23/57 [00:16<00:23,  1.44it/s]

tensor([[0.1483, 0.1584, 0.1586,  ..., 0.1514, 0.1556, 0.1568],
        [0.1519, 0.1592, 0.1636,  ..., 0.1565, 0.1565, 0.1572],
        [0.1533, 0.1608, 0.1659,  ..., 0.1588, 0.1587, 0.1607],
        ...,
        [0.1530, 0.1650, 0.1645,  ..., 0.1540, 0.1598, 0.1610],
        [0.1510, 0.1605, 0.1619,  ..., 0.1532, 0.1579, 0.1600],
        [0.1512, 0.1636, 0.1635,  ..., 0.1541, 0.1594, 0.1607]],
       grad_fn=<LogBackward>)


 42%|████▏     | 24/57 [00:17<00:23,  1.38it/s]

tensor([[0.1545, 0.1616, 0.1680,  ..., 0.1538, 0.1584, 0.1619],
        [0.1465, 0.1548, 0.1568,  ..., 0.1520, 0.1538, 0.1551],
        [0.1481, 0.1581, 0.1591,  ..., 0.1510, 0.1534, 0.1560],
        ...,
        [0.1526, 0.1602, 0.1661,  ..., 0.1521, 0.1592, 0.1599],
        [0.1480, 0.1566, 0.1615,  ..., 0.1561, 0.1528, 0.1567],
        [0.1540, 0.1605, 0.1679,  ..., 0.1517, 0.1575, 0.1606]],
       grad_fn=<LogBackward>)


 44%|████▍     | 25/57 [00:18<00:24,  1.33it/s]

tensor([[0.1496, 0.1564, 0.1618,  ..., 0.1548, 0.1530, 0.1535],
        [0.1549, 0.1615, 0.1673,  ..., 0.1562, 0.1607, 0.1621],
        [0.1545, 0.1649, 0.1712,  ..., 0.1571, 0.1618, 0.1666],
        ...,
        [0.1519, 0.1622, 0.1676,  ..., 0.1572, 0.1563, 0.1601],
        [0.1592, 0.1649, 0.1712,  ..., 0.1556, 0.1612, 0.1641],
        [0.1459, 0.1505, 0.1562,  ..., 0.1449, 0.1470, 0.1507]],
       grad_fn=<LogBackward>)


 46%|████▌     | 26/57 [00:18<00:23,  1.34it/s]

tensor([[0.1508, 0.1578, 0.1606,  ..., 0.1517, 0.1548, 0.1550],
        [0.1567, 0.1636, 0.1700,  ..., 0.1568, 0.1619, 0.1610],
        [0.1551, 0.1556, 0.1671,  ..., 0.1521, 0.1569, 0.1573],
        ...,
        [0.1503, 0.1618, 0.1610,  ..., 0.1553, 0.1599, 0.1562],
        [0.1470, 0.1589, 0.1584,  ..., 0.1507, 0.1557, 0.1575],
        [0.1518, 0.1617, 0.1668,  ..., 0.1541, 0.1576, 0.1604]],
       grad_fn=<LogBackward>)


 47%|████▋     | 27/57 [00:19<00:21,  1.38it/s]

tensor([[0.1495, 0.1587, 0.1607,  ..., 0.1523, 0.1535, 0.1551],
        [0.1511, 0.1561, 0.1633,  ..., 0.1532, 0.1540, 0.1564],
        [0.1526, 0.1616, 0.1628,  ..., 0.1577, 0.1601, 0.1571],
        ...,
        [0.1401, 0.1447, 0.1487,  ..., 0.1445, 0.1429, 0.1450],
        [0.1534, 0.1606, 0.1674,  ..., 0.1529, 0.1577, 0.1594],
        [0.1501, 0.1553, 0.1626,  ..., 0.1490, 0.1518, 0.1538]],
       grad_fn=<LogBackward>)


 49%|████▉     | 28/57 [00:20<00:21,  1.32it/s]

tensor([[0.1476, 0.1579, 0.1588,  ..., 0.1506, 0.1552, 0.1561],
        [0.1571, 0.1652, 0.1721,  ..., 0.1568, 0.1675, 0.1659],
        [0.1549, 0.1614, 0.1678,  ..., 0.1531, 0.1584, 0.1608],
        ...,
        [0.1510, 0.1588, 0.1624,  ..., 0.1534, 0.1556, 0.1583],
        [0.1500, 0.1558, 0.1631,  ..., 0.1506, 0.1538, 0.1539],
        [0.1520, 0.1603, 0.1649,  ..., 0.1559, 0.1557, 0.1572]],
       grad_fn=<LogBackward>)


 51%|█████     | 29/57 [00:21<00:21,  1.28it/s]

tensor([[0.1505, 0.1540, 0.1620,  ..., 0.1531, 0.1536, 0.1554],
        [0.1523, 0.1590, 0.1644,  ..., 0.1514, 0.1566, 0.1576],
        [0.1549, 0.1663, 0.1669,  ..., 0.1557, 0.1616, 0.1626],
        ...,
        [0.1469, 0.1540, 0.1585,  ..., 0.1503, 0.1483, 0.1484],
        [0.1519, 0.1570, 0.1626,  ..., 0.1528, 0.1547, 0.1566],
        [0.1443, 0.1560, 0.1566,  ..., 0.1485, 0.1545, 0.1552]],
       grad_fn=<LogBackward>)


 53%|█████▎    | 30/57 [00:22<00:21,  1.27it/s]

tensor([[0.1481, 0.1604, 0.1600,  ..., 0.1522, 0.1578, 0.1570],
        [0.1534, 0.1588, 0.1643,  ..., 0.1526, 0.1556, 0.1575],
        [0.1489, 0.1576, 0.1594,  ..., 0.1512, 0.1512, 0.1554],
        ...,
        [0.1505, 0.1555, 0.1620,  ..., 0.1520, 0.1527, 0.1543],
        [0.1536, 0.1594, 0.1646,  ..., 0.1536, 0.1561, 0.1582],
        [0.1540, 0.1573, 0.1656,  ..., 0.1518, 0.1557, 0.1578]],
       grad_fn=<LogBackward>)


 54%|█████▍    | 31/57 [00:22<00:20,  1.29it/s]

tensor([[0.1452, 0.1547, 0.1576,  ..., 0.1514, 0.1529, 0.1503],
        [0.1524, 0.1643, 0.1686,  ..., 0.1559, 0.1622, 0.1619],
        [0.1539, 0.1616, 0.1669,  ..., 0.1536, 0.1607, 0.1592],
        ...,
        [0.1452, 0.1509, 0.1543,  ..., 0.1449, 0.1476, 0.1496],
        [0.1521, 0.1605, 0.1682,  ..., 0.1547, 0.1591, 0.1638],
        [0.1468, 0.1542, 0.1574,  ..., 0.1507, 0.1539, 0.1525]],
       grad_fn=<LogBackward>)


 56%|█████▌    | 32/57 [00:23<00:19,  1.29it/s]

tensor([[0.1507, 0.1556, 0.1620,  ..., 0.1512, 0.1531, 0.1541],
        [0.1500, 0.1598, 0.1613,  ..., 0.1587, 0.1612, 0.1570],
        [0.1547, 0.1601, 0.1651,  ..., 0.1579, 0.1568, 0.1594],
        ...,
        [0.1463, 0.1558, 0.1572,  ..., 0.1505, 0.1501, 0.1547],
        [0.1549, 0.1612, 0.1677,  ..., 0.1527, 0.1578, 0.1632],
        [0.1551, 0.1582, 0.1657,  ..., 0.1543, 0.1555, 0.1582]],
       grad_fn=<LogBackward>)


 58%|█████▊    | 33/57 [00:24<00:18,  1.27it/s]

tensor([[0.1483, 0.1589, 0.1601,  ..., 0.1481, 0.1542, 0.1533],
        [0.1554, 0.1595, 0.1685,  ..., 0.1540, 0.1579, 0.1629],
        [0.1535, 0.1594, 0.1667,  ..., 0.1537, 0.1554, 0.1616],
        ...,
        [0.1567, 0.1615, 0.1667,  ..., 0.1540, 0.1566, 0.1588],
        [0.1461, 0.1528, 0.1551,  ..., 0.1505, 0.1480, 0.1483],
        [0.1538, 0.1621, 0.1693,  ..., 0.1560, 0.1641, 0.1648]],
       grad_fn=<LogBackward>)


 60%|█████▉    | 34/57 [00:25<00:17,  1.32it/s]

tensor([[0.1511, 0.1620, 0.1628,  ..., 0.1552, 0.1592, 0.1611],
        [0.1516, 0.1571, 0.1591,  ..., 0.1512, 0.1548, 0.1572],
        [0.1469, 0.1566, 0.1586,  ..., 0.1497, 0.1503, 0.1509],
        ...,
        [0.1451, 0.1531, 0.1570,  ..., 0.1485, 0.1492, 0.1489],
        [0.1498, 0.1565, 0.1617,  ..., 0.1541, 0.1532, 0.1542],
        [0.1486, 0.1583, 0.1633,  ..., 0.1510, 0.1537, 0.1594]],
       grad_fn=<LogBackward>)


 61%|██████▏   | 35/57 [00:25<00:16,  1.30it/s]

tensor([[0.1459, 0.1556, 0.1571,  ..., 0.1491, 0.1545, 0.1549],
        [0.1496, 0.1599, 0.1613,  ..., 0.1524, 0.1545, 0.1573],
        [0.1521, 0.1596, 0.1622,  ..., 0.1522, 0.1546, 0.1587],
        ...,
        [0.1477, 0.1539, 0.1589,  ..., 0.1533, 0.1517, 0.1533],
        [0.1499, 0.1621, 0.1618,  ..., 0.1525, 0.1608, 0.1591],
        [0.1513, 0.1557, 0.1636,  ..., 0.1524, 0.1557, 0.1568]],
       grad_fn=<LogBackward>)


 63%|██████▎   | 36/57 [00:26<00:16,  1.27it/s]

tensor([[0.1517, 0.1596, 0.1629,  ..., 0.1520, 0.1562, 0.1600],
        [0.1494, 0.1623, 0.1603,  ..., 0.1505, 0.1591, 0.1558],
        [0.1567, 0.1621, 0.1684,  ..., 0.1545, 0.1600, 0.1639],
        ...,
        [0.1462, 0.1565, 0.1566,  ..., 0.1499, 0.1534, 0.1531],
        [0.1502, 0.1552, 0.1612,  ..., 0.1502, 0.1517, 0.1575],
        [0.1548, 0.1646, 0.1696,  ..., 0.1581, 0.1639, 0.1652]],
       grad_fn=<LogBackward>)


 65%|██████▍   | 37/57 [00:27<00:15,  1.26it/s]

tensor([[0.1541, 0.1615, 0.1672,  ..., 0.1533, 0.1586, 0.1608],
        [0.1535, 0.1597, 0.1665,  ..., 0.1557, 0.1580, 0.1613],
        [0.1557, 0.1616, 0.1682,  ..., 0.1539, 0.1581, 0.1621],
        ...,
        [0.1493, 0.1575, 0.1609,  ..., 0.1501, 0.1532, 0.1551],
        [0.1503, 0.1545, 0.1621,  ..., 0.1512, 0.1528, 0.1544],
        [0.1532, 0.1568, 0.1633,  ..., 0.1495, 0.1527, 0.1581]],
       grad_fn=<LogBackward>)


 67%|██████▋   | 38/57 [00:28<00:14,  1.27it/s]

tensor([[0.1510, 0.1613, 0.1628,  ..., 0.1572, 0.1616, 0.1597],
        [0.1518, 0.1537, 0.1639,  ..., 0.1505, 0.1534, 0.1578],
        [0.1520, 0.1576, 0.1653,  ..., 0.1541, 0.1566, 0.1576],
        ...,
        [0.1542, 0.1603, 0.1669,  ..., 0.1542, 0.1548, 0.1599],
        [0.1501, 0.1567, 0.1604,  ..., 0.1536, 0.1529, 0.1560],
        [0.1462, 0.1561, 0.1577,  ..., 0.1502, 0.1541, 0.1553]],
       grad_fn=<LogBackward>)


 68%|██████▊   | 39/57 [00:28<00:13,  1.31it/s]

tensor([[0.1513, 0.1581, 0.1660,  ..., 0.1538, 0.1550, 0.1592],
        [0.1519, 0.1602, 0.1619,  ..., 0.1536, 0.1552, 0.1555],
        [0.1561, 0.1632, 0.1684,  ..., 0.1555, 0.1629, 0.1609],
        ...,
        [0.1530, 0.1581, 0.1652,  ..., 0.1542, 0.1553, 0.1592],
        [0.1544, 0.1637, 0.1688,  ..., 0.1545, 0.1599, 0.1628],
        [0.1519, 0.1576, 0.1644,  ..., 0.1555, 0.1576, 0.1590]],
       grad_fn=<LogBackward>)


 70%|███████   | 40/57 [00:29<00:12,  1.31it/s]

tensor([[0.1493, 0.1526, 0.1592,  ..., 0.1513, 0.1502, 0.1519],
        [0.1505, 0.1570, 0.1660,  ..., 0.1529, 0.1565, 0.1586],
        [0.1565, 0.1624, 0.1696,  ..., 0.1546, 0.1596, 0.1620],
        ...,
        [0.1518, 0.1609, 0.1658,  ..., 0.1509, 0.1585, 0.1575],
        [0.1549, 0.1640, 0.1694,  ..., 0.1590, 0.1619, 0.1630],
        [0.1562, 0.1575, 0.1683,  ..., 0.1531, 0.1573, 0.1602]],
       grad_fn=<LogBackward>)


 72%|███████▏  | 41/57 [00:30<00:12,  1.28it/s]

tensor([[0.1545, 0.1622, 0.1682,  ..., 0.1541, 0.1586, 0.1620],
        [0.1517, 0.1587, 0.1634,  ..., 0.1517, 0.1561, 0.1590],
        [0.1581, 0.1629, 0.1682,  ..., 0.1563, 0.1609, 0.1614],
        ...,
        [0.1532, 0.1591, 0.1675,  ..., 0.1532, 0.1588, 0.1611],
        [0.1499, 0.1628, 0.1620,  ..., 0.1575, 0.1612, 0.1548],
        [0.1491, 0.1564, 0.1590,  ..., 0.1501, 0.1535, 0.1540]],
       grad_fn=<LogBackward>)


 74%|███████▎  | 42/57 [00:31<00:11,  1.30it/s]

tensor([[0.1482, 0.1508, 0.1578,  ..., 0.1484, 0.1467, 0.1520],
        [0.1543, 0.1616, 0.1652,  ..., 0.1524, 0.1567, 0.1625],
        [0.1523, 0.1591, 0.1661,  ..., 0.1569, 0.1562, 0.1606],
        ...,
        [0.1504, 0.1574, 0.1597,  ..., 0.1509, 0.1532, 0.1538],
        [0.1541, 0.1644, 0.1651,  ..., 0.1560, 0.1613, 0.1610],
        [0.1519, 0.1583, 0.1654,  ..., 0.1539, 0.1565, 0.1597]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 43/57 [00:32<00:10,  1.31it/s]

tensor([[0.1576, 0.1624, 0.1712,  ..., 0.1566, 0.1601, 0.1639],
        [0.1536, 0.1587, 0.1657,  ..., 0.1534, 0.1556, 0.1605],
        [0.1566, 0.1601, 0.1667,  ..., 0.1570, 0.1585, 0.1598],
        ...,
        [0.1564, 0.1632, 0.1717,  ..., 0.1553, 0.1622, 0.1658],
        [0.1492, 0.1542, 0.1605,  ..., 0.1507, 0.1515, 0.1532],
        [0.1557, 0.1672, 0.1681,  ..., 0.1587, 0.1642, 0.1620]],
       grad_fn=<LogBackward>)


 77%|███████▋  | 44/57 [00:32<00:09,  1.33it/s]

tensor([[0.1539, 0.1610, 0.1686,  ..., 0.1530, 0.1569, 0.1601],
        [0.1503, 0.1609, 0.1632,  ..., 0.1505, 0.1561, 0.1577],
        [0.1506, 0.1587, 0.1640,  ..., 0.1508, 0.1551, 0.1589],
        ...,
        [0.1491, 0.1562, 0.1591,  ..., 0.1521, 0.1529, 0.1534],
        [0.1521, 0.1588, 0.1653,  ..., 0.1544, 0.1572, 0.1603],
        [0.1442, 0.1561, 0.1541,  ..., 0.1486, 0.1525, 0.1501]],
       grad_fn=<LogBackward>)


 79%|███████▉  | 45/57 [00:33<00:08,  1.36it/s]

tensor([[0.1556, 0.1617, 0.1697,  ..., 0.1542, 0.1611, 0.1629],
        [0.1605, 0.1664, 0.1696,  ..., 0.1588, 0.1643, 0.1622],
        [0.1566, 0.1626, 0.1705,  ..., 0.1583, 0.1633, 0.1640],
        ...,
        [0.1528, 0.1624, 0.1636,  ..., 0.1551, 0.1609, 0.1579],
        [0.1578, 0.1634, 0.1699,  ..., 0.1557, 0.1635, 0.1632],
        [0.1531, 0.1608, 0.1661,  ..., 0.1543, 0.1579, 0.1587]],
       grad_fn=<LogBackward>)


 81%|████████  | 46/57 [00:34<00:08,  1.35it/s]

tensor([[0.1481, 0.1561, 0.1614,  ..., 0.1508, 0.1546, 0.1551],
        [0.1513, 0.1572, 0.1615,  ..., 0.1521, 0.1562, 0.1591],
        [0.1552, 0.1640, 0.1706,  ..., 0.1564, 0.1620, 0.1635],
        ...,
        [0.1517, 0.1569, 0.1616,  ..., 0.1512, 0.1527, 0.1537],
        [0.1547, 0.1615, 0.1684,  ..., 0.1556, 0.1591, 0.1629],
        [0.1553, 0.1649, 0.1678,  ..., 0.1558, 0.1619, 0.1622]],
       grad_fn=<LogBackward>)


 82%|████████▏ | 47/57 [00:35<00:07,  1.31it/s]

tensor([[0.1430, 0.1552, 0.1546,  ..., 0.1460, 0.1538, 0.1531],
        [0.1499, 0.1608, 0.1636,  ..., 0.1536, 0.1550, 0.1576],
        [0.1579, 0.1635, 0.1702,  ..., 0.1562, 0.1594, 0.1633],
        ...,
        [0.1456, 0.1592, 0.1579,  ..., 0.1491, 0.1536, 0.1542],
        [0.1508, 0.1566, 0.1626,  ..., 0.1526, 0.1531, 0.1566],
        [0.1507, 0.1594, 0.1628,  ..., 0.1499, 0.1546, 0.1558]],
       grad_fn=<LogBackward>)


 84%|████████▍ | 48/57 [00:35<00:07,  1.27it/s]

tensor([[0.1476, 0.1561, 0.1607,  ..., 0.1491, 0.1496, 0.1509],
        [0.1519, 0.1597, 0.1630,  ..., 0.1555, 0.1555, 0.1568],
        [0.1470, 0.1525, 0.1562,  ..., 0.1515, 0.1489, 0.1496],
        ...,
        [0.1526, 0.1607, 0.1635,  ..., 0.1518, 0.1567, 0.1580],
        [0.1481, 0.1587, 0.1571,  ..., 0.1500, 0.1535, 0.1535],
        [0.1548, 0.1601, 0.1667,  ..., 0.1531, 0.1569, 0.1623]],
       grad_fn=<LogBackward>)


 86%|████████▌ | 49/57 [00:36<00:06,  1.32it/s]

tensor([[0.1471, 0.1569, 0.1594,  ..., 0.1504, 0.1544, 0.1551],
        [0.1565, 0.1627, 0.1694,  ..., 0.1560, 0.1604, 0.1643],
        [0.1458, 0.1568, 0.1595,  ..., 0.1517, 0.1546, 0.1562],
        ...,
        [0.1519, 0.1589, 0.1643,  ..., 0.1557, 0.1556, 0.1589],
        [0.1476, 0.1576, 0.1592,  ..., 0.1520, 0.1561, 0.1545],
        [0.1481, 0.1538, 0.1588,  ..., 0.1498, 0.1501, 0.1541]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 50/57 [00:37<00:05,  1.34it/s]

tensor([[0.1543, 0.1589, 0.1644,  ..., 0.1532, 0.1564, 0.1603],
        [0.1511, 0.1611, 0.1632,  ..., 0.1558, 0.1571, 0.1577],
        [0.1468, 0.1528, 0.1584,  ..., 0.1494, 0.1490, 0.1510],
        ...,
        [0.1488, 0.1574, 0.1604,  ..., 0.1497, 0.1529, 0.1527],
        [0.1454, 0.1547, 0.1560,  ..., 0.1480, 0.1521, 0.1506],
        [0.1471, 0.1546, 0.1565,  ..., 0.1494, 0.1518, 0.1523]],
       grad_fn=<LogBackward>)


 89%|████████▉ | 51/57 [00:38<00:04,  1.33it/s]

tensor([[0.1542, 0.1646, 0.1662,  ..., 0.1548, 0.1606, 0.1617],
        [0.1527, 0.1604, 0.1655,  ..., 0.1525, 0.1561, 0.1589],
        [0.1472, 0.1518, 0.1596,  ..., 0.1499, 0.1513, 0.1518],
        ...,
        [0.1510, 0.1620, 0.1645,  ..., 0.1549, 0.1592, 0.1603],
        [0.1503, 0.1596, 0.1599,  ..., 0.1532, 0.1565, 0.1589],
        [0.1509, 0.1560, 0.1626,  ..., 0.1547, 0.1524, 0.1578]],
       grad_fn=<LogBackward>)


 91%|█████████ | 52/57 [00:38<00:03,  1.34it/s]

tensor([[0.1504, 0.1575, 0.1605,  ..., 0.1528, 0.1549, 0.1543],
        [0.1524, 0.1623, 0.1656,  ..., 0.1531, 0.1588, 0.1592],
        [0.1522, 0.1617, 0.1645,  ..., 0.1565, 0.1609, 0.1565],
        ...,
        [0.1469, 0.1542, 0.1584,  ..., 0.1503, 0.1525, 0.1520],
        [0.1506, 0.1597, 0.1625,  ..., 0.1529, 0.1564, 0.1580],
        [0.1514, 0.1590, 0.1632,  ..., 0.1513, 0.1543, 0.1555]],
       grad_fn=<LogBackward>)


 93%|█████████▎| 53/57 [00:39<00:02,  1.37it/s]

tensor([[0.1561, 0.1647, 0.1685,  ..., 0.1580, 0.1578, 0.1613],
        [0.1530, 0.1592, 0.1640,  ..., 0.1545, 0.1564, 0.1610],
        [0.1479, 0.1570, 0.1600,  ..., 0.1549, 0.1564, 0.1545],
        ...,
        [0.1531, 0.1593, 0.1626,  ..., 0.1509, 0.1550, 0.1570],
        [0.1527, 0.1584, 0.1656,  ..., 0.1543, 0.1547, 0.1594],
        [0.1545, 0.1600, 0.1682,  ..., 0.1553, 0.1577, 0.1607]],
       grad_fn=<LogBackward>)


 95%|█████████▍| 54/57 [00:40<00:02,  1.34it/s]

tensor([[0.1469, 0.1545, 0.1576,  ..., 0.1529, 0.1508, 0.1506],
        [0.1537, 0.1566, 0.1631,  ..., 0.1520, 0.1528, 0.1561],
        [0.1527, 0.1625, 0.1657,  ..., 0.1530, 0.1560, 0.1599],
        ...,
        [0.1476, 0.1534, 0.1593,  ..., 0.1503, 0.1511, 0.1540],
        [0.1494, 0.1578, 0.1608,  ..., 0.1508, 0.1545, 0.1566],
        [0.1497, 0.1565, 0.1615,  ..., 0.1518, 0.1544, 0.1546]],
       grad_fn=<LogBackward>)


 96%|█████████▋| 55/57 [00:41<00:01,  1.32it/s]

tensor([[0.1519, 0.1614, 0.1655,  ..., 0.1515, 0.1571, 0.1589],
        [0.1511, 0.1547, 0.1625,  ..., 0.1537, 0.1540, 0.1555],
        [0.1508, 0.1587, 0.1612,  ..., 0.1518, 0.1518, 0.1520],
        ...,
        [0.1496, 0.1607, 0.1607,  ..., 0.1533, 0.1621, 0.1565],
        [0.1507, 0.1592, 0.1627,  ..., 0.1520, 0.1554, 0.1571],
        [0.1541, 0.1609, 0.1669,  ..., 0.1522, 0.1573, 0.1594]],
       grad_fn=<LogBackward>)


 98%|█████████▊| 56/57 [00:41<00:00,  1.34it/s]

tensor([[0.1494, 0.1587, 0.1609,  ..., 0.1519, 0.1544, 0.1553],
        [0.1529, 0.1569, 0.1655,  ..., 0.1504, 0.1527, 0.1560],
        [0.1501, 0.1560, 0.1617,  ..., 0.1526, 0.1540, 0.1543],
        ...,
        [0.1513, 0.1597, 0.1640,  ..., 0.1546, 0.1569, 0.1570],
        [0.1584, 0.1645, 0.1731,  ..., 0.1559, 0.1621, 0.1632],
        [0.1470, 0.1530, 0.1582,  ..., 0.1497, 0.1512, 0.1504]],
       grad_fn=<LogBackward>)


100%|██████████| 57/57 [00:42<00:00,  1.34it/s]
 12%|█▎        | 1/8 [00:00<00:02,  3.32it/s]

tensor([[0.1511, 0.1598, 0.1628,  ..., 0.1546, 0.1567, 0.1584],
        [0.1526, 0.1608, 0.1646,  ..., 0.1522, 0.1557, 0.1575],
        [0.1484, 0.1562, 0.1598,  ..., 0.1511, 0.1517, 0.1528],
        ...,
        [0.1448, 0.1528, 0.1535,  ..., 0.1474, 0.1482, 0.1492],
        [0.1510, 0.1603, 0.1649,  ..., 0.1561, 0.1556, 0.1563],
        [0.1515, 0.1575, 0.1629,  ..., 0.1532, 0.1564, 0.1575]],
       grad_fn=<LogBackward>)


 25%|██▌       | 2/8 [00:00<00:01,  3.27it/s]

tensor([[0.1459, 0.1533, 0.1557,  ..., 0.1474, 0.1504, 0.1483],
        [0.1542, 0.1612, 0.1678,  ..., 0.1536, 0.1582, 0.1609],
        [0.1493, 0.1569, 0.1618,  ..., 0.1521, 0.1553, 0.1555],
        ...,
        [0.1516, 0.1593, 0.1644,  ..., 0.1499, 0.1535, 0.1556],
        [0.1476, 0.1578, 0.1605,  ..., 0.1512, 0.1554, 0.1531],
        [0.1555, 0.1621, 0.1671,  ..., 0.1566, 0.1595, 0.1602]],
       grad_fn=<LogBackward>)


 38%|███▊      | 3/8 [00:00<00:01,  3.27it/s]

tensor([[0.1505, 0.1569, 0.1620,  ..., 0.1516, 0.1549, 0.1549],
        [0.1513, 0.1602, 0.1623,  ..., 0.1536, 0.1572, 0.1598],
        [0.1529, 0.1601, 0.1663,  ..., 0.1520, 0.1566, 0.1591],
        ...,
        [0.1461, 0.1540, 0.1574,  ..., 0.1488, 0.1492, 0.1506],
        [0.1533, 0.1596, 0.1670,  ..., 0.1522, 0.1559, 0.1641],
        [0.1504, 0.1568, 0.1609,  ..., 0.1524, 0.1513, 0.1537]],
       grad_fn=<LogBackward>)


 50%|█████     | 4/8 [00:01<00:01,  3.27it/s]

tensor([[0.1513, 0.1570, 0.1636,  ..., 0.1516, 0.1525, 0.1563],
        [0.1516, 0.1597, 0.1664,  ..., 0.1535, 0.1592, 0.1595],
        [0.1537, 0.1620, 0.1671,  ..., 0.1539, 0.1624, 0.1610],
        ...,
        [0.1516, 0.1619, 0.1657,  ..., 0.1561, 0.1569, 0.1586],
        [0.1542, 0.1582, 0.1669,  ..., 0.1554, 0.1570, 0.1615],
        [0.1504, 0.1578, 0.1624,  ..., 0.1543, 0.1564, 0.1570]],
       grad_fn=<LogBackward>)


 62%|██████▎   | 5/8 [00:01<00:00,  3.27it/s]

tensor([[0.1515, 0.1640, 0.1701,  ..., 0.1571, 0.1612, 0.1641],
        [0.1512, 0.1596, 0.1632,  ..., 0.1521, 0.1584, 0.1609],
        [0.1536, 0.1617, 0.1669,  ..., 0.1554, 0.1579, 0.1604],
        ...,
        [0.1557, 0.1633, 0.1695,  ..., 0.1546, 0.1603, 0.1638],
        [0.1545, 0.1634, 0.1680,  ..., 0.1569, 0.1630, 0.1621],
        [0.1547, 0.1611, 0.1687,  ..., 0.1546, 0.1594, 0.1609]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 6/8 [00:01<00:00,  3.29it/s]

tensor([[0.1586, 0.1629, 0.1698,  ..., 0.1575, 0.1593, 0.1633],
        [0.1515, 0.1595, 0.1643,  ..., 0.1530, 0.1545, 0.1591],
        [0.1501, 0.1570, 0.1617,  ..., 0.1504, 0.1523, 0.1570],
        ...,
        [0.1530, 0.1613, 0.1660,  ..., 0.1548, 0.1600, 0.1588],
        [0.1513, 0.1598, 0.1646,  ..., 0.1489, 0.1560, 0.1608],
        [0.1509, 0.1629, 0.1629,  ..., 0.1539, 0.1582, 0.1598]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 7/8 [00:02<00:00,  3.32it/s]

tensor([[0.1507, 0.1622, 0.1637,  ..., 0.1539, 0.1603, 0.1591],
        [0.1518, 0.1600, 0.1645,  ..., 0.1547, 0.1563, 0.1568],
        [0.1535, 0.1588, 0.1671,  ..., 0.1523, 0.1578, 0.1564],
        ...,
        [0.1556, 0.1643, 0.1671,  ..., 0.1535, 0.1567, 0.1601],
        [0.1560, 0.1627, 0.1695,  ..., 0.1562, 0.1606, 0.1604],
        [0.1450, 0.1519, 0.1563,  ..., 0.1495, 0.1486, 0.1508]],
       grad_fn=<LogBackward>)


100%|██████████| 8/8 [00:02<00:00,  3.21it/s]
  0%|          | 0/57 [00:00<?, ?it/s]

tensor([[0.1510, 0.1618, 0.1680,  ..., 0.1555, 0.1570, 0.1623],
        [0.1538, 0.1599, 0.1699,  ..., 0.1556, 0.1589, 0.1605],
        [0.1534, 0.1602, 0.1658,  ..., 0.1559, 0.1586, 0.1599],
        ...,
        [0.1522, 0.1633, 0.1641,  ..., 0.1542, 0.1593, 0.1614],
        [0.1508, 0.1594, 0.1635,  ..., 0.1519, 0.1560, 0.1595],
        [0.1541, 0.1613, 0.1678,  ..., 0.1531, 0.1580, 0.1609]],
       grad_fn=<LogBackward>)
[tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128)]
Epoch: 18, train_loss: -0.8635023832321167, valid_loss: -0.8395612239837646
Parameter containing:
tensor([[ 0.9999,  0.9999,  0.9999,  0.9999,  0.9999,  0.9998,  1.0001,  0.9998,
          0.9998,  1.0004, -0.5001, -0.5002, -0.5002, -0.5001, -0.5002, -0.5001,
         -0.5001, -0.5003, -0.5001, -0.5002],
     

  2%|▏         | 1/57 [00:00<00:40,  1.39it/s]

tensor([[0.1545, 0.1568, 0.1639,  ..., 0.1537, 0.1548, 0.1581],
        [0.1557, 0.1646, 0.1704,  ..., 0.1597, 0.1614, 0.1647],
        [0.1522, 0.1591, 0.1657,  ..., 0.1531, 0.1586, 0.1595],
        ...,
        [0.1501, 0.1558, 0.1638,  ..., 0.1519, 0.1540, 0.1560],
        [0.1499, 0.1601, 0.1614,  ..., 0.1526, 0.1565, 0.1554],
        [0.1537, 0.1610, 0.1662,  ..., 0.1554, 0.1576, 0.1626]],
       grad_fn=<LogBackward>)


  4%|▎         | 2/57 [00:01<00:38,  1.42it/s]

tensor([[0.1483, 0.1543, 0.1618,  ..., 0.1491, 0.1527, 0.1551],
        [0.1500, 0.1571, 0.1584,  ..., 0.1513, 0.1540, 0.1554],
        [0.1494, 0.1555, 0.1595,  ..., 0.1513, 0.1538, 0.1533],
        ...,
        [0.1493, 0.1603, 0.1623,  ..., 0.1556, 0.1562, 0.1553],
        [0.1425, 0.1474, 0.1544,  ..., 0.1422, 0.1454, 0.1490],
        [0.1514, 0.1610, 0.1632,  ..., 0.1550, 0.1577, 0.1591]],
       grad_fn=<LogBackward>)


  5%|▌         | 3/57 [00:02<00:37,  1.44it/s]

tensor([[0.1547, 0.1607, 0.1657,  ..., 0.1527, 0.1547, 0.1591],
        [0.1496, 0.1569, 0.1608,  ..., 0.1517, 0.1527, 0.1521],
        [0.1521, 0.1609, 0.1667,  ..., 0.1540, 0.1568, 0.1579],
        ...,
        [0.1467, 0.1541, 0.1558,  ..., 0.1480, 0.1486, 0.1488],
        [0.1477, 0.1576, 0.1582,  ..., 0.1503, 0.1544, 0.1540],
        [0.1458, 0.1558, 0.1585,  ..., 0.1528, 0.1538, 0.1564]],
       grad_fn=<LogBackward>)


  7%|▋         | 4/57 [00:02<00:36,  1.45it/s]

tensor([[0.1503, 0.1592, 0.1619,  ..., 0.1519, 0.1552, 0.1580],
        [0.1447, 0.1556, 0.1534,  ..., 0.1498, 0.1511, 0.1501],
        [0.1549, 0.1623, 0.1679,  ..., 0.1539, 0.1579, 0.1618],
        ...,
        [0.1489, 0.1559, 0.1586,  ..., 0.1499, 0.1498, 0.1509],
        [0.1526, 0.1561, 0.1626,  ..., 0.1551, 0.1545, 0.1573],
        [0.1517, 0.1588, 0.1655,  ..., 0.1519, 0.1545, 0.1556]],
       grad_fn=<LogBackward>)


  9%|▉         | 5/57 [00:03<00:37,  1.39it/s]

tensor([[0.1496, 0.1621, 0.1647,  ..., 0.1528, 0.1581, 0.1598],
        [0.1469, 0.1584, 0.1576,  ..., 0.1514, 0.1567, 0.1560],
        [0.1456, 0.1524, 0.1608,  ..., 0.1485, 0.1483, 0.1528],
        ...,
        [0.1511, 0.1609, 0.1631,  ..., 0.1531, 0.1573, 0.1598],
        [0.1490, 0.1560, 0.1605,  ..., 0.1485, 0.1512, 0.1550],
        [0.1500, 0.1569, 0.1593,  ..., 0.1515, 0.1520, 0.1511]],
       grad_fn=<LogBackward>)


 11%|█         | 6/57 [00:04<00:37,  1.37it/s]

tensor([[0.1509, 0.1624, 0.1646,  ..., 0.1566, 0.1577, 0.1604],
        [0.1491, 0.1577, 0.1592,  ..., 0.1537, 0.1562, 0.1556],
        [0.1494, 0.1567, 0.1634,  ..., 0.1528, 0.1525, 0.1572],
        ...,
        [0.1435, 0.1505, 0.1528,  ..., 0.1449, 0.1464, 0.1468],
        [0.1385, 0.1423, 0.1479,  ..., 0.1436, 0.1398, 0.1438],
        [0.1527, 0.1616, 0.1642,  ..., 0.1572, 0.1586, 0.1597]],
       grad_fn=<LogBackward>)


 12%|█▏        | 7/57 [00:04<00:36,  1.39it/s]

tensor([[0.1488, 0.1595, 0.1603,  ..., 0.1510, 0.1566, 0.1565],
        [0.1530, 0.1627, 0.1682,  ..., 0.1567, 0.1602, 0.1625],
        [0.1458, 0.1570, 0.1571,  ..., 0.1459, 0.1499, 0.1506],
        ...,
        [0.1529, 0.1585, 0.1608,  ..., 0.1528, 0.1544, 0.1586],
        [0.1499, 0.1583, 0.1619,  ..., 0.1526, 0.1554, 0.1552],
        [0.1507, 0.1585, 0.1622,  ..., 0.1506, 0.1557, 0.1567]],
       grad_fn=<LogBackward>)


 14%|█▍        | 8/57 [00:05<00:34,  1.43it/s]

tensor([[0.1511, 0.1583, 0.1613,  ..., 0.1526, 0.1558, 0.1556],
        [0.1551, 0.1599, 0.1658,  ..., 0.1532, 0.1569, 0.1595],
        [0.1523, 0.1564, 0.1636,  ..., 0.1530, 0.1561, 0.1565],
        ...,
        [0.1359, 0.1439, 0.1494,  ..., 0.1424, 0.1430, 0.1447],
        [0.1542, 0.1624, 0.1661,  ..., 0.1555, 0.1581, 0.1605],
        [0.1554, 0.1592, 0.1671,  ..., 0.1553, 0.1581, 0.1598]],
       grad_fn=<LogBackward>)


 16%|█▌        | 9/57 [00:06<00:33,  1.45it/s]

tensor([[0.1521, 0.1602, 0.1642,  ..., 0.1512, 0.1563, 0.1576],
        [0.1508, 0.1583, 0.1630,  ..., 0.1561, 0.1542, 0.1565],
        [0.1524, 0.1569, 0.1674,  ..., 0.1521, 0.1558, 0.1579],
        ...,
        [0.1562, 0.1632, 0.1713,  ..., 0.1567, 0.1625, 0.1646],
        [0.1520, 0.1596, 0.1652,  ..., 0.1524, 0.1560, 0.1575],
        [0.1556, 0.1630, 0.1700,  ..., 0.1553, 0.1616, 0.1629]],
       grad_fn=<LogBackward>)


 18%|█▊        | 10/57 [00:06<00:31,  1.47it/s]

tensor([[0.1511, 0.1579, 0.1659,  ..., 0.1521, 0.1551, 0.1573],
        [0.1551, 0.1611, 0.1667,  ..., 0.1569, 0.1590, 0.1605],
        [0.1554, 0.1625, 0.1675,  ..., 0.1555, 0.1610, 0.1604],
        ...,
        [0.1526, 0.1563, 0.1644,  ..., 0.1511, 0.1547, 0.1580],
        [0.1497, 0.1559, 0.1636,  ..., 0.1526, 0.1537, 0.1521],
        [0.1526, 0.1602, 0.1648,  ..., 0.1553, 0.1580, 0.1595]],
       grad_fn=<LogBackward>)


 19%|█▉        | 11/57 [00:07<00:31,  1.47it/s]

tensor([[0.1551, 0.1610, 0.1646,  ..., 0.1538, 0.1593, 0.1602],
        [0.1538, 0.1634, 0.1677,  ..., 0.1559, 0.1585, 0.1619],
        [0.1523, 0.1599, 0.1659,  ..., 0.1538, 0.1582, 0.1600],
        ...,
        [0.1519, 0.1620, 0.1679,  ..., 0.1554, 0.1585, 0.1655],
        [0.1505, 0.1579, 0.1651,  ..., 0.1542, 0.1570, 0.1609],
        [0.1555, 0.1614, 0.1694,  ..., 0.1541, 0.1592, 0.1634]],
       grad_fn=<LogBackward>)


 21%|██        | 12/57 [00:08<00:30,  1.46it/s]

tensor([[0.1507, 0.1572, 0.1613,  ..., 0.1541, 0.1570, 0.1610],
        [0.1477, 0.1539, 0.1594,  ..., 0.1477, 0.1504, 0.1515],
        [0.1537, 0.1640, 0.1698,  ..., 0.1576, 0.1641, 0.1669],
        ...,
        [0.1532, 0.1590, 0.1645,  ..., 0.1541, 0.1576, 0.1621],
        [0.1540, 0.1608, 0.1667,  ..., 0.1522, 0.1571, 0.1592],
        [0.1510, 0.1629, 0.1645,  ..., 0.1532, 0.1602, 0.1581]],
       grad_fn=<LogBackward>)


 23%|██▎       | 13/57 [00:09<00:33,  1.33it/s]

tensor([[0.1522, 0.1589, 0.1645,  ..., 0.1539, 0.1557, 0.1584],
        [0.1512, 0.1594, 0.1617,  ..., 0.1521, 0.1558, 0.1559],
        [0.1472, 0.1533, 0.1584,  ..., 0.1528, 0.1512, 0.1527],
        ...,
        [0.1488, 0.1567, 0.1587,  ..., 0.1496, 0.1520, 0.1545],
        [0.1547, 0.1622, 0.1675,  ..., 0.1571, 0.1604, 0.1600],
        [0.1501, 0.1545, 0.1606,  ..., 0.1531, 0.1526, 0.1556]],
       grad_fn=<LogBackward>)


 25%|██▍       | 14/57 [00:10<00:32,  1.32it/s]

tensor([[0.1519, 0.1588, 0.1668,  ..., 0.1513, 0.1566, 0.1591],
        [0.1541, 0.1625, 0.1666,  ..., 0.1559, 0.1595, 0.1588],
        [0.1519, 0.1613, 0.1650,  ..., 0.1536, 0.1567, 0.1597],
        ...,
        [0.1525, 0.1599, 0.1638,  ..., 0.1517, 0.1551, 0.1568],
        [0.1499, 0.1585, 0.1624,  ..., 0.1533, 0.1549, 0.1565],
        [0.1479, 0.1547, 0.1591,  ..., 0.1520, 0.1541, 0.1553]],
       grad_fn=<LogBackward>)


 26%|██▋       | 15/57 [00:10<00:31,  1.35it/s]

tensor([[0.1526, 0.1609, 0.1645,  ..., 0.1550, 0.1575, 0.1581],
        [0.1517, 0.1578, 0.1630,  ..., 0.1532, 0.1559, 0.1581],
        [0.1519, 0.1605, 0.1643,  ..., 0.1543, 0.1566, 0.1591],
        ...,
        [0.1502, 0.1572, 0.1615,  ..., 0.1551, 0.1536, 0.1551],
        [0.1515, 0.1645, 0.1637,  ..., 0.1531, 0.1572, 0.1599],
        [0.1491, 0.1516, 0.1595,  ..., 0.1507, 0.1505, 0.1514]],
       grad_fn=<LogBackward>)


 28%|██▊       | 16/57 [00:11<00:30,  1.36it/s]

tensor([[0.1481, 0.1536, 0.1604,  ..., 0.1513, 0.1521, 0.1521],
        [0.1519, 0.1570, 0.1636,  ..., 0.1521, 0.1548, 0.1579],
        [0.1562, 0.1638, 0.1711,  ..., 0.1555, 0.1644, 0.1651],
        ...,
        [0.1500, 0.1603, 0.1626,  ..., 0.1560, 0.1610, 0.1568],
        [0.1536, 0.1641, 0.1664,  ..., 0.1585, 0.1633, 0.1642],
        [0.1540, 0.1614, 0.1668,  ..., 0.1550, 0.1583, 0.1600]],
       grad_fn=<LogBackward>)


 30%|██▉       | 17/57 [00:12<00:29,  1.35it/s]

tensor([[0.1489, 0.1567, 0.1590,  ..., 0.1518, 0.1522, 0.1544],
        [0.1500, 0.1594, 0.1617,  ..., 0.1500, 0.1562, 0.1581],
        [0.1550, 0.1615, 0.1663,  ..., 0.1546, 0.1576, 0.1597],
        ...,
        [0.1541, 0.1559, 0.1636,  ..., 0.1508, 0.1548, 0.1591],
        [0.1510, 0.1611, 0.1618,  ..., 0.1538, 0.1598, 0.1592],
        [0.1521, 0.1590, 0.1635,  ..., 0.1529, 0.1568, 0.1580]],
       grad_fn=<LogBackward>)


 32%|███▏      | 18/57 [00:12<00:28,  1.38it/s]

tensor([[0.1493, 0.1556, 0.1619,  ..., 0.1520, 0.1534, 0.1558],
        [0.1450, 0.1566, 0.1558,  ..., 0.1476, 0.1537, 0.1548],
        [0.1418, 0.1502, 0.1515,  ..., 0.1496, 0.1467, 0.1519],
        ...,
        [0.1461, 0.1589, 0.1575,  ..., 0.1496, 0.1550, 0.1558],
        [0.1532, 0.1597, 0.1676,  ..., 0.1530, 0.1588, 0.1607],
        [0.1504, 0.1575, 0.1616,  ..., 0.1507, 0.1570, 0.1597]],
       grad_fn=<LogBackward>)


 33%|███▎      | 19/57 [00:13<00:28,  1.32it/s]

tensor([[0.1514, 0.1576, 0.1619,  ..., 0.1498, 0.1530, 0.1540],
        [0.1478, 0.1595, 0.1604,  ..., 0.1498, 0.1543, 0.1579],
        [0.1452, 0.1564, 0.1568,  ..., 0.1490, 0.1546, 0.1562],
        ...,
        [0.1510, 0.1583, 0.1640,  ..., 0.1542, 0.1568, 0.1583],
        [0.1473, 0.1538, 0.1578,  ..., 0.1490, 0.1511, 0.1537],
        [0.1509, 0.1564, 0.1616,  ..., 0.1525, 0.1547, 0.1567]],
       grad_fn=<LogBackward>)


 35%|███▌      | 20/57 [00:14<00:28,  1.31it/s]

tensor([[0.1514, 0.1602, 0.1663,  ..., 0.1508, 0.1582, 0.1591],
        [0.1408, 0.1503, 0.1540,  ..., 0.1450, 0.1474, 0.1501],
        [0.1559, 0.1644, 0.1693,  ..., 0.1540, 0.1627, 0.1626],
        ...,
        [0.1509, 0.1578, 0.1624,  ..., 0.1539, 0.1562, 0.1582],
        [0.1560, 0.1649, 0.1678,  ..., 0.1552, 0.1613, 0.1631],
        [0.1502, 0.1576, 0.1618,  ..., 0.1507, 0.1531, 0.1566]],
       grad_fn=<LogBackward>)


 37%|███▋      | 21/57 [00:15<00:26,  1.37it/s]

tensor([[0.1552, 0.1629, 0.1671,  ..., 0.1565, 0.1589, 0.1615],
        [0.1494, 0.1583, 0.1627,  ..., 0.1514, 0.1573, 0.1587],
        [0.1515, 0.1613, 0.1620,  ..., 0.1572, 0.1574, 0.1580],
        ...,
        [0.1546, 0.1610, 0.1674,  ..., 0.1523, 0.1583, 0.1628],
        [0.1519, 0.1556, 0.1603,  ..., 0.1505, 0.1528, 0.1549],
        [0.1439, 0.1465, 0.1533,  ..., 0.1479, 0.1437, 0.1461]],
       grad_fn=<LogBackward>)


 39%|███▊      | 22/57 [00:15<00:25,  1.39it/s]

tensor([[0.1508, 0.1554, 0.1603,  ..., 0.1546, 0.1536, 0.1548],
        [0.1438, 0.1569, 0.1561,  ..., 0.1474, 0.1550, 0.1555],
        [0.1503, 0.1531, 0.1584,  ..., 0.1489, 0.1524, 0.1525],
        ...,
        [0.1558, 0.1611, 0.1702,  ..., 0.1554, 0.1589, 0.1611],
        [0.1520, 0.1615, 0.1654,  ..., 0.1540, 0.1593, 0.1597],
        [0.1511, 0.1553, 0.1629,  ..., 0.1564, 0.1556, 0.1567]],
       grad_fn=<LogBackward>)


 40%|████      | 23/57 [00:16<00:23,  1.42it/s]

tensor([[0.1489, 0.1561, 0.1588,  ..., 0.1498, 0.1528, 0.1532],
        [0.1486, 0.1541, 0.1601,  ..., 0.1502, 0.1511, 0.1526],
        [0.1500, 0.1567, 0.1633,  ..., 0.1502, 0.1521, 0.1579],
        ...,
        [0.1511, 0.1618, 0.1624,  ..., 0.1535, 0.1600, 0.1553],
        [0.1562, 0.1638, 0.1721,  ..., 0.1552, 0.1658, 0.1660],
        [0.1546, 0.1595, 0.1687,  ..., 0.1539, 0.1595, 0.1624]],
       grad_fn=<LogBackward>)


 42%|████▏     | 24/57 [00:17<00:25,  1.30it/s]

tensor([[0.1506, 0.1564, 0.1621,  ..., 0.1516, 0.1528, 0.1581],
        [0.1519, 0.1583, 0.1632,  ..., 0.1529, 0.1570, 0.1603],
        [0.1499, 0.1568, 0.1631,  ..., 0.1532, 0.1548, 0.1573],
        ...,
        [0.1439, 0.1524, 0.1528,  ..., 0.1471, 0.1459, 0.1515],
        [0.1538, 0.1609, 0.1664,  ..., 0.1533, 0.1586, 0.1588],
        [0.1487, 0.1552, 0.1604,  ..., 0.1524, 0.1527, 0.1537]],
       grad_fn=<LogBackward>)


 44%|████▍     | 25/57 [00:18<00:25,  1.26it/s]

tensor([[0.1582, 0.1675, 0.1715,  ..., 0.1580, 0.1633, 0.1669],
        [0.1488, 0.1532, 0.1597,  ..., 0.1505, 0.1505, 0.1520],
        [0.1528, 0.1636, 0.1673,  ..., 0.1558, 0.1613, 0.1612],
        ...,
        [0.1490, 0.1575, 0.1597,  ..., 0.1500, 0.1532, 0.1543],
        [0.1509, 0.1623, 0.1625,  ..., 0.1536, 0.1560, 0.1550],
        [0.1458, 0.1568, 0.1589,  ..., 0.1512, 0.1540, 0.1543]],
       grad_fn=<LogBackward>)


 46%|████▌     | 26/57 [00:19<00:24,  1.28it/s]

tensor([[0.1488, 0.1531, 0.1626,  ..., 0.1496, 0.1508, 0.1530],
        [0.1485, 0.1592, 0.1616,  ..., 0.1524, 0.1573, 0.1566],
        [0.1539, 0.1632, 0.1660,  ..., 0.1552, 0.1586, 0.1593],
        ...,
        [0.1521, 0.1577, 0.1634,  ..., 0.1535, 0.1544, 0.1589],
        [0.1502, 0.1596, 0.1612,  ..., 0.1504, 0.1546, 0.1555],
        [0.1504, 0.1571, 0.1619,  ..., 0.1534, 0.1539, 0.1579]],
       grad_fn=<LogBackward>)


 47%|████▋     | 27/57 [00:19<00:23,  1.28it/s]

tensor([[0.1550, 0.1603, 0.1688,  ..., 0.1541, 0.1579, 0.1628],
        [0.1503, 0.1564, 0.1601,  ..., 0.1517, 0.1546, 0.1549],
        [0.1547, 0.1645, 0.1674,  ..., 0.1556, 0.1618, 0.1623],
        ...,
        [0.1497, 0.1582, 0.1617,  ..., 0.1517, 0.1546, 0.1572],
        [0.1511, 0.1610, 0.1623,  ..., 0.1547, 0.1569, 0.1568],
        [0.1428, 0.1506, 0.1558,  ..., 0.1477, 0.1487, 0.1494]],
       grad_fn=<LogBackward>)


 49%|████▉     | 28/57 [00:20<00:22,  1.29it/s]

tensor([[0.1539, 0.1610, 0.1652,  ..., 0.1530, 0.1560, 0.1588],
        [0.1444, 0.1481, 0.1538,  ..., 0.1491, 0.1446, 0.1489],
        [0.1562, 0.1632, 0.1677,  ..., 0.1591, 0.1615, 0.1598],
        ...,
        [0.1524, 0.1547, 0.1623,  ..., 0.1502, 0.1523, 0.1544],
        [0.1501, 0.1586, 0.1621,  ..., 0.1499, 0.1533, 0.1557],
        [0.1488, 0.1553, 0.1593,  ..., 0.1502, 0.1514, 0.1530]],
       grad_fn=<LogBackward>)


 51%|█████     | 29/57 [00:21<00:21,  1.32it/s]

tensor([[0.1553, 0.1618, 0.1679,  ..., 0.1547, 0.1607, 0.1637],
        [0.1543, 0.1638, 0.1671,  ..., 0.1540, 0.1604, 0.1600],
        [0.1527, 0.1621, 0.1667,  ..., 0.1547, 0.1586, 0.1607],
        ...,
        [0.1507, 0.1567, 0.1626,  ..., 0.1512, 0.1533, 0.1544],
        [0.1509, 0.1567, 0.1643,  ..., 0.1513, 0.1531, 0.1591],
        [0.1448, 0.1525, 0.1564,  ..., 0.1500, 0.1482, 0.1510]],
       grad_fn=<LogBackward>)


 53%|█████▎    | 30/57 [00:21<00:19,  1.37it/s]

tensor([[0.1558, 0.1632, 0.1719,  ..., 0.1555, 0.1607, 0.1633],
        [0.1549, 0.1633, 0.1640,  ..., 0.1562, 0.1629, 0.1647],
        [0.1470, 0.1551, 0.1572,  ..., 0.1470, 0.1494, 0.1535],
        ...,
        [0.1412, 0.1455, 0.1494,  ..., 0.1425, 0.1418, 0.1440],
        [0.1564, 0.1627, 0.1693,  ..., 0.1559, 0.1607, 0.1639],
        [0.1524, 0.1580, 0.1636,  ..., 0.1519, 0.1554, 0.1545]],
       grad_fn=<LogBackward>)


 54%|█████▍    | 31/57 [00:22<00:18,  1.41it/s]

tensor([[0.1522, 0.1617, 0.1677,  ..., 0.1535, 0.1596, 0.1645],
        [0.1493, 0.1600, 0.1624,  ..., 0.1520, 0.1573, 0.1577],
        [0.1567, 0.1631, 0.1710,  ..., 0.1574, 0.1628, 0.1645],
        ...,
        [0.1504, 0.1574, 0.1604,  ..., 0.1530, 0.1539, 0.1548],
        [0.1541, 0.1604, 0.1676,  ..., 0.1572, 0.1612, 0.1631],
        [0.1569, 0.1629, 0.1700,  ..., 0.1562, 0.1634, 0.1636]],
       grad_fn=<LogBackward>)


 56%|█████▌    | 32/57 [00:23<00:17,  1.43it/s]

tensor([[0.1513, 0.1645, 0.1632,  ..., 0.1549, 0.1617, 0.1587],
        [0.1529, 0.1595, 0.1664,  ..., 0.1529, 0.1590, 0.1600],
        [0.1474, 0.1560, 0.1584,  ..., 0.1503, 0.1515, 0.1548],
        ...,
        [0.1501, 0.1570, 0.1617,  ..., 0.1505, 0.1543, 0.1563],
        [0.1498, 0.1619, 0.1594,  ..., 0.1516, 0.1576, 0.1574],
        [0.1499, 0.1632, 0.1631,  ..., 0.1546, 0.1610, 0.1579]],
       grad_fn=<LogBackward>)


 58%|█████▊    | 33/57 [00:24<00:17,  1.41it/s]

tensor([[0.1444, 0.1527, 0.1551,  ..., 0.1480, 0.1496, 0.1530],
        [0.1522, 0.1596, 0.1652,  ..., 0.1544, 0.1562, 0.1594],
        [0.1523, 0.1599, 0.1627,  ..., 0.1534, 0.1566, 0.1567],
        ...,
        [0.1430, 0.1509, 0.1527,  ..., 0.1454, 0.1463, 0.1472],
        [0.1541, 0.1611, 0.1674,  ..., 0.1539, 0.1573, 0.1613],
        [0.1553, 0.1625, 0.1689,  ..., 0.1553, 0.1616, 0.1637]],
       grad_fn=<LogBackward>)


 60%|█████▉    | 34/57 [00:24<00:16,  1.36it/s]

tensor([[0.1583, 0.1652, 0.1712,  ..., 0.1597, 0.1633, 0.1661],
        [0.1494, 0.1600, 0.1618,  ..., 0.1506, 0.1561, 0.1565],
        [0.1536, 0.1637, 0.1637,  ..., 0.1538, 0.1594, 0.1599],
        ...,
        [0.1474, 0.1551, 0.1564,  ..., 0.1491, 0.1475, 0.1481],
        [0.1510, 0.1603, 0.1647,  ..., 0.1557, 0.1600, 0.1586],
        [0.1561, 0.1615, 0.1684,  ..., 0.1558, 0.1600, 0.1611]],
       grad_fn=<LogBackward>)


 61%|██████▏   | 35/57 [00:25<00:16,  1.35it/s]

tensor([[0.1526, 0.1575, 0.1639,  ..., 0.1546, 0.1556, 0.1588],
        [0.1515, 0.1550, 0.1620,  ..., 0.1513, 0.1530, 0.1548],
        [0.1494, 0.1584, 0.1589,  ..., 0.1524, 0.1511, 0.1571],
        ...,
        [0.1482, 0.1595, 0.1630,  ..., 0.1530, 0.1574, 0.1586],
        [0.1501, 0.1598, 0.1636,  ..., 0.1517, 0.1545, 0.1551],
        [0.1525, 0.1620, 0.1651,  ..., 0.1552, 0.1623, 0.1600]],
       grad_fn=<LogBackward>)


 63%|██████▎   | 36/57 [00:26<00:15,  1.38it/s]

tensor([[0.1521, 0.1567, 0.1641,  ..., 0.1531, 0.1537, 0.1569],
        [0.1518, 0.1589, 0.1648,  ..., 0.1541, 0.1563, 0.1585],
        [0.1530, 0.1612, 0.1650,  ..., 0.1534, 0.1562, 0.1622],
        ...,
        [0.1513, 0.1595, 0.1620,  ..., 0.1516, 0.1553, 0.1593],
        [0.1535, 0.1644, 0.1697,  ..., 0.1550, 0.1591, 0.1638],
        [0.1517, 0.1595, 0.1641,  ..., 0.1517, 0.1563, 0.1566]],
       grad_fn=<LogBackward>)


 65%|██████▍   | 37/57 [00:26<00:14,  1.42it/s]

tensor([[0.1486, 0.1581, 0.1623,  ..., 0.1502, 0.1535, 0.1531],
        [0.1467, 0.1558, 0.1592,  ..., 0.1469, 0.1498, 0.1529],
        [0.1543, 0.1612, 0.1668,  ..., 0.1537, 0.1595, 0.1604],
        ...,
        [0.1491, 0.1553, 0.1596,  ..., 0.1506, 0.1521, 0.1516],
        [0.1527, 0.1607, 0.1647,  ..., 0.1558, 0.1563, 0.1595],
        [0.1482, 0.1554, 0.1604,  ..., 0.1493, 0.1505, 0.1537]],
       grad_fn=<LogBackward>)


 67%|██████▋   | 38/57 [00:27<00:13,  1.37it/s]

tensor([[0.1527, 0.1637, 0.1662,  ..., 0.1520, 0.1605, 0.1648],
        [0.1551, 0.1606, 0.1670,  ..., 0.1546, 0.1619, 0.1614],
        [0.1452, 0.1532, 0.1585,  ..., 0.1517, 0.1531, 0.1492],
        ...,
        [0.1510, 0.1610, 0.1629,  ..., 0.1538, 0.1552, 0.1589],
        [0.1553, 0.1603, 0.1689,  ..., 0.1542, 0.1608, 0.1631],
        [0.1555, 0.1631, 0.1675,  ..., 0.1555, 0.1586, 0.1595]],
       grad_fn=<LogBackward>)


 68%|██████▊   | 39/57 [00:28<00:14,  1.28it/s]

tensor([[0.1494, 0.1559, 0.1596,  ..., 0.1509, 0.1503, 0.1547],
        [0.1469, 0.1565, 0.1586,  ..., 0.1482, 0.1505, 0.1538],
        [0.1459, 0.1531, 0.1572,  ..., 0.1500, 0.1494, 0.1515],
        ...,
        [0.1517, 0.1585, 0.1628,  ..., 0.1528, 0.1544, 0.1573],
        [0.1432, 0.1556, 0.1549,  ..., 0.1465, 0.1540, 0.1542],
        [0.1502, 0.1535, 0.1622,  ..., 0.1519, 0.1528, 0.1562]],
       grad_fn=<LogBackward>)


 70%|███████   | 40/57 [00:29<00:13,  1.24it/s]

tensor([[0.1540, 0.1617, 0.1651,  ..., 0.1537, 0.1599, 0.1592],
        [0.1532, 0.1607, 0.1667,  ..., 0.1552, 0.1590, 0.1640],
        [0.1492, 0.1571, 0.1617,  ..., 0.1540, 0.1555, 0.1553],
        ...,
        [0.1543, 0.1637, 0.1704,  ..., 0.1548, 0.1602, 0.1615],
        [0.1513, 0.1624, 0.1656,  ..., 0.1551, 0.1621, 0.1619],
        [0.1543, 0.1601, 0.1655,  ..., 0.1570, 0.1594, 0.1578]],
       grad_fn=<LogBackward>)


 72%|███████▏  | 41/57 [00:30<00:13,  1.22it/s]

tensor([[0.1472, 0.1562, 0.1583,  ..., 0.1494, 0.1545, 0.1551],
        [0.1469, 0.1530, 0.1576,  ..., 0.1491, 0.1494, 0.1517],
        [0.1473, 0.1523, 0.1586,  ..., 0.1511, 0.1498, 0.1531],
        ...,
        [0.1386, 0.1485, 0.1508,  ..., 0.1440, 0.1444, 0.1453],
        [0.1510, 0.1600, 0.1649,  ..., 0.1512, 0.1602, 0.1610],
        [0.1533, 0.1599, 0.1658,  ..., 0.1529, 0.1565, 0.1610]],
       grad_fn=<LogBackward>)


 74%|███████▎  | 42/57 [00:31<00:12,  1.21it/s]

tensor([[0.1507, 0.1558, 0.1613,  ..., 0.1535, 0.1543, 0.1560],
        [0.1454, 0.1562, 0.1579,  ..., 0.1512, 0.1505, 0.1532],
        [0.1469, 0.1543, 0.1588,  ..., 0.1525, 0.1525, 0.1556],
        ...,
        [0.1524, 0.1616, 0.1670,  ..., 0.1566, 0.1593, 0.1607],
        [0.1454, 0.1536, 0.1583,  ..., 0.1477, 0.1502, 0.1522],
        [0.1517, 0.1633, 0.1651,  ..., 0.1550, 0.1591, 0.1586]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 43/57 [00:31<00:11,  1.22it/s]

tensor([[0.1463, 0.1572, 0.1571,  ..., 0.1497, 0.1542, 0.1554],
        [0.1568, 0.1630, 0.1688,  ..., 0.1582, 0.1597, 0.1613],
        [0.1413, 0.1527, 0.1528,  ..., 0.1433, 0.1519, 0.1511],
        ...,
        [0.1504, 0.1561, 0.1615,  ..., 0.1514, 0.1538, 0.1540],
        [0.1452, 0.1544, 0.1594,  ..., 0.1520, 0.1545, 0.1553],
        [0.1548, 0.1622, 0.1687,  ..., 0.1558, 0.1597, 0.1619]],
       grad_fn=<LogBackward>)


 77%|███████▋  | 44/57 [00:32<00:10,  1.25it/s]

tensor([[0.1501, 0.1569, 0.1626,  ..., 0.1517, 0.1531, 0.1546],
        [0.1511, 0.1594, 0.1623,  ..., 0.1560, 0.1580, 0.1592],
        [0.1479, 0.1520, 0.1589,  ..., 0.1492, 0.1509, 0.1502],
        ...,
        [0.1402, 0.1523, 0.1540,  ..., 0.1488, 0.1501, 0.1507],
        [0.1524, 0.1631, 0.1642,  ..., 0.1541, 0.1554, 0.1572],
        [0.1483, 0.1582, 0.1602,  ..., 0.1507, 0.1530, 0.1583]],
       grad_fn=<LogBackward>)


 79%|███████▉  | 45/57 [00:33<00:09,  1.27it/s]

tensor([[0.1493, 0.1555, 0.1600,  ..., 0.1526, 0.1518, 0.1555],
        [0.1512, 0.1619, 0.1644,  ..., 0.1541, 0.1584, 0.1608],
        [0.1525, 0.1569, 0.1655,  ..., 0.1522, 0.1549, 0.1573],
        ...,
        [0.1544, 0.1605, 0.1667,  ..., 0.1529, 0.1592, 0.1590],
        [0.1564, 0.1640, 0.1697,  ..., 0.1582, 0.1662, 0.1614],
        [0.1563, 0.1636, 0.1703,  ..., 0.1540, 0.1612, 0.1624]],
       grad_fn=<LogBackward>)


 81%|████████  | 46/57 [00:34<00:08,  1.30it/s]

tensor([[0.1555, 0.1606, 0.1686,  ..., 0.1545, 0.1599, 0.1642],
        [0.1579, 0.1632, 0.1728,  ..., 0.1557, 0.1650, 0.1656],
        [0.1550, 0.1626, 0.1676,  ..., 0.1558, 0.1599, 0.1611],
        ...,
        [0.1538, 0.1623, 0.1674,  ..., 0.1578, 0.1614, 0.1630],
        [0.1503, 0.1567, 0.1649,  ..., 0.1545, 0.1545, 0.1565],
        [0.1507, 0.1612, 0.1619,  ..., 0.1520, 0.1579, 0.1583]],
       grad_fn=<LogBackward>)


 82%|████████▏ | 47/57 [00:34<00:07,  1.35it/s]

tensor([[0.1505, 0.1564, 0.1619,  ..., 0.1526, 0.1531, 0.1552],
        [0.1561, 0.1607, 0.1682,  ..., 0.1544, 0.1603, 0.1589],
        [0.1476, 0.1538, 0.1598,  ..., 0.1490, 0.1489, 0.1511],
        ...,
        [0.1524, 0.1620, 0.1649,  ..., 0.1539, 0.1563, 0.1572],
        [0.1388, 0.1416, 0.1487,  ..., 0.1408, 0.1406, 0.1388],
        [0.1480, 0.1623, 0.1617,  ..., 0.1506, 0.1567, 0.1547]],
       grad_fn=<LogBackward>)


 84%|████████▍ | 48/57 [00:35<00:06,  1.40it/s]

tensor([[0.1463, 0.1563, 0.1588,  ..., 0.1508, 0.1533, 0.1549],
        [0.1564, 0.1614, 0.1672,  ..., 0.1544, 0.1617, 0.1610],
        [0.1505, 0.1563, 0.1627,  ..., 0.1518, 0.1510, 0.1568],
        ...,
        [0.1549, 0.1607, 0.1645,  ..., 0.1553, 0.1594, 0.1622],
        [0.1448, 0.1536, 0.1566,  ..., 0.1473, 0.1480, 0.1473],
        [0.1521, 0.1583, 0.1666,  ..., 0.1525, 0.1556, 0.1598]],
       grad_fn=<LogBackward>)


 86%|████████▌ | 49/57 [00:36<00:05,  1.40it/s]

tensor([[0.1472, 0.1606, 0.1600,  ..., 0.1507, 0.1526, 0.1534],
        [0.1501, 0.1599, 0.1612,  ..., 0.1565, 0.1551, 0.1576],
        [0.1465, 0.1572, 0.1573,  ..., 0.1499, 0.1532, 0.1508],
        ...,
        [0.1492, 0.1549, 0.1624,  ..., 0.1513, 0.1516, 0.1535],
        [0.1486, 0.1585, 0.1603,  ..., 0.1503, 0.1525, 0.1559],
        [0.1491, 0.1558, 0.1614,  ..., 0.1506, 0.1518, 0.1525]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 50/57 [00:37<00:05,  1.36it/s]

tensor([[0.1490, 0.1551, 0.1607,  ..., 0.1487, 0.1510, 0.1524],
        [0.1525, 0.1588, 0.1654,  ..., 0.1536, 0.1582, 0.1595],
        [0.1413, 0.1533, 0.1528,  ..., 0.1433, 0.1528, 0.1511],
        ...,
        [0.1467, 0.1591, 0.1620,  ..., 0.1537, 0.1557, 0.1593],
        [0.1506, 0.1566, 0.1616,  ..., 0.1536, 0.1517, 0.1562],
        [0.1495, 0.1536, 0.1608,  ..., 0.1511, 0.1531, 0.1540]],
       grad_fn=<LogBackward>)


 89%|████████▉ | 51/57 [00:37<00:04,  1.31it/s]

tensor([[0.1484, 0.1609, 0.1592,  ..., 0.1505, 0.1566, 0.1557],
        [0.1495, 0.1540, 0.1597,  ..., 0.1513, 0.1525, 0.1557],
        [0.1549, 0.1613, 0.1692,  ..., 0.1555, 0.1612, 0.1633],
        ...,
        [0.1529, 0.1639, 0.1657,  ..., 0.1577, 0.1597, 0.1608],
        [0.1541, 0.1566, 0.1631,  ..., 0.1525, 0.1541, 0.1555],
        [0.1525, 0.1593, 0.1671,  ..., 0.1514, 0.1560, 0.1600]],
       grad_fn=<LogBackward>)


 91%|█████████ | 52/57 [00:38<00:03,  1.33it/s]

tensor([[0.1465, 0.1563, 0.1564,  ..., 0.1469, 0.1481, 0.1524],
        [0.1497, 0.1587, 0.1631,  ..., 0.1527, 0.1567, 0.1570],
        [0.1476, 0.1588, 0.1617,  ..., 0.1517, 0.1549, 0.1590],
        ...,
        [0.1417, 0.1528, 0.1530,  ..., 0.1435, 0.1519, 0.1514],
        [0.1461, 0.1524, 0.1561,  ..., 0.1469, 0.1490, 0.1524],
        [0.1493, 0.1567, 0.1591,  ..., 0.1525, 0.1550, 0.1553]],
       grad_fn=<LogBackward>)


 93%|█████████▎| 53/57 [00:39<00:03,  1.31it/s]

tensor([[0.1478, 0.1549, 0.1595,  ..., 0.1528, 0.1518, 0.1540],
        [0.1478, 0.1587, 0.1608,  ..., 0.1499, 0.1572, 0.1585],
        [0.1537, 0.1593, 0.1672,  ..., 0.1521, 0.1566, 0.1614],
        ...,
        [0.1565, 0.1622, 0.1668,  ..., 0.1560, 0.1593, 0.1615],
        [0.1511, 0.1579, 0.1623,  ..., 0.1534, 0.1552, 0.1559],
        [0.1502, 0.1620, 0.1624,  ..., 0.1520, 0.1550, 0.1578]],
       grad_fn=<LogBackward>)


 95%|█████████▍| 54/57 [00:40<00:02,  1.32it/s]

tensor([[0.1443, 0.1530, 0.1564,  ..., 0.1474, 0.1501, 0.1492],
        [0.1478, 0.1558, 0.1599,  ..., 0.1511, 0.1551, 0.1529],
        [0.1506, 0.1521, 0.1595,  ..., 0.1465, 0.1510, 0.1506],
        ...,
        [0.1587, 0.1635, 0.1711,  ..., 0.1570, 0.1629, 0.1637],
        [0.1518, 0.1609, 0.1653,  ..., 0.1533, 0.1539, 0.1584],
        [0.1534, 0.1615, 0.1676,  ..., 0.1562, 0.1618, 0.1619]],
       grad_fn=<LogBackward>)


 96%|█████████▋| 55/57 [00:40<00:01,  1.33it/s]

tensor([[0.1538, 0.1589, 0.1675,  ..., 0.1536, 0.1585, 0.1606],
        [0.1496, 0.1568, 0.1632,  ..., 0.1527, 0.1541, 0.1557],
        [0.1501, 0.1584, 0.1628,  ..., 0.1514, 0.1554, 0.1545],
        ...,
        [0.1546, 0.1663, 0.1673,  ..., 0.1564, 0.1605, 0.1591],
        [0.1540, 0.1604, 0.1635,  ..., 0.1572, 0.1586, 0.1590],
        [0.1524, 0.1625, 0.1656,  ..., 0.1536, 0.1583, 0.1618]],
       grad_fn=<LogBackward>)


 98%|█████████▊| 56/57 [00:41<00:00,  1.32it/s]

tensor([[0.1504, 0.1589, 0.1629,  ..., 0.1539, 0.1609, 0.1581],
        [0.1527, 0.1569, 0.1642,  ..., 0.1538, 0.1531, 0.1568],
        [0.1503, 0.1569, 0.1609,  ..., 0.1525, 0.1561, 0.1574],
        ...,
        [0.1485, 0.1592, 0.1590,  ..., 0.1539, 0.1550, 0.1567],
        [0.1512, 0.1560, 0.1631,  ..., 0.1520, 0.1538, 0.1556],
        [0.1427, 0.1506, 0.1541,  ..., 0.1494, 0.1475, 0.1492]],
       grad_fn=<LogBackward>)


100%|██████████| 57/57 [00:42<00:00,  1.34it/s]
 12%|█▎        | 1/8 [00:00<00:02,  3.18it/s]

tensor([[0.1525, 0.1579, 0.1649,  ..., 0.1521, 0.1566, 0.1599],
        [0.1527, 0.1595, 0.1646,  ..., 0.1538, 0.1526, 0.1596],
        [0.1549, 0.1585, 0.1680,  ..., 0.1532, 0.1571, 0.1602],
        ...,
        [0.1521, 0.1591, 0.1663,  ..., 0.1528, 0.1554, 0.1580],
        [0.1523, 0.1597, 0.1648,  ..., 0.1517, 0.1576, 0.1596],
        [0.1457, 0.1581, 0.1567,  ..., 0.1474, 0.1506, 0.1519]],
       grad_fn=<LogBackward>)


 25%|██▌       | 2/8 [00:00<00:01,  3.19it/s]

tensor([[0.1410, 0.1489, 0.1526,  ..., 0.1485, 0.1465, 0.1476],
        [0.1460, 0.1524, 0.1580,  ..., 0.1470, 0.1484, 0.1527],
        [0.1479, 0.1574, 0.1584,  ..., 0.1529, 0.1553, 0.1537],
        ...,
        [0.1457, 0.1517, 0.1549,  ..., 0.1458, 0.1491, 0.1492],
        [0.1470, 0.1615, 0.1622,  ..., 0.1543, 0.1540, 0.1574],
        [0.1543, 0.1598, 0.1671,  ..., 0.1556, 0.1602, 0.1634]],
       grad_fn=<LogBackward>)


 38%|███▊      | 3/8 [00:00<00:01,  3.20it/s]

tensor([[0.1457, 0.1540, 0.1551,  ..., 0.1480, 0.1507, 0.1478],
        [0.1480, 0.1552, 0.1612,  ..., 0.1504, 0.1517, 0.1541],
        [0.1482, 0.1574, 0.1592,  ..., 0.1505, 0.1541, 0.1586],
        ...,
        [0.1531, 0.1594, 0.1665,  ..., 0.1543, 0.1543, 0.1589],
        [0.1500, 0.1550, 0.1619,  ..., 0.1521, 0.1516, 0.1543],
        [0.1511, 0.1630, 0.1633,  ..., 0.1525, 0.1582, 0.1591]],
       grad_fn=<LogBackward>)


 50%|█████     | 4/8 [00:01<00:01,  3.17it/s]

tensor([[0.1523, 0.1595, 0.1658,  ..., 0.1552, 0.1581, 0.1588],
        [0.1513, 0.1585, 0.1658,  ..., 0.1504, 0.1560, 0.1586],
        [0.1548, 0.1608, 0.1669,  ..., 0.1540, 0.1580, 0.1626],
        ...,
        [0.1478, 0.1543, 0.1593,  ..., 0.1514, 0.1514, 0.1528],
        [0.1550, 0.1618, 0.1666,  ..., 0.1560, 0.1590, 0.1596],
        [0.1511, 0.1581, 0.1623,  ..., 0.1522, 0.1528, 0.1558]],
       grad_fn=<LogBackward>)


 62%|██████▎   | 5/8 [00:01<00:00,  3.23it/s]

tensor([[0.1562, 0.1606, 0.1666,  ..., 0.1524, 0.1622, 0.1620],
        [0.1507, 0.1543, 0.1629,  ..., 0.1528, 0.1520, 0.1569],
        [0.1473, 0.1555, 0.1605,  ..., 0.1543, 0.1548, 0.1544],
        ...,
        [0.1553, 0.1603, 0.1691,  ..., 0.1544, 0.1598, 0.1610],
        [0.1464, 0.1546, 0.1585,  ..., 0.1499, 0.1505, 0.1549],
        [0.1446, 0.1548, 0.1579,  ..., 0.1514, 0.1512, 0.1515]],
       grad_fn=<LogBackward>)


 75%|███████▌  | 6/8 [00:01<00:00,  3.27it/s]

tensor([[0.1499, 0.1548, 0.1628,  ..., 0.1530, 0.1528, 0.1556],
        [0.1475, 0.1553, 0.1589,  ..., 0.1505, 0.1510, 0.1519],
        [0.1519, 0.1578, 0.1635,  ..., 0.1516, 0.1531, 0.1553],
        ...,
        [0.1508, 0.1590, 0.1634,  ..., 0.1546, 0.1582, 0.1585],
        [0.1554, 0.1611, 0.1695,  ..., 0.1577, 0.1607, 0.1629],
        [0.1546, 0.1602, 0.1667,  ..., 0.1563, 0.1577, 0.1621]],
       grad_fn=<LogBackward>)


 88%|████████▊ | 7/8 [00:02<00:00,  3.32it/s]

tensor([[0.1447, 0.1572, 0.1565,  ..., 0.1483, 0.1542, 0.1554],
        [0.1530, 0.1588, 0.1650,  ..., 0.1551, 0.1578, 0.1587],
        [0.1536, 0.1616, 0.1669,  ..., 0.1524, 0.1585, 0.1614],
        ...,
        [0.1508, 0.1557, 0.1623,  ..., 0.1521, 0.1528, 0.1548],
        [0.1465, 0.1574, 0.1573,  ..., 0.1512, 0.1553, 0.1544],
        [0.1490, 0.1588, 0.1623,  ..., 0.1502, 0.1561, 0.1576]],
       grad_fn=<LogBackward>)


100%|██████████| 8/8 [00:02<00:00,  3.27it/s]
  0%|          | 0/57 [00:00<?, ?it/s]

tensor([[0.1516, 0.1604, 0.1643,  ..., 0.1543, 0.1600, 0.1627],
        [0.1417, 0.1528, 0.1530,  ..., 0.1435, 0.1519, 0.1514],
        [0.1470, 0.1514, 0.1550,  ..., 0.1465, 0.1470, 0.1478],
        ...,
        [0.1512, 0.1608, 0.1635,  ..., 0.1536, 0.1572, 0.1610],
        [0.1500, 0.1587, 0.1613,  ..., 0.1562, 0.1561, 0.1570],
        [0.1538, 0.1600, 0.1684,  ..., 0.1525, 0.1579, 0.1627]],
       grad_fn=<LogBackward>)
Updating prototype representation
[inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf]


  2%|▏         | 1/57 [00:00<00:16,  3.37it/s]

[tensor(5.7983), tensor(5.4279), tensor(5.2881), tensor(5.4953), tensor(5.1244), tensor(5.2778), tensor(5.6761), tensor(5.2884), tensor(5.6932), tensor(3.3036), tensor(5.8282), tensor(5.4963), tensor(5.8298), tensor(5.6146), tensor(5.9273), tensor(5.5602), tensor(5.3607), tensor(5.7934), tensor(5.5810), tensor(5.6194)]


  4%|▎         | 2/57 [00:00<00:16,  3.30it/s]

[tensor(5.7978), tensor(5.4279), tensor(5.2846), tensor(5.4953), tensor(5.1244), tensor(5.2233), tensor(5.6558), tensor(5.2884), tensor(5.6886), tensor(3.2946), tensor(5.7899), tensor(5.4963), tensor(5.8298), tensor(5.6146), tensor(5.9273), tensor(5.5602), tensor(5.3607), tensor(5.7934), tensor(5.5388), tensor(5.5972)]


  5%|▌         | 3/57 [00:01<00:17,  3.04it/s]

[tensor(5.7978), tensor(5.4279), tensor(5.2644), tensor(5.4953), tensor(5.1244), tensor(5.2233), tensor(5.6558), tensor(5.2884), tensor(5.6886), tensor(3.2903), tensor(5.7743), tensor(5.4935), tensor(5.7990), tensor(5.6146), tensor(5.9273), tensor(5.5602), tensor(5.3607), tensor(5.7934), tensor(5.5388), tensor(5.5972)]


  7%|▋         | 4/57 [00:01<00:17,  3.02it/s]

[tensor(5.7978), tensor(5.4279), tensor(5.2644), tensor(5.4474), tensor(5.1244), tensor(5.2233), tensor(5.6558), tensor(5.2771), tensor(5.6886), tensor(3.2903), tensor(5.7743), tensor(5.4935), tensor(5.7990), tensor(5.6146), tensor(5.9273), tensor(5.5602), tensor(5.3607), tensor(5.7934), tensor(5.5388), tensor(5.5972)]


  9%|▉         | 5/57 [00:01<00:17,  2.92it/s]

[tensor(5.7978), tensor(5.4279), tensor(5.2644), tensor(5.4474), tensor(5.1244), tensor(5.2233), tensor(5.6558), tensor(5.2528), tensor(5.6886), tensor(3.2903), tensor(5.7743), tensor(5.4935), tensor(5.7743), tensor(5.6146), tensor(5.9273), tensor(5.5602), tensor(5.3533), tensor(5.7934), tensor(5.5388), tensor(5.5972)]


 11%|█         | 6/57 [00:02<00:17,  2.93it/s]

[tensor(5.7978), tensor(5.4279), tensor(5.2644), tensor(5.4474), tensor(5.1244), tensor(5.2233), tensor(5.6558), tensor(5.2528), tensor(5.6886), tensor(3.2903), tensor(5.7743), tensor(5.4935), tensor(5.7743), tensor(5.6146), tensor(5.8420), tensor(5.5210), tensor(5.3533), tensor(5.7934), tensor(5.5388), tensor(5.5505)]


 12%|█▏        | 7/57 [00:02<00:17,  2.93it/s]

[tensor(5.7883), tensor(5.4279), tensor(5.2644), tensor(5.4474), tensor(5.1244), tensor(5.2184), tensor(5.6558), tensor(5.2528), tensor(5.6248), tensor(3.2903), tensor(5.7743), tensor(5.4890), tensor(5.7743), tensor(5.6146), tensor(5.8420), tensor(5.5210), tensor(5.3533), tensor(5.7921), tensor(5.5388), tensor(5.5505)]


 14%|█▍        | 8/57 [00:02<00:17,  2.83it/s]

[tensor(5.7883), tensor(5.3654), tensor(5.2644), tensor(5.4474), tensor(5.1244), tensor(5.2184), tensor(5.6558), tensor(5.2528), tensor(5.6248), tensor(3.2903), tensor(5.7743), tensor(5.4890), tensor(5.7743), tensor(5.6146), tensor(5.8420), tensor(5.5210), tensor(5.3533), tensor(5.7921), tensor(5.5388), tensor(5.5505)]


 16%|█▌        | 9/57 [00:03<00:16,  2.84it/s]

[tensor(5.7883), tensor(5.3654), tensor(5.2644), tensor(5.4474), tensor(5.1244), tensor(5.2184), tensor(5.6558), tensor(5.2528), tensor(5.6248), tensor(3.2903), tensor(5.7743), tensor(5.4890), tensor(5.7743), tensor(5.6146), tensor(5.8420), tensor(5.5210), tensor(5.3533), tensor(5.7921), tensor(5.5388), tensor(5.5505)]


 18%|█▊        | 10/57 [00:03<00:16,  2.85it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.2644), tensor(5.4474), tensor(5.1244), tensor(5.2184), tensor(5.6558), tensor(5.2528), tensor(5.6248), tensor(3.2903), tensor(5.7743), tensor(5.4890), tensor(5.7743), tensor(5.6146), tensor(5.8420), tensor(5.5210), tensor(5.3533), tensor(5.7921), tensor(5.5388), tensor(5.5505)]


 19%|█▉        | 11/57 [00:03<00:16,  2.83it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.2644), tensor(5.4474), tensor(5.1244), tensor(5.2184), tensor(5.6558), tensor(5.2234), tensor(5.6248), tensor(3.2903), tensor(5.7598), tensor(5.4890), tensor(5.7743), tensor(5.6146), tensor(5.8420), tensor(5.5210), tensor(5.3512), tensor(5.7921), tensor(5.5388), tensor(5.5038)]


 21%|██        | 12/57 [00:04<00:15,  2.82it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.2644), tensor(5.4109), tensor(5.1244), tensor(5.2184), tensor(5.6558), tensor(5.2234), tensor(5.6248), tensor(3.2903), tensor(5.7277), tensor(5.4890), tensor(5.7743), tensor(5.6146), tensor(5.8420), tensor(5.5210), tensor(5.3512), tensor(5.7921), tensor(5.5388), tensor(5.5038)]


 23%|██▎       | 13/57 [00:04<00:15,  2.90it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.2644), tensor(5.4109), tensor(5.1244), tensor(5.2184), tensor(5.6306), tensor(5.2234), tensor(5.6248), tensor(3.2903), tensor(5.6864), tensor(5.4890), tensor(5.7743), tensor(5.6146), tensor(5.8420), tensor(5.5210), tensor(5.3512), tensor(5.7921), tensor(5.5388), tensor(5.5038)]


 25%|██▍       | 14/57 [00:04<00:14,  2.94it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.2644), tensor(5.4109), tensor(5.1244), tensor(5.2184), tensor(5.6306), tensor(5.2234), tensor(5.6248), tensor(3.2903), tensor(5.6864), tensor(5.4877), tensor(5.7743), tensor(5.6146), tensor(5.8420), tensor(5.5210), tensor(5.3512), tensor(5.7921), tensor(5.5388), tensor(5.5038)]


 26%|██▋       | 15/57 [00:05<00:14,  2.96it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.2644), tensor(5.4109), tensor(5.1244), tensor(5.2184), tensor(5.6306), tensor(5.2234), tensor(5.6248), tensor(3.2903), tensor(5.6864), tensor(5.4877), tensor(5.7743), tensor(5.6146), tensor(5.8420), tensor(5.5210), tensor(5.2943), tensor(5.7644), tensor(5.5198), tensor(5.5038)]


 28%|██▊       | 16/57 [00:05<00:13,  3.08it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.2644), tensor(5.4109), tensor(5.1244), tensor(5.2184), tensor(5.6306), tensor(5.2234), tensor(5.6248), tensor(3.2903), tensor(5.6864), tensor(5.4877), tensor(5.7743), tensor(5.6146), tensor(5.8420), tensor(5.5210), tensor(5.2943), tensor(5.7644), tensor(5.5198), tensor(5.5038)]


 30%|██▉       | 17/57 [00:05<00:12,  3.10it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.2644), tensor(5.4109), tensor(5.1244), tensor(5.2184), tensor(5.6306), tensor(5.2234), tensor(5.6248), tensor(3.2903), tensor(5.6864), tensor(5.4877), tensor(5.7743), tensor(5.6146), tensor(5.8420), tensor(5.5210), tensor(5.2943), tensor(5.7644), tensor(5.5198), tensor(5.5038)]


 32%|███▏      | 18/57 [00:06<00:12,  3.03it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.2644), tensor(5.4109), tensor(5.1244), tensor(5.2184), tensor(5.6306), tensor(5.2234), tensor(5.6248), tensor(3.2903), tensor(5.6864), tensor(5.4445), tensor(5.7743), tensor(5.6146), tensor(5.8420), tensor(5.5210), tensor(5.2658), tensor(5.7644), tensor(5.5198), tensor(5.5038)]


 33%|███▎      | 19/57 [00:06<00:12,  3.02it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.2644), tensor(5.4109), tensor(5.1244), tensor(5.2184), tensor(5.6306), tensor(5.2234), tensor(5.6248), tensor(3.2903), tensor(5.6864), tensor(5.4445), tensor(5.7743), tensor(5.6020), tensor(5.8420), tensor(5.5210), tensor(5.2658), tensor(5.7644), tensor(5.5198), tensor(5.5038)]


 35%|███▌      | 20/57 [00:06<00:11,  3.13it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.2640), tensor(5.4109), tensor(5.1244), tensor(5.2184), tensor(5.6306), tensor(5.2234), tensor(5.6081), tensor(3.2843), tensor(5.6864), tensor(5.4445), tensor(5.7743), tensor(5.6020), tensor(5.8420), tensor(5.5210), tensor(5.2658), tensor(5.7644), tensor(5.5198), tensor(5.5038)]


 37%|███▋      | 21/57 [00:07<00:11,  3.08it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.2640), tensor(5.4109), tensor(5.1244), tensor(5.2184), tensor(5.6306), tensor(5.2234), tensor(5.6081), tensor(3.2843), tensor(5.6864), tensor(5.4445), tensor(5.7743), tensor(5.5840), tensor(5.8420), tensor(5.5210), tensor(5.2658), tensor(5.7644), tensor(5.5198), tensor(5.5038)]


 39%|███▊      | 22/57 [00:07<00:11,  3.13it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.2640), tensor(5.4109), tensor(5.1244), tensor(5.2184), tensor(5.6306), tensor(5.2234), tensor(5.6081), tensor(3.2843), tensor(5.6864), tensor(5.4445), tensor(5.7743), tensor(5.5840), tensor(5.8420), tensor(5.5210), tensor(5.2658), tensor(5.7644), tensor(5.5198), tensor(5.5038)]


 40%|████      | 23/57 [00:07<00:10,  3.19it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.1902), tensor(5.4109), tensor(5.1244), tensor(5.1691), tensor(5.6306), tensor(5.2234), tensor(5.6081), tensor(3.2839), tensor(5.6864), tensor(5.4445), tensor(5.7743), tensor(5.5840), tensor(5.8420), tensor(5.5210), tensor(5.2658), tensor(5.7644), tensor(5.5198), tensor(5.5038)]


 42%|████▏     | 24/57 [00:08<00:10,  3.15it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.1902), tensor(5.4109), tensor(5.1244), tensor(5.1691), tensor(5.6306), tensor(5.2234), tensor(5.6081), tensor(3.2839), tensor(5.6864), tensor(5.4445), tensor(5.7743), tensor(5.5840), tensor(5.8420), tensor(5.5210), tensor(5.2658), tensor(5.7644), tensor(5.5198), tensor(5.5038)]


 44%|████▍     | 25/57 [00:08<00:10,  3.15it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.1902), tensor(5.4109), tensor(5.1244), tensor(5.1691), tensor(5.6306), tensor(5.2234), tensor(5.6081), tensor(3.2839), tensor(5.6864), tensor(5.4445), tensor(5.7743), tensor(5.5840), tensor(5.8420), tensor(5.5210), tensor(5.2658), tensor(5.7644), tensor(5.5198), tensor(5.5038)]


 46%|████▌     | 26/57 [00:08<00:09,  3.17it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.1902), tensor(5.4109), tensor(5.1244), tensor(5.1691), tensor(5.6306), tensor(5.2234), tensor(5.6081), tensor(3.2839), tensor(5.6864), tensor(5.4445), tensor(5.7743), tensor(5.5840), tensor(5.8420), tensor(5.5210), tensor(5.2658), tensor(5.7644), tensor(5.5198), tensor(5.5038)]


 47%|████▋     | 27/57 [00:08<00:09,  3.15it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.1902), tensor(5.4109), tensor(5.1244), tensor(5.1691), tensor(5.6306), tensor(5.2234), tensor(5.6081), tensor(3.2839), tensor(5.6864), tensor(5.4445), tensor(5.7743), tensor(5.5840), tensor(5.8420), tensor(5.5210), tensor(5.2658), tensor(5.7644), tensor(5.5198), tensor(5.5038)]


 49%|████▉     | 28/57 [00:09<00:09,  3.21it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.1902), tensor(5.4109), tensor(5.1244), tensor(5.1691), tensor(5.6306), tensor(5.2234), tensor(5.6081), tensor(3.2839), tensor(5.6864), tensor(5.4445), tensor(5.7743), tensor(5.5840), tensor(5.8420), tensor(5.5210), tensor(5.2658), tensor(5.7644), tensor(5.5198), tensor(5.5038)]


 51%|█████     | 29/57 [00:09<00:08,  3.36it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.1902), tensor(5.4109), tensor(5.1244), tensor(5.1691), tensor(5.6306), tensor(5.2234), tensor(5.6081), tensor(3.2839), tensor(5.6864), tensor(5.4445), tensor(5.7743), tensor(5.5840), tensor(5.8420), tensor(5.5210), tensor(5.2658), tensor(5.7644), tensor(5.5041), tensor(5.5038)]


 53%|█████▎    | 30/57 [00:09<00:07,  3.42it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.1902), tensor(5.4109), tensor(5.1244), tensor(5.1691), tensor(5.6306), tensor(5.2234), tensor(5.6081), tensor(3.2839), tensor(5.6864), tensor(5.4445), tensor(5.7704), tensor(5.5840), tensor(5.8420), tensor(5.5210), tensor(5.2658), tensor(5.7620), tensor(5.5041), tensor(5.5038)]


 54%|█████▍    | 31/57 [00:10<00:07,  3.27it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.1902), tensor(5.4109), tensor(5.1244), tensor(5.1691), tensor(5.6306), tensor(5.2234), tensor(5.6081), tensor(3.2839), tensor(5.6052), tensor(5.4445), tensor(5.7611), tensor(5.5840), tensor(5.8420), tensor(5.5210), tensor(5.2636), tensor(5.7620), tensor(5.4826), tensor(5.5038)]


 56%|█████▌    | 32/57 [00:10<00:07,  3.24it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.1902), tensor(5.4109), tensor(5.1244), tensor(5.1691), tensor(5.6306), tensor(5.2234), tensor(5.6081), tensor(3.2839), tensor(5.6052), tensor(5.4445), tensor(5.7611), tensor(5.5840), tensor(5.8420), tensor(5.5210), tensor(5.2636), tensor(5.7620), tensor(5.4826), tensor(5.5038)]


 58%|█████▊    | 33/57 [00:10<00:07,  3.15it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.1902), tensor(5.4109), tensor(5.1244), tensor(5.1691), tensor(5.6306), tensor(5.2234), tensor(5.6081), tensor(3.2839), tensor(5.6052), tensor(5.4445), tensor(5.7611), tensor(5.5840), tensor(5.8420), tensor(5.5210), tensor(5.2636), tensor(5.7620), tensor(5.4826), tensor(5.5038)]


 60%|█████▉    | 34/57 [00:11<00:07,  3.12it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.1902), tensor(5.4109), tensor(5.1244), tensor(5.1691), tensor(5.6306), tensor(5.2234), tensor(5.6081), tensor(3.2698), tensor(5.6052), tensor(5.4445), tensor(5.7611), tensor(5.5840), tensor(5.8420), tensor(5.5210), tensor(5.2636), tensor(5.7620), tensor(5.4826), tensor(5.5038)]


 61%|██████▏   | 35/57 [00:11<00:06,  3.16it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.1902), tensor(5.4109), tensor(5.1244), tensor(5.1691), tensor(5.6306), tensor(5.2234), tensor(5.6081), tensor(3.2698), tensor(5.6052), tensor(5.4445), tensor(5.7611), tensor(5.5840), tensor(5.8420), tensor(5.5210), tensor(5.2636), tensor(5.7620), tensor(5.4826), tensor(5.5038)]


 63%|██████▎   | 36/57 [00:11<00:06,  3.11it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.1902), tensor(5.4109), tensor(5.1244), tensor(5.1691), tensor(5.6306), tensor(5.2234), tensor(5.6081), tensor(3.2698), tensor(5.6052), tensor(5.4445), tensor(5.7611), tensor(5.5840), tensor(5.8420), tensor(5.5210), tensor(5.2636), tensor(5.7620), tensor(5.4826), tensor(5.5038)]


 65%|██████▍   | 37/57 [00:12<00:06,  3.12it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.1902), tensor(5.4109), tensor(5.1244), tensor(5.1691), tensor(5.6306), tensor(5.2234), tensor(5.6081), tensor(3.2698), tensor(5.6052), tensor(5.4445), tensor(5.7611), tensor(5.5840), tensor(5.8420), tensor(5.5210), tensor(5.2636), tensor(5.7620), tensor(5.4826), tensor(5.5038)]


 67%|██████▋   | 38/57 [00:12<00:05,  3.25it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.1902), tensor(5.4109), tensor(5.1244), tensor(5.1691), tensor(5.6306), tensor(5.2234), tensor(5.6081), tensor(3.2698), tensor(5.6052), tensor(5.4445), tensor(5.7611), tensor(5.5840), tensor(5.8420), tensor(5.5210), tensor(5.2636), tensor(5.7620), tensor(5.4826), tensor(5.5038)]


 68%|██████▊   | 39/57 [00:12<00:05,  3.32it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.1902), tensor(5.4109), tensor(5.1244), tensor(5.1691), tensor(5.6306), tensor(5.2234), tensor(5.6081), tensor(3.2698), tensor(5.6052), tensor(5.4445), tensor(5.7611), tensor(5.5840), tensor(5.8420), tensor(5.5210), tensor(5.2636), tensor(5.7620), tensor(5.4826), tensor(5.5038)]


 70%|███████   | 40/57 [00:12<00:05,  3.27it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.1902), tensor(5.4109), tensor(5.1244), tensor(5.1691), tensor(5.6306), tensor(5.2234), tensor(5.6081), tensor(3.2698), tensor(5.6052), tensor(5.4445), tensor(5.7611), tensor(5.5840), tensor(5.8420), tensor(5.5210), tensor(5.2636), tensor(5.7620), tensor(5.4826), tensor(5.5038)]


 72%|███████▏  | 41/57 [00:13<00:04,  3.28it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.1902), tensor(5.4109), tensor(5.1244), tensor(5.1691), tensor(5.6306), tensor(5.2234), tensor(5.6081), tensor(3.2698), tensor(5.6052), tensor(5.4445), tensor(5.7611), tensor(5.5840), tensor(5.8420), tensor(5.5210), tensor(5.2636), tensor(5.7620), tensor(5.4826), tensor(5.5038)]


 74%|███████▎  | 42/57 [00:13<00:04,  3.35it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.1902), tensor(5.4109), tensor(5.1244), tensor(5.1691), tensor(5.6306), tensor(5.2234), tensor(5.6081), tensor(3.2698), tensor(5.6052), tensor(5.4445), tensor(5.7611), tensor(5.5840), tensor(5.8420), tensor(5.5210), tensor(5.2636), tensor(5.7620), tensor(5.4826), tensor(5.5038)]


 75%|███████▌  | 43/57 [00:13<00:04,  3.20it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.1902), tensor(5.4109), tensor(5.1244), tensor(5.1691), tensor(5.6306), tensor(5.2234), tensor(5.6081), tensor(3.2698), tensor(5.6052), tensor(5.4445), tensor(5.7611), tensor(5.5840), tensor(5.8420), tensor(5.5210), tensor(5.2636), tensor(5.7620), tensor(5.4826), tensor(5.5038)]


 77%|███████▋  | 44/57 [00:14<00:04,  3.11it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.1902), tensor(5.4109), tensor(5.1244), tensor(5.1691), tensor(5.6306), tensor(5.2234), tensor(5.6081), tensor(3.2698), tensor(5.6052), tensor(5.4445), tensor(5.7290), tensor(5.5840), tensor(5.8420), tensor(5.5210), tensor(5.2636), tensor(5.7620), tensor(5.4826), tensor(5.5038)]


 79%|███████▉  | 45/57 [00:14<00:03,  3.03it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.1902), tensor(5.4109), tensor(5.1244), tensor(5.1691), tensor(5.6306), tensor(5.2234), tensor(5.6081), tensor(3.2698), tensor(5.6052), tensor(5.4445), tensor(5.7290), tensor(5.5840), tensor(5.8420), tensor(5.5210), tensor(5.2636), tensor(5.7620), tensor(5.4826), tensor(5.5038)]


 81%|████████  | 46/57 [00:14<00:03,  3.03it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.1902), tensor(5.4109), tensor(5.1244), tensor(5.1691), tensor(5.6306), tensor(5.2234), tensor(5.6081), tensor(3.2698), tensor(5.6052), tensor(5.4445), tensor(5.7290), tensor(5.5840), tensor(5.8420), tensor(5.5210), tensor(5.2636), tensor(5.7620), tensor(5.4826), tensor(5.5038)]


 82%|████████▏ | 47/57 [00:15<00:03,  3.03it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.1902), tensor(5.4109), tensor(5.1244), tensor(5.1691), tensor(5.6306), tensor(5.2234), tensor(5.6081), tensor(3.2698), tensor(5.6052), tensor(5.4445), tensor(5.7171), tensor(5.5838), tensor(5.8420), tensor(5.5210), tensor(5.2636), tensor(5.7620), tensor(5.4826), tensor(5.5038)]


 84%|████████▍ | 48/57 [00:15<00:02,  3.05it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.1902), tensor(5.4109), tensor(5.1244), tensor(5.1691), tensor(5.6306), tensor(5.2234), tensor(5.6081), tensor(3.2698), tensor(5.6052), tensor(5.4445), tensor(5.7171), tensor(5.5838), tensor(5.8420), tensor(5.5210), tensor(5.2636), tensor(5.7620), tensor(5.4826), tensor(5.5038)]


 86%|████████▌ | 49/57 [00:15<00:02,  3.04it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.1902), tensor(5.4109), tensor(5.1244), tensor(5.1691), tensor(5.6306), tensor(5.2234), tensor(5.6081), tensor(3.2698), tensor(5.6052), tensor(5.4445), tensor(5.7171), tensor(5.5838), tensor(5.8420), tensor(5.5210), tensor(5.2636), tensor(5.7620), tensor(5.4826), tensor(5.5038)]


 88%|████████▊ | 50/57 [00:16<00:02,  3.05it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.1902), tensor(5.4109), tensor(5.1244), tensor(5.1691), tensor(5.6306), tensor(5.2234), tensor(5.6081), tensor(3.2698), tensor(5.6052), tensor(5.4445), tensor(5.7171), tensor(5.5838), tensor(5.8420), tensor(5.5210), tensor(5.2636), tensor(5.7620), tensor(5.4826), tensor(5.5038)]


 89%|████████▉ | 51/57 [00:16<00:01,  3.05it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.1902), tensor(5.4109), tensor(5.1244), tensor(5.1691), tensor(5.6235), tensor(5.2234), tensor(5.6081), tensor(3.2698), tensor(5.6052), tensor(5.4445), tensor(5.7171), tensor(5.5838), tensor(5.8420), tensor(5.5210), tensor(5.2636), tensor(5.7620), tensor(5.4826), tensor(5.5038)]


 91%|█████████ | 52/57 [00:16<00:01,  2.99it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.1902), tensor(5.4109), tensor(5.1244), tensor(5.1691), tensor(5.6235), tensor(5.2234), tensor(5.6081), tensor(3.2698), tensor(5.6052), tensor(5.4445), tensor(5.7171), tensor(5.5838), tensor(5.8420), tensor(5.5210), tensor(5.2636), tensor(5.7620), tensor(5.4826), tensor(5.5038)]


 93%|█████████▎| 53/57 [00:17<00:01,  3.11it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.1902), tensor(5.4109), tensor(5.1244), tensor(5.1691), tensor(5.6235), tensor(5.2234), tensor(5.6081), tensor(3.2698), tensor(5.6052), tensor(5.4445), tensor(5.7171), tensor(5.5838), tensor(5.8420), tensor(5.5210), tensor(5.2636), tensor(5.7620), tensor(5.4826), tensor(5.5038)]


 95%|█████████▍| 54/57 [00:17<00:00,  3.08it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.1902), tensor(5.4109), tensor(5.1244), tensor(5.1691), tensor(5.6235), tensor(5.2234), tensor(5.6081), tensor(3.2698), tensor(5.6052), tensor(5.4445), tensor(5.7171), tensor(5.5838), tensor(5.8420), tensor(5.5210), tensor(5.2636), tensor(5.7620), tensor(5.4826), tensor(5.5038)]


 96%|█████████▋| 55/57 [00:17<00:00,  3.16it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.1902), tensor(5.4109), tensor(5.1244), tensor(5.1691), tensor(5.6235), tensor(5.2234), tensor(5.6081), tensor(3.2698), tensor(5.6052), tensor(5.4445), tensor(5.7171), tensor(5.5838), tensor(5.8420), tensor(5.5210), tensor(5.2636), tensor(5.7620), tensor(5.4826), tensor(5.5038)]


 98%|█████████▊| 56/57 [00:18<00:00,  3.12it/s]

[tensor(5.7473), tensor(5.3654), tensor(5.1902), tensor(5.4109), tensor(5.1244), tensor(5.1691), tensor(5.6235), tensor(5.2234), tensor(5.6081), tensor(3.2698), tensor(5.6052), tensor(5.4445), tensor(5.7171), tensor(5.5838), tensor(5.8420), tensor(5.5210), tensor(5.2636), tensor(5.7620), tensor(5.4826), tensor(5.5038)]


100%|██████████| 57/57 [00:18<00:00,  3.09it/s]


[tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128), tensor(128)]
Epoch: 19, train_loss: -0.8884828090667725, valid_loss: -0.8602588772773743
Parameter containing:
tensor([[ 0.9998,  0.9998,  0.9998,  0.9998,  0.9999,  0.9997,  1.0000,  0.9997,
          0.9997,  1.0004, -0.5002, -0.5003, -0.5003, -0.5002, -0.5003, -0.5002,
         -0.5001, -0.5004, -0.5002, -0.5003],
        [-0.4998, -0.4998, -0.4998, -0.4998, -0.4999, -0.4997, -0.5000, -0.4997,
         -0.4997, -0.5004,  1.0002,  1.0003,  1.0003,  1.0002,  1.0003,  1.0002,
          1.0001,  1.0004,  1.0002,  1.0003]], requires_grad=True)
tensor([[0.3266, 0.3047, 0.2609,  ..., 0.2714, 0.2553, 0.2548],
        [0.3812, 0.3181, 0.3391,  ..., 0.4155, 0.3493, 0.3148],
        [0.4078, 0.3192, 0.3648,  ..., 0.3984, 0.3528, 0.3762],
        ...,

In [57]:
# Check 
check = test[0][:1]
out, min_dis = run.model(check)
value, proto_id = torch.min(min_dis, dim=1)


tensor([[0.3266, 0.3047, 0.2609, 0.2267, 0.2845, 0.2609, 0.2763, 0.3001, 0.2796,
         0.3776, 0.2701, 0.2667, 0.3372, 0.3004, 0.2598, 0.2597, 0.2553, 0.2714,
         0.2553, 0.2548]], grad_fn=<LogBackward>)


In [26]:
# make params_sample
for i in np.arange(N):
    for key,val in stats[i].items():
        param_samples[i][key] = val
        
# saving 
torch.save(param_samples, 'results/Proto_experiment_no_push_N'+str(N)+'.pkl')

# getting pd 
import pandas as pd 
df = pd.DataFrame(param_samples)

In [28]:
df.sort_values('auroc', ascending=False)

Unnamed: 0,BATCH_SIZE,EPOCHS,OPTIMIZER,LEARNING_RATE,LOSS,EARLY_STOPPING,PATIENCE,MIN_DELTA,MAXPOOL,OBO_SIZES,PROTOTYPE_NUM,DROPOUT,HIDDEN_SIZES,KERNEL_SIZES,epoch_stopped,auroc,auprc,acc,f1
4,256,50,<class 'torch.optim.adam.Adam'>,1e-05,protop_loss_0.3_0.1,True,3,0.0005,2,"[256, 256]",20,0.5,"[128, 256, 512]","[7, 3, 3]",,0.792926,0.404332,0.777673,0.440476
1,512,40,<class 'torch.optim.adam.Adam'>,0.0005,protop_loss_1_0.5,True,2,0.0001,2,"[256, 256]",10,0.3,"[128, 256, 512]","[7, 5, 3]",16.0,0.779959,0.433967,0.832545,0.476331
8,128,40,<class 'torch.optim.adam.Adam'>,5e-05,protop_loss_0.3_0.1,True,3,0.005,2,[256],20,0.6,"[128, 256]","[9, 7]",23.0,0.753227,0.404382,0.839877,0.46902
2,256,50,<class 'torch.optim.adam.Adam'>,0.0001,protop_loss_0.5_0.1,True,3,0.0001,2,"[512, 256]",20,0.2,"[128, 256, 512]","[7, 5, 3]",24.0,0.749045,0.399017,0.828997,0.480976
3,256,40,<class 'torch.optim.adam.Adam'>,0.0005,protop_loss_0.3_0.1,True,3,0.0005,2,"[512, 256]",20,0.2,"[128, 256, 512]","[9, 7, 3]",18.0,0.741501,0.369021,0.790445,0.47199
5,512,40,<class 'torch.optim.adam.Adam'>,5e-05,protop_loss_0.1_0.1,True,2,5e-05,2,[512],10,0.1,"[256, 512]","[7, 7]",8.0,0.71921,0.289019,0.843425,0.032164
7,128,20,<class 'torch.optim.adam.Adam'>,1e-05,protop_loss_1_0.5,True,2,0.0005,2,[512],10,0.1,"[256, 512]","[7, 3]",12.0,0.647657,0.230045,0.838931,0.107471
0,512,40,<class 'torch.optim.adam.Adam'>,1e-05,protop_loss_1_1,True,2,0.0001,2,"[512, 256]",20,0.3,"[128, 256, 512]","[5, 5, 5]",8.0,0.618752,0.220073,0.8465,0.026987
6,256,50,<class 'torch.optim.adam.Adam'>,0.0005,protop_loss_0.1_0.1,True,3,0.005,2,[256],20,0.3,"[128, 256]","[9, 7]",9.0,0.536037,0.143266,0.618496,0.021831
9,128,20,<class 'torch.optim.adam.Adam'>,1e-05,protop_loss_0.1_0.1,True,3,0.001,2,[512],10,0.1,"[128, 256]","[9, 3]",9.0,0.464464,0.13088,0.296831,0.247913


In [8]:
model = ProtICU(test[0].shape, 2, [64, 128], [5,3], 2, [256, 128], [0,0,0,0,0,1,1,1,1,1], .2)
out, min_dis = model(test[0][:14])

In [None]:
check = torch.load('results/Proto_experiment_nopush_N30.pkl')