In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

from fastai.learner import *
from fastai.column_data import *

In [2]:
path='data/ml-latest-small/'

In [3]:
ratings = pd.read_csv(path+'ratings.csv')
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205


In [4]:
movies = pd.read_csv(path+'movies.csv')
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [5]:
val_idxs = get_cv_idxs(len(ratings)) # cross-validation indexes, since we don't have a separate validation set.
wd=2e-4 # weight decay (for the optimizer we're using)
n_factors = 50 # dimensionality of the embedding matrix

In [6]:
a = T([[1., 2], [3, 4]])
b = T([[2., 2], [10, 10]])
a,b

(
  1  2
  3  4
 [torch.FloatTensor of size 2x2], 
   2   2
  10  10
 [torch.FloatTensor of size 2x2])

In [7]:
a*b


  2   4
 30  40
[torch.FloatTensor of size 2x2]

In [8]:
(a*b).sum(1)


  6
 70
[torch.FloatTensor of size 2]

In [9]:
class DotProduct(nn.Module):
    """pytorch module to calculate 1-d dot product"""
    def forward(self, u, m): return (u*m).sum(1)

In [10]:
model = DotProduct()

In [11]:
model(a, b)


  6
 70
[torch.FloatTensor of size 2]

Because the `userId`s and `movieId`s may not be contiguous, we get the unique ones of each, enumerate them, and store the enumeration index in a dict. We then replace the `userId` and `movieId` fields in the ratings dataframe with the new contiguous integers. 

In [12]:
u_uniq = ratings.userId.unique()
user2idx = {o:i for i,o in enumerate(u_uniq)}
ratings.userId = ratings.userId.apply(lambda x: user2idx[x])

m_uniq = ratings.movieId.unique()

movie2idx = {o:i for i,o in enumerate(m_uniq)}
ratings.movieId = ratings.movieId.apply(lambda x: movie2idx[x])

n_users = int(ratings.userId.nunique())
n_movies = int(ratings.movieId.nunique())

In [13]:
n_users,n_movies

(671, 9066)

The actual embedding matrix is a variable, not a tensor. The difference between the two is that a variable can perform automatic differentiation. To pull the tensor out of a variable, you get its `data` attribute. In PyTorch, all of the tensor functions have a version with an underscore at the end, e.g. `uniform` has `uniform_` in
```
self.u.weight.data.uniform_(0,0.05)
```
which enables you to calculate values in place

In [14]:
class EmbeddingDot(nn.Module):
    def __init__(self, n_users, n_movies):
        super().__init__()
        self.u = nn.Embedding(n_users, n_factors)
        self.m = nn.Embedding(n_movies, n_factors)
        self.u.weight.data.uniform_(0,0.05) # weight initialization
        self.m.weight.data.uniform_(0,0.05)
    
    def forward(self, cats, conts):
        users, movies = cats[:,0], cats[:,1]
        u, m = self.u(users), self.m(movies)
        return (u*m).sum(1)

In [15]:
x = ratings.drop(['rating', 'timestamp'], axis=1)
y = ratings['rating'].values.astype(np.float32)

In [16]:
data = ColumnarModelData.from_data_frame(path, val_idxs, x, y, ['userId', 'movieId'], 64)

In [17]:
wd = 1e-5
lr = 1e-1
model = EmbeddingDot(n_users, n_movies).cuda()
opt = optim.SGD(model.parameters(), lr, weight_decay=wd, momentum=0.9)

`fit` is a fastai library function that loops through each minibatch to fit a model. If you don't want to perform your own training loop but want to avoid as much of `fastai` as possible, this is the function to call directly. However, you won't have access to fastai tools such as SGDR, differential learning rates etc.

In [18]:
fit(model, data, 10, opt, F.mse_loss)

[ 0.       1.67627  1.63617]                                   
[ 1.       1.13087  1.30792]                                   
[ 2.       0.90835  1.22542]                                    
[ 3.       0.76516  1.20123]                                    
[ 4.       0.72462  1.17098]                                    
[ 5.       0.61049  1.16034]                                    
[ 6.       0.48634  1.16154]                                    
[ 7.      0.4008  1.1639]                                       
[ 8.       0.35235  1.17702]                                    
[ 9.       0.27996  1.19229]                                    



manual learning rate annealing

In [20]:
set_lrs(opt, 0.01)

In [21]:
fit(model, data, 10, opt, F.mse_loss)

[ 0.       0.105    1.22152]                                    
[ 1.       0.09901  1.22067]                                     
[ 2.       0.10646  1.22151]                                     
[ 3.       0.09281  1.22359]                                     
[ 4.       0.09455  1.22489]                                     
[ 5.       0.09407  1.22691]                                     
[ 6.       0.09106  1.22806]                                     
[ 7.       0.08558  1.2298 ]                                     
[ 8.       0.0834   1.23103]                                     
[ 9.       0.08504  1.23302]                                     



In [22]:
min_rating, max_rating = ratings.rating.min(), ratings.rating.max()
min_rating, max_rating

(0.5, 5.0)

In [23]:
def get_emb(ni, nf):
    """takes a number of inputs and a number of factors 
    (number of rows,columns in the embedding matrix), and returns an 
    initialized embedding matrix"""
    e = nn.Embedding(ni, nf)
    e.weight.data.uniform_(-0.01, 0.01)
    return e

In [24]:
class EmbeddingDotBias(nn.Module):
    def __init__(self, n_users, n_movies):
        super().__init__()
        (self.u, self.m, self.ub, self.mb) = [get_emb(*o) for o in [
            (n_users, n_factors), (n_movies, n_factors), 
            (n_users, 1), (n_movies, 1)
        ]]
    def forward(self, cats, conts):
        users, movies = cats[:,0], cats[:,1]
        um = (self.u(users) * self.m(movies)).sum(1)
        res = um + self.ub(users).squeeze() + self.mb(movies).squeeze()
        res = F.sigmoid(res) * (max_rating - min_rating) + min_rating
        return res

In [25]:
wd = 2e-4
lr = 1e-1
model = EmbeddingDotBias(n_users, n_movies).cuda()
opt = optim.SGD(model.parameters(), lr, weight_decay=wd, momentum=0.9)

In [27]:
fit(model, data, 10, opt, F.mse_loss)

[ 0.       0.75627  0.80457]                                    
[ 1.       0.7806   0.80371]                                    
[ 2.       0.7487   0.79998]                                    
[ 3.       0.72433  0.79548]                                    
[ 4.       0.67474  0.7894 ]                                    
[ 5.       0.64464  0.78774]                                    
[ 6.       0.62606  0.78304]                                    
[ 7.       0.62189  0.78066]                                    
[ 8.       0.57876  0.77787]                                    
[ 9.       0.56653  0.77536]                                    



In [23]:
class EmbeddingNet(nn.Module):
    def __init__(self, n_users, n_movies, nh=10):
        super().__init__()
        (self.u, self.m) = [get_emb(*o) for o in [
            (n_users, n_factors), (n_movies, n_factors)
        ]]
        self.lin1 = nn.Linear(n_factors*2, nh)
        self.lin2 = nn.Linear(10, 1)
    
    def forward(self, cats, conts):
        users, movies = cats[:,0], cats[:,1]
        x = torch.cat([self.u(users), self.m(movies)], dim=1)

        if self.training:
            x = F.dropout(torch.cat([self.u(users), self.m(movies)], dim=1), 0.75)
            x = F.dropout(F.relu(self.lin1(x)), 0.75)
        return F.sigmoid(self.lin2(x)) * (max_rating - min_rating + 1) + min_rating - 0.5

In [24]:
wd = 5e-4
lr = 1e-2
model = EmbeddingNet(n_users, n_movies, 10).cuda()
opt = optim.SGD(model.parameters(), lr, weight_decay=wd, momentum=0.9)

In [26]:
model.training = True
fit(model, data, 10, opt, F.mse_loss)


  0%|          | 0/1251 [00:00<?, ?it/s][A
  0%|          | 0/1251 [00:00<?, ?it/s, loss=0.723][A
  0%|          | 0/1251 [00:00<?, ?it/s, loss=0.744][A
  0%|          | 0/1251 [00:00<?, ?it/s, loss=0.9]  [A
  0%|          | 0/1251 [00:00<?, ?it/s, loss=0.938][A
  0%|          | 0/1251 [00:00<?, ?it/s, loss=0.909][A
  0%|          | 0/1251 [00:00<?, ?it/s, loss=0.9]  [A
  0%|          | 6/1251 [00:00<00:21, 58.62it/s, loss=0.9][A
  0%|          | 6/1251 [00:00<00:22, 54.56it/s, loss=0.914][A
  0%|          | 6/1251 [00:00<00:24, 50.33it/s, loss=0.965][A
  0%|          | 6/1251 [00:00<00:26, 46.39it/s, loss=0.935][A
  0%|          | 6/1251 [00:00<00:28, 43.57it/s, loss=0.916][A
  0%|          | 6/1251 [00:00<00:30, 40.96it/s, loss=0.913][A
  0%|          | 6/1251 [00:00<00:31, 39.27it/s, loss=0.918][A
  0%|          | 6/1251 [00:00<00:33, 37.14it/s, loss=0.894][A
  0%|          | 6/1251 [00:00<00:34, 35.58it/s, loss=0.88] [A
  0%|          | 6/1251 [00:00<00:36, 34.00it

  9%|▉         | 115/1251 [00:01<00:13, 82.68it/s, loss=0.817][A
  9%|▉         | 115/1251 [00:01<00:13, 81.81it/s, loss=0.82] [A
  9%|▉         | 115/1251 [00:01<00:14, 80.97it/s, loss=0.817][A
  9%|▉         | 115/1251 [00:01<00:14, 80.08it/s, loss=0.824][A
  9%|▉         | 115/1251 [00:01<00:14, 79.28it/s, loss=0.818][A
  9%|▉         | 115/1251 [00:01<00:14, 78.57it/s, loss=0.813][A
  9%|▉         | 115/1251 [00:01<00:14, 77.77it/s, loss=0.812][A
  9%|▉         | 115/1251 [00:01<00:14, 76.99it/s, loss=0.81] [A
  9%|▉         | 115/1251 [00:01<00:14, 76.25it/s, loss=0.808][A
 10%|▉         | 123/1251 [00:01<00:13, 81.16it/s, loss=0.808][A
 10%|▉         | 123/1251 [00:01<00:14, 80.41it/s, loss=0.809][A
 10%|▉         | 123/1251 [00:01<00:14, 79.61it/s, loss=0.816][A
 10%|▉         | 123/1251 [00:01<00:14, 78.82it/s, loss=0.815][A
 10%|▉         | 123/1251 [00:01<00:14, 78.22it/s, loss=0.815][A
 10%|▉         | 123/1251 [00:01<00:14, 77.43it/s, loss=0.817][A
 10%|▉    

 17%|█▋        | 218/1251 [00:02<00:13, 76.26it/s, loss=0.807][A
 17%|█▋        | 218/1251 [00:02<00:13, 76.15it/s, loss=0.806][A
 17%|█▋        | 218/1251 [00:02<00:13, 76.03it/s, loss=0.809][A
 17%|█▋        | 218/1251 [00:02<00:13, 75.89it/s, loss=0.81] [A
 17%|█▋        | 218/1251 [00:02<00:13, 75.76it/s, loss=0.805][A
 17%|█▋        | 218/1251 [00:02<00:13, 75.67it/s, loss=0.813][A
 17%|█▋        | 218/1251 [00:02<00:13, 75.58it/s, loss=0.818][A
 17%|█▋        | 218/1251 [00:02<00:13, 75.50it/s, loss=0.818][A
 17%|█▋        | 218/1251 [00:02<00:13, 75.40it/s, loss=0.817][A
 17%|█▋        | 218/1251 [00:02<00:13, 75.29it/s, loss=0.813][A
 17%|█▋        | 218/1251 [00:02<00:13, 75.20it/s, loss=0.811][A
 17%|█▋        | 218/1251 [00:02<00:13, 75.11it/s, loss=0.809][A
 17%|█▋        | 218/1251 [00:02<00:13, 75.01it/s, loss=0.812][A
 17%|█▋        | 218/1251 [00:02<00:13, 74.90it/s, loss=0.809][A
 17%|█▋        | 218/1251 [00:02<00:13, 74.82it/s, loss=0.816][A
 17%|█▋   

 26%|██▌       | 322/1251 [00:03<00:09, 96.57it/s, loss=0.804][A
 26%|██▌       | 322/1251 [00:03<00:09, 96.46it/s, loss=0.809][A
 28%|██▊       | 347/1251 [00:03<00:08, 103.89it/s, loss=0.809][A
 28%|██▊       | 347/1251 [00:03<00:08, 103.76it/s, loss=0.811][A
 28%|██▊       | 347/1251 [00:03<00:08, 103.64it/s, loss=0.814][A
 28%|██▊       | 347/1251 [00:03<00:08, 103.50it/s, loss=0.81] [A
 28%|██▊       | 347/1251 [00:03<00:08, 103.37it/s, loss=0.806][A
 28%|██▊       | 347/1251 [00:03<00:08, 103.24it/s, loss=0.806][A
 28%|██▊       | 347/1251 [00:03<00:08, 103.14it/s, loss=0.802][A
 28%|██▊       | 347/1251 [00:03<00:08, 103.03it/s, loss=0.803][A
 28%|██▊       | 347/1251 [00:03<00:08, 102.90it/s, loss=0.804][A
 28%|██▊       | 347/1251 [00:03<00:08, 102.77it/s, loss=0.805][A
 28%|██▊       | 347/1251 [00:03<00:08, 102.66it/s, loss=0.803][A
 28%|██▊       | 347/1251 [00:03<00:08, 102.54it/s, loss=0.801][A
 28%|██▊       | 347/1251 [00:03<00:08, 102.43it/s, loss=0.802]

 36%|███▌      | 450/1251 [00:03<00:06, 118.68it/s, loss=0.824][A
 36%|███▌      | 450/1251 [00:03<00:06, 118.56it/s, loss=0.817][A
 36%|███▌      | 450/1251 [00:03<00:06, 118.44it/s, loss=0.814][A
 36%|███▌      | 450/1251 [00:03<00:06, 118.30it/s, loss=0.812][A
 36%|███▌      | 450/1251 [00:03<00:06, 118.16it/s, loss=0.818][A
 36%|███▌      | 450/1251 [00:03<00:06, 118.05it/s, loss=0.818][A
 36%|███▌      | 450/1251 [00:03<00:06, 117.93it/s, loss=0.821][A
 36%|███▌      | 450/1251 [00:03<00:06, 117.79it/s, loss=0.823][A
 36%|███▌      | 450/1251 [00:03<00:06, 117.63it/s, loss=0.823][A
 36%|███▌      | 450/1251 [00:03<00:06, 117.52it/s, loss=0.825][A
 36%|███▌      | 450/1251 [00:03<00:06, 117.42it/s, loss=0.826][A
 36%|███▌      | 450/1251 [00:03<00:06, 117.34it/s, loss=0.82] [A
 36%|███▌      | 450/1251 [00:03<00:06, 117.25it/s, loss=0.825][A
 36%|███▌      | 450/1251 [00:03<00:06, 117.13it/s, loss=0.824][A
 36%|███▌      | 450/1251 [00:03<00:06, 117.04it/s, loss=0.825

 46%|████▋     | 579/1251 [00:04<00:04, 135.48it/s, loss=0.791][A
 46%|████▋     | 579/1251 [00:04<00:04, 135.35it/s, loss=0.789][A
 46%|████▋     | 579/1251 [00:04<00:04, 135.24it/s, loss=0.791][A
 46%|████▋     | 579/1251 [00:04<00:04, 135.12it/s, loss=0.79] [A
 46%|████▋     | 579/1251 [00:04<00:04, 134.99it/s, loss=0.789][A
 46%|████▋     | 579/1251 [00:04<00:04, 134.87it/s, loss=0.798][A
 46%|████▋     | 579/1251 [00:04<00:04, 134.75it/s, loss=0.797][A
 46%|████▋     | 579/1251 [00:04<00:04, 134.63it/s, loss=0.792][A
 46%|████▋     | 579/1251 [00:04<00:04, 134.51it/s, loss=0.795][A
 46%|████▋     | 579/1251 [00:04<00:05, 134.38it/s, loss=0.8]  [A
 46%|████▋     | 579/1251 [00:04<00:05, 134.28it/s, loss=0.802][A
 46%|████▋     | 579/1251 [00:04<00:05, 134.15it/s, loss=0.804][A
 46%|████▋     | 579/1251 [00:04<00:05, 134.03it/s, loss=0.815][A
 46%|████▋     | 579/1251 [00:04<00:05, 133.92it/s, loss=0.817][A
 46%|████▋     | 579/1251 [00:04<00:05, 133.78it/s, loss=0.817

 54%|█████▍    | 679/1251 [00:04<00:04, 142.05it/s, loss=0.814][A
 54%|█████▍    | 679/1251 [00:04<00:04, 141.93it/s, loss=0.818][A
 54%|█████▍    | 679/1251 [00:04<00:04, 141.81it/s, loss=0.82] [A
 54%|█████▍    | 679/1251 [00:04<00:04, 141.64it/s, loss=0.816][A
 54%|█████▍    | 679/1251 [00:04<00:04, 141.51it/s, loss=0.816][A
 54%|█████▍    | 679/1251 [00:04<00:04, 141.39it/s, loss=0.812][A
 56%|█████▌    | 703/1251 [00:04<00:03, 146.35it/s, loss=0.812][A
 56%|█████▌    | 703/1251 [00:04<00:03, 146.22it/s, loss=0.809][A
 56%|█████▌    | 703/1251 [00:04<00:03, 146.03it/s, loss=0.804][A
 56%|█████▌    | 703/1251 [00:04<00:03, 145.89it/s, loss=0.801][A
 56%|█████▌    | 703/1251 [00:04<00:03, 145.75it/s, loss=0.799][A
 56%|█████▌    | 703/1251 [00:04<00:03, 145.60it/s, loss=0.798][A
 56%|█████▌    | 703/1251 [00:04<00:03, 145.48it/s, loss=0.793][A
 56%|█████▌    | 703/1251 [00:04<00:03, 145.34it/s, loss=0.795][A
 56%|█████▌    | 703/1251 [00:04<00:03, 145.19it/s, loss=0.794

 64%|██████▎   | 797/1251 [00:05<00:03, 150.14it/s, loss=0.809][A
 64%|██████▎   | 797/1251 [00:05<00:03, 150.04it/s, loss=0.808][A
 64%|██████▎   | 797/1251 [00:05<00:03, 149.94it/s, loss=0.803][A
 64%|██████▎   | 797/1251 [00:05<00:03, 149.82it/s, loss=0.809][A
 64%|██████▎   | 797/1251 [00:05<00:03, 149.69it/s, loss=0.822][A
 64%|██████▎   | 797/1251 [00:05<00:03, 149.57it/s, loss=0.82] [A
 66%|██████▌   | 820/1251 [00:05<00:02, 153.84it/s, loss=0.82][A
 66%|██████▌   | 820/1251 [00:05<00:02, 153.74it/s, loss=0.82][A
 66%|██████▌   | 820/1251 [00:05<00:02, 153.62it/s, loss=0.828][A
 66%|██████▌   | 820/1251 [00:05<00:02, 153.49it/s, loss=0.827][A
 66%|██████▌   | 820/1251 [00:05<00:02, 153.38it/s, loss=0.831][A
 66%|██████▌   | 820/1251 [00:05<00:02, 153.25it/s, loss=0.838][A
 66%|██████▌   | 820/1251 [00:05<00:02, 153.11it/s, loss=0.839][A
 66%|██████▌   | 820/1251 [00:05<00:02, 152.98it/s, loss=0.837][A
 66%|██████▌   | 820/1251 [00:05<00:02, 152.89it/s, loss=0.835]

 74%|███████▎  | 920/1251 [00:05<00:02, 158.74it/s, loss=0.807][A
 74%|███████▎  | 920/1251 [00:05<00:02, 158.59it/s, loss=0.808][A
 74%|███████▎  | 920/1251 [00:05<00:02, 158.39it/s, loss=0.806][A
 74%|███████▎  | 920/1251 [00:05<00:02, 158.21it/s, loss=0.809][A
 74%|███████▎  | 920/1251 [00:05<00:02, 158.08it/s, loss=0.807][A
 74%|███████▎  | 920/1251 [00:05<00:02, 157.90it/s, loss=0.811][A
 74%|███████▎  | 920/1251 [00:05<00:02, 157.75it/s, loss=0.805][A
 74%|███████▎  | 920/1251 [00:05<00:02, 157.64it/s, loss=0.81] [A
 74%|███████▎  | 920/1251 [00:05<00:02, 157.48it/s, loss=0.815][A
 74%|███████▎  | 920/1251 [00:05<00:02, 157.34it/s, loss=0.811][A
 74%|███████▎  | 920/1251 [00:05<00:02, 157.21it/s, loss=0.81] [A
 74%|███████▎  | 920/1251 [00:05<00:02, 157.09it/s, loss=0.814][A
 74%|███████▎  | 920/1251 [00:05<00:02, 156.91it/s, loss=0.809][A
 75%|███████▌  | 944/1251 [00:05<00:01, 160.97it/s, loss=0.809][A
 75%|███████▌  | 944/1251 [00:05<00:01, 160.87it/s, loss=0.806

 83%|████████▎ | 1033/1251 [00:06<00:01, 162.38it/s, loss=0.807][A
 83%|████████▎ | 1033/1251 [00:06<00:01, 162.29it/s, loss=0.811][A
 83%|████████▎ | 1033/1251 [00:06<00:01, 162.19it/s, loss=0.814][A
 83%|████████▎ | 1033/1251 [00:06<00:01, 162.07it/s, loss=0.815][A
 83%|████████▎ | 1033/1251 [00:06<00:01, 161.96it/s, loss=0.812][A
 83%|████████▎ | 1033/1251 [00:06<00:01, 161.86it/s, loss=0.814][A
 83%|████████▎ | 1033/1251 [00:06<00:01, 161.76it/s, loss=0.821][A
 83%|████████▎ | 1033/1251 [00:06<00:01, 161.64it/s, loss=0.819][A
 83%|████████▎ | 1033/1251 [00:06<00:01, 161.52it/s, loss=0.818][A
 83%|████████▎ | 1033/1251 [00:06<00:01, 161.44it/s, loss=0.82] [A
 83%|████████▎ | 1033/1251 [00:06<00:01, 161.34it/s, loss=0.828][A
 85%|████████▍ | 1058/1251 [00:06<00:01, 165.20it/s, loss=0.828][A
 85%|████████▍ | 1058/1251 [00:06<00:01, 165.10it/s, loss=0.831][A
 85%|████████▍ | 1058/1251 [00:06<00:01, 165.01it/s, loss=0.828][A
 85%|████████▍ | 1058/1251 [00:06<00:01, 164.92i

 92%|█████████▏| 1152/1251 [00:06<00:00, 167.93it/s, loss=0.801][A
 92%|█████████▏| 1152/1251 [00:06<00:00, 167.85it/s, loss=0.803][A
 92%|█████████▏| 1152/1251 [00:06<00:00, 167.78it/s, loss=0.801][A
 92%|█████████▏| 1152/1251 [00:06<00:00, 167.71it/s, loss=0.807][A
 92%|█████████▏| 1152/1251 [00:06<00:00, 167.61it/s, loss=0.807][A
 92%|█████████▏| 1152/1251 [00:06<00:00, 167.51it/s, loss=0.806][A
 92%|█████████▏| 1152/1251 [00:06<00:00, 167.41it/s, loss=0.806][A
 92%|█████████▏| 1152/1251 [00:06<00:00, 167.31it/s, loss=0.808][A
 92%|█████████▏| 1152/1251 [00:06<00:00, 167.19it/s, loss=0.807][A
 92%|█████████▏| 1152/1251 [00:06<00:00, 167.09it/s, loss=0.809][A
 92%|█████████▏| 1152/1251 [00:06<00:00, 167.01it/s, loss=0.806][A
 92%|█████████▏| 1152/1251 [00:06<00:00, 166.93it/s, loss=0.802][A
 92%|█████████▏| 1152/1251 [00:06<00:00, 166.80it/s, loss=0.802][A
 92%|█████████▏| 1152/1251 [00:06<00:00, 166.68it/s, loss=0.804][A
 94%|█████████▍| 1176/1251 [00:06<00:00, 170.10i

RuntimeError: size mismatch at /home/bmn/src/ai.learn/pytorch/pytorch/aten/src/THC/generic/THCTensorMathBlas.cu:244

Adam = exponentially weighted moving average?

In [34]:
set_lrs(opt, 0.001)

In [36]:
fit(model, data, 10, opt, F.mse_loss)


  0%|          | 0/1251 [00:00<?, ?it/s][A
  0%|          | 0/1251 [00:00<?, ?it/s, loss=0.431][A
  0%|          | 0/1251 [00:00<?, ?it/s, loss=0.501][A
  0%|          | 0/1251 [00:00<?, ?it/s, loss=0.493][A
  0%|          | 0/1251 [00:00<?, ?it/s, loss=0.539][A
  0%|          | 0/1251 [00:00<?, ?it/s, loss=0.528][A
  0%|          | 0/1251 [00:00<?, ?it/s, loss=0.568][A
  0%|          | 0/1251 [00:00<?, ?it/s, loss=0.563][A
  1%|          | 7/1251 [00:00<00:18, 65.83it/s, loss=0.563][A
  1%|          | 7/1251 [00:00<00:20, 61.42it/s, loss=0.583][A
  1%|          | 7/1251 [00:00<00:21, 57.58it/s, loss=0.599][A
  1%|          | 7/1251 [00:00<00:23, 52.83it/s, loss=0.595][A
  1%|          | 7/1251 [00:00<00:25, 49.50it/s, loss=0.593][A
  1%|          | 7/1251 [00:00<00:26, 47.00it/s, loss=0.587][A
  1%|          | 7/1251 [00:00<00:27, 44.79it/s, loss=0.572][A
  1%|          | 7/1251 [00:00<00:28, 43.03it/s, loss=0.563][A
  1%|          | 7/1251 [00:00<00:30, 40.87it/s, lo

 18%|█▊        | 225/1251 [00:01<00:06, 150.75it/s, loss=0.587][A
 18%|█▊        | 225/1251 [00:01<00:06, 150.30it/s, loss=0.588][A
 18%|█▊        | 225/1251 [00:01<00:06, 149.84it/s, loss=0.588][A
 18%|█▊        | 225/1251 [00:01<00:06, 149.42it/s, loss=0.583][A
 18%|█▊        | 225/1251 [00:01<00:06, 148.99it/s, loss=0.582][A
 18%|█▊        | 225/1251 [00:01<00:06, 148.46it/s, loss=0.582][A
 18%|█▊        | 225/1251 [00:01<00:06, 148.02it/s, loss=0.579][A
 18%|█▊        | 225/1251 [00:01<00:06, 147.61it/s, loss=0.579][A
 18%|█▊        | 225/1251 [00:01<00:06, 147.24it/s, loss=0.58] [A
 18%|█▊        | 225/1251 [00:01<00:06, 146.80it/s, loss=0.581][A
 18%|█▊        | 225/1251 [00:01<00:07, 146.44it/s, loss=0.587][A
 18%|█▊        | 225/1251 [00:01<00:07, 146.02it/s, loss=0.587][A
 18%|█▊        | 225/1251 [00:01<00:07, 145.65it/s, loss=0.584][A
 18%|█▊        | 225/1251 [00:01<00:07, 145.24it/s, loss=0.589][A
 18%|█▊        | 225/1251 [00:01<00:07, 144.46it/s, loss=0.586

 37%|███▋      | 463/1251 [00:02<00:04, 184.82it/s, loss=0.581][A
 37%|███▋      | 463/1251 [00:02<00:04, 184.51it/s, loss=0.578][A
 37%|███▋      | 463/1251 [00:02<00:04, 184.19it/s, loss=0.576][A
 37%|███▋      | 463/1251 [00:02<00:04, 183.90it/s, loss=0.578][A
 37%|███▋      | 463/1251 [00:02<00:04, 183.63it/s, loss=0.578][A
 37%|███▋      | 463/1251 [00:02<00:04, 183.34it/s, loss=0.58] [A
 37%|███▋      | 463/1251 [00:02<00:04, 183.07it/s, loss=0.581][A
 37%|███▋      | 463/1251 [00:02<00:04, 182.77it/s, loss=0.579][A
 37%|███▋      | 463/1251 [00:02<00:04, 182.44it/s, loss=0.584][A
 37%|███▋      | 463/1251 [00:02<00:04, 182.15it/s, loss=0.581][A
 37%|███▋      | 463/1251 [00:02<00:04, 181.86it/s, loss=0.584][A
 37%|███▋      | 463/1251 [00:02<00:04, 181.55it/s, loss=0.587][A
 37%|███▋      | 463/1251 [00:02<00:04, 181.26it/s, loss=0.585][A
 37%|███▋      | 463/1251 [00:02<00:04, 180.95it/s, loss=0.588][A
 37%|███▋      | 463/1251 [00:02<00:04, 180.59it/s, loss=0.586

 54%|█████▍    | 678/1251 [00:03<00:02, 192.50it/s, loss=0.598][A
 54%|█████▍    | 678/1251 [00:03<00:02, 192.29it/s, loss=0.597][A
 54%|█████▍    | 678/1251 [00:03<00:02, 192.11it/s, loss=0.596][A
 56%|█████▋    | 705/1251 [00:03<00:02, 199.67it/s, loss=0.596][A
 56%|█████▋    | 705/1251 [00:03<00:02, 199.41it/s, loss=0.594][A
 56%|█████▋    | 705/1251 [00:03<00:02, 199.14it/s, loss=0.599][A
 56%|█████▋    | 705/1251 [00:03<00:02, 198.94it/s, loss=0.602][A
 56%|█████▋    | 705/1251 [00:03<00:02, 198.74it/s, loss=0.604][A
 56%|█████▋    | 705/1251 [00:03<00:02, 198.50it/s, loss=0.604][A
 56%|█████▋    | 705/1251 [00:03<00:02, 198.33it/s, loss=0.606][A
 56%|█████▋    | 705/1251 [00:03<00:02, 198.14it/s, loss=0.61] [A
 56%|█████▋    | 705/1251 [00:03<00:02, 197.94it/s, loss=0.612][A
 56%|█████▋    | 705/1251 [00:03<00:02, 197.70it/s, loss=0.609][A
 56%|█████▋    | 705/1251 [00:03<00:02, 197.44it/s, loss=0.609][A
 56%|█████▋    | 705/1251 [00:03<00:02, 197.23it/s, loss=0.609

 74%|███████▎  | 922/1251 [00:04<00:01, 202.18it/s, loss=0.591][A
 74%|███████▎  | 922/1251 [00:04<00:01, 201.95it/s, loss=0.595][A
 74%|███████▎  | 922/1251 [00:04<00:01, 201.80it/s, loss=0.595][A
 74%|███████▎  | 922/1251 [00:04<00:01, 201.63it/s, loss=0.597][A
 74%|███████▎  | 922/1251 [00:04<00:01, 201.48it/s, loss=0.596][A
 74%|███████▎  | 922/1251 [00:04<00:01, 201.30it/s, loss=0.598][A
 74%|███████▎  | 922/1251 [00:04<00:01, 201.13it/s, loss=0.596][A
 74%|███████▎  | 922/1251 [00:04<00:01, 200.91it/s, loss=0.596][A
 74%|███████▎  | 922/1251 [00:04<00:01, 200.76it/s, loss=0.6]  [A
 74%|███████▎  | 922/1251 [00:04<00:01, 200.53it/s, loss=0.605][A
 76%|███████▌  | 946/1251 [00:04<00:01, 205.67it/s, loss=0.605][A
 76%|███████▌  | 946/1251 [00:04<00:01, 205.48it/s, loss=0.606][A
 76%|███████▌  | 946/1251 [00:04<00:01, 205.24it/s, loss=0.604][A
 76%|███████▌  | 946/1251 [00:04<00:01, 205.02it/s, loss=0.604][A
 76%|███████▌  | 946/1251 [00:04<00:01, 204.80it/s, loss=0.603

 93%|█████████▎| 1167/1251 [00:05<00:00, 206.49it/s, loss=0.612][A
 93%|█████████▎| 1167/1251 [00:05<00:00, 206.34it/s, loss=0.607][A
 93%|█████████▎| 1167/1251 [00:05<00:00, 206.21it/s, loss=0.608][A
 93%|█████████▎| 1167/1251 [00:05<00:00, 206.02it/s, loss=0.605][A
 93%|█████████▎| 1167/1251 [00:05<00:00, 205.87it/s, loss=0.606][A
 93%|█████████▎| 1167/1251 [00:05<00:00, 205.72it/s, loss=0.601][A
 93%|█████████▎| 1167/1251 [00:05<00:00, 205.57it/s, loss=0.6]  [A
 93%|█████████▎| 1167/1251 [00:05<00:00, 205.38it/s, loss=0.603][A
 93%|█████████▎| 1167/1251 [00:05<00:00, 205.22it/s, loss=0.601][A
 93%|█████████▎| 1167/1251 [00:05<00:00, 205.04it/s, loss=0.6]  [A
 93%|█████████▎| 1167/1251 [00:05<00:00, 204.84it/s, loss=0.601][A
 93%|█████████▎| 1167/1251 [00:05<00:00, 204.69it/s, loss=0.6]  [A
 93%|█████████▎| 1167/1251 [00:05<00:00, 204.53it/s, loss=0.596][A
 93%|█████████▎| 1167/1251 [00:05<00:00, 204.39it/s, loss=0.596][A
 93%|█████████▎| 1167/1251 [00:05<00:00, 204.19i

 10%|█         | 129/1251 [00:01<00:11, 96.70it/s, loss=0.574][A
 10%|█         | 129/1251 [00:01<00:11, 96.39it/s, loss=0.58] [A
 10%|█         | 129/1251 [00:01<00:11, 96.00it/s, loss=0.58][A
 10%|█         | 129/1251 [00:01<00:11, 95.74it/s, loss=0.583][A
 10%|█         | 129/1251 [00:01<00:11, 95.50it/s, loss=0.582][A
 10%|█         | 129/1251 [00:01<00:11, 95.24it/s, loss=0.58] [A
 10%|█         | 129/1251 [00:01<00:11, 94.92it/s, loss=0.581][A
 10%|█         | 129/1251 [00:01<00:11, 94.65it/s, loss=0.584][A
 10%|█         | 129/1251 [00:01<00:11, 94.37it/s, loss=0.586][A
 12%|█▏        | 154/1251 [00:01<00:09, 112.55it/s, loss=0.586][A
 12%|█▏        | 154/1251 [00:01<00:09, 112.26it/s, loss=0.589][A
 12%|█▏        | 154/1251 [00:01<00:09, 111.89it/s, loss=0.591][A
 12%|█▏        | 154/1251 [00:01<00:09, 111.62it/s, loss=0.594][A
 12%|█▏        | 154/1251 [00:01<00:09, 111.28it/s, loss=0.596][A
 12%|█▏        | 154/1251 [00:01<00:09, 111.00it/s, loss=0.593][A
 12%|

 29%|██▉       | 364/1251 [00:02<00:05, 154.16it/s, loss=0.62] [A
 29%|██▉       | 364/1251 [00:02<00:05, 153.95it/s, loss=0.621][A
 29%|██▉       | 364/1251 [00:02<00:05, 153.76it/s, loss=0.626][A
 29%|██▉       | 364/1251 [00:02<00:05, 153.55it/s, loss=0.629][A
 29%|██▉       | 364/1251 [00:02<00:05, 153.20it/s, loss=0.629][A
 29%|██▉       | 364/1251 [00:02<00:05, 152.89it/s, loss=0.626][A
 29%|██▉       | 364/1251 [00:02<00:05, 152.65it/s, loss=0.626][A
 29%|██▉       | 364/1251 [00:02<00:05, 152.36it/s, loss=0.623][A
 31%|███       | 387/1251 [00:02<00:05, 161.86it/s, loss=0.623][A
 31%|███       | 387/1251 [00:02<00:05, 161.64it/s, loss=0.621][A
 31%|███       | 387/1251 [00:02<00:05, 161.40it/s, loss=0.615][A
 31%|███       | 387/1251 [00:02<00:05, 161.10it/s, loss=0.616][A
 31%|███       | 387/1251 [00:02<00:05, 160.78it/s, loss=0.618][A
 31%|███       | 387/1251 [00:02<00:05, 160.50it/s, loss=0.622][A
 31%|███       | 387/1251 [00:02<00:05, 160.26it/s, loss=0.623

 48%|████▊     | 598/1251 [00:03<00:03, 175.99it/s, loss=0.615][A
 48%|████▊     | 598/1251 [00:03<00:03, 175.78it/s, loss=0.61] [A
 48%|████▊     | 598/1251 [00:03<00:03, 175.53it/s, loss=0.612][A
 48%|████▊     | 598/1251 [00:03<00:03, 175.30it/s, loss=0.609][A
 48%|████▊     | 598/1251 [00:03<00:03, 175.11it/s, loss=0.608][A
 48%|████▊     | 598/1251 [00:03<00:03, 174.91it/s, loss=0.61] [A
 48%|████▊     | 598/1251 [00:03<00:03, 174.69it/s, loss=0.609][A
 48%|████▊     | 598/1251 [00:03<00:03, 174.50it/s, loss=0.609][A
 48%|████▊     | 598/1251 [00:03<00:03, 174.32it/s, loss=0.609][A
 50%|████▉     | 622/1251 [00:03<00:03, 181.24it/s, loss=0.609][A
 50%|████▉     | 622/1251 [00:03<00:03, 181.02it/s, loss=0.614][A
 50%|████▉     | 622/1251 [00:03<00:03, 180.74it/s, loss=0.619][A
 50%|████▉     | 622/1251 [00:03<00:03, 180.54it/s, loss=0.622][A
 50%|████▉     | 622/1251 [00:03<00:03, 180.34it/s, loss=0.624][A
 50%|████▉     | 622/1251 [00:03<00:03, 180.15it/s, loss=0.619

 66%|██████▌   | 827/1251 [00:04<00:02, 184.24it/s, loss=0.607][A
 66%|██████▌   | 827/1251 [00:04<00:02, 184.06it/s, loss=0.607][A
 68%|██████▊   | 849/1251 [00:04<00:02, 188.90it/s, loss=0.607][A
 68%|██████▊   | 849/1251 [00:04<00:02, 188.73it/s, loss=0.606][A
 68%|██████▊   | 849/1251 [00:04<00:02, 188.55it/s, loss=0.604][A
 68%|██████▊   | 849/1251 [00:04<00:02, 188.32it/s, loss=0.608][A
 68%|██████▊   | 849/1251 [00:04<00:02, 188.12it/s, loss=0.608][A
 68%|██████▊   | 849/1251 [00:04<00:02, 187.92it/s, loss=0.608][A
 68%|██████▊   | 849/1251 [00:04<00:02, 187.68it/s, loss=0.609][A
 68%|██████▊   | 849/1251 [00:04<00:02, 187.50it/s, loss=0.612][A
 68%|██████▊   | 849/1251 [00:04<00:02, 187.27it/s, loss=0.611][A
 68%|██████▊   | 849/1251 [00:04<00:02, 187.08it/s, loss=0.618][A
 68%|██████▊   | 849/1251 [00:04<00:02, 186.90it/s, loss=0.616][A
 68%|██████▊   | 849/1251 [00:04<00:02, 186.78it/s, loss=0.614][A
 68%|██████▊   | 849/1251 [00:04<00:02, 186.62it/s, loss=0.614

 85%|████████▍ | 1061/1251 [00:05<00:01, 189.39it/s, loss=0.611][A
 85%|████████▍ | 1061/1251 [00:05<00:01, 189.21it/s, loss=0.61] [A
 85%|████████▍ | 1061/1251 [00:05<00:01, 189.10it/s, loss=0.609][A
 85%|████████▍ | 1061/1251 [00:05<00:01, 188.96it/s, loss=0.604][A
 87%|████████▋ | 1083/1251 [00:05<00:00, 192.83it/s, loss=0.604][A
 87%|████████▋ | 1083/1251 [00:05<00:00, 192.64it/s, loss=0.609][A
 87%|████████▋ | 1083/1251 [00:05<00:00, 192.49it/s, loss=0.607][A
 87%|████████▋ | 1083/1251 [00:05<00:00, 192.36it/s, loss=0.606][A
 87%|████████▋ | 1083/1251 [00:05<00:00, 192.22it/s, loss=0.604][A
 87%|████████▋ | 1083/1251 [00:05<00:00, 192.02it/s, loss=0.602][A
 87%|████████▋ | 1083/1251 [00:05<00:00, 191.86it/s, loss=0.598][A
 87%|████████▋ | 1083/1251 [00:05<00:00, 191.71it/s, loss=0.598][A
 87%|████████▋ | 1083/1251 [00:05<00:00, 191.57it/s, loss=0.594][A
 87%|████████▋ | 1083/1251 [00:05<00:00, 191.36it/s, loss=0.596][A
 87%|████████▋ | 1083/1251 [00:05<00:00, 191.24i

  4%|▍         | 53/1251 [00:00<00:12, 95.10it/s, loss=0.604][A
  4%|▍         | 53/1251 [00:00<00:12, 93.92it/s, loss=0.601][A
  4%|▍         | 53/1251 [00:00<00:12, 92.35it/s, loss=0.598][A
  4%|▍         | 53/1251 [00:00<00:13, 91.35it/s, loss=0.596][A
  4%|▍         | 53/1251 [00:00<00:13, 89.79it/s, loss=0.596][A
  4%|▍         | 53/1251 [00:00<00:13, 88.68it/s, loss=0.595][A
  4%|▍         | 53/1251 [00:00<00:13, 87.38it/s, loss=0.599][A
  4%|▍         | 53/1251 [00:00<00:13, 86.31it/s, loss=0.6]  [A
  4%|▍         | 53/1251 [00:00<00:14, 85.07it/s, loss=0.601][A
  5%|▌         | 66/1251 [00:00<00:11, 105.60it/s, loss=0.601][A
  5%|▌         | 66/1251 [00:00<00:11, 104.38it/s, loss=0.604][A
  5%|▌         | 66/1251 [00:00<00:11, 102.91it/s, loss=0.603][A
  5%|▌         | 66/1251 [00:00<00:11, 101.34it/s, loss=0.605][A
  5%|▌         | 66/1251 [00:00<00:11, 100.15it/s, loss=0.606][A
  5%|▌         | 66/1251 [00:00<00:11, 99.27it/s, loss=0.605] [A
  5%|▌         | 66

 23%|██▎       | 283/1251 [00:01<00:06, 155.84it/s, loss=0.611][A
 23%|██▎       | 283/1251 [00:01<00:06, 155.40it/s, loss=0.609][A
 23%|██▎       | 283/1251 [00:01<00:06, 155.04it/s, loss=0.61] [A
 23%|██▎       | 283/1251 [00:01<00:06, 154.73it/s, loss=0.608][A
 23%|██▎       | 283/1251 [00:01<00:06, 154.23it/s, loss=0.605][A
 23%|██▎       | 283/1251 [00:01<00:06, 153.75it/s, loss=0.605][A
 23%|██▎       | 283/1251 [00:01<00:06, 153.41it/s, loss=0.607][A
 23%|██▎       | 283/1251 [00:01<00:06, 153.03it/s, loss=0.608][A
 23%|██▎       | 283/1251 [00:01<00:06, 152.58it/s, loss=0.608][A
 23%|██▎       | 283/1251 [00:01<00:06, 152.09it/s, loss=0.609][A
 23%|██▎       | 283/1251 [00:01<00:06, 151.61it/s, loss=0.607][A
 23%|██▎       | 283/1251 [00:01<00:06, 151.17it/s, loss=0.603][A
 23%|██▎       | 283/1251 [00:01<00:06, 150.83it/s, loss=0.599][A
 23%|██▎       | 283/1251 [00:01<00:06, 150.35it/s, loss=0.601][A
 24%|██▍       | 304/1251 [00:01<00:05, 161.34it/s, loss=0.601

 40%|████      | 502/1251 [00:02<00:04, 172.72it/s, loss=0.603][A
 42%|████▏     | 525/1251 [00:02<00:04, 180.53it/s, loss=0.603][A
 42%|████▏     | 525/1251 [00:02<00:04, 180.31it/s, loss=0.6]  [A
 42%|████▏     | 525/1251 [00:02<00:04, 180.02it/s, loss=0.599][A
 42%|████▏     | 525/1251 [00:02<00:04, 179.71it/s, loss=0.596][A
 42%|████▏     | 525/1251 [00:02<00:04, 179.49it/s, loss=0.6]  [A
 42%|████▏     | 525/1251 [00:02<00:04, 179.28it/s, loss=0.598][A
 42%|████▏     | 525/1251 [00:02<00:04, 179.04it/s, loss=0.596][A
 42%|████▏     | 525/1251 [00:02<00:04, 178.70it/s, loss=0.594][A
 42%|████▏     | 525/1251 [00:02<00:04, 178.47it/s, loss=0.592][A
 42%|████▏     | 525/1251 [00:02<00:04, 178.21it/s, loss=0.591][A
 42%|████▏     | 525/1251 [00:02<00:04, 177.95it/s, loss=0.591][A
 42%|████▏     | 525/1251 [00:02<00:04, 177.62it/s, loss=0.588][A
 42%|████▏     | 525/1251 [00:02<00:04, 177.32it/s, loss=0.586][A
 42%|████▏     | 525/1251 [00:02<00:04, 177.05it/s, loss=0.583

 60%|█████▉    | 745/1251 [00:04<00:02, 185.56it/s, loss=0.595][A
 60%|█████▉    | 745/1251 [00:04<00:02, 185.34it/s, loss=0.592][A
 60%|█████▉    | 745/1251 [00:04<00:02, 185.14it/s, loss=0.595][A
 60%|█████▉    | 745/1251 [00:04<00:02, 184.97it/s, loss=0.594][A
 60%|█████▉    | 745/1251 [00:04<00:02, 184.77it/s, loss=0.594][A
 60%|█████▉    | 745/1251 [00:04<00:02, 184.62it/s, loss=0.595][A
 60%|█████▉    | 745/1251 [00:04<00:02, 184.43it/s, loss=0.593][A
 60%|█████▉    | 745/1251 [00:04<00:02, 184.29it/s, loss=0.594][A
 60%|█████▉    | 745/1251 [00:04<00:02, 184.15it/s, loss=0.596][A
 60%|█████▉    | 745/1251 [00:04<00:02, 183.98it/s, loss=0.596][A
 60%|█████▉    | 745/1251 [00:04<00:02, 183.75it/s, loss=0.597][A
 60%|█████▉    | 745/1251 [00:04<00:02, 183.52it/s, loss=0.595][A
 60%|█████▉    | 745/1251 [00:04<00:02, 183.27it/s, loss=0.604][A
 62%|██████▏   | 770/1251 [00:04<00:02, 189.35it/s, loss=0.604][A
 62%|██████▏   | 770/1251 [00:04<00:02, 189.13it/s, loss=0.605

 78%|███████▊  | 974/1251 [00:05<00:01, 191.88it/s, loss=0.59] [A
 78%|███████▊  | 974/1251 [00:05<00:01, 191.72it/s, loss=0.587][A
 78%|███████▊  | 974/1251 [00:05<00:01, 191.54it/s, loss=0.595][A
 78%|███████▊  | 974/1251 [00:05<00:01, 191.35it/s, loss=0.599][A
 78%|███████▊  | 974/1251 [00:05<00:01, 191.21it/s, loss=0.599][A
 78%|███████▊  | 974/1251 [00:05<00:01, 191.04it/s, loss=0.598][A
 78%|███████▊  | 974/1251 [00:05<00:01, 190.88it/s, loss=0.597][A
 80%|███████▉  | 998/1251 [00:05<00:01, 195.52it/s, loss=0.597][A
 80%|███████▉  | 998/1251 [00:05<00:01, 195.37it/s, loss=0.596][A
 80%|███████▉  | 998/1251 [00:05<00:01, 195.25it/s, loss=0.594][A
 80%|███████▉  | 998/1251 [00:05<00:01, 195.12it/s, loss=0.593][A
 80%|███████▉  | 998/1251 [00:05<00:01, 194.95it/s, loss=0.592][A
 80%|███████▉  | 998/1251 [00:05<00:01, 194.77it/s, loss=0.591][A
 80%|███████▉  | 998/1251 [00:05<00:01, 194.61it/s, loss=0.592][A
 80%|███████▉  | 998/1251 [00:05<00:01, 194.45it/s, loss=0.59]

 96%|█████████▌| 1202/1251 [00:06<00:00, 195.60it/s, loss=0.612][A
 96%|█████████▌| 1202/1251 [00:06<00:00, 195.47it/s, loss=0.613][A
 96%|█████████▌| 1202/1251 [00:06<00:00, 195.37it/s, loss=0.614][A
 96%|█████████▌| 1202/1251 [00:06<00:00, 195.24it/s, loss=0.616][A
 96%|█████████▌| 1202/1251 [00:06<00:00, 195.13it/s, loss=0.613][A
 98%|█████████▊| 1227/1251 [00:06<00:00, 199.15it/s, loss=0.613][A
 98%|█████████▊| 1227/1251 [00:06<00:00, 199.02it/s, loss=0.611][A
 98%|█████████▊| 1227/1251 [00:06<00:00, 198.90it/s, loss=0.615][A
 98%|█████████▊| 1227/1251 [00:06<00:00, 198.76it/s, loss=0.613][A
 98%|█████████▊| 1227/1251 [00:06<00:00, 198.61it/s, loss=0.617][A
 98%|█████████▊| 1227/1251 [00:06<00:00, 198.46it/s, loss=0.617][A
 98%|█████████▊| 1227/1251 [00:06<00:00, 198.32it/s, loss=0.615][A
 98%|█████████▊| 1227/1251 [00:06<00:00, 198.20it/s, loss=0.616][A
 98%|█████████▊| 1227/1251 [00:06<00:00, 198.05it/s, loss=0.619][A
 98%|█████████▊| 1227/1251 [00:06<00:00, 197.93i

 15%|█▌        | 191/1251 [00:01<00:08, 125.80it/s, loss=0.576][A
 15%|█▌        | 191/1251 [00:01<00:08, 125.47it/s, loss=0.581][A
 15%|█▌        | 191/1251 [00:01<00:08, 125.14it/s, loss=0.584][A
 15%|█▌        | 191/1251 [00:01<00:08, 124.80it/s, loss=0.585][A
 15%|█▌        | 191/1251 [00:01<00:08, 124.37it/s, loss=0.583][A
 15%|█▌        | 191/1251 [00:01<00:08, 123.98it/s, loss=0.583][A
 15%|█▌        | 191/1251 [00:01<00:08, 123.69it/s, loss=0.586][A
 15%|█▌        | 191/1251 [00:01<00:08, 123.36it/s, loss=0.589][A
 15%|█▌        | 191/1251 [00:01<00:08, 123.00it/s, loss=0.591][A
 15%|█▌        | 191/1251 [00:01<00:08, 122.62it/s, loss=0.586][A
 15%|█▌        | 191/1251 [00:01<00:08, 122.35it/s, loss=0.585][A
 15%|█▌        | 191/1251 [00:01<00:08, 121.99it/s, loss=0.586][A
 17%|█▋        | 213/1251 [00:01<00:07, 135.92it/s, loss=0.586][A
 17%|█▋        | 213/1251 [00:01<00:07, 135.48it/s, loss=0.583][A
 17%|█▋        | 213/1251 [00:01<00:07, 135.06it/s, loss=0.581

 33%|███▎      | 418/1251 [00:02<00:05, 162.35it/s, loss=0.566][A
 33%|███▎      | 418/1251 [00:02<00:05, 162.13it/s, loss=0.566][A
 33%|███▎      | 418/1251 [00:02<00:05, 161.84it/s, loss=0.566][A
 33%|███▎      | 418/1251 [00:02<00:05, 161.55it/s, loss=0.565][A
 33%|███▎      | 418/1251 [00:02<00:05, 161.31it/s, loss=0.566][A
 33%|███▎      | 418/1251 [00:02<00:05, 161.11it/s, loss=0.566][A
 33%|███▎      | 418/1251 [00:02<00:05, 160.91it/s, loss=0.566][A
 35%|███▌      | 442/1251 [00:02<00:04, 170.07it/s, loss=0.566][A
 35%|███▌      | 442/1251 [00:02<00:04, 169.83it/s, loss=0.565][A
 35%|███▌      | 442/1251 [00:02<00:04, 169.59it/s, loss=0.568][A
 35%|███▌      | 442/1251 [00:02<00:04, 169.36it/s, loss=0.571][A
 35%|███▌      | 442/1251 [00:02<00:04, 169.08it/s, loss=0.575][A
 35%|███▌      | 442/1251 [00:02<00:04, 168.73it/s, loss=0.577][A
 35%|███▌      | 442/1251 [00:02<00:04, 168.48it/s, loss=0.577][A
 35%|███▌      | 442/1251 [00:02<00:04, 168.26it/s, loss=0.578

 53%|█████▎    | 665/1251 [00:04<00:04, 134.31it/s, loss=0.593][A
 53%|█████▎    | 665/1251 [00:04<00:04, 133.90it/s, loss=0.594][A
 53%|█████▎    | 665/1251 [00:04<00:04, 133.52it/s, loss=0.593][A
 53%|█████▎    | 665/1251 [00:05<00:04, 132.96it/s, loss=0.595][A
 53%|█████▎    | 665/1251 [00:05<00:04, 132.53it/s, loss=0.596][A
 53%|█████▎    | 665/1251 [00:05<00:04, 132.10it/s, loss=0.601][A
 53%|█████▎    | 665/1251 [00:05<00:04, 131.75it/s, loss=0.604][A
 53%|█████▎    | 665/1251 [00:05<00:04, 131.35it/s, loss=0.604][A
 54%|█████▎    | 672/1251 [00:05<00:04, 132.55it/s, loss=0.604][A
 54%|█████▎    | 672/1251 [00:05<00:04, 132.11it/s, loss=0.602][A
 54%|█████▎    | 672/1251 [00:05<00:04, 131.69it/s, loss=0.6]  [A
 54%|█████▎    | 672/1251 [00:05<00:04, 131.31it/s, loss=0.602][A
 54%|█████▎    | 672/1251 [00:05<00:04, 130.95it/s, loss=0.597][A
 54%|█████▎    | 672/1251 [00:05<00:04, 130.59it/s, loss=0.606][A
 54%|█████▎    | 672/1251 [00:05<00:04, 130.20it/s, loss=0.603

 71%|███████   | 883/1251 [00:06<00:02, 143.24it/s, loss=0.607][A
 71%|███████   | 883/1251 [00:06<00:02, 143.13it/s, loss=0.606][A
 71%|███████   | 883/1251 [00:06<00:02, 143.02it/s, loss=0.611][A
 71%|███████   | 883/1251 [00:06<00:02, 142.93it/s, loss=0.611][A
 71%|███████   | 883/1251 [00:06<00:02, 142.85it/s, loss=0.613][A
 71%|███████   | 883/1251 [00:06<00:02, 142.75it/s, loss=0.614][A
 71%|███████   | 883/1251 [00:06<00:02, 142.67it/s, loss=0.609][A
 71%|███████   | 883/1251 [00:06<00:02, 142.57it/s, loss=0.606][A
 71%|███████   | 883/1251 [00:06<00:02, 142.44it/s, loss=0.606][A
 72%|███████▏  | 906/1251 [00:06<00:02, 146.10it/s, loss=0.606][A
 72%|███████▏  | 906/1251 [00:06<00:02, 145.98it/s, loss=0.602][A
 72%|███████▏  | 906/1251 [00:06<00:02, 145.89it/s, loss=0.608][A
 72%|███████▏  | 906/1251 [00:06<00:02, 145.75it/s, loss=0.605][A
 72%|███████▏  | 906/1251 [00:06<00:02, 145.63it/s, loss=0.601][A
 72%|███████▏  | 906/1251 [00:06<00:02, 145.55it/s, loss=0.595

 89%|████████▊ | 1109/1251 [00:07<00:00, 153.37it/s, loss=0.601][A
 89%|████████▊ | 1109/1251 [00:07<00:00, 153.27it/s, loss=0.602][A
 89%|████████▊ | 1109/1251 [00:07<00:00, 153.11it/s, loss=0.6]  [A
 90%|█████████ | 1132/1251 [00:07<00:00, 156.23it/s, loss=0.6][A
 90%|█████████ | 1132/1251 [00:07<00:00, 156.10it/s, loss=0.599][A
 90%|█████████ | 1132/1251 [00:07<00:00, 156.00it/s, loss=0.598][A
 90%|█████████ | 1132/1251 [00:07<00:00, 155.91it/s, loss=0.599][A
 90%|█████████ | 1132/1251 [00:07<00:00, 155.83it/s, loss=0.599][A
 90%|█████████ | 1132/1251 [00:07<00:00, 155.74it/s, loss=0.599][A
 90%|█████████ | 1132/1251 [00:07<00:00, 155.63it/s, loss=0.594][A
 90%|█████████ | 1132/1251 [00:07<00:00, 155.54it/s, loss=0.594][A
 90%|█████████ | 1132/1251 [00:07<00:00, 155.42it/s, loss=0.59] [A
 90%|█████████ | 1132/1251 [00:07<00:00, 155.33it/s, loss=0.593][A
 90%|█████████ | 1132/1251 [00:07<00:00, 155.25it/s, loss=0.598][A
 90%|█████████ | 1132/1251 [00:07<00:00, 155.16it/

  8%|▊         | 95/1251 [00:01<00:13, 84.46it/s, loss=0.589][A
  8%|▊         | 95/1251 [00:01<00:13, 84.15it/s, loss=0.591][A
  8%|▊         | 95/1251 [00:01<00:13, 83.89it/s, loss=0.594][A
  8%|▊         | 95/1251 [00:01<00:13, 83.56it/s, loss=0.601][A
  8%|▊         | 95/1251 [00:01<00:13, 83.28it/s, loss=0.599][A
  8%|▊         | 95/1251 [00:01<00:13, 82.96it/s, loss=0.596][A
  8%|▊         | 95/1251 [00:01<00:13, 82.71it/s, loss=0.599][A
  8%|▊         | 95/1251 [00:01<00:14, 82.41it/s, loss=0.598][A
  8%|▊         | 95/1251 [00:01<00:14, 82.07it/s, loss=0.595][A
  8%|▊         | 95/1251 [00:01<00:14, 81.79it/s, loss=0.595][A
  8%|▊         | 95/1251 [00:01<00:14, 81.46it/s, loss=0.598][A
  8%|▊         | 95/1251 [00:01<00:14, 81.14it/s, loss=0.599][A
  8%|▊         | 95/1251 [00:01<00:14, 80.83it/s, loss=0.598][A
 10%|▉         | 119/1251 [00:01<00:11, 101.06it/s, loss=0.598][A
 10%|▉         | 119/1251 [00:01<00:11, 100.66it/s, loss=0.595][A
 10%|▉         | 119/

 26%|██▌       | 323/1251 [00:02<00:06, 148.27it/s, loss=0.585][A
 26%|██▌       | 323/1251 [00:02<00:06, 147.97it/s, loss=0.588][A
 26%|██▌       | 323/1251 [00:02<00:06, 147.60it/s, loss=0.592][A
 26%|██▌       | 323/1251 [00:02<00:06, 147.33it/s, loss=0.59] [A
 26%|██▌       | 323/1251 [00:02<00:06, 147.08it/s, loss=0.587][A
 26%|██▌       | 323/1251 [00:02<00:06, 146.79it/s, loss=0.591][A
 28%|██▊       | 346/1251 [00:02<00:05, 157.01it/s, loss=0.591][A
 28%|██▊       | 346/1251 [00:02<00:05, 156.71it/s, loss=0.595][A
 28%|██▊       | 346/1251 [00:02<00:05, 156.44it/s, loss=0.59] [A
 28%|██▊       | 346/1251 [00:02<00:05, 156.20it/s, loss=0.593][A
 28%|██▊       | 346/1251 [00:02<00:05, 155.85it/s, loss=0.59] [A
 28%|██▊       | 346/1251 [00:02<00:05, 155.54it/s, loss=0.59][A
 28%|██▊       | 346/1251 [00:02<00:05, 155.26it/s, loss=0.589][A
 28%|██▊       | 346/1251 [00:02<00:05, 154.78it/s, loss=0.589][A
 28%|██▊       | 346/1251 [00:02<00:05, 154.56it/s, loss=0.59] 

 44%|████▍     | 552/1251 [00:03<00:04, 171.13it/s, loss=0.607][A
 44%|████▍     | 552/1251 [00:03<00:04, 170.93it/s, loss=0.609][A
 46%|████▌     | 576/1251 [00:03<00:03, 178.30it/s, loss=0.609][A
 46%|████▌     | 576/1251 [00:03<00:03, 178.13it/s, loss=0.607][A
 46%|████▌     | 576/1251 [00:03<00:03, 177.92it/s, loss=0.606][A
 46%|████▌     | 576/1251 [00:03<00:03, 177.67it/s, loss=0.602][A
 46%|████▌     | 576/1251 [00:03<00:03, 177.41it/s, loss=0.602][A
 46%|████▌     | 576/1251 [00:03<00:03, 177.12it/s, loss=0.604][A
 46%|████▌     | 576/1251 [00:03<00:03, 176.94it/s, loss=0.604][A
 46%|████▌     | 576/1251 [00:03<00:03, 176.72it/s, loss=0.602][A
 46%|████▌     | 576/1251 [00:03<00:03, 176.50it/s, loss=0.598][A
 46%|████▌     | 576/1251 [00:03<00:03, 176.21it/s, loss=0.594][A
 46%|████▌     | 576/1251 [00:03<00:03, 175.99it/s, loss=0.594][A
 46%|████▌     | 576/1251 [00:03<00:03, 175.70it/s, loss=0.596][A
 46%|████▌     | 576/1251 [00:03<00:03, 175.47it/s, loss=0.597

 65%|██████▍   | 807/1251 [00:04<00:02, 188.29it/s, loss=0.592][A
 65%|██████▍   | 807/1251 [00:04<00:02, 188.09it/s, loss=0.589][A
 65%|██████▍   | 807/1251 [00:04<00:02, 187.91it/s, loss=0.592][A
 65%|██████▍   | 807/1251 [00:04<00:02, 187.72it/s, loss=0.596][A
 65%|██████▍   | 807/1251 [00:04<00:02, 187.51it/s, loss=0.597][A
 65%|██████▍   | 807/1251 [00:04<00:02, 187.33it/s, loss=0.594][A
 65%|██████▍   | 807/1251 [00:04<00:02, 187.07it/s, loss=0.592][A
 65%|██████▍   | 807/1251 [00:04<00:02, 186.88it/s, loss=0.595][A
 65%|██████▍   | 807/1251 [00:04<00:02, 186.71it/s, loss=0.597][A
 65%|██████▍   | 807/1251 [00:04<00:02, 186.47it/s, loss=0.598][A
 65%|██████▍   | 807/1251 [00:04<00:02, 186.29it/s, loss=0.597][A
 65%|██████▍   | 807/1251 [00:04<00:02, 186.09it/s, loss=0.6]  [A
 65%|██████▍   | 807/1251 [00:04<00:02, 185.91it/s, loss=0.599][A
 65%|██████▍   | 807/1251 [00:04<00:02, 185.75it/s, loss=0.605][A
 65%|██████▍   | 807/1251 [00:04<00:02, 185.54it/s, loss=0.607

 82%|████████▏ | 1031/1251 [00:05<00:01, 191.83it/s, loss=0.597][A
 82%|████████▏ | 1031/1251 [00:05<00:01, 191.70it/s, loss=0.596][A
 82%|████████▏ | 1031/1251 [00:05<00:01, 191.58it/s, loss=0.595][A
 82%|████████▏ | 1031/1251 [00:05<00:01, 191.42it/s, loss=0.591][A
 82%|████████▏ | 1031/1251 [00:05<00:01, 191.31it/s, loss=0.592][A
 82%|████████▏ | 1031/1251 [00:05<00:01, 191.17it/s, loss=0.594][A
 82%|████████▏ | 1031/1251 [00:05<00:01, 191.05it/s, loss=0.591][A
 82%|████████▏ | 1031/1251 [00:05<00:01, 190.90it/s, loss=0.59] [A
 82%|████████▏ | 1031/1251 [00:05<00:01, 190.77it/s, loss=0.593][A
 82%|████████▏ | 1031/1251 [00:05<00:01, 190.63it/s, loss=0.593][A
 82%|████████▏ | 1031/1251 [00:05<00:01, 190.50it/s, loss=0.587][A
 82%|████████▏ | 1031/1251 [00:05<00:01, 190.38it/s, loss=0.586][A
 82%|████████▏ | 1031/1251 [00:05<00:01, 190.24it/s, loss=0.587][A
 82%|████████▏ | 1031/1251 [00:05<00:01, 190.13it/s, loss=0.592][A
 82%|████████▏ | 1031/1251 [00:05<00:01, 189.97i

  1%|          | 14/1251 [00:00<00:23, 51.80it/s, loss=0.604][A
  1%|          | 14/1251 [00:00<00:24, 50.35it/s, loss=0.6]  [A
  1%|          | 14/1251 [00:00<00:25, 48.45it/s, loss=0.596][A
  1%|          | 14/1251 [00:00<00:26, 46.22it/s, loss=0.598][A
  1%|          | 14/1251 [00:00<00:27, 44.78it/s, loss=0.598][A
  2%|▏         | 23/1251 [00:00<00:16, 72.89it/s, loss=0.598][A
  2%|▏         | 23/1251 [00:00<00:17, 71.18it/s, loss=0.598][A
  2%|▏         | 23/1251 [00:00<00:17, 69.34it/s, loss=0.599][A
  2%|▏         | 23/1251 [00:00<00:18, 67.26it/s, loss=0.597][A
  2%|▏         | 23/1251 [00:00<00:18, 65.36it/s, loss=0.597][A
  2%|▏         | 23/1251 [00:00<00:19, 63.79it/s, loss=0.595][A
  2%|▏         | 23/1251 [00:00<00:19, 62.28it/s, loss=0.594][A
  2%|▏         | 23/1251 [00:00<00:20, 60.41it/s, loss=0.595][A
  2%|▏         | 23/1251 [00:00<00:20, 58.97it/s, loss=0.593][A
  2%|▏         | 23/1251 [00:00<00:21, 57.46it/s, loss=0.59] [A
  2%|▏         | 23/1251 

 18%|█▊        | 231/1251 [00:01<00:07, 132.59it/s, loss=0.591][A
 18%|█▊        | 231/1251 [00:01<00:07, 132.26it/s, loss=0.591][A
 18%|█▊        | 231/1251 [00:01<00:07, 131.87it/s, loss=0.596][A
 18%|█▊        | 231/1251 [00:01<00:07, 131.56it/s, loss=0.603][A
 20%|██        | 255/1251 [00:01<00:06, 145.09it/s, loss=0.603][A
 20%|██        | 255/1251 [00:01<00:06, 144.76it/s, loss=0.605][A
 20%|██        | 255/1251 [00:01<00:06, 144.38it/s, loss=0.605][A
 20%|██        | 255/1251 [00:01<00:06, 143.99it/s, loss=0.605][A
 20%|██        | 255/1251 [00:01<00:06, 143.67it/s, loss=0.604][A
 20%|██        | 255/1251 [00:01<00:06, 143.41it/s, loss=0.605][A
 20%|██        | 255/1251 [00:01<00:06, 143.08it/s, loss=0.601][A
 20%|██        | 255/1251 [00:01<00:06, 142.82it/s, loss=0.594][A
 20%|██        | 255/1251 [00:01<00:06, 142.35it/s, loss=0.594][A
 20%|██        | 255/1251 [00:01<00:07, 142.01it/s, loss=0.593][A
 20%|██        | 255/1251 [00:01<00:07, 141.59it/s, loss=0.59]

 39%|███▉      | 485/1251 [00:02<00:04, 173.40it/s, loss=0.593][A
 39%|███▉      | 485/1251 [00:02<00:04, 173.11it/s, loss=0.591][A
 39%|███▉      | 485/1251 [00:02<00:04, 172.85it/s, loss=0.593][A
 39%|███▉      | 485/1251 [00:02<00:04, 172.60it/s, loss=0.591][A
 39%|███▉      | 485/1251 [00:02<00:04, 172.27it/s, loss=0.59] [A
 39%|███▉      | 485/1251 [00:02<00:04, 172.02it/s, loss=0.591][A
 39%|███▉      | 485/1251 [00:02<00:04, 171.69it/s, loss=0.593][A
 39%|███▉      | 485/1251 [00:02<00:04, 171.40it/s, loss=0.596][A
 39%|███▉      | 485/1251 [00:02<00:04, 171.02it/s, loss=0.598][A
 39%|███▉      | 485/1251 [00:02<00:04, 170.77it/s, loss=0.595][A
 39%|███▉      | 485/1251 [00:02<00:04, 170.54it/s, loss=0.601][A
 39%|███▉      | 485/1251 [00:02<00:04, 170.27it/s, loss=0.602][A
 39%|███▉      | 485/1251 [00:02<00:04, 169.98it/s, loss=0.601][A
 39%|███▉      | 485/1251 [00:02<00:04, 169.69it/s, loss=0.596][A
 39%|███▉      | 485/1251 [00:02<00:04, 169.40it/s, loss=0.595

 57%|█████▋    | 714/1251 [00:03<00:02, 183.74it/s, loss=0.585][A
 57%|█████▋    | 714/1251 [00:03<00:02, 183.51it/s, loss=0.583][A
 57%|█████▋    | 714/1251 [00:03<00:02, 183.33it/s, loss=0.579][A
 57%|█████▋    | 714/1251 [00:03<00:02, 183.01it/s, loss=0.577][A
 57%|█████▋    | 714/1251 [00:03<00:02, 182.81it/s, loss=0.58] [A
 57%|█████▋    | 714/1251 [00:03<00:02, 182.61it/s, loss=0.582][A
 57%|█████▋    | 714/1251 [00:03<00:02, 182.34it/s, loss=0.585][A
 57%|█████▋    | 714/1251 [00:03<00:02, 182.11it/s, loss=0.588][A
 57%|█████▋    | 714/1251 [00:03<00:02, 181.90it/s, loss=0.589][A
 57%|█████▋    | 714/1251 [00:03<00:02, 181.75it/s, loss=0.585][A
 57%|█████▋    | 714/1251 [00:03<00:02, 181.53it/s, loss=0.584][A
 57%|█████▋    | 714/1251 [00:03<00:02, 181.29it/s, loss=0.58] [A
 57%|█████▋    | 714/1251 [00:03<00:02, 181.01it/s, loss=0.585][A
 57%|█████▋    | 714/1251 [00:03<00:02, 180.75it/s, loss=0.587][A
 57%|█████▋    | 714/1251 [00:03<00:02, 180.57it/s, loss=0.586

 75%|███████▍  | 932/1251 [00:05<00:01, 185.74it/s, loss=0.59] [A
 75%|███████▍  | 932/1251 [00:05<00:01, 185.58it/s, loss=0.597][A
 76%|███████▋  | 954/1251 [00:05<00:01, 189.90it/s, loss=0.597][A
 76%|███████▋  | 954/1251 [00:05<00:01, 189.74it/s, loss=0.596][A
 76%|███████▋  | 954/1251 [00:05<00:01, 189.52it/s, loss=0.596][A
 76%|███████▋  | 954/1251 [00:05<00:01, 189.37it/s, loss=0.592][A
 76%|███████▋  | 954/1251 [00:05<00:01, 189.20it/s, loss=0.594][A
 76%|███████▋  | 954/1251 [00:05<00:01, 189.03it/s, loss=0.589][A
 76%|███████▋  | 954/1251 [00:05<00:01, 188.77it/s, loss=0.585][A
 76%|███████▋  | 954/1251 [00:05<00:01, 188.58it/s, loss=0.583][A
 76%|███████▋  | 954/1251 [00:05<00:01, 188.43it/s, loss=0.581][A
 76%|███████▋  | 954/1251 [00:05<00:01, 188.26it/s, loss=0.582][A
 76%|███████▋  | 954/1251 [00:05<00:01, 188.11it/s, loss=0.583][A
 76%|███████▋  | 954/1251 [00:05<00:01, 187.93it/s, loss=0.577][A
 76%|███████▋  | 954/1251 [00:05<00:01, 187.75it/s, loss=0.578

 94%|█████████▍| 1177/1251 [00:06<00:00, 193.00it/s, loss=0.596][A
 94%|█████████▍| 1177/1251 [00:06<00:00, 192.88it/s, loss=0.593][A
 94%|█████████▍| 1177/1251 [00:06<00:00, 192.76it/s, loss=0.597][A
 94%|█████████▍| 1177/1251 [00:06<00:00, 192.62it/s, loss=0.601][A
 94%|█████████▍| 1177/1251 [00:06<00:00, 192.48it/s, loss=0.599][A
 94%|█████████▍| 1177/1251 [00:06<00:00, 192.36it/s, loss=0.595][A
 94%|█████████▍| 1177/1251 [00:06<00:00, 192.22it/s, loss=0.593][A
 94%|█████████▍| 1177/1251 [00:06<00:00, 192.11it/s, loss=0.595][A
 94%|█████████▍| 1177/1251 [00:06<00:00, 191.98it/s, loss=0.601][A
 94%|█████████▍| 1177/1251 [00:06<00:00, 191.87it/s, loss=0.605][A
 94%|█████████▍| 1177/1251 [00:06<00:00, 191.78it/s, loss=0.607][A
 94%|█████████▍| 1177/1251 [00:06<00:00, 191.65it/s, loss=0.606][A
 94%|█████████▍| 1177/1251 [00:06<00:00, 191.54it/s, loss=0.605][A
 94%|█████████▍| 1177/1251 [00:06<00:00, 191.37it/s, loss=0.606][A
 94%|█████████▍| 1177/1251 [00:06<00:00, 191.25i

 12%|█▏        | 145/1251 [00:01<00:10, 110.10it/s, loss=0.591][A
 12%|█▏        | 145/1251 [00:01<00:10, 109.81it/s, loss=0.592][A
 12%|█▏        | 145/1251 [00:01<00:10, 109.45it/s, loss=0.597][A
 12%|█▏        | 145/1251 [00:01<00:10, 109.16it/s, loss=0.595][A
 12%|█▏        | 145/1251 [00:01<00:10, 108.87it/s, loss=0.592][A
 12%|█▏        | 145/1251 [00:01<00:10, 108.51it/s, loss=0.59] [A
 12%|█▏        | 145/1251 [00:01<00:10, 108.18it/s, loss=0.592][A
 12%|█▏        | 145/1251 [00:01<00:10, 107.92it/s, loss=0.59] [A
 12%|█▏        | 145/1251 [00:01<00:10, 107.36it/s, loss=0.589][A
 12%|█▏        | 145/1251 [00:01<00:10, 107.14it/s, loss=0.587][A
 12%|█▏        | 145/1251 [00:01<00:10, 106.85it/s, loss=0.584][A
 14%|█▎        | 172/1251 [00:01<00:08, 126.59it/s, loss=0.584][A
 14%|█▎        | 172/1251 [00:01<00:08, 126.26it/s, loss=0.582][A
 14%|█▎        | 172/1251 [00:01<00:08, 125.97it/s, loss=0.586][A
 14%|█▎        | 172/1251 [00:01<00:08, 125.65it/s, loss=0.585

 30%|███       | 378/1251 [00:02<00:05, 159.02it/s, loss=0.596][A
 30%|███       | 378/1251 [00:02<00:05, 158.67it/s, loss=0.6]  [A
 30%|███       | 378/1251 [00:02<00:05, 158.40it/s, loss=0.601][A
 30%|███       | 378/1251 [00:02<00:05, 158.15it/s, loss=0.604][A
 30%|███       | 378/1251 [00:02<00:05, 157.86it/s, loss=0.605][A
 32%|███▏      | 400/1251 [00:02<00:05, 166.92it/s, loss=0.605][A
 32%|███▏      | 400/1251 [00:02<00:05, 166.62it/s, loss=0.61] [A
 32%|███▏      | 400/1251 [00:02<00:05, 166.33it/s, loss=0.605][A
 32%|███▏      | 400/1251 [00:02<00:05, 166.09it/s, loss=0.608][A
 32%|███▏      | 400/1251 [00:02<00:05, 165.83it/s, loss=0.611][A
 32%|███▏      | 400/1251 [00:02<00:05, 165.57it/s, loss=0.611][A
 32%|███▏      | 400/1251 [00:02<00:05, 165.34it/s, loss=0.61] [A
 32%|███▏      | 400/1251 [00:02<00:05, 165.11it/s, loss=0.618][A
 32%|███▏      | 400/1251 [00:02<00:05, 164.89it/s, loss=0.615][A
 32%|███▏      | 400/1251 [00:02<00:05, 164.57it/s, loss=0.613

 50%|████▉     | 621/1251 [00:03<00:03, 184.28it/s, loss=0.604][A
 50%|████▉     | 621/1251 [00:03<00:03, 184.05it/s, loss=0.608][A
 50%|████▉     | 621/1251 [00:03<00:03, 183.83it/s, loss=0.606][A
 50%|████▉     | 621/1251 [00:03<00:03, 183.62it/s, loss=0.603][A
 50%|████▉     | 621/1251 [00:03<00:03, 183.42it/s, loss=0.597][A
 50%|████▉     | 621/1251 [00:03<00:03, 183.23it/s, loss=0.595][A
 50%|████▉     | 621/1251 [00:03<00:03, 183.01it/s, loss=0.596][A
 50%|████▉     | 621/1251 [00:03<00:03, 182.74it/s, loss=0.593][A
 50%|████▉     | 621/1251 [00:03<00:03, 182.52it/s, loss=0.595][A
 50%|████▉     | 621/1251 [00:03<00:03, 182.31it/s, loss=0.594][A
 50%|████▉     | 621/1251 [00:03<00:03, 182.08it/s, loss=0.591][A
 50%|████▉     | 621/1251 [00:03<00:03, 181.82it/s, loss=0.59] [A
 50%|████▉     | 621/1251 [00:03<00:03, 181.64it/s, loss=0.589][A
 50%|████▉     | 621/1251 [00:03<00:03, 181.46it/s, loss=0.591][A
 50%|████▉     | 621/1251 [00:03<00:03, 181.27it/s, loss=0.588

 69%|██████▉   | 861/1251 [00:04<00:02, 194.70it/s, loss=0.585][A
 69%|██████▉   | 861/1251 [00:04<00:02, 194.52it/s, loss=0.587][A
 69%|██████▉   | 861/1251 [00:04<00:02, 194.28it/s, loss=0.595][A
 69%|██████▉   | 861/1251 [00:04<00:02, 194.03it/s, loss=0.592][A
 69%|██████▉   | 861/1251 [00:04<00:02, 193.84it/s, loss=0.594][A
 69%|██████▉   | 861/1251 [00:04<00:02, 193.64it/s, loss=0.593][A
 69%|██████▉   | 861/1251 [00:04<00:02, 193.42it/s, loss=0.595][A
 69%|██████▉   | 861/1251 [00:04<00:02, 193.23it/s, loss=0.597][A
 69%|██████▉   | 861/1251 [00:04<00:02, 193.05it/s, loss=0.594][A
 69%|██████▉   | 861/1251 [00:04<00:02, 192.77it/s, loss=0.591][A
 69%|██████▉   | 861/1251 [00:04<00:02, 192.54it/s, loss=0.597][A
 69%|██████▉   | 861/1251 [00:04<00:02, 192.36it/s, loss=0.595][A
 69%|██████▉   | 861/1251 [00:04<00:02, 192.08it/s, loss=0.599][A
 69%|██████▉   | 861/1251 [00:04<00:02, 191.90it/s, loss=0.595][A
 69%|██████▉   | 861/1251 [00:04<00:02, 191.72it/s, loss=0.597

 87%|████████▋ | 1083/1251 [00:05<00:00, 191.75it/s, loss=0.61] [A
 87%|████████▋ | 1083/1251 [00:05<00:00, 191.57it/s, loss=0.61][A
 87%|████████▋ | 1083/1251 [00:05<00:00, 191.41it/s, loss=0.617][A
 87%|████████▋ | 1083/1251 [00:05<00:00, 191.27it/s, loss=0.624][A
 87%|████████▋ | 1083/1251 [00:05<00:00, 191.11it/s, loss=0.624][A
 87%|████████▋ | 1083/1251 [00:05<00:00, 190.96it/s, loss=0.624][A
 87%|████████▋ | 1083/1251 [00:05<00:00, 190.82it/s, loss=0.625][A
 87%|████████▋ | 1083/1251 [00:05<00:00, 190.66it/s, loss=0.62] [A
 87%|████████▋ | 1083/1251 [00:05<00:00, 190.52it/s, loss=0.619][A
 87%|████████▋ | 1083/1251 [00:05<00:00, 190.38it/s, loss=0.625][A
 87%|████████▋ | 1083/1251 [00:05<00:00, 190.21it/s, loss=0.623][A
 88%|████████▊ | 1105/1251 [00:05<00:00, 193.99it/s, loss=0.623][A
 88%|████████▊ | 1105/1251 [00:05<00:00, 193.82it/s, loss=0.62] [A
 88%|████████▊ | 1105/1251 [00:05<00:00, 193.70it/s, loss=0.619][A
 88%|████████▊ | 1105/1251 [00:05<00:00, 193.54it

  6%|▌         | 71/1251 [00:00<00:13, 84.51it/s, loss=0.602][A
  6%|▌         | 71/1251 [00:00<00:14, 83.61it/s, loss=0.603][A
  6%|▌         | 71/1251 [00:00<00:14, 82.42it/s, loss=0.602][A
  6%|▌         | 71/1251 [00:00<00:14, 81.31it/s, loss=0.601][A
  6%|▌         | 71/1251 [00:00<00:14, 79.82it/s, loss=0.604][A
  6%|▌         | 71/1251 [00:00<00:14, 79.03it/s, loss=0.603][A
  6%|▌         | 71/1251 [00:00<00:15, 78.15it/s, loss=0.601][A
  6%|▌         | 71/1251 [00:00<00:15, 77.34it/s, loss=0.6]  [A
  6%|▌         | 71/1251 [00:00<00:15, 76.55it/s, loss=0.598][A
  6%|▌         | 71/1251 [00:00<00:15, 75.95it/s, loss=0.597][A
  6%|▌         | 71/1251 [00:00<00:15, 75.11it/s, loss=0.6]  [A
  6%|▋         | 81/1251 [00:00<00:13, 85.42it/s, loss=0.6][A
  6%|▋         | 81/1251 [00:00<00:13, 84.14it/s, loss=0.595][A
  6%|▋         | 81/1251 [00:00<00:14, 83.31it/s, loss=0.595][A
  6%|▋         | 81/1251 [00:00<00:14, 82.43it/s, loss=0.597][A
  6%|▋         | 81/1251 [0

 24%|██▍       | 306/1251 [00:01<00:06, 155.62it/s, loss=0.586][A
 24%|██▍       | 306/1251 [00:01<00:06, 155.34it/s, loss=0.583][A
 24%|██▍       | 306/1251 [00:01<00:06, 155.06it/s, loss=0.583][A
 24%|██▍       | 306/1251 [00:01<00:06, 154.76it/s, loss=0.586][A
 24%|██▍       | 306/1251 [00:01<00:06, 154.43it/s, loss=0.584][A
 24%|██▍       | 306/1251 [00:01<00:06, 154.12it/s, loss=0.588][A
 24%|██▍       | 306/1251 [00:01<00:06, 153.76it/s, loss=0.591][A
 24%|██▍       | 306/1251 [00:01<00:06, 153.49it/s, loss=0.59] [A
 24%|██▍       | 306/1251 [00:01<00:06, 153.14it/s, loss=0.588][A
 24%|██▍       | 306/1251 [00:02<00:06, 152.74it/s, loss=0.59] [A
 24%|██▍       | 306/1251 [00:02<00:06, 152.35it/s, loss=0.589][A
 24%|██▍       | 306/1251 [00:02<00:06, 152.03it/s, loss=0.589][A
 24%|██▍       | 306/1251 [00:02<00:06, 151.72it/s, loss=0.591][A
 24%|██▍       | 306/1251 [00:02<00:06, 151.36it/s, loss=0.591][A
 24%|██▍       | 306/1251 [00:02<00:06, 150.99it/s, loss=0.588

 42%|████▏     | 528/1251 [00:02<00:04, 178.65it/s, loss=0.578][A
 42%|████▏     | 528/1251 [00:02<00:04, 178.40it/s, loss=0.58] [A
 42%|████▏     | 528/1251 [00:02<00:04, 178.18it/s, loss=0.587][A
 42%|████▏     | 528/1251 [00:02<00:04, 177.97it/s, loss=0.587][A
 42%|████▏     | 528/1251 [00:02<00:04, 177.73it/s, loss=0.593][A
 42%|████▏     | 528/1251 [00:02<00:04, 177.49it/s, loss=0.59] [A
 42%|████▏     | 528/1251 [00:02<00:04, 177.23it/s, loss=0.591][A
 42%|████▏     | 528/1251 [00:02<00:04, 177.03it/s, loss=0.592][A
 42%|████▏     | 528/1251 [00:02<00:04, 176.78it/s, loss=0.591][A
 42%|████▏     | 528/1251 [00:02<00:04, 176.48it/s, loss=0.591][A
 42%|████▏     | 528/1251 [00:02<00:04, 176.25it/s, loss=0.596][A
 42%|████▏     | 528/1251 [00:02<00:04, 176.01it/s, loss=0.593][A
 42%|████▏     | 528/1251 [00:03<00:04, 175.81it/s, loss=0.587][A
 42%|████▏     | 528/1251 [00:03<00:04, 175.58it/s, loss=0.585][A
 44%|████▍     | 554/1251 [00:03<00:03, 184.15it/s, loss=0.585

 62%|██████▏   | 775/1251 [00:03<00:02, 196.37it/s, loss=0.601][A
 62%|██████▏   | 775/1251 [00:03<00:02, 196.18it/s, loss=0.599][A
 62%|██████▏   | 775/1251 [00:03<00:02, 195.95it/s, loss=0.602][A
 62%|██████▏   | 775/1251 [00:03<00:02, 195.73it/s, loss=0.605][A
 62%|██████▏   | 775/1251 [00:03<00:02, 195.55it/s, loss=0.603][A
 62%|██████▏   | 775/1251 [00:03<00:02, 195.37it/s, loss=0.603][A
 62%|██████▏   | 775/1251 [00:03<00:02, 195.17it/s, loss=0.602][A
 62%|██████▏   | 775/1251 [00:03<00:02, 194.95it/s, loss=0.604][A
 62%|██████▏   | 775/1251 [00:03<00:02, 194.73it/s, loss=0.602][A
 62%|██████▏   | 775/1251 [00:03<00:02, 194.53it/s, loss=0.6]  [A
 62%|██████▏   | 775/1251 [00:03<00:02, 194.30it/s, loss=0.598][A
 62%|██████▏   | 775/1251 [00:03<00:02, 194.08it/s, loss=0.597][A
 62%|██████▏   | 775/1251 [00:03<00:02, 193.92it/s, loss=0.602][A
 62%|██████▏   | 775/1251 [00:04<00:02, 193.74it/s, loss=0.6]  [A
 62%|██████▏   | 775/1251 [00:04<00:02, 193.51it/s, loss=0.602

 80%|███████▉  | 999/1251 [00:04<00:01, 202.91it/s, loss=0.593][A
 80%|███████▉  | 999/1251 [00:04<00:01, 202.77it/s, loss=0.592][A
 80%|███████▉  | 999/1251 [00:04<00:01, 202.61it/s, loss=0.59] [A
 80%|███████▉  | 999/1251 [00:04<00:01, 202.43it/s, loss=0.588][A
 80%|███████▉  | 999/1251 [00:04<00:01, 202.24it/s, loss=0.585][A
 80%|███████▉  | 999/1251 [00:04<00:01, 202.04it/s, loss=0.586][A
 80%|███████▉  | 999/1251 [00:04<00:01, 201.81it/s, loss=0.585][A
 80%|███████▉  | 999/1251 [00:04<00:01, 201.52it/s, loss=0.58] [A
 80%|███████▉  | 999/1251 [00:04<00:01, 201.30it/s, loss=0.577][A
 80%|███████▉  | 999/1251 [00:04<00:01, 201.08it/s, loss=0.581][A
 80%|███████▉  | 999/1251 [00:04<00:01, 200.83it/s, loss=0.58] [A
 80%|███████▉  | 999/1251 [00:04<00:01, 200.68it/s, loss=0.581][A
 80%|███████▉  | 999/1251 [00:04<00:01, 200.53it/s, loss=0.579][A
 80%|███████▉  | 999/1251 [00:04<00:01, 200.27it/s, loss=0.582][A
 80%|███████▉  | 999/1251 [00:04<00:01, 200.09it/s, loss=0.582

 98%|█████████▊| 1221/1251 [00:05<00:00, 206.91it/s, loss=0.629][A
 98%|█████████▊| 1221/1251 [00:05<00:00, 206.77it/s, loss=0.625][A
 98%|█████████▊| 1221/1251 [00:05<00:00, 206.61it/s, loss=0.628][A
 98%|█████████▊| 1221/1251 [00:05<00:00, 206.47it/s, loss=0.63] [A
 98%|█████████▊| 1221/1251 [00:05<00:00, 206.21it/s, loss=0.628][A
 98%|█████████▊| 1221/1251 [00:05<00:00, 206.01it/s, loss=0.627][A
100%|█████████▉| 1246/1251 [00:05<00:00, 210.17it/s, loss=0.627][A
100%|█████████▉| 1246/1251 [00:05<00:00, 210.04it/s, loss=0.622][A
100%|█████████▉| 1246/1251 [00:05<00:00, 209.89it/s, loss=0.624][A
100%|█████████▉| 1246/1251 [00:05<00:00, 209.75it/s, loss=0.623][A
100%|█████████▉| 1246/1251 [00:05<00:00, 209.62it/s, loss=0.623][A
100%|█████████▉| 1246/1251 [00:05<00:00, 209.48it/s, loss=0.619][A
                                                                [A[ 7.       0.61891  0.79937]

  0%|          | 0/1251 [00:00<?, ?it/s][A
  0%|          | 0/1251 [00:00<?, ?it/s, lo

 17%|█▋        | 217/1251 [00:01<00:07, 147.52it/s, loss=0.581][A
 17%|█▋        | 217/1251 [00:01<00:07, 147.13it/s, loss=0.581][A
 17%|█▋        | 217/1251 [00:01<00:07, 146.69it/s, loss=0.579][A
 17%|█▋        | 217/1251 [00:01<00:07, 146.34it/s, loss=0.575][A
 17%|█▋        | 217/1251 [00:01<00:07, 145.96it/s, loss=0.576][A
 17%|█▋        | 217/1251 [00:01<00:07, 145.54it/s, loss=0.572][A
 17%|█▋        | 217/1251 [00:01<00:07, 145.17it/s, loss=0.571][A
 17%|█▋        | 217/1251 [00:01<00:07, 144.73it/s, loss=0.574][A
 17%|█▋        | 217/1251 [00:01<00:07, 144.36it/s, loss=0.575][A
 17%|█▋        | 217/1251 [00:01<00:07, 144.01it/s, loss=0.576][A
 17%|█▋        | 217/1251 [00:01<00:07, 143.58it/s, loss=0.579][A
 17%|█▋        | 217/1251 [00:01<00:07, 143.13it/s, loss=0.579][A
 17%|█▋        | 217/1251 [00:01<00:07, 142.76it/s, loss=0.581][A
 17%|█▋        | 217/1251 [00:01<00:07, 142.45it/s, loss=0.583][A
 17%|█▋        | 217/1251 [00:01<00:07, 142.09it/s, loss=0.58]

 35%|███▌      | 442/1251 [00:02<00:04, 181.12it/s, loss=0.599][A
 35%|███▌      | 442/1251 [00:02<00:04, 180.79it/s, loss=0.598][A
 35%|███▌      | 442/1251 [00:02<00:04, 180.42it/s, loss=0.597][A
 35%|███▌      | 442/1251 [00:02<00:04, 180.13it/s, loss=0.6]  [A
 35%|███▌      | 442/1251 [00:02<00:04, 179.85it/s, loss=0.598][A
 35%|███▌      | 442/1251 [00:02<00:04, 179.51it/s, loss=0.597][A
 35%|███▌      | 442/1251 [00:02<00:04, 178.99it/s, loss=0.597][A
 35%|███▌      | 442/1251 [00:02<00:04, 178.67it/s, loss=0.599][A
 35%|███▌      | 442/1251 [00:02<00:04, 178.40it/s, loss=0.597][A
 35%|███▌      | 442/1251 [00:02<00:04, 178.09it/s, loss=0.597][A
 35%|███▌      | 442/1251 [00:02<00:04, 177.83it/s, loss=0.592][A
 35%|███▌      | 442/1251 [00:02<00:04, 177.59it/s, loss=0.593][A
 37%|███▋      | 467/1251 [00:02<00:04, 187.53it/s, loss=0.593][A
 37%|███▋      | 467/1251 [00:02<00:04, 187.24it/s, loss=0.592][A
 37%|███▋      | 467/1251 [00:02<00:04, 186.92it/s, loss=0.592

 53%|█████▎    | 668/1251 [00:03<00:02, 195.92it/s, loss=0.585][A
 53%|█████▎    | 668/1251 [00:03<00:02, 195.68it/s, loss=0.59] [A
 53%|█████▎    | 668/1251 [00:03<00:02, 195.41it/s, loss=0.592][A
 55%|█████▌    | 693/1251 [00:03<00:02, 202.61it/s, loss=0.592][A
 55%|█████▌    | 693/1251 [00:03<00:02, 202.38it/s, loss=0.59] [A
 55%|█████▌    | 693/1251 [00:03<00:02, 202.13it/s, loss=0.587][A
 55%|█████▌    | 693/1251 [00:03<00:02, 201.91it/s, loss=0.587][A
 55%|█████▌    | 693/1251 [00:03<00:02, 201.66it/s, loss=0.589][A
 55%|█████▌    | 693/1251 [00:03<00:02, 201.42it/s, loss=0.59] [A
 55%|█████▌    | 693/1251 [00:03<00:02, 201.20it/s, loss=0.588][A
 55%|█████▌    | 693/1251 [00:03<00:02, 200.97it/s, loss=0.588][A
 55%|█████▌    | 693/1251 [00:03<00:02, 200.73it/s, loss=0.586][A
 55%|█████▌    | 693/1251 [00:03<00:02, 200.50it/s, loss=0.583][A
 55%|█████▌    | 693/1251 [00:03<00:02, 200.24it/s, loss=0.585][A
 55%|█████▌    | 693/1251 [00:03<00:02, 199.97it/s, loss=0.582

 74%|███████▍  | 924/1251 [00:04<00:01, 211.42it/s, loss=0.62] [A
 74%|███████▍  | 924/1251 [00:04<00:01, 211.25it/s, loss=0.621][A
 74%|███████▍  | 924/1251 [00:04<00:01, 211.10it/s, loss=0.629][A
 74%|███████▍  | 924/1251 [00:04<00:01, 210.94it/s, loss=0.634][A
 74%|███████▍  | 924/1251 [00:04<00:01, 210.77it/s, loss=0.637][A
 74%|███████▍  | 924/1251 [00:04<00:01, 210.63it/s, loss=0.638][A
 74%|███████▍  | 924/1251 [00:04<00:01, 210.49it/s, loss=0.638][A
 74%|███████▍  | 924/1251 [00:04<00:01, 210.31it/s, loss=0.639][A
 74%|███████▍  | 924/1251 [00:04<00:01, 210.15it/s, loss=0.639][A
 74%|███████▍  | 924/1251 [00:04<00:01, 210.01it/s, loss=0.636][A
 74%|███████▍  | 924/1251 [00:04<00:01, 209.80it/s, loss=0.638][A
 74%|███████▍  | 924/1251 [00:04<00:01, 209.62it/s, loss=0.636][A
 74%|███████▍  | 924/1251 [00:04<00:01, 209.43it/s, loss=0.638][A
 74%|███████▍  | 924/1251 [00:04<00:01, 209.28it/s, loss=0.638][A
 74%|███████▍  | 924/1251 [00:04<00:01, 209.08it/s, loss=0.638

 92%|█████████▏| 1157/1251 [00:05<00:00, 217.80it/s, loss=0.572][A
 92%|█████████▏| 1157/1251 [00:05<00:00, 217.65it/s, loss=0.576][A
 92%|█████████▏| 1157/1251 [00:05<00:00, 217.49it/s, loss=0.575][A
 92%|█████████▏| 1157/1251 [00:05<00:00, 217.32it/s, loss=0.574][A
 92%|█████████▏| 1157/1251 [00:05<00:00, 217.16it/s, loss=0.576][A
 92%|█████████▏| 1157/1251 [00:05<00:00, 217.00it/s, loss=0.577][A
 92%|█████████▏| 1157/1251 [00:05<00:00, 216.80it/s, loss=0.574][A
 92%|█████████▏| 1157/1251 [00:05<00:00, 216.63it/s, loss=0.571][A
 92%|█████████▏| 1157/1251 [00:05<00:00, 216.45it/s, loss=0.569][A
 92%|█████████▏| 1157/1251 [00:05<00:00, 216.29it/s, loss=0.567][A
 92%|█████████▏| 1157/1251 [00:05<00:00, 216.12it/s, loss=0.576][A
 92%|█████████▏| 1157/1251 [00:05<00:00, 215.96it/s, loss=0.576][A
 92%|█████████▏| 1157/1251 [00:05<00:00, 215.79it/s, loss=0.574][A
 92%|█████████▏| 1157/1251 [00:05<00:00, 215.64it/s, loss=0.577][A
 92%|█████████▏| 1157/1251 [00:05<00:00, 215.49i

  9%|▉         | 115/1251 [00:01<00:11, 102.08it/s, loss=0.578][A
  9%|▉         | 115/1251 [00:01<00:11, 101.76it/s, loss=0.581][A
  9%|▉         | 115/1251 [00:01<00:11, 101.40it/s, loss=0.579][A
  9%|▉         | 115/1251 [00:01<00:11, 101.03it/s, loss=0.577][A
 11%|█▏        | 141/1251 [00:01<00:08, 123.68it/s, loss=0.577][A
 11%|█▏        | 141/1251 [00:01<00:09, 123.09it/s, loss=0.576][A
 11%|█▏        | 141/1251 [00:01<00:09, 122.52it/s, loss=0.576][A
 11%|█▏        | 141/1251 [00:01<00:09, 122.09it/s, loss=0.574][A
 11%|█▏        | 141/1251 [00:01<00:09, 121.69it/s, loss=0.575][A
 11%|█▏        | 141/1251 [00:01<00:09, 121.27it/s, loss=0.579][A
 11%|█▏        | 141/1251 [00:01<00:09, 120.89it/s, loss=0.578][A
 11%|█▏        | 141/1251 [00:01<00:09, 120.55it/s, loss=0.577][A
 11%|█▏        | 141/1251 [00:01<00:09, 120.09it/s, loss=0.58] [A
 11%|█▏        | 141/1251 [00:01<00:09, 119.71it/s, loss=0.582][A
 11%|█▏        | 141/1251 [00:01<00:09, 119.36it/s, loss=0.583

 28%|██▊       | 355/1251 [00:02<00:05, 174.26it/s, loss=0.582][A
 28%|██▊       | 355/1251 [00:02<00:05, 173.92it/s, loss=0.58] [A
 28%|██▊       | 355/1251 [00:02<00:05, 173.56it/s, loss=0.578][A
 28%|██▊       | 355/1251 [00:02<00:05, 173.20it/s, loss=0.577][A
 28%|██▊       | 355/1251 [00:02<00:05, 172.81it/s, loss=0.586][A
 28%|██▊       | 355/1251 [00:02<00:05, 172.48it/s, loss=0.586][A
 28%|██▊       | 355/1251 [00:02<00:05, 172.20it/s, loss=0.586][A
 28%|██▊       | 355/1251 [00:02<00:05, 171.76it/s, loss=0.588][A
 28%|██▊       | 355/1251 [00:02<00:05, 171.43it/s, loss=0.584][A
 30%|███       | 381/1251 [00:02<00:04, 183.83it/s, loss=0.584][A
 30%|███       | 381/1251 [00:02<00:04, 183.37it/s, loss=0.583][A
 30%|███       | 381/1251 [00:02<00:04, 183.00it/s, loss=0.581][A
 30%|███       | 381/1251 [00:02<00:04, 182.61it/s, loss=0.581][A
 30%|███       | 381/1251 [00:02<00:04, 182.16it/s, loss=0.585][A
 30%|███       | 381/1251 [00:02<00:04, 181.81it/s, loss=0.583

 47%|████▋     | 588/1251 [00:02<00:03, 196.93it/s, loss=0.605][A
 47%|████▋     | 588/1251 [00:02<00:03, 196.68it/s, loss=0.605][A
 47%|████▋     | 588/1251 [00:02<00:03, 196.42it/s, loss=0.604][A
 47%|████▋     | 588/1251 [00:02<00:03, 196.15it/s, loss=0.606][A
 47%|████▋     | 588/1251 [00:03<00:03, 195.86it/s, loss=0.61] [A
 47%|████▋     | 588/1251 [00:03<00:03, 195.55it/s, loss=0.607][A
 47%|████▋     | 588/1251 [00:03<00:03, 195.33it/s, loss=0.603][A
 49%|████▉     | 614/1251 [00:03<00:03, 203.88it/s, loss=0.603][A
 49%|████▉     | 614/1251 [00:03<00:03, 203.65it/s, loss=0.604][A
 49%|████▉     | 614/1251 [00:03<00:03, 203.41it/s, loss=0.6]  [A
 49%|████▉     | 614/1251 [00:03<00:03, 203.08it/s, loss=0.595][A
 49%|████▉     | 614/1251 [00:03<00:03, 202.77it/s, loss=0.591][A
 49%|████▉     | 614/1251 [00:03<00:03, 202.54it/s, loss=0.587][A
 49%|████▉     | 614/1251 [00:03<00:03, 202.24it/s, loss=0.588][A
 49%|████▉     | 614/1251 [00:03<00:03, 201.94it/s, loss=0.59]

 67%|██████▋   | 839/1251 [00:03<00:01, 211.80it/s, loss=0.595][A
 67%|██████▋   | 839/1251 [00:03<00:01, 211.60it/s, loss=0.592][A
 67%|██████▋   | 839/1251 [00:03<00:01, 211.39it/s, loss=0.595][A
 67%|██████▋   | 839/1251 [00:03<00:01, 211.14it/s, loss=0.592][A
 67%|██████▋   | 839/1251 [00:03<00:01, 210.90it/s, loss=0.587][A
 67%|██████▋   | 839/1251 [00:03<00:01, 210.65it/s, loss=0.587][A
 67%|██████▋   | 839/1251 [00:03<00:01, 210.46it/s, loss=0.587][A
 67%|██████▋   | 839/1251 [00:03<00:01, 210.25it/s, loss=0.582][A
 67%|██████▋   | 839/1251 [00:03<00:01, 210.04it/s, loss=0.58] [A
 67%|██████▋   | 839/1251 [00:03<00:01, 209.84it/s, loss=0.58][A
 67%|██████▋   | 839/1251 [00:04<00:01, 209.58it/s, loss=0.579][A
 67%|██████▋   | 839/1251 [00:04<00:01, 209.34it/s, loss=0.587][A
 67%|██████▋   | 839/1251 [00:04<00:01, 209.14it/s, loss=0.587][A
 67%|██████▋   | 839/1251 [00:04<00:01, 208.90it/s, loss=0.587][A
 67%|██████▋   | 839/1251 [00:04<00:01, 208.60it/s, loss=0.591]

 85%|████████▌ | 1067/1251 [00:04<00:00, 216.61it/s, loss=0.609][A
 85%|████████▌ | 1067/1251 [00:04<00:00, 216.45it/s, loss=0.607][A
 85%|████████▌ | 1067/1251 [00:04<00:00, 216.28it/s, loss=0.608][A
 85%|████████▌ | 1067/1251 [00:04<00:00, 216.10it/s, loss=0.603][A
 85%|████████▌ | 1067/1251 [00:04<00:00, 215.95it/s, loss=0.602][A
 85%|████████▌ | 1067/1251 [00:04<00:00, 215.81it/s, loss=0.607][A
 85%|████████▌ | 1067/1251 [00:04<00:00, 215.68it/s, loss=0.604][A
 85%|████████▌ | 1067/1251 [00:04<00:00, 215.53it/s, loss=0.605][A
 85%|████████▌ | 1067/1251 [00:04<00:00, 215.38it/s, loss=0.607][A
 85%|████████▌ | 1067/1251 [00:04<00:00, 215.23it/s, loss=0.608][A
 85%|████████▌ | 1067/1251 [00:04<00:00, 215.07it/s, loss=0.605][A
 85%|████████▌ | 1067/1251 [00:04<00:00, 214.93it/s, loss=0.607][A
 85%|████████▌ | 1067/1251 [00:04<00:00, 214.78it/s, loss=0.607][A
 85%|████████▌ | 1067/1251 [00:04<00:00, 214.57it/s, loss=0.606][A
 85%|████████▌ | 1067/1251 [00:04<00:00, 214.38i