# Import

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [27]:
import random

In [2]:
# from sklearn.tree import DecisionTreeClassifier
# from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
# from sklearn.naive_bayes import BernoulliNB, GaussianNB
from sklearn.linear_model import LogisticRegression
# from sklearn.neural_network import MLPClassifier, MLPRegressor
# from sklearn.svm import SVC
# from sklearn.dummy import DummyClassifier
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold

In [3]:
from imblearn.under_sampling import RandomUnderSampler

In [4]:
from imblearn.over_sampling import SMOTE, RandomOverSampler
from imblearn.pipeline import Pipeline

In [5]:
from collections import Counter

In [6]:
import wandb

In [7]:
from sklearn import preprocessing

In [8]:
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType
import onnxruntime as rt

In [9]:
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from mlxtend.feature_selection import ExhaustiveFeatureSelector as EXS

In [10]:
import torch
import torch.nn.functional as F
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

import torch.nn as nn
import torch.nn.functional as F

In [43]:
import torch.optim as optim

In [11]:
torch.__version__

'1.2.0'

In [12]:
from tqdm.notebook import tqdm

# Function

In [13]:
# Function(Utility)

# check Duplicate file
def checkDuplicateFile(file_path):
    import os
    if os.path.isfile(file_path):
        print("Caution: File existed!")
        ans = input("Do you want to cover it?(Y/others)")
        if ans == "Y":
            return False
        else:
            print("Canceled....")
            return True
    else:
        return False


# Function(Use)

# Test for checking ICU_id missing in Lab_1103_csv
def getMissingIDinLab(Lab_file, show=True):
    test = sorted(Lab_file.ICU_id.unique())
    s = 0
    error_list = list()
    for i in test:
        #         print(i)
        s += 1
        if s != i:
            if show:
                print(f"error! : {s}")
            error_list.append(s)
            s += 1
    if show:
        print(f"Missing ID Result: {error_list}")
    if show:
        print(f"Missing Length:{len(error_list)}")
    return error_list

# store Dataframe to CSV


def store2CSV(data, target_name, target_loc_prefix='./'):
    file_path = target_loc_prefix+target_name+".csv"
    if checkDuplicateFile(file_path):
        print("store2CSV failed")
        return
    data.to_csv(file_path)
    print("store2CSV Successful!")

# store Datastruc. to pickle


def store2Pickle(data, target_name, target_loc_prefix='./'):
    import pickle
    file_path = target_loc_prefix+target_name+'.pickle'
    if checkDuplicateFile(file_path):
        print("store2Pickle failed")
        return
    with open(file_path, 'wb') as f:
        pickle.dump(data, f)
    print("store2Pickle Successful!")


def readFPickle(target_name, target_loc_prefix='./'):
    import pickle
    file_name = target_loc_prefix+target_name+'.pickle'
    with open(file_name, 'rb') as f:
        temp = pickle.load(f)
    return temp

# Function(Data preprocessing)

#Function: 補值
# 將針對輸入的df_data直接進行inplace插補
# 須確保df_data的缺失值位置有放np.nan


def handleMissing(df_data, df_feature, outFeature=["outcome"], cate_astype="int"):
    for featureName in df_data.columns:
        if featureName not in outFeature:
            if df_data[featureName].isna().sum() == 0:
                print(f"{featureName}: Not need to fill.")
                continue
            else:
                # 先去看是連續與否 (1代表連續,0代表離散)
                kindValue = df_feature.loc[df_feature["features name"]
                                           == featureName, "kind"].values[0]
                if kindValue == 1:
                    # continuous
                    # mean filling
                    targetMean = df_data[featureName].mean()
                    df_data[featureName].fillna(value=targetMean, inplace=True)
                    print(f"{featureName}: Fill, Continuous.")

                else:
                    # categorical
                    # mode filling
                    targetMode = df_data[featureName].mode()[0]
                    df_data[featureName].fillna(value=targetMode, inplace=True)
                    df_data[featureName] = df_data[featureName].astype(
                        cate_astype)
                    print(
                        f"{featureName}: Fill, Categorical. (astype to {cate_astype})")

    print("---handleMissing Finish---")

# plot hist
# filtered_data need to check not have nan


def plotHist(df_data, target, outcome="outcome", bins=20):
    filtered_data = pd.concat([df_data["outcome"], df_data[target]], axis=1)
    filtered_data = filtered_data.dropna()
    print(filtered_data.isna().sum())
    plt.hist(filtered_data.loc[filtered_data[outcome]
             == 0, target], bins=bins, alpha=0.5, label='0')
    plt.hist(filtered_data.loc[filtered_data[outcome]
             == 1, target], bins=bins, alpha=0.5, label='1')
    plt.xlabel(target)
    plt.ylabel('count')
    plt.legend(title=outcome)

# plot countplot
# filtered_data need to check not have nan


def plotCountplot(df_data, target, outcome="outcome"):
    filtered_data = pd.concat([df_data["outcome"], df_data[target]], axis=1)
    filtered_data = filtered_data.dropna()
    print(filtered_data.isna().sum())
    sns.countplot(x=target, hue=outcome, data=filtered_data)

# plot boxplot
# filtered_data need to check not have nan


def plotBoxplot(df_data, target, outcome="outcome"):
    filtered_data = pd.concat([df_data["outcome"], df_data[target]], axis=1)
    filtered_data = filtered_data.dropna()
    print(filtered_data.isna().sum())
    sns.boxplot(x=filtered_data[target], data=filtered_data)

# Function(Model)


def serializeModel(model, modelName, featureNum):
    initial_type = [('float_input', FloatTensorType([None, featureNum]))]
    onx = convert_sklearn(model, initial_types=initial_type)
    with open(modelName + ".onnx", "wb") as f:
        f.write(onx.SerializeToString())


def modelPredict(modelName, testData):
    sess = rt.InferenceSession(modelName + '.onnx')  # load the onnx
    input_name = sess.get_inputs()[0].name
    label_name = sess.get_outputs()[0].name
    pred_onx = sess.run([label_name], {input_name: testData.astype(np.float32)})[
        0]  # predict testData
    print(pred_onx)

In [14]:
def get_data(X, y):
    X = torch.from_numpy(X).float()#transform the data from numpy to torch
    y = torch.from_numpy(y).long()
    
    dataset = TensorDataset(X,y)
    
    return dataset


def make_loader(dataset, batch_size):
    loader = torch.utils.data.DataLoader(dataset=dataset,
                                         batch_size=batch_size, 
                                         shuffle=True,
                                         pin_memory=True, num_workers=2)
    return loader

In [45]:
# ConvNet(self, kernel_size->list, input_size, classes=2)
def make(config, X_train, y_train, X_val, y_val):
    # Make the data
    # X_train, y_train, X_val, y_val is dataframe
    # now to ndarray
    X_train = X_train.values
    y_train = y_train.values
    X_val = X_val.values
    y_val = y_val.values
    train, test = get_data(X_train, y_train), get_data(X_val, y_val)
    train_loader = make_loader(train, batch_size=config.batch_size)
    test_loader = make_loader(test, batch_size=config.batch_size)

    # Make the model
    model = ConvNet(config.kernel_size, config.feature_size ,config.classes).to(device)

    # Make the loss and optimizer
    criterion = nn.CrossEntropyLoss()
#     optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate)
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

    return model, train_loader, test_loader, criterion, optimizer

In [39]:
def model_pipeline(hyperparameters, X_train, y_train, X_val, y_val):

    # tell wandb to get started
    with wandb.init(project="DataMining_Project2", entity="oscarchencs10", config=hyperparameters):
        # access all HPs through wandb.config, so logging matches execution!
        config = wandb.config

        # make the model, data, and optimization problem
        model, train_loader, test_loader, criterion, optimizer = make(config, X_train, y_train, X_val, y_val)
        print(model)

        # and use them to train the model
        train(model, train_loader, criterion, optimizer, config)

        # and test its final performance
        test(model, test_loader)

    return model

In [17]:
def train_log(loss, example_ct, epoch):
    # Where the magic happens
    wandb.log({"epoch": epoch, "loss": loss}, step=example_ct)
    print(f"Loss after " + str(example_ct).zfill(5) + f" examples: {loss:.3f}")

# Model Architecture

In [72]:
# Conventional and convolutional neural network

class ConvNet(nn.Module):
    def __init__(self, kernel_size, input_size, classes=2):
        super(ConvNet, self).__init__()
        
        self.layer1 = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=6, kernel_size=kernel_size[0]),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=1))
        self.layer2 = nn.Sequential(
            nn.Conv1d(in_channels=6, out_channels=16, kernel_size=kernel_size[1]),
            nn.ReLU(),)
        
        self.flatten = nn.Flatten()
        
        self.fc = nn.Sequential(
            nn.Linear(16*(input_size-kernel_size[-1]+1), 120),
            nn.ReLU(),
            nn.Linear(120, 32),
            nn.ReLU(),
            nn.Linear(32, classes),
        )
        
    def forward(self, x):
#         print(x.shape)
        x = torch.reshape(x, (-1,1, 40))
#         print(x.shape)
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.flatten(out)
        out = self.fc(out)
#         print(out.shape)
        return out

In [19]:
def train(model, loader, criterion, optimizer, config):
    # Tell wandb to watch what the model gets up to: gradients, weights, and more!
    wandb.watch(model, criterion, log="all", log_freq=10)

    # Run training and track with wandb
    total_batches = len(loader) * config.epochs
    example_ct = 0  # number of examples seen
    batch_ct = 0
    for epoch in tqdm(range(config.epochs)):
        for _, (data_row, labels) in enumerate(loader):

            loss = train_batch(data_row, labels, model, optimizer, criterion)
            example_ct +=  len(data_row)
            batch_ct += 1

            # Report metrics every 25th batch
            if ((batch_ct + 1) % 25) == 0:
                train_log(loss, example_ct, epoch)


def train_batch(data_row, labels, model, optimizer, criterion):
    data_row, labels = data_row.to(device), labels.to(device)
    
    # Forward pass ➡
    outputs = model(data_row)
    loss = criterion(outputs, labels)
    
    # Backward pass ⬅
    optimizer.zero_grad()
    loss.backward()

    # Step with optimizer
    optimizer.step()

    return loss

In [20]:
def test(model, test_loader):
    model.eval()

    # Run the model on some test examples
    with torch.no_grad():
        correct, total = 0, 0
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        print(f"Accuracy of the model on the {total} " +
              f"test images: {100 * correct / total}%")
        
        wandb.log({"test_accuracy": correct / total})

#     # Save the model in the exchangeable ONNX format
#     torch.onnx.export(model, images, "model.onnx")
#     wandb.save("model.onnx")

# Load Data

In [21]:
exp13_X_train = readFPickle("data/exp13/exp13_X_train")
exp13_X_train

Unnamed: 0,AGE,SEX,LOS,Joint,Drain,Cemented,Commercial_ALBC,Non_commercial_ALBC,cci_index,elx_index,...,Coagulopathy,Fluid and Electrolyte Disorders,Blood Loss Anemia,Deficiency Anemia,Anemia,Alcohol Abuse,Drug Abuse,Psychoses,Depression,Psyciatric disorder
25753,66,0,6,0,1,1,1,0,4,3,...,0,0,0,0,0,0,0,0,1,1
628,61,0,10,0,1,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
26083,80,1,2,0,1,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
37780,67,0,6,0,1,1,0,1,1,1,...,0,0,0,0,0,0,0,0,0,0
28632,62,0,4,1,0,0,0,0,0,2,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11284,70,0,7,0,1,1,0,1,0,1,...,0,0,0,0,0,0,0,0,0,0
44732,72,0,3,0,1,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
38158,68,0,6,0,1,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
860,31,1,4,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [22]:
exp13_y_train = readFPickle("data/exp13/exp13_y_train")
exp13_y_train

25753    0
628      0
26083    0
37780    1
28632    0
        ..
11284    0
44732    1
38158    1
860      0
15795    0
Name: outcome, Length: 45627, dtype: int64

In [23]:
exp13_X_val = readFPickle("data/exp13/exp13_X_val")
exp13_X_val

Unnamed: 0,AGE,SEX,LOS,Joint,Drain,Cemented,Commercial_ALBC,Non_commercial_ALBC,cci_index,elx_index,...,Coagulopathy,Fluid and Electrolyte Disorders,Blood Loss Anemia,Deficiency Anemia,Anemia,Alcohol Abuse,Drug Abuse,Psychoses,Depression,Psyciatric disorder
286,72.0,0,13,0,1,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
40555,78.0,1,3,0,1,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
28125,69.0,0,8,1,1,0,0,0,2,2,...,0,0,0,0,0,0,0,0,0,0
44616,72.0,1,7,1,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
49606,80.0,0,3,1,0,0,0,0,2,3,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23499,81.0,1,5,0,1,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
47449,71.0,0,7,0,1,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
10480,55.0,0,3,0,1,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
35724,73.0,0,7,0,1,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0


In [24]:
exp13_y_val = readFPickle("data/exp13/exp13_y_val")
exp13_y_val

286      1
40555    0
28125    0
44616    0
49606    0
        ..
23499    0
47449    0
10480    0
35724    0
5180     0
Name: outcome, Length: 8037, dtype: int64

# Random Seed

In [28]:
# Ensure deterministic behavior
torch.backends.cudnn.deterministic = True
random.seed(hash("setting random seeds") % 2**32 - 1)
np.random.seed(hash("improves reproducibility") % 2**32 - 1)
torch.manual_seed(hash("by removing stochasticity") % 2**32 - 1)
torch.cuda.manual_seed_all(hash("so runs are repeatable") % 2**32 - 1)

# GPU Setting

In [29]:
# Device configuration
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=1)

In [30]:
torch.cuda.current_device()

0

In [31]:
torch.cuda.device_count()

4

In [32]:
torch.cuda.get_device_name(1)

'Tesla K40m'

In [33]:
torch.cuda.set_device(1)

In [34]:
torch.cuda.current_device()

1

# Config

In [70]:
config = dict(
    epochs=100,
    classes=2,
    kernel_size=[1, 1],
    feature_size=40,
    batch_size=128,
    learning_rate=0.005,
    dataset="PJI",
    architecture="CNN")

# Main

In [71]:
# Build, train and analyze the model with the pipeline
model = model_pipeline(config, exp13_X_train, exp13_y_train, exp13_X_val, exp13_y_val)

ConvNet(
  (layer1): Sequential(
    (0): Conv1d(1, 6, kernel_size=(1,), stride=(1,))
    (1): ReLU()
    (2): MaxPool1d(kernel_size=1, stride=1, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv1d(6, 16, kernel_size=(1,), stride=(1,))
    (1): ReLU()
  )
  (flatten): Flatten()
  (fc): Sequential(
    (0): Linear(in_features=640, out_features=120, bias=True)
    (1): ReLU()
    (2): Linear(in_features=120, out_features=32, bias=True)
    (3): ReLU()
    (4): Linear(in_features=32, out_features=2, bias=True)
  )
)


  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 03072 examples: 0.546
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 47803 examples: 0.666
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 51003 examples: 0.587
torch.Size

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 92534 examples: 0.568
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 95734 examples: 0.634
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 137265 examples: 0.627
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 140465 examples: 0.619
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Si

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 185196 examples: 0.542
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40]

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 229927 examples: 0.696
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 233127 examples: 0.571
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Si

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 274658 examples: 0.639
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 277858 examples: 0.572
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Si

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 322589 examples: 0.609
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40]

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 367320 examples: 0.586
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 370520 ex

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 412051 examples: 0.586
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 415251 examples: 0.635
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Si

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 456782 examples: 0.582
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 459982 examples: 0.609
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Si

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 504713 examples: 0.551
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40]

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 549444 examples: 0.587
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 552644 examples: 0.614
torch.Size([128, 40])
torch.Size([128, 40])
torch.Si

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 594175 examples: 0.590
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 597375 examples: 0.604
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Si

torch.Size([128, 40])
Loss after 638906 examples: 0.624
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 642106 examples: 0.582
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Si

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 686837 examples: 0.611
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40]

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([59, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 731568 examples: 0.585
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 734768 exa

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 776299 examples: 0.614
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 779499 examples: 0.605
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Si

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 824230 examples: 0.572
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40]

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 868961 examples: 0.570
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 872161 examples: 0.532
torch.Size([128, 40])
torch.Si

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 913692 examples: 0.498
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 916892 examples: 0.610
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Si

torch.Size([128, 40])
torch.Size([128, 40])
Loss after 958423 examples: 0.644
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 961623 examples: 0.534
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Si

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 1006354 examples: 0.552
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 1051085 examples: 0.643
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 1054285 examples: 0.578
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.

torch.Size([59, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 1095816 examples: 0.523
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 1099016 examples: 0.642
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.S

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 1143747 examples: 0.593
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 1188478 examples: 0.697
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 1191678 examples: 0.566
torch.

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 1233209 examples: 0.604
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 1236409 examples: 0.534
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 1277940 examples: 0.600
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 1281140 examples: 0.527
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 1325871 examples: 0.565
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 1370602 examples: 0.548
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 1373802 examples: 0.516
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 1415333 examples: 0.495
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 1418533 examples: 0.558
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 1463264 examples: 0.547
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 1507995 examples: 0.584
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 1511195 

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 1552726 examples: 0.439
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 1555926 examples: 0.453
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([59, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 1597457 examples: 0.668
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 1600657 examples: 0.504
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.S

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 1645388 examples: 0.579
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 1690119 examples: 0.528
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 1693319 examples: 0.574
torch.Size([128, 40])
torch.Size([128, 40])
torch.

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 1734850 examples: 0.544
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 1738050 examples: 0.466
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([59, 40])
torch.Size([128, 40])
Loss after 1779581 examples: 0.539
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 1782781 examples: 0.425
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.S

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 1827512 examples: 0.420
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 1872243 examples: 0.462
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 1875443 examples: 0.496
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([59, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 1916974 examples: 0.404
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 1920174 examples: 0.611
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.S

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 1964905 examples: 0.408
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 2009636 examples: 0.468
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 2012836 examples: 0.450
torch.Size([128, 40])
torch.

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 2054367 examples: 0.431
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 2057567 examples: 0.482
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.

torch.Size([128, 40])
torch.Size([128, 40])
Loss after 2099098 examples: 0.395
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 2102298 examples: 0.373
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([59, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 2147029 examples: 0.386
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40]

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([59, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 2191760 examples: 0.461
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40]

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([59, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 2236491 examples: 0.474
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 2239691 examples: 0.396
torch.S

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 2284422 examples: 0.396
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 2329153 examples: 0.367
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 2332353 examples: 0.358
torch.

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 2373884 examples: 0.410
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 2377084 examples: 0.382
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 2418615 examples: 0.495
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 2421815 examples: 0.438
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 2466546 examples: 0.460
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 2511277 examples: 0.435
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 2514477 examples: 0.441
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 2556008 examples: 0.384
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 2559208 examples: 0.472
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 2603939 examples: 0.401
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 2648670 examples: 0.402
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 2651870 

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 2693401 examples: 0.373
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 2696601 examples: 0.384
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 2738132 examples: 0.421
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 2741332 examples: 0.436
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.

torch.Size([128, 40])
torch.Size([128, 40])
Loss after 2782932 examples: 0.418
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([59, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 2786063 examples: 0.448
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.S

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([59, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 2830794 examples: 0.533
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40]

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([59, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 2875525 examples: 0.444
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40]

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([59, 40])
torch.Size([128, 40])
Loss after 2920256 examples: 0.442
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 2923456 examples: 0.403
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.S

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 2968187 examples: 0.469
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 3012918 examples: 0.370
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 3016118 examples: 0.371
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 3057649 examples: 0.410
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 3060849 examples: 0.392
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 3105580 examples: 0.409
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 3150311 examples: 0.454
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 3153511 examples: 0.468
torch.Size([128, 40])
torch.

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 3195042 examples: 0.436
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 3198242 examples: 0.408
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.

torch.Size([59, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 3239773 examples: 0.373
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 3242973 examples: 0.325
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.S

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 3287704 examples: 0.403
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 3332435 examples: 0.411
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 3335635 examples: 0.415
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 3377166 examples: 0.387
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 3380366 examples: 0.344
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.

Loss after 3421966 examples: 0.412
torch.Size([59, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 3425097 examples: 0.289
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.S

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 3469828 examples: 0.383
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 3473028 examples: 0.371
torch.

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 3514559 examples: 0.304
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 3517759 examples: 0.295
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 3559290 examples: 0.405
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 3562490 examples: 0.385
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 3607221 examples: 0.382
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 3651952 examples: 0.326
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 3655152 examples: 0.320
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 3696683 examples: 0.378
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 3699883 examples: 0.391
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.

torch.Size([128, 40])
torch.Size([59, 40])
Loss after 3741414 examples: 0.179
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 3744614 examples: 0.344
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.S

torch.Size([59, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 3789345 examples: 0.333
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40]

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 3834076 examples: 0.443
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 3837276 examples: 0.332
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([59, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 3878807 examples: 0.358
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 3882007 examples: 0.406
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.S

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 3926738 examples: 0.463
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 3971469 examples: 0.414
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 3974669 examples: 0.390
torch.Size([128, 40])
torch.Size([128, 40])
torch.

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 4016200 examples: 0.345
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 4019400 examples: 0.305
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.

torch.Size([128, 40])
Loss after 4060931 examples: 0.407
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 4064131 examples: 0.319
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 4108862 examples: 0.386
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 4153593 examples: 0.409
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 4156793 examples: 0.369
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 4198324 examples: 0.354
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 4201524 examples: 0.581
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 4246255 examples: 0.436
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 4290986 examples: 0.352
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 4294186 examples: 0.419
torch.Size([128, 40])
torch.

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 4335717 examples: 0.453
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 4338917 examples: 0.375
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.

torch.Size([128, 40])
torch.Size([128, 40])
Loss after 4380448 examples: 0.362
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 4383648 examples: 0.404
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 4428379 examples: 0.330
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 4473110 examples: 0.328
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 4476310 examples: 0.392
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([59, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 4517841 examples: 0.357
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 4521041 examples: 0.326
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.S

torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
Loss after 4562641 examples: 0.581
torch.Size([59, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40])
torch.Size([128, 40]

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
loss,▆▆▆▆▇▇▇▅▆▄▅▇▅█▅▄▅▄▃▃▄▄▃▂▅▃▂▄▃▂▃▃▁▄▃▂▁▃▁▂
test_accuracy,▁

0,1
epoch,99.0
loss,0.58101
test_accuracy,0.92323
