In [1]:
import copy
import numpy as np
import os
import torch
import torch.nn as nn
import torch.utils.data as utils
import torch.nn.functional as F
import higher
import pickle
from torchvision import datasets, transforms
import argparse
import tqdm

from dataloaders import get_data
from models import *
from sklearn.metrics import accuracy_score, precision_score

## The student model

Here we define the student model to be used to train on cifar-10 dataset. <br />
The output is the 10 dimensional softmax output

In [2]:
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.leaky_relu(F.max_pool2d(self.conv1(x), 2), negative_slope=0.1)
        x = F.leaky_relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2), negative_slope=0.1)
        x = x.view(-1, 320)
        x = F.leaky_relu(self.fc1(x), negative_slope=0.1)
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.softmax(x, dim=1)

sample_student = Net()
preds = sample_student(torch.randn((1,1, 28,28)))
print(torch.sum(preds))
del sample_student
del preds

tensor(1.0000, grad_fn=<SumBackward0>)


## Teacher Model

We define the teacher model which calculates the weighted cross entropy loss on <br />


In [16]:
class Teacher(nn.Module):
    def __init__(self, classes=10, st_dim=13, hidden_dim=16):
        super(Teacher, self).__init__()
        self.V = torch.nn.Parameter(torch.randn(hidden_dim,st_dim))
        self.V.requires_grad = True
        self.W = torch.nn.Parameter(torch.randn(classes,classes,hidden_dim))
        self.W.requires_grad = True
        self.phi = torch.nn.Parameter(0.1 * torch.randn(classes,classes) + torch.eye(classes))
        self.phi.requires_grad = True
        self.softmax = nn.Softmax()
        self.sigmoid = nn.Sigmoid()
    def forward(self, st, pw, y):
        '''
        W_channel_weights = self.softmax(torch.matmul(st, self.V.T))
        phi = torch.matmul(self.W, W_channel_weights)
        logpw_weights = torch.matmul(y, phi)
        loss = torch.mean(-1 * self.sigmoid(logpw_weights*torch.log(pw)))
        '''
        loss = torch.mean(-1 * self.sigmoid(torch.matmul(y, self.phi) * torch.log(pw)))
        
        return loss

In [17]:
def get_loss(student, teacher, x, y, state=None, val=False):
    if val:
        # this is the loss function that the teacher tries to minimize
        return  -1 * torch.mean(torch.sum(y * student(x), dim=1))
    else:
        # this is the loss function given by the teacher to optimize the student 
        # in the inner meta loop
        preds = student(x)
        student_loss = teacher(state, preds, y)
        return student_loss

In [18]:
teacher = Teacher()
student = Net()

st = torch.randn((13)).float()
sample_input = torch.randn((5, 1, 28,28))
pw = student(sample_input)
y = F.one_hot(torch.randint(0, 10, (5,)), num_classes=10).float()

result = teacher(st, pw, y)
#result.backward()
loss = get_loss(student, teacher, sample_input, y , st, val=False)
loss.backward()
# print( "V mean grad", torch.mean(torch.abs(teacher.V.grad)), "V max grad", torch.max(torch.abs(teacher.V.grad)) )
print("phi mean grad", torch.mean(torch.abs(teacher.phi.grad)), "V max grad", torch.max(torch.abs(teacher.phi.grad)) )
print(get_loss(student, teacher, sample_input, y , st, val=True))

del student
del teacher

phi mean grad tensor(0.0053) V max grad tensor(0.0139)
tensor(-0.0966, grad_fn=<MulBackward0>)


In [19]:
def get_metrics(student, train_dl, val_dl):
    train_preds = []
    train_gt = []
    val_preds = []
    val_gt = []
    # running the loop for training dataset
    for i, (x,y) in enumerate(train_dl):
        y_pred = torch.max(F.softmax(student(x), dim=1), 1)[1]
        train_preds.append(y_pred)
        train_gt.append(y)

    # running the loop for validation dataset
    for i, (x,y) in enumerate(val_dl):
        y_pred = torch.max(F.softmax(student(x), dim=1), 1)[1]
        val_preds.append(y_pred)
        val_gt.append(y)

    train_preds = torch.cat(train_preds, dim=0).detach().numpy()
    train_gt = torch.cat(train_gt, dim=0).detach().numpy()
    val_preds = torch.cat(val_preds, dim=0).detach().numpy()
    val_gt = torch.cat(val_gt, dim=0).detach().numpy()

    train_acc = accuracy_score(train_gt, train_preds, normalize=True)
    val_acc = accuracy_score(val_gt, val_preds, normalize=True)
    prescision = precision_score(val_gt, val_preds, average=None)
    return torch.cat([torch.tensor([train_acc]), torch.tensor([val_acc]), torch.tensor(prescision)],0)

In [20]:
train_loader, val_loader, test_loader = get_data(dataset="mnist")

In [21]:
def do_train_student(student, optimizer, teacher, train_dl, val_dl, iter=0,inner_steps=100):
    student.train()
    teacher.train()
    metrics = None# get_metrics(student, train_dl, val_dl).float()

    for i, (x,y) in enumerate(train_dl):
        x = x.float()
        y = F.one_hot(y, num_classes=10).float()

        # try to get the state vector here
        state = None # torch.cat([torch.tensor([float(iter/20.0)]), metrics])

        student_loss = get_loss(student, teacher, x, y, state=state, val=False)
        loss = student_loss
        if torch.isnan(student_loss):
            if torch.sum(student(x) <= 0):
                print("non positive value inside log found hence nan loss")
            print("=" * 30)
        # higher library exposes this interface for zero_grad on optimizer, loss.backward(), and optim.step()
        else:
            optimizer.step(loss)

        print('Train student: Step ', i, "loss value is", student_loss)
        if i>100:
            break
    return student

In [22]:
def do_train_teacher(teacher, optimizer, student, train_dl):
    lossdict = {
        'teach_loss' : [],
    }

    student.train()
    teacher.train()
    netloss = None
    for i, (x,y) in enumerate(train_dl):
        x = x.float()
        y = F.one_hot(y, num_classes=10).float()
        # Set val=True to get unweighted loss
        teacher_loss = get_loss(student, teacher, x, y, i, val=True)

        # Teacher is trained to minimise the student's classification error loss after training
        if netloss is None:
            netloss = teacher_loss
        else:
            netloss += teacher_loss

        print('Train Teacher: Step ', i, "  Teacher loss this step is:", teacher_loss)
        if i>50:
            break

    # Accumulate losses and do the step after we process outer_steps number of batches.
    netloss = netloss/(i+1)
    netloss.backward(retain_graph=True)
    return lossdict, teacher

In [28]:
def train(train_dl, val_dl, test_dl, num_steps):
    student = Net()
    teacher = Teacher()

    teacher_optim = torch.optim.Adam(teacher.parameters(), lr=1e-2)

    stud_state_dict = copy.deepcopy(student.state_dict())

    for ep in range(num_steps):
        student.load_state_dict(stud_state_dict, strict=True)
        stud_optim = torch.optim.Adam(student.parameters(), lr=1e-4)

        teacher_optim.zero_grad()
        print("inside epoch before higher optim")
        # The outer loop optimiser (teacher optimiser) does not change the student params, so copy_initial_weights can be True or False
        with higher.innerloop_ctx(student, stud_optim, copy_initial_weights=False) as (fnet, diffopt):
            fnet = do_train_student(fnet, diffopt, teacher, train_dl, val_dl)
            # Note: do_train_student does not edit the .grad field of teacher parameters
            # Therefore, no need to clear teacher grads before do_train_teacher.
            teacher_ld, teacher = do_train_teacher(teacher, teacher_optim, fnet, val_dl)
            student.load_state_dict(fnet.state_dict())
            # print( "V mean grad", torch.mean(torch.abs(teacher.V.grad)), "V max grad", torch.max(torch.abs(teacher.V.grad)) )
            print("phi mean grad", torch.mean(torch.abs(teacher.phi.grad)), "V max grad", torch.max(torch.abs(teacher.phi.grad)) )

            grad = student.conv1.weight.grad
            print( "student conv1 mean grad", torch.mean(torch.abs(grad)), "student conv1 max grad", torch.max(torch.abs(grad)) )

        teacher_optim.step()
        print(teacher.phi)

        #stud_train_ld = update_lossdict(stud_train_ld, student_ld)
        #teacher_train_ld = update_lossdict(teacher_train_ld, teacher_ld)

        print("Teacher training step: ", ep)

        #tld = eval_student(student, test_dl)
        #stud_test_ld = update_lossdict(stud_test_ld, tld)
        #vld = eval_student(student, val_dl)
        #stud_val_ld = update_lossdict(stud_val_ld, vld)

        #train_ep = eval_student(student, train_dl)
        #stud_train_ld = update_lossdict(stud_train_ld, train_ep) 

    return teacher, student

In [29]:
teacher, student = train(train_loader, val_loader, test_loader, num_steps=1000)

inside epoch before higher optim
Train student: Step  0 loss value is tensor(-0.4621, grad_fn=<MeanBackward0>)
Train student: Step  1 loss value is tensor(-0.4624, grad_fn=<MeanBackward0>)
Train student: Step  2 loss value is tensor(-0.4644, grad_fn=<MeanBackward0>)
Train student: Step  3 loss value is tensor(-0.4629, grad_fn=<MeanBackward0>)
Train student: Step  4 loss value is tensor(-0.4674, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4567, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4594, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4676, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4637, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4643, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4618, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4603, grad_fn=<MeanBackward0>)
Train student: Step  12 loss 

Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1345, grad_fn=<MulBackward0>)
Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1131, grad_fn=<MulBackward0>)
Train Teacher: Step  18   Teacher loss this step is: tensor(-0.0932, grad_fn=<MulBackward0>)
Train Teacher: Step  19   Teacher loss this step is: tensor(-0.1008, grad_fn=<MulBackward0>)
Train Teacher: Step  20   Teacher loss this step is: tensor(-0.1392, grad_fn=<MulBackward0>)
Train Teacher: Step  21   Teacher loss this step is: tensor(-0.0823, grad_fn=<MulBackward0>)
Train Teacher: Step  22   Teacher loss this step is: tensor(-0.1153, grad_fn=<MulBackward0>)
Train Teacher: Step  23   Teacher loss this step is: tensor(-0.1453, grad_fn=<MulBackward0>)
Train Teacher: Step  24   Teacher loss this step is: tensor(-0.1420, grad_fn=<MulBackward0>)
Train Teacher: Step  25   Teacher loss this step is: tensor(-0.0935, grad_fn=<MulBackward0>)
Train Teacher: Step  26   Teacher loss this step is: tensor(-0.0967, g

Train student: Step  41 loss value is tensor(-0.4645, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4628, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4586, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4637, grad_fn=<MeanBackward0>)
Train student: Step  45 loss value is tensor(-0.4586, grad_fn=<MeanBackward0>)
Train student: Step  46 loss value is tensor(-0.4647, grad_fn=<MeanBackward0>)
Train student: Step  47 loss value is tensor(-0.4583, grad_fn=<MeanBackward0>)
Train student: Step  48 loss value is tensor(-0.4600, grad_fn=<MeanBackward0>)
Train student: Step  49 loss value is tensor(-0.4661, grad_fn=<MeanBackward0>)
Train student: Step  50 loss value is tensor(-0.4596, grad_fn=<MeanBackward0>)
Train student: Step  51 loss value is tensor(-0.4662, grad_fn=<MeanBackward0>)
Train student: Step  52 loss value is tensor(-0.4664, grad_fn=<MeanBackward0>)
Train student: Step  53 loss value is tensor(-0.4592

Train Teacher: Step  42   Teacher loss this step is: tensor(-0.1007, grad_fn=<MulBackward0>)
Train Teacher: Step  43   Teacher loss this step is: tensor(-0.1058, grad_fn=<MulBackward0>)
Train Teacher: Step  44   Teacher loss this step is: tensor(-0.1019, grad_fn=<MulBackward0>)
Train Teacher: Step  45   Teacher loss this step is: tensor(-0.1006, grad_fn=<MulBackward0>)
Train Teacher: Step  46   Teacher loss this step is: tensor(-0.1008, grad_fn=<MulBackward0>)
Train Teacher: Step  47   Teacher loss this step is: tensor(-0.1091, grad_fn=<MulBackward0>)
Train Teacher: Step  48   Teacher loss this step is: tensor(-0.1178, grad_fn=<MulBackward0>)
Train Teacher: Step  49   Teacher loss this step is: tensor(-0.0843, grad_fn=<MulBackward0>)
Train Teacher: Step  50   Teacher loss this step is: tensor(-0.0953, grad_fn=<MulBackward0>)
Train Teacher: Step  51   Teacher loss this step is: tensor(-0.1445, grad_fn=<MulBackward0>)
phi mean grad tensor(0.0083) V max grad tensor(0.0224)
student conv1 m

Train student: Step  78 loss value is tensor(-0.4680, grad_fn=<MeanBackward0>)
Train student: Step  79 loss value is tensor(-0.4609, grad_fn=<MeanBackward0>)
Train student: Step  80 loss value is tensor(-0.4594, grad_fn=<MeanBackward0>)
Train student: Step  81 loss value is tensor(-0.4635, grad_fn=<MeanBackward0>)
Train student: Step  82 loss value is tensor(-0.4604, grad_fn=<MeanBackward0>)
Train student: Step  83 loss value is tensor(-0.4611, grad_fn=<MeanBackward0>)
Train student: Step  84 loss value is tensor(-0.4634, grad_fn=<MeanBackward0>)
Train student: Step  85 loss value is tensor(-0.4641, grad_fn=<MeanBackward0>)
Train student: Step  86 loss value is tensor(-0.4621, grad_fn=<MeanBackward0>)
Train student: Step  87 loss value is tensor(-0.4612, grad_fn=<MeanBackward0>)
Train student: Step  88 loss value is tensor(-0.4639, grad_fn=<MeanBackward0>)
Train student: Step  89 loss value is tensor(-0.4621, grad_fn=<MeanBackward0>)
Train student: Step  90 loss value is tensor(-0.4597

Train student: Step  7 loss value is tensor(-0.4548, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4532, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4634, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4561, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4567, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4597, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4602, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4585, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4558, grad_fn=<MeanBackward0>)
Train student: Step  16 loss value is tensor(-0.4546, grad_fn=<MeanBackward0>)
Train student: Step  17 loss value is tensor(-0.4556, grad_fn=<MeanBackward0>)
Train student: Step  18 loss value is tensor(-0.4549, grad_fn=<MeanBackward0>)
Train student: Step  19 loss value is tensor(-0.4566, g

Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1208, grad_fn=<MulBackward0>)
Train Teacher: Step  18   Teacher loss this step is: tensor(-0.1161, grad_fn=<MulBackward0>)
Train Teacher: Step  19   Teacher loss this step is: tensor(-0.1255, grad_fn=<MulBackward0>)
Train Teacher: Step  20   Teacher loss this step is: tensor(-0.1355, grad_fn=<MulBackward0>)
Train Teacher: Step  21   Teacher loss this step is: tensor(-0.1304, grad_fn=<MulBackward0>)
Train Teacher: Step  22   Teacher loss this step is: tensor(-0.1317, grad_fn=<MulBackward0>)
Train Teacher: Step  23   Teacher loss this step is: tensor(-0.1096, grad_fn=<MulBackward0>)
Train Teacher: Step  24   Teacher loss this step is: tensor(-0.1253, grad_fn=<MulBackward0>)
Train Teacher: Step  25   Teacher loss this step is: tensor(-0.1218, grad_fn=<MulBackward0>)
Train Teacher: Step  26   Teacher loss this step is: tensor(-0.1273, grad_fn=<MulBackward0>)
Train Teacher: Step  27   Teacher loss this step is: tensor(-0.1187, g

Train student: Step  47 loss value is tensor(-0.4560, grad_fn=<MeanBackward0>)
Train student: Step  48 loss value is tensor(-0.4486, grad_fn=<MeanBackward0>)
Train student: Step  49 loss value is tensor(-0.4595, grad_fn=<MeanBackward0>)
Train student: Step  50 loss value is tensor(-0.4638, grad_fn=<MeanBackward0>)
Train student: Step  51 loss value is tensor(-0.4590, grad_fn=<MeanBackward0>)
Train student: Step  52 loss value is tensor(-0.4581, grad_fn=<MeanBackward0>)
Train student: Step  53 loss value is tensor(-0.4577, grad_fn=<MeanBackward0>)
Train student: Step  54 loss value is tensor(-0.4577, grad_fn=<MeanBackward0>)
Train student: Step  55 loss value is tensor(-0.4539, grad_fn=<MeanBackward0>)
Train student: Step  56 loss value is tensor(-0.4561, grad_fn=<MeanBackward0>)
Train student: Step  57 loss value is tensor(-0.4550, grad_fn=<MeanBackward0>)
Train student: Step  58 loss value is tensor(-0.4615, grad_fn=<MeanBackward0>)
Train student: Step  59 loss value is tensor(-0.4609

phi mean grad tensor(0.0044) V max grad tensor(0.0198)
student conv1 mean grad tensor(0.0115) student conv1 max grad tensor(0.0476)
test
Parameter containing:
tensor([[ 0.7647,  0.0116,  0.0101, -0.0715,  0.0129, -0.0598,  0.0863, -0.0497,
         -0.0328, -0.1347],
        [-0.0203,  0.9778, -0.0719, -0.1097, -0.1310, -0.1664,  0.0095,  0.0025,
         -0.0509,  0.0384],
        [-0.0294,  0.1043,  0.9661, -0.0635, -0.0822, -0.0895, -0.1316, -0.0987,
          0.2171, -0.1279],
        [-0.0464,  0.0728,  0.0313,  0.6871,  0.0864,  0.1238, -0.0284,  0.2399,
          0.1316,  0.0282],
        [ 0.0491, -0.0755,  0.0941, -0.0858,  1.1564, -0.0103,  0.1696, -0.0672,
          0.0174,  0.1592],
        [ 0.1570, -0.0082,  0.1384,  0.2153, -0.0664,  1.0868,  0.0478,  0.2095,
         -0.2833,  0.1149],
        [ 0.2022,  0.1099,  0.0093,  0.1815,  0.0289,  0.2032,  0.9152,  0.0199,
         -0.0577, -0.0441],
        [-0.0530,  0.1018,  0.1312, -0.0425,  0.0508, -0.0903, -0.0680,  0.896

Train student: Step  88 loss value is tensor(-0.4571, grad_fn=<MeanBackward0>)
Train student: Step  89 loss value is tensor(-0.4575, grad_fn=<MeanBackward0>)
Train student: Step  90 loss value is tensor(-0.4630, grad_fn=<MeanBackward0>)
Train student: Step  91 loss value is tensor(-0.4593, grad_fn=<MeanBackward0>)
Train student: Step  92 loss value is tensor(-0.4585, grad_fn=<MeanBackward0>)
Train student: Step  93 loss value is tensor(-0.4547, grad_fn=<MeanBackward0>)
Train student: Step  94 loss value is tensor(-0.4558, grad_fn=<MeanBackward0>)
Train student: Step  95 loss value is tensor(-0.4603, grad_fn=<MeanBackward0>)
Train student: Step  96 loss value is tensor(-0.4556, grad_fn=<MeanBackward0>)
Train student: Step  97 loss value is tensor(-0.4631, grad_fn=<MeanBackward0>)
Train student: Step  98 loss value is tensor(-0.4612, grad_fn=<MeanBackward0>)
Train student: Step  99 loss value is tensor(-0.4617, grad_fn=<MeanBackward0>)
Train student: Step  100 loss value is tensor(-0.451

Train student: Step  7 loss value is tensor(-0.4548, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4583, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4577, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4566, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4530, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4508, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4531, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4528, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4512, grad_fn=<MeanBackward0>)
Train student: Step  16 loss value is tensor(-0.4537, grad_fn=<MeanBackward0>)
Train student: Step  17 loss value is tensor(-0.4549, grad_fn=<MeanBackward0>)
Train student: Step  18 loss value is tensor(-0.4535, grad_fn=<MeanBackward0>)
Train student: Step  19 loss value is tensor(-0.4537, g

Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1038, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1226, grad_fn=<MulBackward0>)
Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1345, grad_fn=<MulBackward0>)
Train Teacher: Step  18   Teacher loss this step is: tensor(-0.1099, grad_fn=<MulBackward0>)
Train Teacher: Step  19   Teacher loss this step is: tensor(-0.1542, grad_fn=<MulBackward0>)
Train Teacher: Step  20   Teacher loss this step is: tensor(-0.1186, grad_fn=<MulBackward0>)
Train Teacher: Step  21   Teacher loss this step is: tensor(-0.1184, grad_fn=<MulBackward0>)
Train Teacher: Step  22   Teacher loss this step is: tensor(-0.1119, grad_fn=<MulBackward0>)
Train Teacher: Step  23   Teacher loss this step is: tensor(-0.1224, grad_fn=<MulBackward0>)
Train Teacher: Step  24   Teacher loss this step is: tensor(-0.1174, grad_fn=<MulBackward0>)
Train Teacher: Step  25   Teacher loss this step is: tensor(-0.1131, g

Train student: Step  44 loss value is tensor(-0.4598, grad_fn=<MeanBackward0>)
Train student: Step  45 loss value is tensor(-0.4590, grad_fn=<MeanBackward0>)
Train student: Step  46 loss value is tensor(-0.4524, grad_fn=<MeanBackward0>)
Train student: Step  47 loss value is tensor(-0.4525, grad_fn=<MeanBackward0>)
Train student: Step  48 loss value is tensor(-0.4567, grad_fn=<MeanBackward0>)
Train student: Step  49 loss value is tensor(-0.4571, grad_fn=<MeanBackward0>)
Train student: Step  50 loss value is tensor(-0.4588, grad_fn=<MeanBackward0>)
Train student: Step  51 loss value is tensor(-0.4571, grad_fn=<MeanBackward0>)
Train student: Step  52 loss value is tensor(-0.4564, grad_fn=<MeanBackward0>)
Train student: Step  53 loss value is tensor(-0.4545, grad_fn=<MeanBackward0>)
Train student: Step  54 loss value is tensor(-0.4638, grad_fn=<MeanBackward0>)
Train student: Step  55 loss value is tensor(-0.4589, grad_fn=<MeanBackward0>)
Train student: Step  56 loss value is tensor(-0.4533

phi mean grad tensor(0.0030) V max grad tensor(0.0173)
student conv1 mean grad tensor(0.0199) student conv1 max grad tensor(0.0797)
test
Parameter containing:
tensor([[ 7.8056e-01, -9.1245e-03,  3.2472e-03, -4.9743e-02, -1.1100e-02,
         -6.0237e-02,  7.1021e-02, -3.0263e-02, -2.4469e-02, -1.5333e-01],
        [-3.8542e-02,  9.5449e-01, -4.9988e-02, -1.1297e-01, -1.4334e-01,
         -1.6666e-01, -1.0814e-02,  7.5259e-04, -3.0175e-02,  3.5068e-02],
        [-1.0526e-02,  1.0418e-01,  9.6060e-01, -5.9215e-02, -9.8650e-02,
         -9.1715e-02, -1.4354e-01, -8.5316e-02,  2.2412e-01, -1.4156e-01],
        [-2.4899e-02,  7.5931e-02,  4.7625e-02,  6.7895e-01,  6.8772e-02,
          1.1853e-01, -4.1248e-02,  2.5410e-01,  1.3815e-01,  1.9099e-02],
        [ 5.4374e-02, -7.5741e-02,  9.6246e-02, -8.8446e-02,  1.1582e+00,
         -2.1339e-02,  1.5446e-01, -3.9893e-02,  3.6471e-02,  1.4414e-01],
        [ 1.7647e-01, -3.9325e-03,  1.3743e-01,  2.1882e-01, -8.2473e-02,
          1.0944e+00, 

Train student: Step  82 loss value is tensor(-0.4639, grad_fn=<MeanBackward0>)
Train student: Step  83 loss value is tensor(-0.4568, grad_fn=<MeanBackward0>)
Train student: Step  84 loss value is tensor(-0.4673, grad_fn=<MeanBackward0>)
Train student: Step  85 loss value is tensor(-0.4607, grad_fn=<MeanBackward0>)
Train student: Step  86 loss value is tensor(-0.4617, grad_fn=<MeanBackward0>)
Train student: Step  87 loss value is tensor(-0.4625, grad_fn=<MeanBackward0>)
Train student: Step  88 loss value is tensor(-0.4614, grad_fn=<MeanBackward0>)
Train student: Step  89 loss value is tensor(-0.4556, grad_fn=<MeanBackward0>)
Train student: Step  90 loss value is tensor(-0.4613, grad_fn=<MeanBackward0>)
Train student: Step  91 loss value is tensor(-0.4617, grad_fn=<MeanBackward0>)
Train student: Step  92 loss value is tensor(-0.4599, grad_fn=<MeanBackward0>)
Train student: Step  93 loss value is tensor(-0.4624, grad_fn=<MeanBackward0>)
Train student: Step  94 loss value is tensor(-0.4545

Train student: Step  6 loss value is tensor(-0.4562, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4561, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4550, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4576, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4554, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4533, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4597, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4577, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4535, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4641, grad_fn=<MeanBackward0>)
Train student: Step  16 loss value is tensor(-0.4540, grad_fn=<MeanBackward0>)
Train student: Step  17 loss value is tensor(-0.4561, grad_fn=<MeanBackward0>)
Train student: Step  18 loss value is tensor(-0.4534, gr

Train Teacher: Step  8   Teacher loss this step is: tensor(-0.1089, grad_fn=<MulBackward0>)
Train Teacher: Step  9   Teacher loss this step is: tensor(-0.1272, grad_fn=<MulBackward0>)
Train Teacher: Step  10   Teacher loss this step is: tensor(-0.1353, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.1186, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1062, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1193, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1297, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1052, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1138, grad_fn=<MulBackward0>)
Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1414, grad_fn=<MulBackward0>)
Train Teacher: Step  18   Teacher loss this step is: tensor(-0.1287, gra

Train student: Step  36 loss value is tensor(-0.4597, grad_fn=<MeanBackward0>)
Train student: Step  37 loss value is tensor(-0.4562, grad_fn=<MeanBackward0>)
Train student: Step  38 loss value is tensor(-0.4615, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4636, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4562, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4506, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4614, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4605, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4552, grad_fn=<MeanBackward0>)
Train student: Step  45 loss value is tensor(-0.4555, grad_fn=<MeanBackward0>)
Train student: Step  46 loss value is tensor(-0.4589, grad_fn=<MeanBackward0>)
Train student: Step  47 loss value is tensor(-0.4563, grad_fn=<MeanBackward0>)
Train student: Step  48 loss value is tensor(-0.4593

Train Teacher: Step  44   Teacher loss this step is: tensor(-0.1372, grad_fn=<MulBackward0>)
Train Teacher: Step  45   Teacher loss this step is: tensor(-0.1180, grad_fn=<MulBackward0>)
Train Teacher: Step  46   Teacher loss this step is: tensor(-0.1260, grad_fn=<MulBackward0>)
Train Teacher: Step  47   Teacher loss this step is: tensor(-0.1254, grad_fn=<MulBackward0>)
Train Teacher: Step  48   Teacher loss this step is: tensor(-0.1093, grad_fn=<MulBackward0>)
Train Teacher: Step  49   Teacher loss this step is: tensor(-0.1534, grad_fn=<MulBackward0>)
Train Teacher: Step  50   Teacher loss this step is: tensor(-0.1331, grad_fn=<MulBackward0>)
Train Teacher: Step  51   Teacher loss this step is: tensor(-0.1299, grad_fn=<MulBackward0>)
phi mean grad tensor(0.0044) V max grad tensor(0.0124)
student conv1 mean grad tensor(0.0290) student conv1 max grad tensor(0.1114)
test
Parameter containing:
tensor([[ 8.0399e-01, -3.3265e-02, -1.5754e-02, -3.8462e-02, -2.8357e-02,
         -4.5124e-02,  

Train student: Step  75 loss value is tensor(-0.4610, grad_fn=<MeanBackward0>)
Train student: Step  76 loss value is tensor(-0.4621, grad_fn=<MeanBackward0>)
Train student: Step  77 loss value is tensor(-0.4626, grad_fn=<MeanBackward0>)
Train student: Step  78 loss value is tensor(-0.4680, grad_fn=<MeanBackward0>)
Train student: Step  79 loss value is tensor(-0.4591, grad_fn=<MeanBackward0>)
Train student: Step  80 loss value is tensor(-0.4650, grad_fn=<MeanBackward0>)
Train student: Step  81 loss value is tensor(-0.4643, grad_fn=<MeanBackward0>)
Train student: Step  82 loss value is tensor(-0.4629, grad_fn=<MeanBackward0>)
Train student: Step  83 loss value is tensor(-0.4541, grad_fn=<MeanBackward0>)
Train student: Step  84 loss value is tensor(-0.4590, grad_fn=<MeanBackward0>)
Train student: Step  85 loss value is tensor(-0.4650, grad_fn=<MeanBackward0>)
Train student: Step  86 loss value is tensor(-0.4615, grad_fn=<MeanBackward0>)
Train student: Step  87 loss value is tensor(-0.4650

Train student: Step  6 loss value is tensor(-0.4546, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4575, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4567, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4657, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4614, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4582, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4583, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4583, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4599, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4526, grad_fn=<MeanBackward0>)
Train student: Step  16 loss value is tensor(-0.4584, grad_fn=<MeanBackward0>)
Train student: Step  17 loss value is tensor(-0.4586, grad_fn=<MeanBackward0>)
Train student: Step  18 loss value is tensor(-0.4603, gr

Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1216, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1118, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1166, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1151, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1035, grad_fn=<MulBackward0>)
Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1412, grad_fn=<MulBackward0>)
Train Teacher: Step  18   Teacher loss this step is: tensor(-0.1192, grad_fn=<MulBackward0>)
Train Teacher: Step  19   Teacher loss this step is: tensor(-0.1109, grad_fn=<MulBackward0>)
Train Teacher: Step  20   Teacher loss this step is: tensor(-0.1222, grad_fn=<MulBackward0>)
Train Teacher: Step  21   Teacher loss this step is: tensor(-0.1291, grad_fn=<MulBackward0>)
Train Teacher: Step  22   Teacher loss this step is: tensor(-0.1453, g

Train student: Step  35 loss value is tensor(-0.4612, grad_fn=<MeanBackward0>)
Train student: Step  36 loss value is tensor(-0.4593, grad_fn=<MeanBackward0>)
Train student: Step  37 loss value is tensor(-0.4636, grad_fn=<MeanBackward0>)
Train student: Step  38 loss value is tensor(-0.4561, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4540, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4612, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4588, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4614, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4644, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4590, grad_fn=<MeanBackward0>)
Train student: Step  45 loss value is tensor(-0.4563, grad_fn=<MeanBackward0>)
Train student: Step  46 loss value is tensor(-0.4639, grad_fn=<MeanBackward0>)
Train student: Step  47 loss value is tensor(-0.4638

Train Teacher: Step  38   Teacher loss this step is: tensor(-0.1448, grad_fn=<MulBackward0>)
Train Teacher: Step  39   Teacher loss this step is: tensor(-0.1574, grad_fn=<MulBackward0>)
Train Teacher: Step  40   Teacher loss this step is: tensor(-0.1256, grad_fn=<MulBackward0>)
Train Teacher: Step  41   Teacher loss this step is: tensor(-0.1159, grad_fn=<MulBackward0>)
Train Teacher: Step  42   Teacher loss this step is: tensor(-0.1347, grad_fn=<MulBackward0>)
Train Teacher: Step  43   Teacher loss this step is: tensor(-0.1479, grad_fn=<MulBackward0>)
Train Teacher: Step  44   Teacher loss this step is: tensor(-0.1107, grad_fn=<MulBackward0>)
Train Teacher: Step  45   Teacher loss this step is: tensor(-0.1144, grad_fn=<MulBackward0>)
Train Teacher: Step  46   Teacher loss this step is: tensor(-0.1258, grad_fn=<MulBackward0>)
Train Teacher: Step  47   Teacher loss this step is: tensor(-0.1226, grad_fn=<MulBackward0>)
Train Teacher: Step  48   Teacher loss this step is: tensor(-0.1301, g

Train student: Step  67 loss value is tensor(-0.4623, grad_fn=<MeanBackward0>)
Train student: Step  68 loss value is tensor(-0.4632, grad_fn=<MeanBackward0>)
Train student: Step  69 loss value is tensor(-0.4644, grad_fn=<MeanBackward0>)
Train student: Step  70 loss value is tensor(-0.4579, grad_fn=<MeanBackward0>)
Train student: Step  71 loss value is tensor(-0.4631, grad_fn=<MeanBackward0>)
Train student: Step  72 loss value is tensor(-0.4640, grad_fn=<MeanBackward0>)
Train student: Step  73 loss value is tensor(-0.4659, grad_fn=<MeanBackward0>)
Train student: Step  74 loss value is tensor(-0.4593, grad_fn=<MeanBackward0>)
Train student: Step  75 loss value is tensor(-0.4610, grad_fn=<MeanBackward0>)
Train student: Step  76 loss value is tensor(-0.4572, grad_fn=<MeanBackward0>)
Train student: Step  77 loss value is tensor(-0.4636, grad_fn=<MeanBackward0>)
Train student: Step  78 loss value is tensor(-0.4569, grad_fn=<MeanBackward0>)
Train student: Step  79 loss value is tensor(-0.4652

Train student: Step  6 loss value is tensor(-0.4582, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4598, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4576, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4601, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4611, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4554, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4625, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4578, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4591, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4599, grad_fn=<MeanBackward0>)
Train student: Step  16 loss value is tensor(-0.4569, grad_fn=<MeanBackward0>)
Train student: Step  17 loss value is tensor(-0.4601, grad_fn=<MeanBackward0>)
Train student: Step  18 loss value is tensor(-0.4599, gr

Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1116, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1271, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1263, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1485, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1078, grad_fn=<MulBackward0>)
Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1429, grad_fn=<MulBackward0>)
Train Teacher: Step  18   Teacher loss this step is: tensor(-0.1265, grad_fn=<MulBackward0>)
Train Teacher: Step  19   Teacher loss this step is: tensor(-0.1305, grad_fn=<MulBackward0>)
Train Teacher: Step  20   Teacher loss this step is: tensor(-0.1199, grad_fn=<MulBackward0>)
Train Teacher: Step  21   Teacher loss this step is: tensor(-0.1043, grad_fn=<MulBackward0>)
Train Teacher: Step  22   Teacher loss this step is: tensor(-0.1292, g

Train student: Step  41 loss value is tensor(-0.4568, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4604, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4578, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4656, grad_fn=<MeanBackward0>)
Train student: Step  45 loss value is tensor(-0.4577, grad_fn=<MeanBackward0>)
Train student: Step  46 loss value is tensor(-0.4641, grad_fn=<MeanBackward0>)
Train student: Step  47 loss value is tensor(-0.4617, grad_fn=<MeanBackward0>)
Train student: Step  48 loss value is tensor(-0.4606, grad_fn=<MeanBackward0>)
Train student: Step  49 loss value is tensor(-0.4632, grad_fn=<MeanBackward0>)
Train student: Step  50 loss value is tensor(-0.4601, grad_fn=<MeanBackward0>)
Train student: Step  51 loss value is tensor(-0.4599, grad_fn=<MeanBackward0>)
Train student: Step  52 loss value is tensor(-0.4562, grad_fn=<MeanBackward0>)
Train student: Step  53 loss value is tensor(-0.4579

Train Teacher: Step  49   Teacher loss this step is: tensor(-0.1432, grad_fn=<MulBackward0>)
Train Teacher: Step  50   Teacher loss this step is: tensor(-0.1201, grad_fn=<MulBackward0>)
Train Teacher: Step  51   Teacher loss this step is: tensor(-0.1326, grad_fn=<MulBackward0>)
phi mean grad tensor(0.0042) V max grad tensor(0.0117)
student conv1 mean grad tensor(0.0536) student conv1 max grad tensor(0.1944)
test
Parameter containing:
tensor([[ 8.6085e-01, -7.9654e-02, -6.8103e-02, -4.7055e-02, -4.6559e-02,
         -2.4624e-02,  4.2425e-02, -5.3885e-02, -5.4147e-02, -1.7728e-01],
        [-1.2124e-01,  8.7746e-01, -2.2594e-02, -1.7843e-01, -1.1734e-01,
         -1.2420e-01, -6.2816e-02, -6.0287e-02,  6.7354e-04,  5.3840e-02],
        [-1.4969e-02,  1.1227e-01,  9.8555e-01, -1.1409e-01, -8.9516e-02,
         -5.2701e-02, -1.5092e-01, -1.1280e-01,  2.0326e-01, -1.3546e-01],
        [ 4.6526e-03,  9.2779e-02,  3.7549e-02,  6.9258e-01,  7.4049e-02,
          1.4495e-01, -3.9811e-02,  2.264

Train student: Step  84 loss value is tensor(-0.4616, grad_fn=<MeanBackward0>)
Train student: Step  85 loss value is tensor(-0.4659, grad_fn=<MeanBackward0>)
Train student: Step  86 loss value is tensor(-0.4673, grad_fn=<MeanBackward0>)
Train student: Step  87 loss value is tensor(-0.4625, grad_fn=<MeanBackward0>)
Train student: Step  88 loss value is tensor(-0.4519, grad_fn=<MeanBackward0>)
Train student: Step  89 loss value is tensor(-0.4690, grad_fn=<MeanBackward0>)
Train student: Step  90 loss value is tensor(-0.4725, grad_fn=<MeanBackward0>)
Train student: Step  91 loss value is tensor(-0.4659, grad_fn=<MeanBackward0>)
Train student: Step  92 loss value is tensor(-0.4602, grad_fn=<MeanBackward0>)
Train student: Step  93 loss value is tensor(-0.4620, grad_fn=<MeanBackward0>)
Train student: Step  94 loss value is tensor(-0.4705, grad_fn=<MeanBackward0>)
Train student: Step  95 loss value is tensor(-0.4622, grad_fn=<MeanBackward0>)
Train student: Step  96 loss value is tensor(-0.4573

Train student: Step  6 loss value is tensor(-0.4620, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4585, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4625, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4612, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4572, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4605, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4629, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4612, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4637, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4627, grad_fn=<MeanBackward0>)
Train student: Step  16 loss value is tensor(-0.4581, grad_fn=<MeanBackward0>)
Train student: Step  17 loss value is tensor(-0.4552, grad_fn=<MeanBackward0>)
Train student: Step  18 loss value is tensor(-0.4597, gr

Train Teacher: Step  9   Teacher loss this step is: tensor(-0.1286, grad_fn=<MulBackward0>)
Train Teacher: Step  10   Teacher loss this step is: tensor(-0.1566, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.1645, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1393, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1100, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1573, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1457, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1190, grad_fn=<MulBackward0>)
Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1265, grad_fn=<MulBackward0>)
Train Teacher: Step  18   Teacher loss this step is: tensor(-0.1668, grad_fn=<MulBackward0>)
Train Teacher: Step  19   Teacher loss this step is: tensor(-0.1579, gr

Train student: Step  35 loss value is tensor(-0.4678, grad_fn=<MeanBackward0>)
Train student: Step  36 loss value is tensor(-0.4643, grad_fn=<MeanBackward0>)
Train student: Step  37 loss value is tensor(-0.4642, grad_fn=<MeanBackward0>)
Train student: Step  38 loss value is tensor(-0.4620, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4612, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4692, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4643, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4654, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4675, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4662, grad_fn=<MeanBackward0>)
Train student: Step  45 loss value is tensor(-0.4647, grad_fn=<MeanBackward0>)
Train student: Step  46 loss value is tensor(-0.4620, grad_fn=<MeanBackward0>)
Train student: Step  47 loss value is tensor(-0.4630

Train Teacher: Step  38   Teacher loss this step is: tensor(-0.1155, grad_fn=<MulBackward0>)
Train Teacher: Step  39   Teacher loss this step is: tensor(-0.1287, grad_fn=<MulBackward0>)
Train Teacher: Step  40   Teacher loss this step is: tensor(-0.1450, grad_fn=<MulBackward0>)
Train Teacher: Step  41   Teacher loss this step is: tensor(-0.1612, grad_fn=<MulBackward0>)
Train Teacher: Step  42   Teacher loss this step is: tensor(-0.1173, grad_fn=<MulBackward0>)
Train Teacher: Step  43   Teacher loss this step is: tensor(-0.1482, grad_fn=<MulBackward0>)
Train Teacher: Step  44   Teacher loss this step is: tensor(-0.1543, grad_fn=<MulBackward0>)
Train Teacher: Step  45   Teacher loss this step is: tensor(-0.1172, grad_fn=<MulBackward0>)
Train Teacher: Step  46   Teacher loss this step is: tensor(-0.1418, grad_fn=<MulBackward0>)
Train Teacher: Step  47   Teacher loss this step is: tensor(-0.1324, grad_fn=<MulBackward0>)
Train Teacher: Step  48   Teacher loss this step is: tensor(-0.1309, g

Train student: Step  72 loss value is tensor(-0.4628, grad_fn=<MeanBackward0>)
Train student: Step  73 loss value is tensor(-0.4651, grad_fn=<MeanBackward0>)
Train student: Step  74 loss value is tensor(-0.4609, grad_fn=<MeanBackward0>)
Train student: Step  75 loss value is tensor(-0.4614, grad_fn=<MeanBackward0>)
Train student: Step  76 loss value is tensor(-0.4649, grad_fn=<MeanBackward0>)
Train student: Step  77 loss value is tensor(-0.4626, grad_fn=<MeanBackward0>)
Train student: Step  78 loss value is tensor(-0.4651, grad_fn=<MeanBackward0>)
Train student: Step  79 loss value is tensor(-0.4740, grad_fn=<MeanBackward0>)
Train student: Step  80 loss value is tensor(-0.4708, grad_fn=<MeanBackward0>)
Train student: Step  81 loss value is tensor(-0.4730, grad_fn=<MeanBackward0>)
Train student: Step  82 loss value is tensor(-0.4685, grad_fn=<MeanBackward0>)
Train student: Step  83 loss value is tensor(-0.4660, grad_fn=<MeanBackward0>)
Train student: Step  84 loss value is tensor(-0.4765

Train student: Step  5 loss value is tensor(-0.4635, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4626, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4675, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4587, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4676, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4617, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4595, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4653, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4614, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4633, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4619, grad_fn=<MeanBackward0>)
Train student: Step  16 loss value is tensor(-0.4664, grad_fn=<MeanBackward0>)
Train student: Step  17 loss value is tensor(-0.4603, gra

Train Teacher: Step  7   Teacher loss this step is: tensor(-0.1372, grad_fn=<MulBackward0>)
Train Teacher: Step  8   Teacher loss this step is: tensor(-0.1428, grad_fn=<MulBackward0>)
Train Teacher: Step  9   Teacher loss this step is: tensor(-0.1544, grad_fn=<MulBackward0>)
Train Teacher: Step  10   Teacher loss this step is: tensor(-0.1289, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.1547, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1281, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1376, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1819, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1295, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1190, grad_fn=<MulBackward0>)
Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1425, grad

Train student: Step  34 loss value is tensor(-0.4643, grad_fn=<MeanBackward0>)
Train student: Step  35 loss value is tensor(-0.4656, grad_fn=<MeanBackward0>)
Train student: Step  36 loss value is tensor(-0.4646, grad_fn=<MeanBackward0>)
Train student: Step  37 loss value is tensor(-0.4658, grad_fn=<MeanBackward0>)
Train student: Step  38 loss value is tensor(-0.4690, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4663, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4575, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4634, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4615, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4668, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4683, grad_fn=<MeanBackward0>)
Train student: Step  45 loss value is tensor(-0.4636, grad_fn=<MeanBackward0>)
Train student: Step  46 loss value is tensor(-0.4617

Train Teacher: Step  32   Teacher loss this step is: tensor(-0.1804, grad_fn=<MulBackward0>)
Train Teacher: Step  33   Teacher loss this step is: tensor(-0.1560, grad_fn=<MulBackward0>)
Train Teacher: Step  34   Teacher loss this step is: tensor(-0.1576, grad_fn=<MulBackward0>)
Train Teacher: Step  35   Teacher loss this step is: tensor(-0.1136, grad_fn=<MulBackward0>)
Train Teacher: Step  36   Teacher loss this step is: tensor(-0.1314, grad_fn=<MulBackward0>)
Train Teacher: Step  37   Teacher loss this step is: tensor(-0.1427, grad_fn=<MulBackward0>)
Train Teacher: Step  38   Teacher loss this step is: tensor(-0.1385, grad_fn=<MulBackward0>)
Train Teacher: Step  39   Teacher loss this step is: tensor(-0.1056, grad_fn=<MulBackward0>)
Train Teacher: Step  40   Teacher loss this step is: tensor(-0.1180, grad_fn=<MulBackward0>)
Train Teacher: Step  41   Teacher loss this step is: tensor(-0.1061, grad_fn=<MulBackward0>)
Train Teacher: Step  42   Teacher loss this step is: tensor(-0.1316, g

Train student: Step  68 loss value is tensor(-0.4679, grad_fn=<MeanBackward0>)
Train student: Step  69 loss value is tensor(-0.4702, grad_fn=<MeanBackward0>)
Train student: Step  70 loss value is tensor(-0.4698, grad_fn=<MeanBackward0>)
Train student: Step  71 loss value is tensor(-0.4661, grad_fn=<MeanBackward0>)
Train student: Step  72 loss value is tensor(-0.4670, grad_fn=<MeanBackward0>)
Train student: Step  73 loss value is tensor(-0.4814, grad_fn=<MeanBackward0>)
Train student: Step  74 loss value is tensor(-0.4644, grad_fn=<MeanBackward0>)
Train student: Step  75 loss value is tensor(-0.4757, grad_fn=<MeanBackward0>)
Train student: Step  76 loss value is tensor(-0.4598, grad_fn=<MeanBackward0>)
Train student: Step  77 loss value is tensor(-0.4673, grad_fn=<MeanBackward0>)
Train student: Step  78 loss value is tensor(-0.4685, grad_fn=<MeanBackward0>)
Train student: Step  79 loss value is tensor(-0.4754, grad_fn=<MeanBackward0>)
Train student: Step  80 loss value is tensor(-0.4673

Train student: Step  6 loss value is tensor(-0.4691, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4632, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4578, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4654, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4668, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4621, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4588, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4643, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4670, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4640, grad_fn=<MeanBackward0>)
Train student: Step  16 loss value is tensor(-0.4601, grad_fn=<MeanBackward0>)
Train student: Step  17 loss value is tensor(-0.4667, grad_fn=<MeanBackward0>)
Train student: Step  18 loss value is tensor(-0.4636, gr

Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1596, grad_fn=<MulBackward0>)
Train Teacher: Step  18   Teacher loss this step is: tensor(-0.1306, grad_fn=<MulBackward0>)
Train Teacher: Step  19   Teacher loss this step is: tensor(-0.1537, grad_fn=<MulBackward0>)
Train Teacher: Step  20   Teacher loss this step is: tensor(-0.1232, grad_fn=<MulBackward0>)
Train Teacher: Step  21   Teacher loss this step is: tensor(-0.1615, grad_fn=<MulBackward0>)
Train Teacher: Step  22   Teacher loss this step is: tensor(-0.1073, grad_fn=<MulBackward0>)
Train Teacher: Step  23   Teacher loss this step is: tensor(-0.1640, grad_fn=<MulBackward0>)
Train Teacher: Step  24   Teacher loss this step is: tensor(-0.1345, grad_fn=<MulBackward0>)
Train Teacher: Step  25   Teacher loss this step is: tensor(-0.1084, grad_fn=<MulBackward0>)
Train Teacher: Step  26   Teacher loss this step is: tensor(-0.1828, grad_fn=<MulBackward0>)
Train Teacher: Step  27   Teacher loss this step is: tensor(-0.1398, g

Train student: Step  46 loss value is tensor(-0.4675, grad_fn=<MeanBackward0>)
Train student: Step  47 loss value is tensor(-0.4624, grad_fn=<MeanBackward0>)
Train student: Step  48 loss value is tensor(-0.4685, grad_fn=<MeanBackward0>)
Train student: Step  49 loss value is tensor(-0.4681, grad_fn=<MeanBackward0>)
Train student: Step  50 loss value is tensor(-0.4683, grad_fn=<MeanBackward0>)
Train student: Step  51 loss value is tensor(-0.4682, grad_fn=<MeanBackward0>)
Train student: Step  52 loss value is tensor(-0.4651, grad_fn=<MeanBackward0>)
Train student: Step  53 loss value is tensor(-0.4671, grad_fn=<MeanBackward0>)
Train student: Step  54 loss value is tensor(-0.4671, grad_fn=<MeanBackward0>)
Train student: Step  55 loss value is tensor(-0.4684, grad_fn=<MeanBackward0>)
Train student: Step  56 loss value is tensor(-0.4702, grad_fn=<MeanBackward0>)
Train student: Step  57 loss value is tensor(-0.4583, grad_fn=<MeanBackward0>)
Train student: Step  58 loss value is tensor(-0.4744

phi mean grad tensor(0.0040) V max grad tensor(0.0214)
student conv1 mean grad tensor(0.0998) student conv1 max grad tensor(0.3526)
test
Parameter containing:
tensor([[ 0.9449, -0.1513, -0.1284, -0.0229, -0.1041, -0.0512,  0.0293, -0.1083,
         -0.1084, -0.1935],
        [-0.2082,  0.8187,  0.0113, -0.2130, -0.1079, -0.1375, -0.0976, -0.1323,
         -0.0111,  0.0527],
        [-0.0208,  0.1222,  1.0356, -0.1227, -0.0890, -0.0494, -0.1277, -0.1480,
          0.1421, -0.1027],
        [ 0.0367,  0.1273,  0.0374,  0.7132,  0.0617,  0.1161,  0.0028,  0.1629,
          0.0708,  0.0263],
        [-0.0153, -0.0512,  0.0105, -0.1478,  1.1113, -0.0406,  0.0872,  0.0726,
          0.0405,  0.1133],
        [ 0.2379,  0.0423,  0.0241,  0.1284, -0.0797,  1.1178,  0.0837,  0.1488,
         -0.1644,  0.1287],
        [ 0.2617,  0.1408,  0.0117,  0.0592, -0.0162,  0.1741,  0.9435, -0.1048,
         -0.0077, -0.0247],
        [-0.0444,  0.0152, -0.0054, -0.1177,  0.0169, -0.0993, -0.0976,  0.862

Train student: Step  88 loss value is tensor(-0.4681, grad_fn=<MeanBackward0>)
Train student: Step  89 loss value is tensor(-0.4787, grad_fn=<MeanBackward0>)
Train student: Step  90 loss value is tensor(-0.4798, grad_fn=<MeanBackward0>)
Train student: Step  91 loss value is tensor(-0.4744, grad_fn=<MeanBackward0>)
Train student: Step  92 loss value is tensor(-0.4736, grad_fn=<MeanBackward0>)
Train student: Step  93 loss value is tensor(-0.4717, grad_fn=<MeanBackward0>)
Train student: Step  94 loss value is tensor(-0.4729, grad_fn=<MeanBackward0>)
Train student: Step  95 loss value is tensor(-0.4761, grad_fn=<MeanBackward0>)
Train student: Step  96 loss value is tensor(-0.4574, grad_fn=<MeanBackward0>)
Train student: Step  97 loss value is tensor(-0.4787, grad_fn=<MeanBackward0>)
Train student: Step  98 loss value is tensor(-0.4676, grad_fn=<MeanBackward0>)
Train student: Step  99 loss value is tensor(-0.4809, grad_fn=<MeanBackward0>)
Train student: Step  100 loss value is tensor(-0.468

Train student: Step  13 loss value is tensor(-0.4692, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4683, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4630, grad_fn=<MeanBackward0>)
Train student: Step  16 loss value is tensor(-0.4647, grad_fn=<MeanBackward0>)
Train student: Step  17 loss value is tensor(-0.4725, grad_fn=<MeanBackward0>)
Train student: Step  18 loss value is tensor(-0.4687, grad_fn=<MeanBackward0>)
Train student: Step  19 loss value is tensor(-0.4635, grad_fn=<MeanBackward0>)
Train student: Step  20 loss value is tensor(-0.4724, grad_fn=<MeanBackward0>)
Train student: Step  21 loss value is tensor(-0.4640, grad_fn=<MeanBackward0>)
Train student: Step  22 loss value is tensor(-0.4686, grad_fn=<MeanBackward0>)
Train student: Step  23 loss value is tensor(-0.4628, grad_fn=<MeanBackward0>)
Train student: Step  24 loss value is tensor(-0.4626, grad_fn=<MeanBackward0>)
Train student: Step  25 loss value is tensor(-0.4663

Train Teacher: Step  19   Teacher loss this step is: tensor(-0.1488, grad_fn=<MulBackward0>)
Train Teacher: Step  20   Teacher loss this step is: tensor(-0.1809, grad_fn=<MulBackward0>)
Train Teacher: Step  21   Teacher loss this step is: tensor(-0.1479, grad_fn=<MulBackward0>)
Train Teacher: Step  22   Teacher loss this step is: tensor(-0.1477, grad_fn=<MulBackward0>)
Train Teacher: Step  23   Teacher loss this step is: tensor(-0.1626, grad_fn=<MulBackward0>)
Train Teacher: Step  24   Teacher loss this step is: tensor(-0.1181, grad_fn=<MulBackward0>)
Train Teacher: Step  25   Teacher loss this step is: tensor(-0.1601, grad_fn=<MulBackward0>)
Train Teacher: Step  26   Teacher loss this step is: tensor(-0.1525, grad_fn=<MulBackward0>)
Train Teacher: Step  27   Teacher loss this step is: tensor(-0.0812, grad_fn=<MulBackward0>)
Train Teacher: Step  28   Teacher loss this step is: tensor(-0.1250, grad_fn=<MulBackward0>)
Train Teacher: Step  29   Teacher loss this step is: tensor(-0.1669, g

Train student: Step  53 loss value is tensor(-0.4662, grad_fn=<MeanBackward0>)
Train student: Step  54 loss value is tensor(-0.4671, grad_fn=<MeanBackward0>)
Train student: Step  55 loss value is tensor(-0.4674, grad_fn=<MeanBackward0>)
Train student: Step  56 loss value is tensor(-0.4701, grad_fn=<MeanBackward0>)
Train student: Step  57 loss value is tensor(-0.4774, grad_fn=<MeanBackward0>)
Train student: Step  58 loss value is tensor(-0.4661, grad_fn=<MeanBackward0>)
Train student: Step  59 loss value is tensor(-0.4746, grad_fn=<MeanBackward0>)
Train student: Step  60 loss value is tensor(-0.4644, grad_fn=<MeanBackward0>)
Train student: Step  61 loss value is tensor(-0.4619, grad_fn=<MeanBackward0>)
Train student: Step  62 loss value is tensor(-0.4749, grad_fn=<MeanBackward0>)
Train student: Step  63 loss value is tensor(-0.4749, grad_fn=<MeanBackward0>)
Train student: Step  64 loss value is tensor(-0.4651, grad_fn=<MeanBackward0>)
Train student: Step  65 loss value is tensor(-0.4678

phi mean grad tensor(0.0041) V max grad tensor(0.0141)
student conv1 mean grad tensor(0.1165) student conv1 max grad tensor(0.4075)
test
Parameter containing:
tensor([[ 0.9697, -0.1781, -0.1431, -0.0020, -0.1270, -0.0550,  0.0200, -0.1078,
         -0.1133, -0.2075],
        [-0.2383,  0.8102,  0.0219, -0.2114, -0.1114, -0.1388, -0.1114, -0.1418,
         -0.0121,  0.0400],
        [-0.0309,  0.1155,  1.0519, -0.1076, -0.0932, -0.0413, -0.1247, -0.1370,
          0.1285, -0.0970],
        [ 0.0377,  0.1295,  0.0425,  0.7062,  0.0508,  0.1117,  0.0153,  0.1606,
          0.0663,  0.0194],
        [-0.0297, -0.0511,  0.0066, -0.1297,  1.1246, -0.0440,  0.0748,  0.0963,
          0.0389,  0.0950],
        [ 0.2406,  0.0401,  0.0144,  0.1345, -0.0887,  1.1311,  0.0923,  0.1493,
         -0.1430,  0.1192],
        [ 0.2607,  0.1338,  0.0142,  0.0689, -0.0283,  0.1671,  0.9587, -0.0999,
          0.0016, -0.0276],
        [-0.0567,  0.0028, -0.0178, -0.1088,  0.0035, -0.1003, -0.0944,  0.852

Train student: Step  88 loss value is tensor(-0.4712, grad_fn=<MeanBackward0>)
Train student: Step  89 loss value is tensor(-0.4608, grad_fn=<MeanBackward0>)
Train student: Step  90 loss value is tensor(-0.4691, grad_fn=<MeanBackward0>)
Train student: Step  91 loss value is tensor(-0.4669, grad_fn=<MeanBackward0>)
Train student: Step  92 loss value is tensor(-0.4859, grad_fn=<MeanBackward0>)
Train student: Step  93 loss value is tensor(-0.4665, grad_fn=<MeanBackward0>)
Train student: Step  94 loss value is tensor(-0.4724, grad_fn=<MeanBackward0>)
Train student: Step  95 loss value is tensor(-0.4801, grad_fn=<MeanBackward0>)
Train student: Step  96 loss value is tensor(-0.4679, grad_fn=<MeanBackward0>)
Train student: Step  97 loss value is tensor(-0.4726, grad_fn=<MeanBackward0>)
Train student: Step  98 loss value is tensor(-0.4773, grad_fn=<MeanBackward0>)
Train student: Step  99 loss value is tensor(-0.4678, grad_fn=<MeanBackward0>)
Train student: Step  100 loss value is tensor(-0.466

Train student: Step  13 loss value is tensor(-0.4680, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4667, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4634, grad_fn=<MeanBackward0>)
Train student: Step  16 loss value is tensor(-0.4642, grad_fn=<MeanBackward0>)
Train student: Step  17 loss value is tensor(-0.4588, grad_fn=<MeanBackward0>)
Train student: Step  18 loss value is tensor(-0.4669, grad_fn=<MeanBackward0>)
Train student: Step  19 loss value is tensor(-0.4726, grad_fn=<MeanBackward0>)
Train student: Step  20 loss value is tensor(-0.4678, grad_fn=<MeanBackward0>)
Train student: Step  21 loss value is tensor(-0.4653, grad_fn=<MeanBackward0>)
Train student: Step  22 loss value is tensor(-0.4619, grad_fn=<MeanBackward0>)
Train student: Step  23 loss value is tensor(-0.4700, grad_fn=<MeanBackward0>)
Train student: Step  24 loss value is tensor(-0.4709, grad_fn=<MeanBackward0>)
Train student: Step  25 loss value is tensor(-0.4667

Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1855, grad_fn=<MulBackward0>)
Train Teacher: Step  18   Teacher loss this step is: tensor(-0.1335, grad_fn=<MulBackward0>)
Train Teacher: Step  19   Teacher loss this step is: tensor(-0.1332, grad_fn=<MulBackward0>)
Train Teacher: Step  20   Teacher loss this step is: tensor(-0.1136, grad_fn=<MulBackward0>)
Train Teacher: Step  21   Teacher loss this step is: tensor(-0.1648, grad_fn=<MulBackward0>)
Train Teacher: Step  22   Teacher loss this step is: tensor(-0.1603, grad_fn=<MulBackward0>)
Train Teacher: Step  23   Teacher loss this step is: tensor(-0.1500, grad_fn=<MulBackward0>)
Train Teacher: Step  24   Teacher loss this step is: tensor(-0.1664, grad_fn=<MulBackward0>)
Train Teacher: Step  25   Teacher loss this step is: tensor(-0.1325, grad_fn=<MulBackward0>)
Train Teacher: Step  26   Teacher loss this step is: tensor(-0.1812, grad_fn=<MulBackward0>)
Train Teacher: Step  27   Teacher loss this step is: tensor(-0.1672, g

Train student: Step  46 loss value is tensor(-0.4716, grad_fn=<MeanBackward0>)
Train student: Step  47 loss value is tensor(-0.4780, grad_fn=<MeanBackward0>)
Train student: Step  48 loss value is tensor(-0.4697, grad_fn=<MeanBackward0>)
Train student: Step  49 loss value is tensor(-0.4688, grad_fn=<MeanBackward0>)
Train student: Step  50 loss value is tensor(-0.4700, grad_fn=<MeanBackward0>)
Train student: Step  51 loss value is tensor(-0.4660, grad_fn=<MeanBackward0>)
Train student: Step  52 loss value is tensor(-0.4698, grad_fn=<MeanBackward0>)
Train student: Step  53 loss value is tensor(-0.4720, grad_fn=<MeanBackward0>)
Train student: Step  54 loss value is tensor(-0.4682, grad_fn=<MeanBackward0>)
Train student: Step  55 loss value is tensor(-0.4638, grad_fn=<MeanBackward0>)
Train student: Step  56 loss value is tensor(-0.4708, grad_fn=<MeanBackward0>)
Train student: Step  57 loss value is tensor(-0.4672, grad_fn=<MeanBackward0>)
Train student: Step  58 loss value is tensor(-0.4721

phi mean grad tensor(0.0059) V max grad tensor(0.0207)
student conv1 mean grad tensor(0.1343) student conv1 max grad tensor(0.4642)
test
Parameter containing:
tensor([[ 0.9941, -0.2034, -0.1610,  0.0105, -0.1449, -0.0603,  0.0020, -0.1032,
         -0.1151, -0.2202],
        [-0.2666,  0.8067,  0.0216, -0.2160, -0.1086, -0.1405, -0.1302, -0.1444,
         -0.0183,  0.0247],
        [-0.0388,  0.1061,  1.0757, -0.0983, -0.0899, -0.0343, -0.1305, -0.1186,
          0.1112, -0.0943],
        [ 0.0409,  0.1312,  0.0420,  0.7102,  0.0463,  0.1048,  0.0181,  0.1641,
          0.0599,  0.0091],
        [-0.0411, -0.0510, -0.0023, -0.1183,  1.1333, -0.0494,  0.0548,  0.1216,
          0.0331,  0.0743],
        [ 0.2446,  0.0384,  0.0013,  0.1314, -0.0910,  1.1474,  0.0903,  0.1545,
         -0.1226,  0.1062],
        [ 0.2619,  0.1264,  0.0091,  0.0704, -0.0325,  0.1580,  0.9839, -0.0887,
          0.0077, -0.0319],
        [-0.0687, -0.0099, -0.0344, -0.1078, -0.0033, -0.1016, -0.0980,  0.842

Train student: Step  88 loss value is tensor(-0.4698, grad_fn=<MeanBackward0>)
Train student: Step  89 loss value is tensor(-0.4682, grad_fn=<MeanBackward0>)
Train student: Step  90 loss value is tensor(-0.4687, grad_fn=<MeanBackward0>)
Train student: Step  91 loss value is tensor(-0.4815, grad_fn=<MeanBackward0>)
Train student: Step  92 loss value is tensor(-0.4708, grad_fn=<MeanBackward0>)
Train student: Step  93 loss value is tensor(-0.4645, grad_fn=<MeanBackward0>)
Train student: Step  94 loss value is tensor(-0.4836, grad_fn=<MeanBackward0>)
Train student: Step  95 loss value is tensor(-0.4673, grad_fn=<MeanBackward0>)
Train student: Step  96 loss value is tensor(-0.4831, grad_fn=<MeanBackward0>)
Train student: Step  97 loss value is tensor(-0.4742, grad_fn=<MeanBackward0>)
Train student: Step  98 loss value is tensor(-0.4755, grad_fn=<MeanBackward0>)
Train student: Step  99 loss value is tensor(-0.4789, grad_fn=<MeanBackward0>)
Train student: Step  100 loss value is tensor(-0.471

Train student: Step  13 loss value is tensor(-0.4685, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4677, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4649, grad_fn=<MeanBackward0>)
Train student: Step  16 loss value is tensor(-0.4774, grad_fn=<MeanBackward0>)
Train student: Step  17 loss value is tensor(-0.4680, grad_fn=<MeanBackward0>)
Train student: Step  18 loss value is tensor(-0.4692, grad_fn=<MeanBackward0>)
Train student: Step  19 loss value is tensor(-0.4717, grad_fn=<MeanBackward0>)
Train student: Step  20 loss value is tensor(-0.4641, grad_fn=<MeanBackward0>)
Train student: Step  21 loss value is tensor(-0.4610, grad_fn=<MeanBackward0>)
Train student: Step  22 loss value is tensor(-0.4744, grad_fn=<MeanBackward0>)
Train student: Step  23 loss value is tensor(-0.4666, grad_fn=<MeanBackward0>)
Train student: Step  24 loss value is tensor(-0.4685, grad_fn=<MeanBackward0>)
Train student: Step  25 loss value is tensor(-0.4732

Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1594, grad_fn=<MulBackward0>)
Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1585, grad_fn=<MulBackward0>)
Train Teacher: Step  18   Teacher loss this step is: tensor(-0.1475, grad_fn=<MulBackward0>)
Train Teacher: Step  19   Teacher loss this step is: tensor(-0.1155, grad_fn=<MulBackward0>)
Train Teacher: Step  20   Teacher loss this step is: tensor(-0.1529, grad_fn=<MulBackward0>)
Train Teacher: Step  21   Teacher loss this step is: tensor(-0.1702, grad_fn=<MulBackward0>)
Train Teacher: Step  22   Teacher loss this step is: tensor(-0.1710, grad_fn=<MulBackward0>)
Train Teacher: Step  23   Teacher loss this step is: tensor(-0.1852, grad_fn=<MulBackward0>)
Train Teacher: Step  24   Teacher loss this step is: tensor(-0.2039, grad_fn=<MulBackward0>)
Train Teacher: Step  25   Teacher loss this step is: tensor(-0.1450, grad_fn=<MulBackward0>)
Train Teacher: Step  26   Teacher loss this step is: tensor(-0.1215, g

Train student: Step  41 loss value is tensor(-0.4750, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4749, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4690, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4774, grad_fn=<MeanBackward0>)
Train student: Step  45 loss value is tensor(-0.4781, grad_fn=<MeanBackward0>)
Train student: Step  46 loss value is tensor(-0.4783, grad_fn=<MeanBackward0>)
Train student: Step  47 loss value is tensor(-0.4704, grad_fn=<MeanBackward0>)
Train student: Step  48 loss value is tensor(-0.4774, grad_fn=<MeanBackward0>)
Train student: Step  49 loss value is tensor(-0.4750, grad_fn=<MeanBackward0>)
Train student: Step  50 loss value is tensor(-0.4777, grad_fn=<MeanBackward0>)
Train student: Step  51 loss value is tensor(-0.4663, grad_fn=<MeanBackward0>)
Train student: Step  52 loss value is tensor(-0.4661, grad_fn=<MeanBackward0>)
Train student: Step  53 loss value is tensor(-0.4715

Train Teacher: Step  47   Teacher loss this step is: tensor(-0.1521, grad_fn=<MulBackward0>)
Train Teacher: Step  48   Teacher loss this step is: tensor(-0.1345, grad_fn=<MulBackward0>)
Train Teacher: Step  49   Teacher loss this step is: tensor(-0.0799, grad_fn=<MulBackward0>)
Train Teacher: Step  50   Teacher loss this step is: tensor(-0.1422, grad_fn=<MulBackward0>)
Train Teacher: Step  51   Teacher loss this step is: tensor(-0.1426, grad_fn=<MulBackward0>)
phi mean grad tensor(0.0062) V max grad tensor(0.0310)
student conv1 mean grad tensor(0.1516) student conv1 max grad tensor(0.5220)
test
Parameter containing:
tensor([[ 1.0179e+00, -2.2406e-01, -1.7871e-01,  1.5655e-02, -1.6069e-01,
         -6.3537e-02, -1.1592e-02, -1.1251e-01, -1.1198e-01, -2.1972e-01],
        [-2.9272e-01,  8.0573e-01,  1.6262e-02, -2.2527e-01, -1.0525e-01,
         -1.4058e-01, -1.4350e-01, -1.5684e-01, -2.4910e-02,  2.1891e-02],
        [-4.4125e-02,  9.9482e-02,  1.1020e+00, -9.4505e-02, -8.6226e-02,
    

Train student: Step  76 loss value is tensor(-0.4767, grad_fn=<MeanBackward0>)
Train student: Step  77 loss value is tensor(-0.4716, grad_fn=<MeanBackward0>)
Train student: Step  78 loss value is tensor(-0.4846, grad_fn=<MeanBackward0>)
Train student: Step  79 loss value is tensor(-0.4719, grad_fn=<MeanBackward0>)
Train student: Step  80 loss value is tensor(-0.4750, grad_fn=<MeanBackward0>)
Train student: Step  81 loss value is tensor(-0.4751, grad_fn=<MeanBackward0>)
Train student: Step  82 loss value is tensor(-0.4744, grad_fn=<MeanBackward0>)
Train student: Step  83 loss value is tensor(-0.4902, grad_fn=<MeanBackward0>)
Train student: Step  84 loss value is tensor(-0.4809, grad_fn=<MeanBackward0>)
Train student: Step  85 loss value is tensor(-0.4813, grad_fn=<MeanBackward0>)
Train student: Step  86 loss value is tensor(-0.4765, grad_fn=<MeanBackward0>)
Train student: Step  87 loss value is tensor(-0.4638, grad_fn=<MeanBackward0>)
Train student: Step  88 loss value is tensor(-0.4728

Train student: Step  6 loss value is tensor(-0.4782, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4711, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4700, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4747, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4769, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4634, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4709, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4727, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4728, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4678, grad_fn=<MeanBackward0>)
Train student: Step  16 loss value is tensor(-0.4702, grad_fn=<MeanBackward0>)
Train student: Step  17 loss value is tensor(-0.4716, grad_fn=<MeanBackward0>)
Train student: Step  18 loss value is tensor(-0.4665, gr

Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1747, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1252, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.0924, grad_fn=<MulBackward0>)
Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1485, grad_fn=<MulBackward0>)
Train Teacher: Step  18   Teacher loss this step is: tensor(-0.1701, grad_fn=<MulBackward0>)
Train Teacher: Step  19   Teacher loss this step is: tensor(-0.1305, grad_fn=<MulBackward0>)
Train Teacher: Step  20   Teacher loss this step is: tensor(-0.1659, grad_fn=<MulBackward0>)
Train Teacher: Step  21   Teacher loss this step is: tensor(-0.1257, grad_fn=<MulBackward0>)
Train Teacher: Step  22   Teacher loss this step is: tensor(-0.1755, grad_fn=<MulBackward0>)
Train Teacher: Step  23   Teacher loss this step is: tensor(-0.1636, grad_fn=<MulBackward0>)
Train Teacher: Step  24   Teacher loss this step is: tensor(-0.1301, g

Train student: Step  46 loss value is tensor(-0.4663, grad_fn=<MeanBackward0>)
Train student: Step  47 loss value is tensor(-0.4703, grad_fn=<MeanBackward0>)
Train student: Step  48 loss value is tensor(-0.4715, grad_fn=<MeanBackward0>)
Train student: Step  49 loss value is tensor(-0.4663, grad_fn=<MeanBackward0>)
Train student: Step  50 loss value is tensor(-0.4688, grad_fn=<MeanBackward0>)
Train student: Step  51 loss value is tensor(-0.4763, grad_fn=<MeanBackward0>)
Train student: Step  52 loss value is tensor(-0.4714, grad_fn=<MeanBackward0>)
Train student: Step  53 loss value is tensor(-0.4613, grad_fn=<MeanBackward0>)
Train student: Step  54 loss value is tensor(-0.4793, grad_fn=<MeanBackward0>)
Train student: Step  55 loss value is tensor(-0.4714, grad_fn=<MeanBackward0>)
Train student: Step  56 loss value is tensor(-0.4610, grad_fn=<MeanBackward0>)
Train student: Step  57 loss value is tensor(-0.4727, grad_fn=<MeanBackward0>)
Train student: Step  58 loss value is tensor(-0.4721

Train Teacher: Step  43   Teacher loss this step is: tensor(-0.1670, grad_fn=<MulBackward0>)
Train Teacher: Step  44   Teacher loss this step is: tensor(-0.1326, grad_fn=<MulBackward0>)
Train Teacher: Step  45   Teacher loss this step is: tensor(-0.1087, grad_fn=<MulBackward0>)
Train Teacher: Step  46   Teacher loss this step is: tensor(-0.1161, grad_fn=<MulBackward0>)
Train Teacher: Step  47   Teacher loss this step is: tensor(-0.1638, grad_fn=<MulBackward0>)
Train Teacher: Step  48   Teacher loss this step is: tensor(-0.1635, grad_fn=<MulBackward0>)
Train Teacher: Step  49   Teacher loss this step is: tensor(-0.1437, grad_fn=<MulBackward0>)
Train Teacher: Step  50   Teacher loss this step is: tensor(-0.1553, grad_fn=<MulBackward0>)
Train Teacher: Step  51   Teacher loss this step is: tensor(-0.1271, grad_fn=<MulBackward0>)
phi mean grad tensor(0.0056) V max grad tensor(0.0141)
student conv1 mean grad tensor(0.1704) student conv1 max grad tensor(0.5839)
test
Parameter containing:
tens

Train student: Step  78 loss value is tensor(-0.4829, grad_fn=<MeanBackward0>)
Train student: Step  79 loss value is tensor(-0.4788, grad_fn=<MeanBackward0>)
Train student: Step  80 loss value is tensor(-0.4668, grad_fn=<MeanBackward0>)
Train student: Step  81 loss value is tensor(-0.4705, grad_fn=<MeanBackward0>)
Train student: Step  82 loss value is tensor(-0.4799, grad_fn=<MeanBackward0>)
Train student: Step  83 loss value is tensor(-0.4820, grad_fn=<MeanBackward0>)
Train student: Step  84 loss value is tensor(-0.4794, grad_fn=<MeanBackward0>)
Train student: Step  85 loss value is tensor(-0.4754, grad_fn=<MeanBackward0>)
Train student: Step  86 loss value is tensor(-0.4785, grad_fn=<MeanBackward0>)
Train student: Step  87 loss value is tensor(-0.4784, grad_fn=<MeanBackward0>)
Train student: Step  88 loss value is tensor(-0.4870, grad_fn=<MeanBackward0>)
Train student: Step  89 loss value is tensor(-0.4824, grad_fn=<MeanBackward0>)
Train student: Step  90 loss value is tensor(-0.4713

Train student: Step  5 loss value is tensor(-0.4723, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4727, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4724, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4645, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4713, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4757, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4763, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4603, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4649, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4663, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4704, grad_fn=<MeanBackward0>)
Train student: Step  16 loss value is tensor(-0.4744, grad_fn=<MeanBackward0>)
Train student: Step  17 loss value is tensor(-0.4706, gra

Train Teacher: Step  7   Teacher loss this step is: tensor(-0.1434, grad_fn=<MulBackward0>)
Train Teacher: Step  8   Teacher loss this step is: tensor(-0.1282, grad_fn=<MulBackward0>)
Train Teacher: Step  9   Teacher loss this step is: tensor(-0.1385, grad_fn=<MulBackward0>)
Train Teacher: Step  10   Teacher loss this step is: tensor(-0.1641, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.1358, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1587, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1326, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1393, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1243, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1430, grad_fn=<MulBackward0>)
Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1721, grad

Train student: Step  32 loss value is tensor(-0.4752, grad_fn=<MeanBackward0>)
Train student: Step  33 loss value is tensor(-0.4728, grad_fn=<MeanBackward0>)
Train student: Step  34 loss value is tensor(-0.4706, grad_fn=<MeanBackward0>)
Train student: Step  35 loss value is tensor(-0.4741, grad_fn=<MeanBackward0>)
Train student: Step  36 loss value is tensor(-0.4865, grad_fn=<MeanBackward0>)
Train student: Step  37 loss value is tensor(-0.4654, grad_fn=<MeanBackward0>)
Train student: Step  38 loss value is tensor(-0.4784, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4661, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4734, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4779, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4714, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4693, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4694

Train Teacher: Step  36   Teacher loss this step is: tensor(-0.1610, grad_fn=<MulBackward0>)
Train Teacher: Step  37   Teacher loss this step is: tensor(-0.1478, grad_fn=<MulBackward0>)
Train Teacher: Step  38   Teacher loss this step is: tensor(-0.2158, grad_fn=<MulBackward0>)
Train Teacher: Step  39   Teacher loss this step is: tensor(-0.1424, grad_fn=<MulBackward0>)
Train Teacher: Step  40   Teacher loss this step is: tensor(-0.1504, grad_fn=<MulBackward0>)
Train Teacher: Step  41   Teacher loss this step is: tensor(-0.1716, grad_fn=<MulBackward0>)
Train Teacher: Step  42   Teacher loss this step is: tensor(-0.1664, grad_fn=<MulBackward0>)
Train Teacher: Step  43   Teacher loss this step is: tensor(-0.2012, grad_fn=<MulBackward0>)
Train Teacher: Step  44   Teacher loss this step is: tensor(-0.1531, grad_fn=<MulBackward0>)
Train Teacher: Step  45   Teacher loss this step is: tensor(-0.2057, grad_fn=<MulBackward0>)
Train Teacher: Step  46   Teacher loss this step is: tensor(-0.1022, g

Train student: Step  65 loss value is tensor(-0.4746, grad_fn=<MeanBackward0>)
Train student: Step  66 loss value is tensor(-0.4755, grad_fn=<MeanBackward0>)
Train student: Step  67 loss value is tensor(-0.4642, grad_fn=<MeanBackward0>)
Train student: Step  68 loss value is tensor(-0.4771, grad_fn=<MeanBackward0>)
Train student: Step  69 loss value is tensor(-0.4762, grad_fn=<MeanBackward0>)
Train student: Step  70 loss value is tensor(-0.4719, grad_fn=<MeanBackward0>)
Train student: Step  71 loss value is tensor(-0.4692, grad_fn=<MeanBackward0>)
Train student: Step  72 loss value is tensor(-0.4801, grad_fn=<MeanBackward0>)
Train student: Step  73 loss value is tensor(-0.4722, grad_fn=<MeanBackward0>)
Train student: Step  74 loss value is tensor(-0.4619, grad_fn=<MeanBackward0>)
Train student: Step  75 loss value is tensor(-0.4821, grad_fn=<MeanBackward0>)
Train student: Step  76 loss value is tensor(-0.4777, grad_fn=<MeanBackward0>)
Train student: Step  77 loss value is tensor(-0.4814

Train student: Step  4 loss value is tensor(-0.4712, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4690, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4710, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4721, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4736, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4815, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4732, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4686, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4705, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4656, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4721, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4714, grad_fn=<MeanBackward0>)
Train student: Step  16 loss value is tensor(-0.4749, grad

Train Teacher: Step  8   Teacher loss this step is: tensor(-0.1255, grad_fn=<MulBackward0>)
Train Teacher: Step  9   Teacher loss this step is: tensor(-0.1664, grad_fn=<MulBackward0>)
Train Teacher: Step  10   Teacher loss this step is: tensor(-0.1359, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.1369, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1582, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1428, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1332, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1231, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1823, grad_fn=<MulBackward0>)
Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1370, grad_fn=<MulBackward0>)
Train Teacher: Step  18   Teacher loss this step is: tensor(-0.1413, gra

Train student: Step  38 loss value is tensor(-0.4743, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4787, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4753, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4754, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4727, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4716, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4745, grad_fn=<MeanBackward0>)
Train student: Step  45 loss value is tensor(-0.4644, grad_fn=<MeanBackward0>)
Train student: Step  46 loss value is tensor(-0.4685, grad_fn=<MeanBackward0>)
Train student: Step  47 loss value is tensor(-0.4747, grad_fn=<MeanBackward0>)
Train student: Step  48 loss value is tensor(-0.4665, grad_fn=<MeanBackward0>)
Train student: Step  49 loss value is tensor(-0.4722, grad_fn=<MeanBackward0>)
Train student: Step  50 loss value is tensor(-0.4749

Train Teacher: Step  34   Teacher loss this step is: tensor(-0.1772, grad_fn=<MulBackward0>)
Train Teacher: Step  35   Teacher loss this step is: tensor(-0.1570, grad_fn=<MulBackward0>)
Train Teacher: Step  36   Teacher loss this step is: tensor(-0.1367, grad_fn=<MulBackward0>)
Train Teacher: Step  37   Teacher loss this step is: tensor(-0.1218, grad_fn=<MulBackward0>)
Train Teacher: Step  38   Teacher loss this step is: tensor(-0.0981, grad_fn=<MulBackward0>)
Train Teacher: Step  39   Teacher loss this step is: tensor(-0.1602, grad_fn=<MulBackward0>)
Train Teacher: Step  40   Teacher loss this step is: tensor(-0.1881, grad_fn=<MulBackward0>)
Train Teacher: Step  41   Teacher loss this step is: tensor(-0.1186, grad_fn=<MulBackward0>)
Train Teacher: Step  42   Teacher loss this step is: tensor(-0.1694, grad_fn=<MulBackward0>)
Train Teacher: Step  43   Teacher loss this step is: tensor(-0.1721, grad_fn=<MulBackward0>)
Train Teacher: Step  44   Teacher loss this step is: tensor(-0.1172, g

Train student: Step  68 loss value is tensor(-0.4747, grad_fn=<MeanBackward0>)
Train student: Step  69 loss value is tensor(-0.4812, grad_fn=<MeanBackward0>)
Train student: Step  70 loss value is tensor(-0.4710, grad_fn=<MeanBackward0>)
Train student: Step  71 loss value is tensor(-0.4668, grad_fn=<MeanBackward0>)
Train student: Step  72 loss value is tensor(-0.4745, grad_fn=<MeanBackward0>)
Train student: Step  73 loss value is tensor(-0.4636, grad_fn=<MeanBackward0>)
Train student: Step  74 loss value is tensor(-0.4708, grad_fn=<MeanBackward0>)
Train student: Step  75 loss value is tensor(-0.4680, grad_fn=<MeanBackward0>)
Train student: Step  76 loss value is tensor(-0.4887, grad_fn=<MeanBackward0>)
Train student: Step  77 loss value is tensor(-0.4891, grad_fn=<MeanBackward0>)
Train student: Step  78 loss value is tensor(-0.4804, grad_fn=<MeanBackward0>)
Train student: Step  79 loss value is tensor(-0.4702, grad_fn=<MeanBackward0>)
Train student: Step  80 loss value is tensor(-0.4749

Train student: Step  4 loss value is tensor(-0.4636, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4735, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4663, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4685, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4716, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4656, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4752, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4696, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4768, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4694, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4627, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4722, grad_fn=<MeanBackward0>)
Train student: Step  16 loss value is tensor(-0.4649, grad

Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1840, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1825, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1510, grad_fn=<MulBackward0>)
Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1720, grad_fn=<MulBackward0>)
Train Teacher: Step  18   Teacher loss this step is: tensor(-0.1360, grad_fn=<MulBackward0>)
Train Teacher: Step  19   Teacher loss this step is: tensor(-0.1423, grad_fn=<MulBackward0>)
Train Teacher: Step  20   Teacher loss this step is: tensor(-0.1426, grad_fn=<MulBackward0>)
Train Teacher: Step  21   Teacher loss this step is: tensor(-0.1780, grad_fn=<MulBackward0>)
Train Teacher: Step  22   Teacher loss this step is: tensor(-0.1181, grad_fn=<MulBackward0>)
Train Teacher: Step  23   Teacher loss this step is: tensor(-0.1477, grad_fn=<MulBackward0>)
Train Teacher: Step  24   Teacher loss this step is: tensor(-0.1297, g

Train student: Step  42 loss value is tensor(-0.4735, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4607, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4691, grad_fn=<MeanBackward0>)
Train student: Step  45 loss value is tensor(-0.4742, grad_fn=<MeanBackward0>)
Train student: Step  46 loss value is tensor(-0.4862, grad_fn=<MeanBackward0>)
Train student: Step  47 loss value is tensor(-0.4651, grad_fn=<MeanBackward0>)
Train student: Step  48 loss value is tensor(-0.4799, grad_fn=<MeanBackward0>)
Train student: Step  49 loss value is tensor(-0.4797, grad_fn=<MeanBackward0>)
Train student: Step  50 loss value is tensor(-0.4685, grad_fn=<MeanBackward0>)
Train student: Step  51 loss value is tensor(-0.4784, grad_fn=<MeanBackward0>)
Train student: Step  52 loss value is tensor(-0.4656, grad_fn=<MeanBackward0>)
Train student: Step  53 loss value is tensor(-0.4691, grad_fn=<MeanBackward0>)
Train student: Step  54 loss value is tensor(-0.4771

Train Teacher: Step  44   Teacher loss this step is: tensor(-0.1227, grad_fn=<MulBackward0>)
Train Teacher: Step  45   Teacher loss this step is: tensor(-0.1504, grad_fn=<MulBackward0>)
Train Teacher: Step  46   Teacher loss this step is: tensor(-0.0985, grad_fn=<MulBackward0>)
Train Teacher: Step  47   Teacher loss this step is: tensor(-0.1702, grad_fn=<MulBackward0>)
Train Teacher: Step  48   Teacher loss this step is: tensor(-0.1925, grad_fn=<MulBackward0>)
Train Teacher: Step  49   Teacher loss this step is: tensor(-0.1296, grad_fn=<MulBackward0>)
Train Teacher: Step  50   Teacher loss this step is: tensor(-0.1484, grad_fn=<MulBackward0>)
Train Teacher: Step  51   Teacher loss this step is: tensor(-0.1327, grad_fn=<MulBackward0>)
phi mean grad tensor(0.0070) V max grad tensor(0.0169)
student conv1 mean grad tensor(0.2251) student conv1 max grad tensor(0.7624)
test
Parameter containing:
tensor([[ 1.0895, -0.2922, -0.2154,  0.0209, -0.2052, -0.0746, -0.0534, -0.1605,
         -0.0814

Train student: Step  81 loss value is tensor(-0.4772, grad_fn=<MeanBackward0>)
Train student: Step  82 loss value is tensor(-0.4649, grad_fn=<MeanBackward0>)
Train student: Step  83 loss value is tensor(-0.4742, grad_fn=<MeanBackward0>)
Train student: Step  84 loss value is tensor(-0.4798, grad_fn=<MeanBackward0>)
Train student: Step  85 loss value is tensor(-0.4763, grad_fn=<MeanBackward0>)
Train student: Step  86 loss value is tensor(-0.4724, grad_fn=<MeanBackward0>)
Train student: Step  87 loss value is tensor(-0.4830, grad_fn=<MeanBackward0>)
Train student: Step  88 loss value is tensor(-0.4828, grad_fn=<MeanBackward0>)
Train student: Step  89 loss value is tensor(-0.4877, grad_fn=<MeanBackward0>)
Train student: Step  90 loss value is tensor(-0.4830, grad_fn=<MeanBackward0>)
Train student: Step  91 loss value is tensor(-0.4762, grad_fn=<MeanBackward0>)
Train student: Step  92 loss value is tensor(-0.4954, grad_fn=<MeanBackward0>)
Train student: Step  93 loss value is tensor(-0.4743

Train student: Step  3 loss value is tensor(-0.4654, grad_fn=<MeanBackward0>)
Train student: Step  4 loss value is tensor(-0.4811, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4711, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4673, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4708, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4744, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4708, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4765, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4701, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4727, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4661, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4718, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4651, grad_

Train Teacher: Step  11   Teacher loss this step is: tensor(-0.1852, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1006, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1660, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1689, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1587, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1302, grad_fn=<MulBackward0>)
Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1183, grad_fn=<MulBackward0>)
Train Teacher: Step  18   Teacher loss this step is: tensor(-0.1425, grad_fn=<MulBackward0>)
Train Teacher: Step  19   Teacher loss this step is: tensor(-0.1576, grad_fn=<MulBackward0>)
Train Teacher: Step  20   Teacher loss this step is: tensor(-0.1647, grad_fn=<MulBackward0>)
Train Teacher: Step  21   Teacher loss this step is: tensor(-0.1352, g

Train student: Step  38 loss value is tensor(-0.4706, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4710, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4712, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4663, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4755, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4771, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4708, grad_fn=<MeanBackward0>)
Train student: Step  45 loss value is tensor(-0.4827, grad_fn=<MeanBackward0>)
Train student: Step  46 loss value is tensor(-0.4768, grad_fn=<MeanBackward0>)
Train student: Step  47 loss value is tensor(-0.4694, grad_fn=<MeanBackward0>)
Train student: Step  48 loss value is tensor(-0.4723, grad_fn=<MeanBackward0>)
Train student: Step  49 loss value is tensor(-0.4803, grad_fn=<MeanBackward0>)
Train student: Step  50 loss value is tensor(-0.4708

Train Teacher: Step  39   Teacher loss this step is: tensor(-0.2001, grad_fn=<MulBackward0>)
Train Teacher: Step  40   Teacher loss this step is: tensor(-0.1382, grad_fn=<MulBackward0>)
Train Teacher: Step  41   Teacher loss this step is: tensor(-0.1717, grad_fn=<MulBackward0>)
Train Teacher: Step  42   Teacher loss this step is: tensor(-0.1372, grad_fn=<MulBackward0>)
Train Teacher: Step  43   Teacher loss this step is: tensor(-0.1489, grad_fn=<MulBackward0>)
Train Teacher: Step  44   Teacher loss this step is: tensor(-0.1663, grad_fn=<MulBackward0>)
Train Teacher: Step  45   Teacher loss this step is: tensor(-0.2082, grad_fn=<MulBackward0>)
Train Teacher: Step  46   Teacher loss this step is: tensor(-0.1343, grad_fn=<MulBackward0>)
Train Teacher: Step  47   Teacher loss this step is: tensor(-0.1316, grad_fn=<MulBackward0>)
Train Teacher: Step  48   Teacher loss this step is: tensor(-0.1324, grad_fn=<MulBackward0>)
Train Teacher: Step  49   Teacher loss this step is: tensor(-0.1438, g

Train student: Step  68 loss value is tensor(-0.4797, grad_fn=<MeanBackward0>)
Train student: Step  69 loss value is tensor(-0.4645, grad_fn=<MeanBackward0>)
Train student: Step  70 loss value is tensor(-0.4823, grad_fn=<MeanBackward0>)
Train student: Step  71 loss value is tensor(-0.4790, grad_fn=<MeanBackward0>)
Train student: Step  72 loss value is tensor(-0.4811, grad_fn=<MeanBackward0>)
Train student: Step  73 loss value is tensor(-0.4756, grad_fn=<MeanBackward0>)
Train student: Step  74 loss value is tensor(-0.4824, grad_fn=<MeanBackward0>)
Train student: Step  75 loss value is tensor(-0.4762, grad_fn=<MeanBackward0>)
Train student: Step  76 loss value is tensor(-0.4817, grad_fn=<MeanBackward0>)
Train student: Step  77 loss value is tensor(-0.4908, grad_fn=<MeanBackward0>)
Train student: Step  78 loss value is tensor(-0.4735, grad_fn=<MeanBackward0>)
Train student: Step  79 loss value is tensor(-0.4761, grad_fn=<MeanBackward0>)
Train student: Step  80 loss value is tensor(-0.4694

Train student: Step  3 loss value is tensor(-0.4762, grad_fn=<MeanBackward0>)
Train student: Step  4 loss value is tensor(-0.4744, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4810, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4669, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4674, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4681, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4779, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4721, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4741, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4834, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4674, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4731, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4743, grad_

Train Teacher: Step  5   Teacher loss this step is: tensor(-0.1855, grad_fn=<MulBackward0>)
Train Teacher: Step  6   Teacher loss this step is: tensor(-0.1415, grad_fn=<MulBackward0>)
Train Teacher: Step  7   Teacher loss this step is: tensor(-0.1815, grad_fn=<MulBackward0>)
Train Teacher: Step  8   Teacher loss this step is: tensor(-0.1325, grad_fn=<MulBackward0>)
Train Teacher: Step  9   Teacher loss this step is: tensor(-0.1431, grad_fn=<MulBackward0>)
Train Teacher: Step  10   Teacher loss this step is: tensor(-0.1483, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.1781, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1368, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1381, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1345, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1473, grad_f

Train student: Step  33 loss value is tensor(-0.4689, grad_fn=<MeanBackward0>)
Train student: Step  34 loss value is tensor(-0.4706, grad_fn=<MeanBackward0>)
Train student: Step  35 loss value is tensor(-0.4676, grad_fn=<MeanBackward0>)
Train student: Step  36 loss value is tensor(-0.4705, grad_fn=<MeanBackward0>)
Train student: Step  37 loss value is tensor(-0.4775, grad_fn=<MeanBackward0>)
Train student: Step  38 loss value is tensor(-0.4706, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4717, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4699, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4723, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4710, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4737, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4639, grad_fn=<MeanBackward0>)
Train student: Step  45 loss value is tensor(-0.4719

Train Teacher: Step  33   Teacher loss this step is: tensor(-0.2117, grad_fn=<MulBackward0>)
Train Teacher: Step  34   Teacher loss this step is: tensor(-0.1563, grad_fn=<MulBackward0>)
Train Teacher: Step  35   Teacher loss this step is: tensor(-0.1795, grad_fn=<MulBackward0>)
Train Teacher: Step  36   Teacher loss this step is: tensor(-0.1682, grad_fn=<MulBackward0>)
Train Teacher: Step  37   Teacher loss this step is: tensor(-0.1492, grad_fn=<MulBackward0>)
Train Teacher: Step  38   Teacher loss this step is: tensor(-0.1585, grad_fn=<MulBackward0>)
Train Teacher: Step  39   Teacher loss this step is: tensor(-0.1490, grad_fn=<MulBackward0>)
Train Teacher: Step  40   Teacher loss this step is: tensor(-0.1246, grad_fn=<MulBackward0>)
Train Teacher: Step  41   Teacher loss this step is: tensor(-0.1528, grad_fn=<MulBackward0>)
Train Teacher: Step  42   Teacher loss this step is: tensor(-0.1472, grad_fn=<MulBackward0>)
Train Teacher: Step  43   Teacher loss this step is: tensor(-0.1521, g

Train student: Step  67 loss value is tensor(-0.4777, grad_fn=<MeanBackward0>)
Train student: Step  68 loss value is tensor(-0.4737, grad_fn=<MeanBackward0>)
Train student: Step  69 loss value is tensor(-0.4737, grad_fn=<MeanBackward0>)
Train student: Step  70 loss value is tensor(-0.4650, grad_fn=<MeanBackward0>)
Train student: Step  71 loss value is tensor(-0.4886, grad_fn=<MeanBackward0>)
Train student: Step  72 loss value is tensor(-0.4808, grad_fn=<MeanBackward0>)
Train student: Step  73 loss value is tensor(-0.4686, grad_fn=<MeanBackward0>)
Train student: Step  74 loss value is tensor(-0.4848, grad_fn=<MeanBackward0>)
Train student: Step  75 loss value is tensor(-0.4820, grad_fn=<MeanBackward0>)
Train student: Step  76 loss value is tensor(-0.4726, grad_fn=<MeanBackward0>)
Train student: Step  77 loss value is tensor(-0.4596, grad_fn=<MeanBackward0>)
Train student: Step  78 loss value is tensor(-0.4736, grad_fn=<MeanBackward0>)
Train student: Step  79 loss value is tensor(-0.4672

Train student: Step  2 loss value is tensor(-0.4636, grad_fn=<MeanBackward0>)
Train student: Step  3 loss value is tensor(-0.4606, grad_fn=<MeanBackward0>)
Train student: Step  4 loss value is tensor(-0.4719, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4619, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4665, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4700, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4650, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4816, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4656, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4736, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4665, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4682, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4685, grad_f

Train Teacher: Step  9   Teacher loss this step is: tensor(-0.1282, grad_fn=<MulBackward0>)
Train Teacher: Step  10   Teacher loss this step is: tensor(-0.1911, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.1123, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1305, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1142, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1292, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1534, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1532, grad_fn=<MulBackward0>)
Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1147, grad_fn=<MulBackward0>)
Train Teacher: Step  18   Teacher loss this step is: tensor(-0.1341, grad_fn=<MulBackward0>)
Train Teacher: Step  19   Teacher loss this step is: tensor(-0.1131, gr

Train student: Step  33 loss value is tensor(-0.4690, grad_fn=<MeanBackward0>)
Train student: Step  34 loss value is tensor(-0.4607, grad_fn=<MeanBackward0>)
Train student: Step  35 loss value is tensor(-0.4673, grad_fn=<MeanBackward0>)
Train student: Step  36 loss value is tensor(-0.4655, grad_fn=<MeanBackward0>)
Train student: Step  37 loss value is tensor(-0.4793, grad_fn=<MeanBackward0>)
Train student: Step  38 loss value is tensor(-0.4705, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4677, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4644, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4759, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4762, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4655, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4585, grad_fn=<MeanBackward0>)
Train student: Step  45 loss value is tensor(-0.4731

Train Teacher: Step  32   Teacher loss this step is: tensor(-0.1570, grad_fn=<MulBackward0>)
Train Teacher: Step  33   Teacher loss this step is: tensor(-0.1755, grad_fn=<MulBackward0>)
Train Teacher: Step  34   Teacher loss this step is: tensor(-0.1381, grad_fn=<MulBackward0>)
Train Teacher: Step  35   Teacher loss this step is: tensor(-0.1564, grad_fn=<MulBackward0>)
Train Teacher: Step  36   Teacher loss this step is: tensor(-0.1417, grad_fn=<MulBackward0>)
Train Teacher: Step  37   Teacher loss this step is: tensor(-0.1726, grad_fn=<MulBackward0>)
Train Teacher: Step  38   Teacher loss this step is: tensor(-0.2163, grad_fn=<MulBackward0>)
Train Teacher: Step  39   Teacher loss this step is: tensor(-0.1785, grad_fn=<MulBackward0>)
Train Teacher: Step  40   Teacher loss this step is: tensor(-0.1452, grad_fn=<MulBackward0>)
Train Teacher: Step  41   Teacher loss this step is: tensor(-0.1464, grad_fn=<MulBackward0>)
Train Teacher: Step  42   Teacher loss this step is: tensor(-0.1884, g

Train student: Step  65 loss value is tensor(-0.4699, grad_fn=<MeanBackward0>)
Train student: Step  66 loss value is tensor(-0.4892, grad_fn=<MeanBackward0>)
Train student: Step  67 loss value is tensor(-0.4736, grad_fn=<MeanBackward0>)
Train student: Step  68 loss value is tensor(-0.4760, grad_fn=<MeanBackward0>)
Train student: Step  69 loss value is tensor(-0.4789, grad_fn=<MeanBackward0>)
Train student: Step  70 loss value is tensor(-0.4713, grad_fn=<MeanBackward0>)
Train student: Step  71 loss value is tensor(-0.4663, grad_fn=<MeanBackward0>)
Train student: Step  72 loss value is tensor(-0.4750, grad_fn=<MeanBackward0>)
Train student: Step  73 loss value is tensor(-0.4870, grad_fn=<MeanBackward0>)
Train student: Step  74 loss value is tensor(-0.4797, grad_fn=<MeanBackward0>)
Train student: Step  75 loss value is tensor(-0.4767, grad_fn=<MeanBackward0>)
Train student: Step  76 loss value is tensor(-0.4833, grad_fn=<MeanBackward0>)
Train student: Step  77 loss value is tensor(-0.4712

Train student: Step  2 loss value is tensor(-0.4699, grad_fn=<MeanBackward0>)
Train student: Step  3 loss value is tensor(-0.4658, grad_fn=<MeanBackward0>)
Train student: Step  4 loss value is tensor(-0.4706, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4719, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4719, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4693, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4658, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4803, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4695, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4725, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4667, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4727, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4634, grad_f

Train Teacher: Step  4   Teacher loss this step is: tensor(-0.1619, grad_fn=<MulBackward0>)
Train Teacher: Step  5   Teacher loss this step is: tensor(-0.1601, grad_fn=<MulBackward0>)
Train Teacher: Step  6   Teacher loss this step is: tensor(-0.1096, grad_fn=<MulBackward0>)
Train Teacher: Step  7   Teacher loss this step is: tensor(-0.1702, grad_fn=<MulBackward0>)
Train Teacher: Step  8   Teacher loss this step is: tensor(-0.1881, grad_fn=<MulBackward0>)
Train Teacher: Step  9   Teacher loss this step is: tensor(-0.1468, grad_fn=<MulBackward0>)
Train Teacher: Step  10   Teacher loss this step is: tensor(-0.1053, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.1391, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1538, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1310, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1872, grad_fn

Train student: Step  32 loss value is tensor(-0.4666, grad_fn=<MeanBackward0>)
Train student: Step  33 loss value is tensor(-0.4693, grad_fn=<MeanBackward0>)
Train student: Step  34 loss value is tensor(-0.4678, grad_fn=<MeanBackward0>)
Train student: Step  35 loss value is tensor(-0.4754, grad_fn=<MeanBackward0>)
Train student: Step  36 loss value is tensor(-0.4819, grad_fn=<MeanBackward0>)
Train student: Step  37 loss value is tensor(-0.4649, grad_fn=<MeanBackward0>)
Train student: Step  38 loss value is tensor(-0.4661, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4652, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4778, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4722, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4709, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4717, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4796

Train Teacher: Step  29   Teacher loss this step is: tensor(-0.1874, grad_fn=<MulBackward0>)
Train Teacher: Step  30   Teacher loss this step is: tensor(-0.1161, grad_fn=<MulBackward0>)
Train Teacher: Step  31   Teacher loss this step is: tensor(-0.1358, grad_fn=<MulBackward0>)
Train Teacher: Step  32   Teacher loss this step is: tensor(-0.1698, grad_fn=<MulBackward0>)
Train Teacher: Step  33   Teacher loss this step is: tensor(-0.1657, grad_fn=<MulBackward0>)
Train Teacher: Step  34   Teacher loss this step is: tensor(-0.1575, grad_fn=<MulBackward0>)
Train Teacher: Step  35   Teacher loss this step is: tensor(-0.1174, grad_fn=<MulBackward0>)
Train Teacher: Step  36   Teacher loss this step is: tensor(-0.1593, grad_fn=<MulBackward0>)
Train Teacher: Step  37   Teacher loss this step is: tensor(-0.1626, grad_fn=<MulBackward0>)
Train Teacher: Step  38   Teacher loss this step is: tensor(-0.1703, grad_fn=<MulBackward0>)
Train Teacher: Step  39   Teacher loss this step is: tensor(-0.1661, g

Train student: Step  61 loss value is tensor(-0.4797, grad_fn=<MeanBackward0>)
Train student: Step  62 loss value is tensor(-0.4701, grad_fn=<MeanBackward0>)
Train student: Step  63 loss value is tensor(-0.4872, grad_fn=<MeanBackward0>)
Train student: Step  64 loss value is tensor(-0.4650, grad_fn=<MeanBackward0>)
Train student: Step  65 loss value is tensor(-0.4689, grad_fn=<MeanBackward0>)
Train student: Step  66 loss value is tensor(-0.4659, grad_fn=<MeanBackward0>)
Train student: Step  67 loss value is tensor(-0.4788, grad_fn=<MeanBackward0>)
Train student: Step  68 loss value is tensor(-0.4868, grad_fn=<MeanBackward0>)
Train student: Step  69 loss value is tensor(-0.4651, grad_fn=<MeanBackward0>)
Train student: Step  70 loss value is tensor(-0.4759, grad_fn=<MeanBackward0>)
Train student: Step  71 loss value is tensor(-0.4749, grad_fn=<MeanBackward0>)
Train student: Step  72 loss value is tensor(-0.4711, grad_fn=<MeanBackward0>)
Train student: Step  73 loss value is tensor(-0.4757

Train student: Step  3 loss value is tensor(-0.4687, grad_fn=<MeanBackward0>)
Train student: Step  4 loss value is tensor(-0.4705, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4674, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4566, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4545, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4658, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4678, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4735, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4652, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4776, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4670, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4730, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4739, grad_

Train Teacher: Step  7   Teacher loss this step is: tensor(-0.1668, grad_fn=<MulBackward0>)
Train Teacher: Step  8   Teacher loss this step is: tensor(-0.0859, grad_fn=<MulBackward0>)
Train Teacher: Step  9   Teacher loss this step is: tensor(-0.1558, grad_fn=<MulBackward0>)
Train Teacher: Step  10   Teacher loss this step is: tensor(-0.1415, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.1381, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.2090, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1464, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1840, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1112, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1517, grad_fn=<MulBackward0>)
Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1733, grad

Train student: Step  35 loss value is tensor(-0.4827, grad_fn=<MeanBackward0>)
Train student: Step  36 loss value is tensor(-0.4674, grad_fn=<MeanBackward0>)
Train student: Step  37 loss value is tensor(-0.4644, grad_fn=<MeanBackward0>)
Train student: Step  38 loss value is tensor(-0.4825, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4749, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4797, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4724, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4668, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4642, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4696, grad_fn=<MeanBackward0>)
Train student: Step  45 loss value is tensor(-0.4753, grad_fn=<MeanBackward0>)
Train student: Step  46 loss value is tensor(-0.4735, grad_fn=<MeanBackward0>)
Train student: Step  47 loss value is tensor(-0.4745

Train Teacher: Step  39   Teacher loss this step is: tensor(-0.1307, grad_fn=<MulBackward0>)
Train Teacher: Step  40   Teacher loss this step is: tensor(-0.1465, grad_fn=<MulBackward0>)
Train Teacher: Step  41   Teacher loss this step is: tensor(-0.1605, grad_fn=<MulBackward0>)
Train Teacher: Step  42   Teacher loss this step is: tensor(-0.1952, grad_fn=<MulBackward0>)
Train Teacher: Step  43   Teacher loss this step is: tensor(-0.1233, grad_fn=<MulBackward0>)
Train Teacher: Step  44   Teacher loss this step is: tensor(-0.1278, grad_fn=<MulBackward0>)
Train Teacher: Step  45   Teacher loss this step is: tensor(-0.1608, grad_fn=<MulBackward0>)
Train Teacher: Step  46   Teacher loss this step is: tensor(-0.1450, grad_fn=<MulBackward0>)
Train Teacher: Step  47   Teacher loss this step is: tensor(-0.1608, grad_fn=<MulBackward0>)
Train Teacher: Step  48   Teacher loss this step is: tensor(-0.1483, grad_fn=<MulBackward0>)
Train Teacher: Step  49   Teacher loss this step is: tensor(-0.1441, g

Train student: Step  68 loss value is tensor(-0.4754, grad_fn=<MeanBackward0>)
Train student: Step  69 loss value is tensor(-0.4775, grad_fn=<MeanBackward0>)
Train student: Step  70 loss value is tensor(-0.4769, grad_fn=<MeanBackward0>)
Train student: Step  71 loss value is tensor(-0.4839, grad_fn=<MeanBackward0>)
Train student: Step  72 loss value is tensor(-0.4809, grad_fn=<MeanBackward0>)
Train student: Step  73 loss value is tensor(-0.4651, grad_fn=<MeanBackward0>)
Train student: Step  74 loss value is tensor(-0.4813, grad_fn=<MeanBackward0>)
Train student: Step  75 loss value is tensor(-0.4820, grad_fn=<MeanBackward0>)
Train student: Step  76 loss value is tensor(-0.4833, grad_fn=<MeanBackward0>)
Train student: Step  77 loss value is tensor(-0.4775, grad_fn=<MeanBackward0>)
Train student: Step  78 loss value is tensor(-0.4817, grad_fn=<MeanBackward0>)
Train student: Step  79 loss value is tensor(-0.4701, grad_fn=<MeanBackward0>)
Train student: Step  80 loss value is tensor(-0.4780

Train student: Step  3 loss value is tensor(-0.4670, grad_fn=<MeanBackward0>)
Train student: Step  4 loss value is tensor(-0.4725, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4727, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4685, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4650, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4700, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4746, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4639, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4786, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4653, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4716, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4669, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4664, grad_

Train Teacher: Step  4   Teacher loss this step is: tensor(-0.1683, grad_fn=<MulBackward0>)
Train Teacher: Step  5   Teacher loss this step is: tensor(-0.1627, grad_fn=<MulBackward0>)
Train Teacher: Step  6   Teacher loss this step is: tensor(-0.1326, grad_fn=<MulBackward0>)
Train Teacher: Step  7   Teacher loss this step is: tensor(-0.1489, grad_fn=<MulBackward0>)
Train Teacher: Step  8   Teacher loss this step is: tensor(-0.0963, grad_fn=<MulBackward0>)
Train Teacher: Step  9   Teacher loss this step is: tensor(-0.1234, grad_fn=<MulBackward0>)
Train Teacher: Step  10   Teacher loss this step is: tensor(-0.1384, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.1473, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1400, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1397, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1520, grad_fn

Train student: Step  32 loss value is tensor(-0.4848, grad_fn=<MeanBackward0>)
Train student: Step  33 loss value is tensor(-0.4713, grad_fn=<MeanBackward0>)
Train student: Step  34 loss value is tensor(-0.4616, grad_fn=<MeanBackward0>)
Train student: Step  35 loss value is tensor(-0.4700, grad_fn=<MeanBackward0>)
Train student: Step  36 loss value is tensor(-0.4709, grad_fn=<MeanBackward0>)
Train student: Step  37 loss value is tensor(-0.4787, grad_fn=<MeanBackward0>)
Train student: Step  38 loss value is tensor(-0.4656, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4836, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4712, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4703, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4688, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4780, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4676

Train Teacher: Step  31   Teacher loss this step is: tensor(-0.1684, grad_fn=<MulBackward0>)
Train Teacher: Step  32   Teacher loss this step is: tensor(-0.1794, grad_fn=<MulBackward0>)
Train Teacher: Step  33   Teacher loss this step is: tensor(-0.1959, grad_fn=<MulBackward0>)
Train Teacher: Step  34   Teacher loss this step is: tensor(-0.1371, grad_fn=<MulBackward0>)
Train Teacher: Step  35   Teacher loss this step is: tensor(-0.1300, grad_fn=<MulBackward0>)
Train Teacher: Step  36   Teacher loss this step is: tensor(-0.1760, grad_fn=<MulBackward0>)
Train Teacher: Step  37   Teacher loss this step is: tensor(-0.1290, grad_fn=<MulBackward0>)
Train Teacher: Step  38   Teacher loss this step is: tensor(-0.1431, grad_fn=<MulBackward0>)
Train Teacher: Step  39   Teacher loss this step is: tensor(-0.1392, grad_fn=<MulBackward0>)
Train Teacher: Step  40   Teacher loss this step is: tensor(-0.1496, grad_fn=<MulBackward0>)
Train Teacher: Step  41   Teacher loss this step is: tensor(-0.1280, g

Train student: Step  66 loss value is tensor(-0.4655, grad_fn=<MeanBackward0>)
Train student: Step  67 loss value is tensor(-0.4658, grad_fn=<MeanBackward0>)
Train student: Step  68 loss value is tensor(-0.4709, grad_fn=<MeanBackward0>)
Train student: Step  69 loss value is tensor(-0.4659, grad_fn=<MeanBackward0>)
Train student: Step  70 loss value is tensor(-0.4773, grad_fn=<MeanBackward0>)
Train student: Step  71 loss value is tensor(-0.4628, grad_fn=<MeanBackward0>)
Train student: Step  72 loss value is tensor(-0.4743, grad_fn=<MeanBackward0>)
Train student: Step  73 loss value is tensor(-0.4710, grad_fn=<MeanBackward0>)
Train student: Step  74 loss value is tensor(-0.4705, grad_fn=<MeanBackward0>)
Train student: Step  75 loss value is tensor(-0.4719, grad_fn=<MeanBackward0>)
Train student: Step  76 loss value is tensor(-0.4787, grad_fn=<MeanBackward0>)
Train student: Step  77 loss value is tensor(-0.4709, grad_fn=<MeanBackward0>)
Train student: Step  78 loss value is tensor(-0.4719

Train student: Step  2 loss value is tensor(-0.4634, grad_fn=<MeanBackward0>)
Train student: Step  3 loss value is tensor(-0.4661, grad_fn=<MeanBackward0>)
Train student: Step  4 loss value is tensor(-0.4672, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4561, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4644, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4688, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4708, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4738, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4651, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4602, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4762, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4745, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4639, grad_f

Train Teacher: Step  9   Teacher loss this step is: tensor(-0.1461, grad_fn=<MulBackward0>)
Train Teacher: Step  10   Teacher loss this step is: tensor(-0.1569, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.1584, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1162, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1752, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1564, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1568, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1410, grad_fn=<MulBackward0>)
Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1516, grad_fn=<MulBackward0>)
Train Teacher: Step  18   Teacher loss this step is: tensor(-0.1482, grad_fn=<MulBackward0>)
Train Teacher: Step  19   Teacher loss this step is: tensor(-0.1701, gr

Train student: Step  32 loss value is tensor(-0.4652, grad_fn=<MeanBackward0>)
Train student: Step  33 loss value is tensor(-0.4697, grad_fn=<MeanBackward0>)
Train student: Step  34 loss value is tensor(-0.4768, grad_fn=<MeanBackward0>)
Train student: Step  35 loss value is tensor(-0.4650, grad_fn=<MeanBackward0>)
Train student: Step  36 loss value is tensor(-0.4624, grad_fn=<MeanBackward0>)
Train student: Step  37 loss value is tensor(-0.4603, grad_fn=<MeanBackward0>)
Train student: Step  38 loss value is tensor(-0.4666, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4695, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4649, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4738, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4662, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4712, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4639

Train Teacher: Step  31   Teacher loss this step is: tensor(-0.2000, grad_fn=<MulBackward0>)
Train Teacher: Step  32   Teacher loss this step is: tensor(-0.1447, grad_fn=<MulBackward0>)
Train Teacher: Step  33   Teacher loss this step is: tensor(-0.1862, grad_fn=<MulBackward0>)
Train Teacher: Step  34   Teacher loss this step is: tensor(-0.1382, grad_fn=<MulBackward0>)
Train Teacher: Step  35   Teacher loss this step is: tensor(-0.1780, grad_fn=<MulBackward0>)
Train Teacher: Step  36   Teacher loss this step is: tensor(-0.1601, grad_fn=<MulBackward0>)
Train Teacher: Step  37   Teacher loss this step is: tensor(-0.1847, grad_fn=<MulBackward0>)
Train Teacher: Step  38   Teacher loss this step is: tensor(-0.1672, grad_fn=<MulBackward0>)
Train Teacher: Step  39   Teacher loss this step is: tensor(-0.1779, grad_fn=<MulBackward0>)
Train Teacher: Step  40   Teacher loss this step is: tensor(-0.1720, grad_fn=<MulBackward0>)
Train Teacher: Step  41   Teacher loss this step is: tensor(-0.1276, g

Train student: Step  58 loss value is tensor(-0.4836, grad_fn=<MeanBackward0>)
Train student: Step  59 loss value is tensor(-0.4704, grad_fn=<MeanBackward0>)
Train student: Step  60 loss value is tensor(-0.4769, grad_fn=<MeanBackward0>)
Train student: Step  61 loss value is tensor(-0.4577, grad_fn=<MeanBackward0>)
Train student: Step  62 loss value is tensor(-0.4729, grad_fn=<MeanBackward0>)
Train student: Step  63 loss value is tensor(-0.4680, grad_fn=<MeanBackward0>)
Train student: Step  64 loss value is tensor(-0.4714, grad_fn=<MeanBackward0>)
Train student: Step  65 loss value is tensor(-0.4753, grad_fn=<MeanBackward0>)
Train student: Step  66 loss value is tensor(-0.4694, grad_fn=<MeanBackward0>)
Train student: Step  67 loss value is tensor(-0.4739, grad_fn=<MeanBackward0>)
Train student: Step  68 loss value is tensor(-0.4730, grad_fn=<MeanBackward0>)
Train student: Step  69 loss value is tensor(-0.4811, grad_fn=<MeanBackward0>)
Train student: Step  70 loss value is tensor(-0.4760

Train Teacher: Step  51   Teacher loss this step is: tensor(-0.1916, grad_fn=<MulBackward0>)
phi mean grad tensor(0.0111) V max grad tensor(0.0503)
student conv1 mean grad tensor(0.3639) student conv1 max grad tensor(1.2083)
test
Parameter containing:
tensor([[ 1.0941, -0.3473, -0.2487,  0.0152, -0.2416, -0.0990, -0.0889, -0.2115,
         -0.0412, -0.1855],
        [-0.4715,  0.8634,  0.0018, -0.2882, -0.1272, -0.1406, -0.1841, -0.2211,
         -0.0936, -0.0298],
        [-0.0523,  0.0688,  1.3158, -0.0672, -0.0214,  0.0374, -0.0535, -0.0795,
         -0.0150,  0.0186],
        [ 0.1003,  0.1635,  0.1005,  0.8654,  0.0494,  0.0737,  0.1228,  0.0908,
          0.0417,  0.0451],
        [-0.0434, -0.0273,  0.0023, -0.0392,  1.2960, -0.0634, -0.0081,  0.1651,
          0.0075, -0.0038],
        [ 0.3168,  0.0631,  0.0056,  0.0665, -0.0785,  1.3365,  0.1067,  0.0652,
          0.0155,  0.0912],
        [ 0.3156,  0.0864,  0.0282,  0.0490, -0.0116,  0.1107,  1.2405, -0.1091,
          0.0

Train student: Step  88 loss value is tensor(-0.4796, grad_fn=<MeanBackward0>)
Train student: Step  89 loss value is tensor(-0.4759, grad_fn=<MeanBackward0>)
Train student: Step  90 loss value is tensor(-0.4708, grad_fn=<MeanBackward0>)
Train student: Step  91 loss value is tensor(-0.4890, grad_fn=<MeanBackward0>)
Train student: Step  92 loss value is tensor(-0.4778, grad_fn=<MeanBackward0>)
Train student: Step  93 loss value is tensor(-0.4745, grad_fn=<MeanBackward0>)
Train student: Step  94 loss value is tensor(-0.4909, grad_fn=<MeanBackward0>)
Train student: Step  95 loss value is tensor(-0.4914, grad_fn=<MeanBackward0>)
Train student: Step  96 loss value is tensor(-0.4655, grad_fn=<MeanBackward0>)
Train student: Step  97 loss value is tensor(-0.5012, grad_fn=<MeanBackward0>)
Train student: Step  98 loss value is tensor(-0.4657, grad_fn=<MeanBackward0>)
Train student: Step  99 loss value is tensor(-0.4850, grad_fn=<MeanBackward0>)
Train student: Step  100 loss value is tensor(-0.481

Train student: Step  7 loss value is tensor(-0.4749, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4666, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4640, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4633, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4653, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4794, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4610, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4746, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4699, grad_fn=<MeanBackward0>)
Train student: Step  16 loss value is tensor(-0.4687, grad_fn=<MeanBackward0>)
Train student: Step  17 loss value is tensor(-0.4599, grad_fn=<MeanBackward0>)
Train student: Step  18 loss value is tensor(-0.4660, grad_fn=<MeanBackward0>)
Train student: Step  19 loss value is tensor(-0.4771, g

Train Teacher: Step  8   Teacher loss this step is: tensor(-0.1821, grad_fn=<MulBackward0>)
Train Teacher: Step  9   Teacher loss this step is: tensor(-0.1882, grad_fn=<MulBackward0>)
Train Teacher: Step  10   Teacher loss this step is: tensor(-0.1407, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.1093, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1651, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1181, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1485, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1725, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1484, grad_fn=<MulBackward0>)
Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1716, grad_fn=<MulBackward0>)
Train Teacher: Step  18   Teacher loss this step is: tensor(-0.2086, gra

Train student: Step  31 loss value is tensor(-0.4761, grad_fn=<MeanBackward0>)
Train student: Step  32 loss value is tensor(-0.4683, grad_fn=<MeanBackward0>)
Train student: Step  33 loss value is tensor(-0.4834, grad_fn=<MeanBackward0>)
Train student: Step  34 loss value is tensor(-0.4670, grad_fn=<MeanBackward0>)
Train student: Step  35 loss value is tensor(-0.4766, grad_fn=<MeanBackward0>)
Train student: Step  36 loss value is tensor(-0.4708, grad_fn=<MeanBackward0>)
Train student: Step  37 loss value is tensor(-0.4684, grad_fn=<MeanBackward0>)
Train student: Step  38 loss value is tensor(-0.4699, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4643, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4683, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4701, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4643, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4718

Train Teacher: Step  32   Teacher loss this step is: tensor(-0.1753, grad_fn=<MulBackward0>)
Train Teacher: Step  33   Teacher loss this step is: tensor(-0.1741, grad_fn=<MulBackward0>)
Train Teacher: Step  34   Teacher loss this step is: tensor(-0.1940, grad_fn=<MulBackward0>)
Train Teacher: Step  35   Teacher loss this step is: tensor(-0.1352, grad_fn=<MulBackward0>)
Train Teacher: Step  36   Teacher loss this step is: tensor(-0.1682, grad_fn=<MulBackward0>)
Train Teacher: Step  37   Teacher loss this step is: tensor(-0.1305, grad_fn=<MulBackward0>)
Train Teacher: Step  38   Teacher loss this step is: tensor(-0.1417, grad_fn=<MulBackward0>)
Train Teacher: Step  39   Teacher loss this step is: tensor(-0.1883, grad_fn=<MulBackward0>)
Train Teacher: Step  40   Teacher loss this step is: tensor(-0.1579, grad_fn=<MulBackward0>)
Train Teacher: Step  41   Teacher loss this step is: tensor(-0.1290, grad_fn=<MulBackward0>)
Train Teacher: Step  42   Teacher loss this step is: tensor(-0.1429, g

Train student: Step  61 loss value is tensor(-0.4576, grad_fn=<MeanBackward0>)
Train student: Step  62 loss value is tensor(-0.4658, grad_fn=<MeanBackward0>)
Train student: Step  63 loss value is tensor(-0.4780, grad_fn=<MeanBackward0>)
Train student: Step  64 loss value is tensor(-0.4717, grad_fn=<MeanBackward0>)
Train student: Step  65 loss value is tensor(-0.4732, grad_fn=<MeanBackward0>)
Train student: Step  66 loss value is tensor(-0.4765, grad_fn=<MeanBackward0>)
Train student: Step  67 loss value is tensor(-0.4712, grad_fn=<MeanBackward0>)
Train student: Step  68 loss value is tensor(-0.4655, grad_fn=<MeanBackward0>)
Train student: Step  69 loss value is tensor(-0.4726, grad_fn=<MeanBackward0>)
Train student: Step  70 loss value is tensor(-0.4687, grad_fn=<MeanBackward0>)
Train student: Step  71 loss value is tensor(-0.4733, grad_fn=<MeanBackward0>)
Train student: Step  72 loss value is tensor(-0.4715, grad_fn=<MeanBackward0>)
Train student: Step  73 loss value is tensor(-0.4600

Train student: Step  3 loss value is tensor(-0.4591, grad_fn=<MeanBackward0>)
Train student: Step  4 loss value is tensor(-0.4575, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4619, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4716, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4622, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4681, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4775, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4526, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4718, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4698, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4722, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4611, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4546, grad_

Train Teacher: Step  10   Teacher loss this step is: tensor(-0.1715, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.1914, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1702, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1993, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1399, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1482, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1544, grad_fn=<MulBackward0>)
Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1496, grad_fn=<MulBackward0>)
Train Teacher: Step  18   Teacher loss this step is: tensor(-0.1761, grad_fn=<MulBackward0>)
Train Teacher: Step  19   Teacher loss this step is: tensor(-0.1357, grad_fn=<MulBackward0>)
Train Teacher: Step  20   Teacher loss this step is: tensor(-0.1253, g

Train student: Step  36 loss value is tensor(-0.4724, grad_fn=<MeanBackward0>)
Train student: Step  37 loss value is tensor(-0.4609, grad_fn=<MeanBackward0>)
Train student: Step  38 loss value is tensor(-0.4687, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4671, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4655, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4737, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4567, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4646, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4683, grad_fn=<MeanBackward0>)
Train student: Step  45 loss value is tensor(-0.4709, grad_fn=<MeanBackward0>)
Train student: Step  46 loss value is tensor(-0.4653, grad_fn=<MeanBackward0>)
Train student: Step  47 loss value is tensor(-0.4603, grad_fn=<MeanBackward0>)
Train student: Step  48 loss value is tensor(-0.4646

Train Teacher: Step  39   Teacher loss this step is: tensor(-0.1523, grad_fn=<MulBackward0>)
Train Teacher: Step  40   Teacher loss this step is: tensor(-0.1847, grad_fn=<MulBackward0>)
Train Teacher: Step  41   Teacher loss this step is: tensor(-0.1848, grad_fn=<MulBackward0>)
Train Teacher: Step  42   Teacher loss this step is: tensor(-0.1572, grad_fn=<MulBackward0>)
Train Teacher: Step  43   Teacher loss this step is: tensor(-0.1602, grad_fn=<MulBackward0>)
Train Teacher: Step  44   Teacher loss this step is: tensor(-0.1606, grad_fn=<MulBackward0>)
Train Teacher: Step  45   Teacher loss this step is: tensor(-0.1749, grad_fn=<MulBackward0>)
Train Teacher: Step  46   Teacher loss this step is: tensor(-0.1520, grad_fn=<MulBackward0>)
Train Teacher: Step  47   Teacher loss this step is: tensor(-0.1304, grad_fn=<MulBackward0>)
Train Teacher: Step  48   Teacher loss this step is: tensor(-0.1039, grad_fn=<MulBackward0>)
Train Teacher: Step  49   Teacher loss this step is: tensor(-0.1736, g

Train student: Step  67 loss value is tensor(-0.4721, grad_fn=<MeanBackward0>)
Train student: Step  68 loss value is tensor(-0.4687, grad_fn=<MeanBackward0>)
Train student: Step  69 loss value is tensor(-0.4590, grad_fn=<MeanBackward0>)
Train student: Step  70 loss value is tensor(-0.4648, grad_fn=<MeanBackward0>)
Train student: Step  71 loss value is tensor(-0.4558, grad_fn=<MeanBackward0>)
Train student: Step  72 loss value is tensor(-0.4636, grad_fn=<MeanBackward0>)
Train student: Step  73 loss value is tensor(-0.4783, grad_fn=<MeanBackward0>)
Train student: Step  74 loss value is tensor(-0.4684, grad_fn=<MeanBackward0>)
Train student: Step  75 loss value is tensor(-0.4758, grad_fn=<MeanBackward0>)
Train student: Step  76 loss value is tensor(-0.4834, grad_fn=<MeanBackward0>)
Train student: Step  77 loss value is tensor(-0.4783, grad_fn=<MeanBackward0>)
Train student: Step  78 loss value is tensor(-0.4668, grad_fn=<MeanBackward0>)
Train student: Step  79 loss value is tensor(-0.4734

Train student: Step  2 loss value is tensor(-0.4586, grad_fn=<MeanBackward0>)
Train student: Step  3 loss value is tensor(-0.4568, grad_fn=<MeanBackward0>)
Train student: Step  4 loss value is tensor(-0.4701, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4513, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4761, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4625, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4638, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4821, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4721, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4648, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4633, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4671, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4594, grad_f

Train Teacher: Step  6   Teacher loss this step is: tensor(-0.1363, grad_fn=<MulBackward0>)
Train Teacher: Step  7   Teacher loss this step is: tensor(-0.1961, grad_fn=<MulBackward0>)
Train Teacher: Step  8   Teacher loss this step is: tensor(-0.0997, grad_fn=<MulBackward0>)
Train Teacher: Step  9   Teacher loss this step is: tensor(-0.1418, grad_fn=<MulBackward0>)
Train Teacher: Step  10   Teacher loss this step is: tensor(-0.1577, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.1206, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1686, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1711, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1581, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1578, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1405, grad_

Train student: Step  31 loss value is tensor(-0.4640, grad_fn=<MeanBackward0>)
Train student: Step  32 loss value is tensor(-0.4738, grad_fn=<MeanBackward0>)
Train student: Step  33 loss value is tensor(-0.4617, grad_fn=<MeanBackward0>)
Train student: Step  34 loss value is tensor(-0.4729, grad_fn=<MeanBackward0>)
Train student: Step  35 loss value is tensor(-0.4600, grad_fn=<MeanBackward0>)
Train student: Step  36 loss value is tensor(-0.4670, grad_fn=<MeanBackward0>)
Train student: Step  37 loss value is tensor(-0.4738, grad_fn=<MeanBackward0>)
Train student: Step  38 loss value is tensor(-0.4645, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4746, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4724, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4670, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4605, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4705

Train Teacher: Step  31   Teacher loss this step is: tensor(-0.1419, grad_fn=<MulBackward0>)
Train Teacher: Step  32   Teacher loss this step is: tensor(-0.1323, grad_fn=<MulBackward0>)
Train Teacher: Step  33   Teacher loss this step is: tensor(-0.1524, grad_fn=<MulBackward0>)
Train Teacher: Step  34   Teacher loss this step is: tensor(-0.1624, grad_fn=<MulBackward0>)
Train Teacher: Step  35   Teacher loss this step is: tensor(-0.1205, grad_fn=<MulBackward0>)
Train Teacher: Step  36   Teacher loss this step is: tensor(-0.1465, grad_fn=<MulBackward0>)
Train Teacher: Step  37   Teacher loss this step is: tensor(-0.1071, grad_fn=<MulBackward0>)
Train Teacher: Step  38   Teacher loss this step is: tensor(-0.1416, grad_fn=<MulBackward0>)
Train Teacher: Step  39   Teacher loss this step is: tensor(-0.1382, grad_fn=<MulBackward0>)
Train Teacher: Step  40   Teacher loss this step is: tensor(-0.1614, grad_fn=<MulBackward0>)
Train Teacher: Step  41   Teacher loss this step is: tensor(-0.1564, g

Train student: Step  63 loss value is tensor(-0.4765, grad_fn=<MeanBackward0>)
Train student: Step  64 loss value is tensor(-0.4627, grad_fn=<MeanBackward0>)
Train student: Step  65 loss value is tensor(-0.4674, grad_fn=<MeanBackward0>)
Train student: Step  66 loss value is tensor(-0.4664, grad_fn=<MeanBackward0>)
Train student: Step  67 loss value is tensor(-0.4865, grad_fn=<MeanBackward0>)
Train student: Step  68 loss value is tensor(-0.4709, grad_fn=<MeanBackward0>)
Train student: Step  69 loss value is tensor(-0.4621, grad_fn=<MeanBackward0>)
Train student: Step  70 loss value is tensor(-0.4620, grad_fn=<MeanBackward0>)
Train student: Step  71 loss value is tensor(-0.4667, grad_fn=<MeanBackward0>)
Train student: Step  72 loss value is tensor(-0.4905, grad_fn=<MeanBackward0>)
Train student: Step  73 loss value is tensor(-0.4675, grad_fn=<MeanBackward0>)
Train student: Step  74 loss value is tensor(-0.4887, grad_fn=<MeanBackward0>)
Train student: Step  75 loss value is tensor(-0.4665

Train student: Step  3 loss value is tensor(-0.4594, grad_fn=<MeanBackward0>)
Train student: Step  4 loss value is tensor(-0.4632, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4577, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4629, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4656, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4606, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4643, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4675, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4587, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4783, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4581, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4530, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4656, grad_

Train Teacher: Step  10   Teacher loss this step is: tensor(-0.1516, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.1624, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1064, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1716, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1196, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1376, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1942, grad_fn=<MulBackward0>)
Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1524, grad_fn=<MulBackward0>)
Train Teacher: Step  18   Teacher loss this step is: tensor(-0.1729, grad_fn=<MulBackward0>)
Train Teacher: Step  19   Teacher loss this step is: tensor(-0.1604, grad_fn=<MulBackward0>)
Train Teacher: Step  20   Teacher loss this step is: tensor(-0.1597, g

Train student: Step  35 loss value is tensor(-0.4733, grad_fn=<MeanBackward0>)
Train student: Step  36 loss value is tensor(-0.4719, grad_fn=<MeanBackward0>)
Train student: Step  37 loss value is tensor(-0.4758, grad_fn=<MeanBackward0>)
Train student: Step  38 loss value is tensor(-0.4685, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4804, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4762, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4706, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4594, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4691, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4707, grad_fn=<MeanBackward0>)
Train student: Step  45 loss value is tensor(-0.4675, grad_fn=<MeanBackward0>)
Train student: Step  46 loss value is tensor(-0.4737, grad_fn=<MeanBackward0>)
Train student: Step  47 loss value is tensor(-0.4559

Train Teacher: Step  32   Teacher loss this step is: tensor(-0.1273, grad_fn=<MulBackward0>)
Train Teacher: Step  33   Teacher loss this step is: tensor(-0.1799, grad_fn=<MulBackward0>)
Train Teacher: Step  34   Teacher loss this step is: tensor(-0.0960, grad_fn=<MulBackward0>)
Train Teacher: Step  35   Teacher loss this step is: tensor(-0.1738, grad_fn=<MulBackward0>)
Train Teacher: Step  36   Teacher loss this step is: tensor(-0.2050, grad_fn=<MulBackward0>)
Train Teacher: Step  37   Teacher loss this step is: tensor(-0.1512, grad_fn=<MulBackward0>)
Train Teacher: Step  38   Teacher loss this step is: tensor(-0.1182, grad_fn=<MulBackward0>)
Train Teacher: Step  39   Teacher loss this step is: tensor(-0.1257, grad_fn=<MulBackward0>)
Train Teacher: Step  40   Teacher loss this step is: tensor(-0.1608, grad_fn=<MulBackward0>)
Train Teacher: Step  41   Teacher loss this step is: tensor(-0.1736, grad_fn=<MulBackward0>)
Train Teacher: Step  42   Teacher loss this step is: tensor(-0.1677, g

Train student: Step  61 loss value is tensor(-0.4687, grad_fn=<MeanBackward0>)
Train student: Step  62 loss value is tensor(-0.4645, grad_fn=<MeanBackward0>)
Train student: Step  63 loss value is tensor(-0.4791, grad_fn=<MeanBackward0>)
Train student: Step  64 loss value is tensor(-0.4751, grad_fn=<MeanBackward0>)
Train student: Step  65 loss value is tensor(-0.4682, grad_fn=<MeanBackward0>)
Train student: Step  66 loss value is tensor(-0.4742, grad_fn=<MeanBackward0>)
Train student: Step  67 loss value is tensor(-0.4743, grad_fn=<MeanBackward0>)
Train student: Step  68 loss value is tensor(-0.4710, grad_fn=<MeanBackward0>)
Train student: Step  69 loss value is tensor(-0.4700, grad_fn=<MeanBackward0>)
Train student: Step  70 loss value is tensor(-0.4747, grad_fn=<MeanBackward0>)
Train student: Step  71 loss value is tensor(-0.4715, grad_fn=<MeanBackward0>)
Train student: Step  72 loss value is tensor(-0.4701, grad_fn=<MeanBackward0>)
Train student: Step  73 loss value is tensor(-0.4791

Train student: Step  2 loss value is tensor(-0.4614, grad_fn=<MeanBackward0>)
Train student: Step  3 loss value is tensor(-0.4705, grad_fn=<MeanBackward0>)
Train student: Step  4 loss value is tensor(-0.4723, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4631, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4647, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4561, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4679, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4744, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4747, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4728, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4713, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4567, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4574, grad_f

Train Teacher: Step  8   Teacher loss this step is: tensor(-0.1248, grad_fn=<MulBackward0>)
Train Teacher: Step  9   Teacher loss this step is: tensor(-0.1485, grad_fn=<MulBackward0>)
Train Teacher: Step  10   Teacher loss this step is: tensor(-0.1751, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.1562, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1568, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1688, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1012, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1580, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1230, grad_fn=<MulBackward0>)
Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1885, grad_fn=<MulBackward0>)
Train Teacher: Step  18   Teacher loss this step is: tensor(-0.1681, gra

Train student: Step  34 loss value is tensor(-0.4697, grad_fn=<MeanBackward0>)
Train student: Step  35 loss value is tensor(-0.4747, grad_fn=<MeanBackward0>)
Train student: Step  36 loss value is tensor(-0.4648, grad_fn=<MeanBackward0>)
Train student: Step  37 loss value is tensor(-0.4688, grad_fn=<MeanBackward0>)
Train student: Step  38 loss value is tensor(-0.4664, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4806, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4661, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4675, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4629, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4682, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4589, grad_fn=<MeanBackward0>)
Train student: Step  45 loss value is tensor(-0.4643, grad_fn=<MeanBackward0>)
Train student: Step  46 loss value is tensor(-0.4678

Train Teacher: Step  31   Teacher loss this step is: tensor(-0.2096, grad_fn=<MulBackward0>)
Train Teacher: Step  32   Teacher loss this step is: tensor(-0.1546, grad_fn=<MulBackward0>)
Train Teacher: Step  33   Teacher loss this step is: tensor(-0.1543, grad_fn=<MulBackward0>)
Train Teacher: Step  34   Teacher loss this step is: tensor(-0.1592, grad_fn=<MulBackward0>)
Train Teacher: Step  35   Teacher loss this step is: tensor(-0.1386, grad_fn=<MulBackward0>)
Train Teacher: Step  36   Teacher loss this step is: tensor(-0.1653, grad_fn=<MulBackward0>)
Train Teacher: Step  37   Teacher loss this step is: tensor(-0.1466, grad_fn=<MulBackward0>)
Train Teacher: Step  38   Teacher loss this step is: tensor(-0.1264, grad_fn=<MulBackward0>)
Train Teacher: Step  39   Teacher loss this step is: tensor(-0.0907, grad_fn=<MulBackward0>)
Train Teacher: Step  40   Teacher loss this step is: tensor(-0.1537, grad_fn=<MulBackward0>)
Train Teacher: Step  41   Teacher loss this step is: tensor(-0.1870, g

Train student: Step  63 loss value is tensor(-0.4877, grad_fn=<MeanBackward0>)
Train student: Step  64 loss value is tensor(-0.4798, grad_fn=<MeanBackward0>)
Train student: Step  65 loss value is tensor(-0.4741, grad_fn=<MeanBackward0>)
Train student: Step  66 loss value is tensor(-0.4722, grad_fn=<MeanBackward0>)
Train student: Step  67 loss value is tensor(-0.4652, grad_fn=<MeanBackward0>)
Train student: Step  68 loss value is tensor(-0.4722, grad_fn=<MeanBackward0>)
Train student: Step  69 loss value is tensor(-0.4712, grad_fn=<MeanBackward0>)
Train student: Step  70 loss value is tensor(-0.4790, grad_fn=<MeanBackward0>)
Train student: Step  71 loss value is tensor(-0.4726, grad_fn=<MeanBackward0>)
Train student: Step  72 loss value is tensor(-0.4715, grad_fn=<MeanBackward0>)
Train student: Step  73 loss value is tensor(-0.4751, grad_fn=<MeanBackward0>)
Train student: Step  74 loss value is tensor(-0.4786, grad_fn=<MeanBackward0>)
Train student: Step  75 loss value is tensor(-0.4896

Train student: Step  3 loss value is tensor(-0.4681, grad_fn=<MeanBackward0>)
Train student: Step  4 loss value is tensor(-0.4709, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4638, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4636, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4686, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4720, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4606, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4687, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4594, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4681, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4581, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4642, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4697, grad_

Train Teacher: Step  11   Teacher loss this step is: tensor(-0.1527, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1740, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1541, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1848, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1424, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1141, grad_fn=<MulBackward0>)
Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1700, grad_fn=<MulBackward0>)
Train Teacher: Step  18   Teacher loss this step is: tensor(-0.1086, grad_fn=<MulBackward0>)
Train Teacher: Step  19   Teacher loss this step is: tensor(-0.1751, grad_fn=<MulBackward0>)
Train Teacher: Step  20   Teacher loss this step is: tensor(-0.1096, grad_fn=<MulBackward0>)
Train Teacher: Step  21   Teacher loss this step is: tensor(-0.1581, g

Train student: Step  39 loss value is tensor(-0.4657, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4683, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4648, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4637, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4741, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4630, grad_fn=<MeanBackward0>)
Train student: Step  45 loss value is tensor(-0.4571, grad_fn=<MeanBackward0>)
Train student: Step  46 loss value is tensor(-0.4642, grad_fn=<MeanBackward0>)
Train student: Step  47 loss value is tensor(-0.4733, grad_fn=<MeanBackward0>)
Train student: Step  48 loss value is tensor(-0.4659, grad_fn=<MeanBackward0>)
Train student: Step  49 loss value is tensor(-0.4711, grad_fn=<MeanBackward0>)
Train student: Step  50 loss value is tensor(-0.4638, grad_fn=<MeanBackward0>)
Train student: Step  51 loss value is tensor(-0.4668

Train Teacher: Step  41   Teacher loss this step is: tensor(-0.1274, grad_fn=<MulBackward0>)
Train Teacher: Step  42   Teacher loss this step is: tensor(-0.1465, grad_fn=<MulBackward0>)
Train Teacher: Step  43   Teacher loss this step is: tensor(-0.1920, grad_fn=<MulBackward0>)
Train Teacher: Step  44   Teacher loss this step is: tensor(-0.1486, grad_fn=<MulBackward0>)
Train Teacher: Step  45   Teacher loss this step is: tensor(-0.1610, grad_fn=<MulBackward0>)
Train Teacher: Step  46   Teacher loss this step is: tensor(-0.1443, grad_fn=<MulBackward0>)
Train Teacher: Step  47   Teacher loss this step is: tensor(-0.1804, grad_fn=<MulBackward0>)
Train Teacher: Step  48   Teacher loss this step is: tensor(-0.1894, grad_fn=<MulBackward0>)
Train Teacher: Step  49   Teacher loss this step is: tensor(-0.1326, grad_fn=<MulBackward0>)
Train Teacher: Step  50   Teacher loss this step is: tensor(-0.1747, grad_fn=<MulBackward0>)
Train Teacher: Step  51   Teacher loss this step is: tensor(-0.1587, g

Train student: Step  71 loss value is tensor(-0.4668, grad_fn=<MeanBackward0>)
Train student: Step  72 loss value is tensor(-0.4703, grad_fn=<MeanBackward0>)
Train student: Step  73 loss value is tensor(-0.4803, grad_fn=<MeanBackward0>)
Train student: Step  74 loss value is tensor(-0.4686, grad_fn=<MeanBackward0>)
Train student: Step  75 loss value is tensor(-0.4759, grad_fn=<MeanBackward0>)
Train student: Step  76 loss value is tensor(-0.4726, grad_fn=<MeanBackward0>)
Train student: Step  77 loss value is tensor(-0.4734, grad_fn=<MeanBackward0>)
Train student: Step  78 loss value is tensor(-0.4861, grad_fn=<MeanBackward0>)
Train student: Step  79 loss value is tensor(-0.4809, grad_fn=<MeanBackward0>)
Train student: Step  80 loss value is tensor(-0.4844, grad_fn=<MeanBackward0>)
Train student: Step  81 loss value is tensor(-0.4728, grad_fn=<MeanBackward0>)
Train student: Step  82 loss value is tensor(-0.4753, grad_fn=<MeanBackward0>)
Train student: Step  83 loss value is tensor(-0.4789

Train student: Step  2 loss value is tensor(-0.4752, grad_fn=<MeanBackward0>)
Train student: Step  3 loss value is tensor(-0.4523, grad_fn=<MeanBackward0>)
Train student: Step  4 loss value is tensor(-0.4625, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4676, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4611, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4696, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4573, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4669, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4686, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4814, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4508, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4709, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4660, grad_f

Train Teacher: Step  10   Teacher loss this step is: tensor(-0.1872, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.2112, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1429, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1712, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1479, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1540, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1336, grad_fn=<MulBackward0>)
Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1299, grad_fn=<MulBackward0>)
Train Teacher: Step  18   Teacher loss this step is: tensor(-0.1120, grad_fn=<MulBackward0>)
Train Teacher: Step  19   Teacher loss this step is: tensor(-0.1392, grad_fn=<MulBackward0>)
Train Teacher: Step  20   Teacher loss this step is: tensor(-0.1817, g

Train student: Step  34 loss value is tensor(-0.4734, grad_fn=<MeanBackward0>)
Train student: Step  35 loss value is tensor(-0.4714, grad_fn=<MeanBackward0>)
Train student: Step  36 loss value is tensor(-0.4579, grad_fn=<MeanBackward0>)
Train student: Step  37 loss value is tensor(-0.4772, grad_fn=<MeanBackward0>)
Train student: Step  38 loss value is tensor(-0.4633, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4764, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4592, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4719, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4648, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4617, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4635, grad_fn=<MeanBackward0>)
Train student: Step  45 loss value is tensor(-0.4729, grad_fn=<MeanBackward0>)
Train student: Step  46 loss value is tensor(-0.4744

Train Teacher: Step  32   Teacher loss this step is: tensor(-0.1140, grad_fn=<MulBackward0>)
Train Teacher: Step  33   Teacher loss this step is: tensor(-0.1908, grad_fn=<MulBackward0>)
Train Teacher: Step  34   Teacher loss this step is: tensor(-0.1287, grad_fn=<MulBackward0>)
Train Teacher: Step  35   Teacher loss this step is: tensor(-0.1708, grad_fn=<MulBackward0>)
Train Teacher: Step  36   Teacher loss this step is: tensor(-0.1170, grad_fn=<MulBackward0>)
Train Teacher: Step  37   Teacher loss this step is: tensor(-0.1364, grad_fn=<MulBackward0>)
Train Teacher: Step  38   Teacher loss this step is: tensor(-0.1274, grad_fn=<MulBackward0>)
Train Teacher: Step  39   Teacher loss this step is: tensor(-0.1449, grad_fn=<MulBackward0>)
Train Teacher: Step  40   Teacher loss this step is: tensor(-0.1493, grad_fn=<MulBackward0>)
Train Teacher: Step  41   Teacher loss this step is: tensor(-0.1502, grad_fn=<MulBackward0>)
Train Teacher: Step  42   Teacher loss this step is: tensor(-0.1871, g

Train student: Step  62 loss value is tensor(-0.4658, grad_fn=<MeanBackward0>)
Train student: Step  63 loss value is tensor(-0.4812, grad_fn=<MeanBackward0>)
Train student: Step  64 loss value is tensor(-0.4818, grad_fn=<MeanBackward0>)
Train student: Step  65 loss value is tensor(-0.4624, grad_fn=<MeanBackward0>)
Train student: Step  66 loss value is tensor(-0.4762, grad_fn=<MeanBackward0>)
Train student: Step  67 loss value is tensor(-0.4740, grad_fn=<MeanBackward0>)
Train student: Step  68 loss value is tensor(-0.4662, grad_fn=<MeanBackward0>)
Train student: Step  69 loss value is tensor(-0.4745, grad_fn=<MeanBackward0>)
Train student: Step  70 loss value is tensor(-0.4669, grad_fn=<MeanBackward0>)
Train student: Step  71 loss value is tensor(-0.4700, grad_fn=<MeanBackward0>)
Train student: Step  72 loss value is tensor(-0.4704, grad_fn=<MeanBackward0>)
Train student: Step  73 loss value is tensor(-0.4669, grad_fn=<MeanBackward0>)
Train student: Step  74 loss value is tensor(-0.4730

Train student: Step  3 loss value is tensor(-0.4723, grad_fn=<MeanBackward0>)
Train student: Step  4 loss value is tensor(-0.4651, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4721, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4610, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4741, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4660, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4592, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4704, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4677, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4685, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4741, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4683, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4635, grad_

Train Teacher: Step  8   Teacher loss this step is: tensor(-0.1458, grad_fn=<MulBackward0>)
Train Teacher: Step  9   Teacher loss this step is: tensor(-0.1396, grad_fn=<MulBackward0>)
Train Teacher: Step  10   Teacher loss this step is: tensor(-0.1436, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.1676, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1351, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1522, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1826, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1279, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1665, grad_fn=<MulBackward0>)
Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1748, grad_fn=<MulBackward0>)
Train Teacher: Step  18   Teacher loss this step is: tensor(-0.1556, gra

Train student: Step  39 loss value is tensor(-0.4624, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4737, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4759, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4669, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4753, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4685, grad_fn=<MeanBackward0>)
Train student: Step  45 loss value is tensor(-0.4676, grad_fn=<MeanBackward0>)
Train student: Step  46 loss value is tensor(-0.4593, grad_fn=<MeanBackward0>)
Train student: Step  47 loss value is tensor(-0.4701, grad_fn=<MeanBackward0>)
Train student: Step  48 loss value is tensor(-0.4623, grad_fn=<MeanBackward0>)
Train student: Step  49 loss value is tensor(-0.4702, grad_fn=<MeanBackward0>)
Train student: Step  50 loss value is tensor(-0.4707, grad_fn=<MeanBackward0>)
Train student: Step  51 loss value is tensor(-0.4735

Train Teacher: Step  39   Teacher loss this step is: tensor(-0.1838, grad_fn=<MulBackward0>)
Train Teacher: Step  40   Teacher loss this step is: tensor(-0.1589, grad_fn=<MulBackward0>)
Train Teacher: Step  41   Teacher loss this step is: tensor(-0.1199, grad_fn=<MulBackward0>)
Train Teacher: Step  42   Teacher loss this step is: tensor(-0.1373, grad_fn=<MulBackward0>)
Train Teacher: Step  43   Teacher loss this step is: tensor(-0.1362, grad_fn=<MulBackward0>)
Train Teacher: Step  44   Teacher loss this step is: tensor(-0.1915, grad_fn=<MulBackward0>)
Train Teacher: Step  45   Teacher loss this step is: tensor(-0.1375, grad_fn=<MulBackward0>)
Train Teacher: Step  46   Teacher loss this step is: tensor(-0.1564, grad_fn=<MulBackward0>)
Train Teacher: Step  47   Teacher loss this step is: tensor(-0.1295, grad_fn=<MulBackward0>)
Train Teacher: Step  48   Teacher loss this step is: tensor(-0.1473, grad_fn=<MulBackward0>)
Train Teacher: Step  49   Teacher loss this step is: tensor(-0.1978, g

Train student: Step  67 loss value is tensor(-0.4759, grad_fn=<MeanBackward0>)
Train student: Step  68 loss value is tensor(-0.4700, grad_fn=<MeanBackward0>)
Train student: Step  69 loss value is tensor(-0.4703, grad_fn=<MeanBackward0>)
Train student: Step  70 loss value is tensor(-0.4755, grad_fn=<MeanBackward0>)
Train student: Step  71 loss value is tensor(-0.4656, grad_fn=<MeanBackward0>)
Train student: Step  72 loss value is tensor(-0.4625, grad_fn=<MeanBackward0>)
Train student: Step  73 loss value is tensor(-0.4697, grad_fn=<MeanBackward0>)
Train student: Step  74 loss value is tensor(-0.4833, grad_fn=<MeanBackward0>)
Train student: Step  75 loss value is tensor(-0.4649, grad_fn=<MeanBackward0>)
Train student: Step  76 loss value is tensor(-0.4691, grad_fn=<MeanBackward0>)
Train student: Step  77 loss value is tensor(-0.4789, grad_fn=<MeanBackward0>)
Train student: Step  78 loss value is tensor(-0.4737, grad_fn=<MeanBackward0>)
Train student: Step  79 loss value is tensor(-0.4894

Train student: Step  2 loss value is tensor(-0.4766, grad_fn=<MeanBackward0>)
Train student: Step  3 loss value is tensor(-0.4666, grad_fn=<MeanBackward0>)
Train student: Step  4 loss value is tensor(-0.4650, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4634, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4640, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4589, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4789, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4534, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4720, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4671, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4654, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4644, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4641, grad_f

Train Teacher: Step  6   Teacher loss this step is: tensor(-0.1404, grad_fn=<MulBackward0>)
Train Teacher: Step  7   Teacher loss this step is: tensor(-0.1344, grad_fn=<MulBackward0>)
Train Teacher: Step  8   Teacher loss this step is: tensor(-0.1788, grad_fn=<MulBackward0>)
Train Teacher: Step  9   Teacher loss this step is: tensor(-0.1179, grad_fn=<MulBackward0>)
Train Teacher: Step  10   Teacher loss this step is: tensor(-0.1189, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.1728, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1655, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1655, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1904, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1440, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.2249, grad_

Train student: Step  28 loss value is tensor(-0.4674, grad_fn=<MeanBackward0>)
Train student: Step  29 loss value is tensor(-0.4621, grad_fn=<MeanBackward0>)
Train student: Step  30 loss value is tensor(-0.4614, grad_fn=<MeanBackward0>)
Train student: Step  31 loss value is tensor(-0.4627, grad_fn=<MeanBackward0>)
Train student: Step  32 loss value is tensor(-0.4532, grad_fn=<MeanBackward0>)
Train student: Step  33 loss value is tensor(-0.4753, grad_fn=<MeanBackward0>)
Train student: Step  34 loss value is tensor(-0.4636, grad_fn=<MeanBackward0>)
Train student: Step  35 loss value is tensor(-0.4642, grad_fn=<MeanBackward0>)
Train student: Step  36 loss value is tensor(-0.4726, grad_fn=<MeanBackward0>)
Train student: Step  37 loss value is tensor(-0.4625, grad_fn=<MeanBackward0>)
Train student: Step  38 loss value is tensor(-0.4611, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4681, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4502

Train Teacher: Step  27   Teacher loss this step is: tensor(-0.1303, grad_fn=<MulBackward0>)
Train Teacher: Step  28   Teacher loss this step is: tensor(-0.1962, grad_fn=<MulBackward0>)
Train Teacher: Step  29   Teacher loss this step is: tensor(-0.2279, grad_fn=<MulBackward0>)
Train Teacher: Step  30   Teacher loss this step is: tensor(-0.1308, grad_fn=<MulBackward0>)
Train Teacher: Step  31   Teacher loss this step is: tensor(-0.1523, grad_fn=<MulBackward0>)
Train Teacher: Step  32   Teacher loss this step is: tensor(-0.1702, grad_fn=<MulBackward0>)
Train Teacher: Step  33   Teacher loss this step is: tensor(-0.1770, grad_fn=<MulBackward0>)
Train Teacher: Step  34   Teacher loss this step is: tensor(-0.1906, grad_fn=<MulBackward0>)
Train Teacher: Step  35   Teacher loss this step is: tensor(-0.2527, grad_fn=<MulBackward0>)
Train Teacher: Step  36   Teacher loss this step is: tensor(-0.2248, grad_fn=<MulBackward0>)
Train Teacher: Step  37   Teacher loss this step is: tensor(-0.1452, g

Train student: Step  53 loss value is tensor(-0.4569, grad_fn=<MeanBackward0>)
Train student: Step  54 loss value is tensor(-0.4563, grad_fn=<MeanBackward0>)
Train student: Step  55 loss value is tensor(-0.4688, grad_fn=<MeanBackward0>)
Train student: Step  56 loss value is tensor(-0.4738, grad_fn=<MeanBackward0>)
Train student: Step  57 loss value is tensor(-0.4609, grad_fn=<MeanBackward0>)
Train student: Step  58 loss value is tensor(-0.4790, grad_fn=<MeanBackward0>)
Train student: Step  59 loss value is tensor(-0.4666, grad_fn=<MeanBackward0>)
Train student: Step  60 loss value is tensor(-0.4696, grad_fn=<MeanBackward0>)
Train student: Step  61 loss value is tensor(-0.4687, grad_fn=<MeanBackward0>)
Train student: Step  62 loss value is tensor(-0.4728, grad_fn=<MeanBackward0>)
Train student: Step  63 loss value is tensor(-0.4737, grad_fn=<MeanBackward0>)
Train student: Step  64 loss value is tensor(-0.4652, grad_fn=<MeanBackward0>)
Train student: Step  65 loss value is tensor(-0.4801

Train Teacher: Step  48   Teacher loss this step is: tensor(-0.1200, grad_fn=<MulBackward0>)
Train Teacher: Step  49   Teacher loss this step is: tensor(-0.1618, grad_fn=<MulBackward0>)
Train Teacher: Step  50   Teacher loss this step is: tensor(-0.1510, grad_fn=<MulBackward0>)
Train Teacher: Step  51   Teacher loss this step is: tensor(-0.1844, grad_fn=<MulBackward0>)
phi mean grad tensor(0.0075) V max grad tensor(0.0178)
student conv1 mean grad tensor(0.5451) student conv1 max grad tensor(1.7970)
test
Parameter containing:
tensor([[ 1.1371e+00, -3.7474e-01, -2.6621e-01, -2.0362e-02, -2.1960e-01,
         -1.2523e-01, -1.0692e-01, -2.7439e-01, -2.3949e-02, -1.6451e-01],
        [-5.5503e-01,  9.7315e-01, -7.9372e-02, -2.7240e-01, -1.7967e-01,
         -1.3596e-01, -1.8023e-01, -2.1628e-01, -1.4172e-01, -4.8893e-02],
        [-6.9932e-02,  5.1489e-02,  1.4761e+00, -3.7024e-02,  2.3967e-03,
          3.1818e-02, -1.6035e-02, -3.6440e-02, -4.4715e-02,  2.5823e-02],
        [ 1.0432e-01, 

Train student: Step  80 loss value is tensor(-0.4859, grad_fn=<MeanBackward0>)
Train student: Step  81 loss value is tensor(-0.4718, grad_fn=<MeanBackward0>)
Train student: Step  82 loss value is tensor(-0.4735, grad_fn=<MeanBackward0>)
Train student: Step  83 loss value is tensor(-0.4746, grad_fn=<MeanBackward0>)
Train student: Step  84 loss value is tensor(-0.4841, grad_fn=<MeanBackward0>)
Train student: Step  85 loss value is tensor(-0.4613, grad_fn=<MeanBackward0>)
Train student: Step  86 loss value is tensor(-0.4821, grad_fn=<MeanBackward0>)
Train student: Step  87 loss value is tensor(-0.4755, grad_fn=<MeanBackward0>)
Train student: Step  88 loss value is tensor(-0.4658, grad_fn=<MeanBackward0>)
Train student: Step  89 loss value is tensor(-0.4830, grad_fn=<MeanBackward0>)
Train student: Step  90 loss value is tensor(-0.4799, grad_fn=<MeanBackward0>)
Train student: Step  91 loss value is tensor(-0.4858, grad_fn=<MeanBackward0>)
Train student: Step  92 loss value is tensor(-0.4777

Train student: Step  2 loss value is tensor(-0.4591, grad_fn=<MeanBackward0>)
Train student: Step  3 loss value is tensor(-0.4651, grad_fn=<MeanBackward0>)
Train student: Step  4 loss value is tensor(-0.4691, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4675, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4699, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4638, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4540, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4584, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4608, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4558, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4659, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4716, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4721, grad_f

Train Teacher: Step  4   Teacher loss this step is: tensor(-0.1201, grad_fn=<MulBackward0>)
Train Teacher: Step  5   Teacher loss this step is: tensor(-0.1785, grad_fn=<MulBackward0>)
Train Teacher: Step  6   Teacher loss this step is: tensor(-0.1935, grad_fn=<MulBackward0>)
Train Teacher: Step  7   Teacher loss this step is: tensor(-0.1692, grad_fn=<MulBackward0>)
Train Teacher: Step  8   Teacher loss this step is: tensor(-0.1390, grad_fn=<MulBackward0>)
Train Teacher: Step  9   Teacher loss this step is: tensor(-0.1357, grad_fn=<MulBackward0>)
Train Teacher: Step  10   Teacher loss this step is: tensor(-0.1605, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.1592, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1310, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1223, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1451, grad_fn

Train student: Step  33 loss value is tensor(-0.4599, grad_fn=<MeanBackward0>)
Train student: Step  34 loss value is tensor(-0.4648, grad_fn=<MeanBackward0>)
Train student: Step  35 loss value is tensor(-0.4591, grad_fn=<MeanBackward0>)
Train student: Step  36 loss value is tensor(-0.4681, grad_fn=<MeanBackward0>)
Train student: Step  37 loss value is tensor(-0.4659, grad_fn=<MeanBackward0>)
Train student: Step  38 loss value is tensor(-0.4717, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4658, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4571, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4559, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4665, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4723, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4715, grad_fn=<MeanBackward0>)
Train student: Step  45 loss value is tensor(-0.4724

Train Teacher: Step  36   Teacher loss this step is: tensor(-0.1149, grad_fn=<MulBackward0>)
Train Teacher: Step  37   Teacher loss this step is: tensor(-0.2081, grad_fn=<MulBackward0>)
Train Teacher: Step  38   Teacher loss this step is: tensor(-0.1594, grad_fn=<MulBackward0>)
Train Teacher: Step  39   Teacher loss this step is: tensor(-0.1685, grad_fn=<MulBackward0>)
Train Teacher: Step  40   Teacher loss this step is: tensor(-0.1122, grad_fn=<MulBackward0>)
Train Teacher: Step  41   Teacher loss this step is: tensor(-0.2226, grad_fn=<MulBackward0>)
Train Teacher: Step  42   Teacher loss this step is: tensor(-0.1458, grad_fn=<MulBackward0>)
Train Teacher: Step  43   Teacher loss this step is: tensor(-0.1554, grad_fn=<MulBackward0>)
Train Teacher: Step  44   Teacher loss this step is: tensor(-0.1283, grad_fn=<MulBackward0>)
Train Teacher: Step  45   Teacher loss this step is: tensor(-0.1499, grad_fn=<MulBackward0>)
Train Teacher: Step  46   Teacher loss this step is: tensor(-0.1212, g

Train student: Step  64 loss value is tensor(-0.4697, grad_fn=<MeanBackward0>)
Train student: Step  65 loss value is tensor(-0.4817, grad_fn=<MeanBackward0>)
Train student: Step  66 loss value is tensor(-0.4669, grad_fn=<MeanBackward0>)
Train student: Step  67 loss value is tensor(-0.4616, grad_fn=<MeanBackward0>)
Train student: Step  68 loss value is tensor(-0.4807, grad_fn=<MeanBackward0>)
Train student: Step  69 loss value is tensor(-0.4775, grad_fn=<MeanBackward0>)
Train student: Step  70 loss value is tensor(-0.4636, grad_fn=<MeanBackward0>)
Train student: Step  71 loss value is tensor(-0.4708, grad_fn=<MeanBackward0>)
Train student: Step  72 loss value is tensor(-0.4634, grad_fn=<MeanBackward0>)
Train student: Step  73 loss value is tensor(-0.4667, grad_fn=<MeanBackward0>)
Train student: Step  74 loss value is tensor(-0.4730, grad_fn=<MeanBackward0>)
Train student: Step  75 loss value is tensor(-0.4784, grad_fn=<MeanBackward0>)
Train student: Step  76 loss value is tensor(-0.4676

Train student: Step  3 loss value is tensor(-0.4758, grad_fn=<MeanBackward0>)
Train student: Step  4 loss value is tensor(-0.4736, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4649, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4576, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4613, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4525, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4600, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4629, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4521, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4555, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4675, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4724, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4656, grad_

Train Teacher: Step  9   Teacher loss this step is: tensor(-0.1670, grad_fn=<MulBackward0>)
Train Teacher: Step  10   Teacher loss this step is: tensor(-0.1608, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.1657, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.0978, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1525, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1698, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1919, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1457, grad_fn=<MulBackward0>)
Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1317, grad_fn=<MulBackward0>)
Train Teacher: Step  18   Teacher loss this step is: tensor(-0.1701, grad_fn=<MulBackward0>)
Train Teacher: Step  19   Teacher loss this step is: tensor(-0.1728, gr

Train student: Step  33 loss value is tensor(-0.4709, grad_fn=<MeanBackward0>)
Train student: Step  34 loss value is tensor(-0.4563, grad_fn=<MeanBackward0>)
Train student: Step  35 loss value is tensor(-0.4716, grad_fn=<MeanBackward0>)
Train student: Step  36 loss value is tensor(-0.4676, grad_fn=<MeanBackward0>)
Train student: Step  37 loss value is tensor(-0.4561, grad_fn=<MeanBackward0>)
Train student: Step  38 loss value is tensor(-0.4770, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4796, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4863, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4769, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4608, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4657, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4739, grad_fn=<MeanBackward0>)
Train student: Step  45 loss value is tensor(-0.4572

Train Teacher: Step  37   Teacher loss this step is: tensor(-0.1919, grad_fn=<MulBackward0>)
Train Teacher: Step  38   Teacher loss this step is: tensor(-0.1184, grad_fn=<MulBackward0>)
Train Teacher: Step  39   Teacher loss this step is: tensor(-0.1679, grad_fn=<MulBackward0>)
Train Teacher: Step  40   Teacher loss this step is: tensor(-0.1832, grad_fn=<MulBackward0>)
Train Teacher: Step  41   Teacher loss this step is: tensor(-0.1556, grad_fn=<MulBackward0>)
Train Teacher: Step  42   Teacher loss this step is: tensor(-0.2226, grad_fn=<MulBackward0>)
Train Teacher: Step  43   Teacher loss this step is: tensor(-0.1517, grad_fn=<MulBackward0>)
Train Teacher: Step  44   Teacher loss this step is: tensor(-0.1694, grad_fn=<MulBackward0>)
Train Teacher: Step  45   Teacher loss this step is: tensor(-0.1155, grad_fn=<MulBackward0>)
Train Teacher: Step  46   Teacher loss this step is: tensor(-0.1494, grad_fn=<MulBackward0>)
Train Teacher: Step  47   Teacher loss this step is: tensor(-0.1711, g

Train student: Step  73 loss value is tensor(-0.4618, grad_fn=<MeanBackward0>)
Train student: Step  74 loss value is tensor(-0.4582, grad_fn=<MeanBackward0>)
Train student: Step  75 loss value is tensor(-0.4648, grad_fn=<MeanBackward0>)
Train student: Step  76 loss value is tensor(-0.4778, grad_fn=<MeanBackward0>)
Train student: Step  77 loss value is tensor(-0.4813, grad_fn=<MeanBackward0>)
Train student: Step  78 loss value is tensor(-0.4898, grad_fn=<MeanBackward0>)
Train student: Step  79 loss value is tensor(-0.4957, grad_fn=<MeanBackward0>)
Train student: Step  80 loss value is tensor(-0.4632, grad_fn=<MeanBackward0>)
Train student: Step  81 loss value is tensor(-0.4769, grad_fn=<MeanBackward0>)
Train student: Step  82 loss value is tensor(-0.4654, grad_fn=<MeanBackward0>)
Train student: Step  83 loss value is tensor(-0.4723, grad_fn=<MeanBackward0>)
Train student: Step  84 loss value is tensor(-0.4648, grad_fn=<MeanBackward0>)
Train student: Step  85 loss value is tensor(-0.4567

Train student: Step  4 loss value is tensor(-0.4746, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4780, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4717, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4756, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4649, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4649, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4647, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4561, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4617, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4651, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4656, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4692, grad_fn=<MeanBackward0>)
Train student: Step  16 loss value is tensor(-0.4595, grad

Train Teacher: Step  8   Teacher loss this step is: tensor(-0.1907, grad_fn=<MulBackward0>)
Train Teacher: Step  9   Teacher loss this step is: tensor(-0.1764, grad_fn=<MulBackward0>)
Train Teacher: Step  10   Teacher loss this step is: tensor(-0.1867, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.1278, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1839, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1606, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1969, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1479, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1725, grad_fn=<MulBackward0>)
Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1078, grad_fn=<MulBackward0>)
Train Teacher: Step  18   Teacher loss this step is: tensor(-0.1405, gra

Train student: Step  36 loss value is tensor(-0.4662, grad_fn=<MeanBackward0>)
Train student: Step  37 loss value is tensor(-0.4734, grad_fn=<MeanBackward0>)
Train student: Step  38 loss value is tensor(-0.4806, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4615, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4796, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4761, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4697, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4712, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4542, grad_fn=<MeanBackward0>)
Train student: Step  45 loss value is tensor(-0.4706, grad_fn=<MeanBackward0>)
Train student: Step  46 loss value is tensor(-0.4760, grad_fn=<MeanBackward0>)
Train student: Step  47 loss value is tensor(-0.4688, grad_fn=<MeanBackward0>)
Train student: Step  48 loss value is tensor(-0.4690

Train Teacher: Step  41   Teacher loss this step is: tensor(-0.1270, grad_fn=<MulBackward0>)
Train Teacher: Step  42   Teacher loss this step is: tensor(-0.1253, grad_fn=<MulBackward0>)
Train Teacher: Step  43   Teacher loss this step is: tensor(-0.1824, grad_fn=<MulBackward0>)
Train Teacher: Step  44   Teacher loss this step is: tensor(-0.1497, grad_fn=<MulBackward0>)
Train Teacher: Step  45   Teacher loss this step is: tensor(-0.1965, grad_fn=<MulBackward0>)
Train Teacher: Step  46   Teacher loss this step is: tensor(-0.1465, grad_fn=<MulBackward0>)
Train Teacher: Step  47   Teacher loss this step is: tensor(-0.1526, grad_fn=<MulBackward0>)
Train Teacher: Step  48   Teacher loss this step is: tensor(-0.1253, grad_fn=<MulBackward0>)
Train Teacher: Step  49   Teacher loss this step is: tensor(-0.1773, grad_fn=<MulBackward0>)
Train Teacher: Step  50   Teacher loss this step is: tensor(-0.1673, grad_fn=<MulBackward0>)
Train Teacher: Step  51   Teacher loss this step is: tensor(-0.1648, g

Train student: Step  69 loss value is tensor(-0.4767, grad_fn=<MeanBackward0>)
Train student: Step  70 loss value is tensor(-0.4750, grad_fn=<MeanBackward0>)
Train student: Step  71 loss value is tensor(-0.4640, grad_fn=<MeanBackward0>)
Train student: Step  72 loss value is tensor(-0.4628, grad_fn=<MeanBackward0>)
Train student: Step  73 loss value is tensor(-0.4528, grad_fn=<MeanBackward0>)
Train student: Step  74 loss value is tensor(-0.4806, grad_fn=<MeanBackward0>)
Train student: Step  75 loss value is tensor(-0.4689, grad_fn=<MeanBackward0>)
Train student: Step  76 loss value is tensor(-0.4685, grad_fn=<MeanBackward0>)
Train student: Step  77 loss value is tensor(-0.4713, grad_fn=<MeanBackward0>)
Train student: Step  78 loss value is tensor(-0.4889, grad_fn=<MeanBackward0>)
Train student: Step  79 loss value is tensor(-0.4765, grad_fn=<MeanBackward0>)
Train student: Step  80 loss value is tensor(-0.4652, grad_fn=<MeanBackward0>)
Train student: Step  81 loss value is tensor(-0.4863

Train student: Step  4 loss value is tensor(-0.4665, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4763, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4708, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4558, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4676, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4722, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4548, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4623, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4696, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4483, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4675, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4557, grad_fn=<MeanBackward0>)
Train student: Step  16 loss value is tensor(-0.4851, grad

Train Teacher: Step  9   Teacher loss this step is: tensor(-0.1593, grad_fn=<MulBackward0>)
Train Teacher: Step  10   Teacher loss this step is: tensor(-0.1782, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.1622, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1242, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1533, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1459, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1274, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1858, grad_fn=<MulBackward0>)
Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1611, grad_fn=<MulBackward0>)
Train Teacher: Step  18   Teacher loss this step is: tensor(-0.1527, grad_fn=<MulBackward0>)
Train Teacher: Step  19   Teacher loss this step is: tensor(-0.1472, gr

Train student: Step  34 loss value is tensor(-0.4670, grad_fn=<MeanBackward0>)
Train student: Step  35 loss value is tensor(-0.4608, grad_fn=<MeanBackward0>)
Train student: Step  36 loss value is tensor(-0.4663, grad_fn=<MeanBackward0>)
Train student: Step  37 loss value is tensor(-0.4667, grad_fn=<MeanBackward0>)
Train student: Step  38 loss value is tensor(-0.4657, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4751, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4627, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4726, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4594, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4771, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4570, grad_fn=<MeanBackward0>)
Train student: Step  45 loss value is tensor(-0.4529, grad_fn=<MeanBackward0>)
Train student: Step  46 loss value is tensor(-0.4790

Train Teacher: Step  33   Teacher loss this step is: tensor(-0.1926, grad_fn=<MulBackward0>)
Train Teacher: Step  34   Teacher loss this step is: tensor(-0.1588, grad_fn=<MulBackward0>)
Train Teacher: Step  35   Teacher loss this step is: tensor(-0.2237, grad_fn=<MulBackward0>)
Train Teacher: Step  36   Teacher loss this step is: tensor(-0.1645, grad_fn=<MulBackward0>)
Train Teacher: Step  37   Teacher loss this step is: tensor(-0.1227, grad_fn=<MulBackward0>)
Train Teacher: Step  38   Teacher loss this step is: tensor(-0.1695, grad_fn=<MulBackward0>)
Train Teacher: Step  39   Teacher loss this step is: tensor(-0.1661, grad_fn=<MulBackward0>)
Train Teacher: Step  40   Teacher loss this step is: tensor(-0.1409, grad_fn=<MulBackward0>)
Train Teacher: Step  41   Teacher loss this step is: tensor(-0.1396, grad_fn=<MulBackward0>)
Train Teacher: Step  42   Teacher loss this step is: tensor(-0.1246, grad_fn=<MulBackward0>)
Train Teacher: Step  43   Teacher loss this step is: tensor(-0.1325, g

Train student: Step  60 loss value is tensor(-0.4742, grad_fn=<MeanBackward0>)
Train student: Step  61 loss value is tensor(-0.4793, grad_fn=<MeanBackward0>)
Train student: Step  62 loss value is tensor(-0.4711, grad_fn=<MeanBackward0>)
Train student: Step  63 loss value is tensor(-0.4664, grad_fn=<MeanBackward0>)
Train student: Step  64 loss value is tensor(-0.4852, grad_fn=<MeanBackward0>)
Train student: Step  65 loss value is tensor(-0.4629, grad_fn=<MeanBackward0>)
Train student: Step  66 loss value is tensor(-0.4706, grad_fn=<MeanBackward0>)
Train student: Step  67 loss value is tensor(-0.4791, grad_fn=<MeanBackward0>)
Train student: Step  68 loss value is tensor(-0.4625, grad_fn=<MeanBackward0>)
Train student: Step  69 loss value is tensor(-0.4736, grad_fn=<MeanBackward0>)
Train student: Step  70 loss value is tensor(-0.4705, grad_fn=<MeanBackward0>)
Train student: Step  71 loss value is tensor(-0.4734, grad_fn=<MeanBackward0>)
Train student: Step  72 loss value is tensor(-0.4634

Train student: Step  4 loss value is tensor(-0.4617, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4731, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4680, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4721, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4675, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4722, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4603, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4570, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4692, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4548, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4776, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4576, grad_fn=<MeanBackward0>)
Train student: Step  16 loss value is tensor(-0.4574, grad

Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1465, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1468, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1527, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1583, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1265, grad_fn=<MulBackward0>)
Train Teacher: Step  17   Teacher loss this step is: tensor(-0.2357, grad_fn=<MulBackward0>)
Train Teacher: Step  18   Teacher loss this step is: tensor(-0.1909, grad_fn=<MulBackward0>)
Train Teacher: Step  19   Teacher loss this step is: tensor(-0.1270, grad_fn=<MulBackward0>)
Train Teacher: Step  20   Teacher loss this step is: tensor(-0.1675, grad_fn=<MulBackward0>)
Train Teacher: Step  21   Teacher loss this step is: tensor(-0.1655, grad_fn=<MulBackward0>)
Train Teacher: Step  22   Teacher loss this step is: tensor(-0.2017, g

Train student: Step  36 loss value is tensor(-0.4686, grad_fn=<MeanBackward0>)
Train student: Step  37 loss value is tensor(-0.4663, grad_fn=<MeanBackward0>)
Train student: Step  38 loss value is tensor(-0.4746, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4693, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4608, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4670, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4606, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4720, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4744, grad_fn=<MeanBackward0>)
Train student: Step  45 loss value is tensor(-0.4686, grad_fn=<MeanBackward0>)
Train student: Step  46 loss value is tensor(-0.4710, grad_fn=<MeanBackward0>)
Train student: Step  47 loss value is tensor(-0.4756, grad_fn=<MeanBackward0>)
Train student: Step  48 loss value is tensor(-0.4674

Train Teacher: Step  33   Teacher loss this step is: tensor(-0.1618, grad_fn=<MulBackward0>)
Train Teacher: Step  34   Teacher loss this step is: tensor(-0.1644, grad_fn=<MulBackward0>)
Train Teacher: Step  35   Teacher loss this step is: tensor(-0.1494, grad_fn=<MulBackward0>)
Train Teacher: Step  36   Teacher loss this step is: tensor(-0.1262, grad_fn=<MulBackward0>)
Train Teacher: Step  37   Teacher loss this step is: tensor(-0.1992, grad_fn=<MulBackward0>)
Train Teacher: Step  38   Teacher loss this step is: tensor(-0.1567, grad_fn=<MulBackward0>)
Train Teacher: Step  39   Teacher loss this step is: tensor(-0.1590, grad_fn=<MulBackward0>)
Train Teacher: Step  40   Teacher loss this step is: tensor(-0.1785, grad_fn=<MulBackward0>)
Train Teacher: Step  41   Teacher loss this step is: tensor(-0.1611, grad_fn=<MulBackward0>)
Train Teacher: Step  42   Teacher loss this step is: tensor(-0.1533, grad_fn=<MulBackward0>)
Train Teacher: Step  43   Teacher loss this step is: tensor(-0.1348, g

Train student: Step  67 loss value is tensor(-0.4667, grad_fn=<MeanBackward0>)
Train student: Step  68 loss value is tensor(-0.4667, grad_fn=<MeanBackward0>)
Train student: Step  69 loss value is tensor(-0.4708, grad_fn=<MeanBackward0>)
Train student: Step  70 loss value is tensor(-0.4583, grad_fn=<MeanBackward0>)
Train student: Step  71 loss value is tensor(-0.4759, grad_fn=<MeanBackward0>)
Train student: Step  72 loss value is tensor(-0.4768, grad_fn=<MeanBackward0>)
Train student: Step  73 loss value is tensor(-0.4724, grad_fn=<MeanBackward0>)
Train student: Step  74 loss value is tensor(-0.4726, grad_fn=<MeanBackward0>)
Train student: Step  75 loss value is tensor(-0.4555, grad_fn=<MeanBackward0>)
Train student: Step  76 loss value is tensor(-0.4724, grad_fn=<MeanBackward0>)
Train student: Step  77 loss value is tensor(-0.4776, grad_fn=<MeanBackward0>)
Train student: Step  78 loss value is tensor(-0.4790, grad_fn=<MeanBackward0>)
Train student: Step  79 loss value is tensor(-0.4927

Train student: Step  2 loss value is tensor(-0.4754, grad_fn=<MeanBackward0>)
Train student: Step  3 loss value is tensor(-0.4580, grad_fn=<MeanBackward0>)
Train student: Step  4 loss value is tensor(-0.4617, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4591, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4557, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4600, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4598, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4718, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4786, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4581, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4699, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4594, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4732, grad_f

Train Teacher: Step  9   Teacher loss this step is: tensor(-0.1486, grad_fn=<MulBackward0>)
Train Teacher: Step  10   Teacher loss this step is: tensor(-0.1560, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.1680, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.2063, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1666, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1350, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1955, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1711, grad_fn=<MulBackward0>)
Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1413, grad_fn=<MulBackward0>)
Train Teacher: Step  18   Teacher loss this step is: tensor(-0.0999, grad_fn=<MulBackward0>)
Train Teacher: Step  19   Teacher loss this step is: tensor(-0.1560, gr

Train student: Step  32 loss value is tensor(-0.4814, grad_fn=<MeanBackward0>)
Train student: Step  33 loss value is tensor(-0.4678, grad_fn=<MeanBackward0>)
Train student: Step  34 loss value is tensor(-0.4697, grad_fn=<MeanBackward0>)
Train student: Step  35 loss value is tensor(-0.4552, grad_fn=<MeanBackward0>)
Train student: Step  36 loss value is tensor(-0.4745, grad_fn=<MeanBackward0>)
Train student: Step  37 loss value is tensor(-0.4625, grad_fn=<MeanBackward0>)
Train student: Step  38 loss value is tensor(-0.4738, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4682, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4794, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4684, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4668, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4653, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4618

Train Teacher: Step  34   Teacher loss this step is: tensor(-0.1552, grad_fn=<MulBackward0>)
Train Teacher: Step  35   Teacher loss this step is: tensor(-0.1917, grad_fn=<MulBackward0>)
Train Teacher: Step  36   Teacher loss this step is: tensor(-0.1403, grad_fn=<MulBackward0>)
Train Teacher: Step  37   Teacher loss this step is: tensor(-0.1462, grad_fn=<MulBackward0>)
Train Teacher: Step  38   Teacher loss this step is: tensor(-0.1810, grad_fn=<MulBackward0>)
Train Teacher: Step  39   Teacher loss this step is: tensor(-0.1435, grad_fn=<MulBackward0>)
Train Teacher: Step  40   Teacher loss this step is: tensor(-0.1733, grad_fn=<MulBackward0>)
Train Teacher: Step  41   Teacher loss this step is: tensor(-0.1729, grad_fn=<MulBackward0>)
Train Teacher: Step  42   Teacher loss this step is: tensor(-0.1709, grad_fn=<MulBackward0>)
Train Teacher: Step  43   Teacher loss this step is: tensor(-0.1575, grad_fn=<MulBackward0>)
Train Teacher: Step  44   Teacher loss this step is: tensor(-0.1133, g

Train student: Step  62 loss value is tensor(-0.4711, grad_fn=<MeanBackward0>)
Train student: Step  63 loss value is tensor(-0.4617, grad_fn=<MeanBackward0>)
Train student: Step  64 loss value is tensor(-0.4751, grad_fn=<MeanBackward0>)
Train student: Step  65 loss value is tensor(-0.4839, grad_fn=<MeanBackward0>)
Train student: Step  66 loss value is tensor(-0.4738, grad_fn=<MeanBackward0>)
Train student: Step  67 loss value is tensor(-0.4697, grad_fn=<MeanBackward0>)
Train student: Step  68 loss value is tensor(-0.4800, grad_fn=<MeanBackward0>)
Train student: Step  69 loss value is tensor(-0.4658, grad_fn=<MeanBackward0>)
Train student: Step  70 loss value is tensor(-0.4712, grad_fn=<MeanBackward0>)
Train student: Step  71 loss value is tensor(-0.4687, grad_fn=<MeanBackward0>)
Train student: Step  72 loss value is tensor(-0.4646, grad_fn=<MeanBackward0>)
Train student: Step  73 loss value is tensor(-0.4730, grad_fn=<MeanBackward0>)
Train student: Step  74 loss value is tensor(-0.4670

Train student: Step  3 loss value is tensor(-0.4579, grad_fn=<MeanBackward0>)
Train student: Step  4 loss value is tensor(-0.4635, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4687, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4745, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4592, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4708, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4628, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4623, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4654, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4683, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4641, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4633, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4629, grad_

Train Teacher: Step  9   Teacher loss this step is: tensor(-0.1655, grad_fn=<MulBackward0>)
Train Teacher: Step  10   Teacher loss this step is: tensor(-0.1894, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.1723, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1830, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1337, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1980, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1625, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1476, grad_fn=<MulBackward0>)
Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1730, grad_fn=<MulBackward0>)
Train Teacher: Step  18   Teacher loss this step is: tensor(-0.1219, grad_fn=<MulBackward0>)
Train Teacher: Step  19   Teacher loss this step is: tensor(-0.1842, gr

Train student: Step  32 loss value is tensor(-0.4642, grad_fn=<MeanBackward0>)
Train student: Step  33 loss value is tensor(-0.4660, grad_fn=<MeanBackward0>)
Train student: Step  34 loss value is tensor(-0.4764, grad_fn=<MeanBackward0>)
Train student: Step  35 loss value is tensor(-0.4682, grad_fn=<MeanBackward0>)
Train student: Step  36 loss value is tensor(-0.4662, grad_fn=<MeanBackward0>)
Train student: Step  37 loss value is tensor(-0.4659, grad_fn=<MeanBackward0>)
Train student: Step  38 loss value is tensor(-0.4617, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4485, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4775, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4730, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4758, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4623, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4618

Train Teacher: Step  30   Teacher loss this step is: tensor(-0.1457, grad_fn=<MulBackward0>)
Train Teacher: Step  31   Teacher loss this step is: tensor(-0.2012, grad_fn=<MulBackward0>)
Train Teacher: Step  32   Teacher loss this step is: tensor(-0.2113, grad_fn=<MulBackward0>)
Train Teacher: Step  33   Teacher loss this step is: tensor(-0.1828, grad_fn=<MulBackward0>)
Train Teacher: Step  34   Teacher loss this step is: tensor(-0.1510, grad_fn=<MulBackward0>)
Train Teacher: Step  35   Teacher loss this step is: tensor(-0.1639, grad_fn=<MulBackward0>)
Train Teacher: Step  36   Teacher loss this step is: tensor(-0.2126, grad_fn=<MulBackward0>)
Train Teacher: Step  37   Teacher loss this step is: tensor(-0.1409, grad_fn=<MulBackward0>)
Train Teacher: Step  38   Teacher loss this step is: tensor(-0.1432, grad_fn=<MulBackward0>)
Train Teacher: Step  39   Teacher loss this step is: tensor(-0.1598, grad_fn=<MulBackward0>)
Train Teacher: Step  40   Teacher loss this step is: tensor(-0.1350, g

Train student: Step  59 loss value is tensor(-0.4870, grad_fn=<MeanBackward0>)
Train student: Step  60 loss value is tensor(-0.4692, grad_fn=<MeanBackward0>)
Train student: Step  61 loss value is tensor(-0.4805, grad_fn=<MeanBackward0>)
Train student: Step  62 loss value is tensor(-0.4754, grad_fn=<MeanBackward0>)
Train student: Step  63 loss value is tensor(-0.4823, grad_fn=<MeanBackward0>)
Train student: Step  64 loss value is tensor(-0.4786, grad_fn=<MeanBackward0>)
Train student: Step  65 loss value is tensor(-0.4681, grad_fn=<MeanBackward0>)
Train student: Step  66 loss value is tensor(-0.4686, grad_fn=<MeanBackward0>)
Train student: Step  67 loss value is tensor(-0.4837, grad_fn=<MeanBackward0>)
Train student: Step  68 loss value is tensor(-0.4700, grad_fn=<MeanBackward0>)
Train student: Step  69 loss value is tensor(-0.4519, grad_fn=<MeanBackward0>)
Train student: Step  70 loss value is tensor(-0.4585, grad_fn=<MeanBackward0>)
Train student: Step  71 loss value is tensor(-0.4755

phi mean grad tensor(0.0046) V max grad tensor(0.0131)
student conv1 mean grad tensor(0.6944) student conv1 max grad tensor(2.2790)
test
Parameter containing:
tensor([[ 1.1957, -0.3841, -0.2681, -0.0527, -0.1929, -0.1384, -0.1121, -0.3168,
         -0.0127, -0.1550],
        [-0.5899,  1.0178, -0.1213, -0.2754, -0.2089, -0.1426, -0.1836, -0.2004,
         -0.1716, -0.0857],
        [-0.0767,  0.0533,  1.5799, -0.0381,  0.0048,  0.0260, -0.0191, -0.0159,
         -0.0432,  0.0251],
        [ 0.1136,  0.1544,  0.1126,  1.1940,  0.0859,  0.0877,  0.1136,  0.0912,
          0.0687,  0.0765],
        [-0.0566, -0.0194, -0.0135,  0.0267,  1.5664, -0.0606,  0.0061,  0.1827,
          0.0280, -0.0122],
        [ 0.3367,  0.0552,  0.0161,  0.0599, -0.0135,  1.5273,  0.0745,  0.0147,
          0.0574,  0.0778],
        [ 0.3098,  0.0648,  0.0390,  0.0497,  0.0174,  0.0859,  1.5267, -0.1056,
          0.0604,  0.0420],
        [-0.1775, -0.0881, -0.1322, -0.1368, -0.1006, -0.0836, -0.0424,  1.007

Train student: Step  89 loss value is tensor(-0.4659, grad_fn=<MeanBackward0>)
Train student: Step  90 loss value is tensor(-0.4679, grad_fn=<MeanBackward0>)
Train student: Step  91 loss value is tensor(-0.4861, grad_fn=<MeanBackward0>)
Train student: Step  92 loss value is tensor(-0.4647, grad_fn=<MeanBackward0>)
Train student: Step  93 loss value is tensor(-0.4734, grad_fn=<MeanBackward0>)
Train student: Step  94 loss value is tensor(-0.4866, grad_fn=<MeanBackward0>)
Train student: Step  95 loss value is tensor(-0.4762, grad_fn=<MeanBackward0>)
Train student: Step  96 loss value is tensor(-0.4745, grad_fn=<MeanBackward0>)
Train student: Step  97 loss value is tensor(-0.4803, grad_fn=<MeanBackward0>)
Train student: Step  98 loss value is tensor(-0.4920, grad_fn=<MeanBackward0>)
Train student: Step  99 loss value is tensor(-0.4877, grad_fn=<MeanBackward0>)
Train student: Step  100 loss value is tensor(-0.4925, grad_fn=<MeanBackward0>)
Train student: Step  101 loss value is tensor(-0.48

Train student: Step  12 loss value is tensor(-0.4746, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4692, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4675, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4741, grad_fn=<MeanBackward0>)
Train student: Step  16 loss value is tensor(-0.4732, grad_fn=<MeanBackward0>)
Train student: Step  17 loss value is tensor(-0.4552, grad_fn=<MeanBackward0>)
Train student: Step  18 loss value is tensor(-0.4548, grad_fn=<MeanBackward0>)
Train student: Step  19 loss value is tensor(-0.4696, grad_fn=<MeanBackward0>)
Train student: Step  20 loss value is tensor(-0.4706, grad_fn=<MeanBackward0>)
Train student: Step  21 loss value is tensor(-0.4628, grad_fn=<MeanBackward0>)
Train student: Step  22 loss value is tensor(-0.4615, grad_fn=<MeanBackward0>)
Train student: Step  23 loss value is tensor(-0.4702, grad_fn=<MeanBackward0>)
Train student: Step  24 loss value is tensor(-0.4760

Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1853, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1664, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.2013, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1288, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1366, grad_fn=<MulBackward0>)
Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1929, grad_fn=<MulBackward0>)
Train Teacher: Step  18   Teacher loss this step is: tensor(-0.2419, grad_fn=<MulBackward0>)
Train Teacher: Step  19   Teacher loss this step is: tensor(-0.1623, grad_fn=<MulBackward0>)
Train Teacher: Step  20   Teacher loss this step is: tensor(-0.1806, grad_fn=<MulBackward0>)
Train Teacher: Step  21   Teacher loss this step is: tensor(-0.1105, grad_fn=<MulBackward0>)
Train Teacher: Step  22   Teacher loss this step is: tensor(-0.1744, g

Train student: Step  40 loss value is tensor(-0.4635, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4689, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4688, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4690, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4749, grad_fn=<MeanBackward0>)
Train student: Step  45 loss value is tensor(-0.4776, grad_fn=<MeanBackward0>)
Train student: Step  46 loss value is tensor(-0.4665, grad_fn=<MeanBackward0>)
Train student: Step  47 loss value is tensor(-0.4711, grad_fn=<MeanBackward0>)
Train student: Step  48 loss value is tensor(-0.4578, grad_fn=<MeanBackward0>)
Train student: Step  49 loss value is tensor(-0.4691, grad_fn=<MeanBackward0>)
Train student: Step  50 loss value is tensor(-0.4723, grad_fn=<MeanBackward0>)
Train student: Step  51 loss value is tensor(-0.4754, grad_fn=<MeanBackward0>)
Train student: Step  52 loss value is tensor(-0.4712

Train Teacher: Step  42   Teacher loss this step is: tensor(-0.1088, grad_fn=<MulBackward0>)
Train Teacher: Step  43   Teacher loss this step is: tensor(-0.1205, grad_fn=<MulBackward0>)
Train Teacher: Step  44   Teacher loss this step is: tensor(-0.1432, grad_fn=<MulBackward0>)
Train Teacher: Step  45   Teacher loss this step is: tensor(-0.1798, grad_fn=<MulBackward0>)
Train Teacher: Step  46   Teacher loss this step is: tensor(-0.1262, grad_fn=<MulBackward0>)
Train Teacher: Step  47   Teacher loss this step is: tensor(-0.1952, grad_fn=<MulBackward0>)
Train Teacher: Step  48   Teacher loss this step is: tensor(-0.1846, grad_fn=<MulBackward0>)
Train Teacher: Step  49   Teacher loss this step is: tensor(-0.1563, grad_fn=<MulBackward0>)
Train Teacher: Step  50   Teacher loss this step is: tensor(-0.1535, grad_fn=<MulBackward0>)
Train Teacher: Step  51   Teacher loss this step is: tensor(-0.1429, grad_fn=<MulBackward0>)
phi mean grad tensor(0.0116) V max grad tensor(0.0424)
student conv1 m

Train student: Step  76 loss value is tensor(-0.4751, grad_fn=<MeanBackward0>)
Train student: Step  77 loss value is tensor(-0.4701, grad_fn=<MeanBackward0>)
Train student: Step  78 loss value is tensor(-0.4749, grad_fn=<MeanBackward0>)
Train student: Step  79 loss value is tensor(-0.4787, grad_fn=<MeanBackward0>)
Train student: Step  80 loss value is tensor(-0.4819, grad_fn=<MeanBackward0>)
Train student: Step  81 loss value is tensor(-0.4665, grad_fn=<MeanBackward0>)
Train student: Step  82 loss value is tensor(-0.4717, grad_fn=<MeanBackward0>)
Train student: Step  83 loss value is tensor(-0.4688, grad_fn=<MeanBackward0>)
Train student: Step  84 loss value is tensor(-0.4765, grad_fn=<MeanBackward0>)
Train student: Step  85 loss value is tensor(-0.4650, grad_fn=<MeanBackward0>)
Train student: Step  86 loss value is tensor(-0.4654, grad_fn=<MeanBackward0>)
Train student: Step  87 loss value is tensor(-0.4811, grad_fn=<MeanBackward0>)
Train student: Step  88 loss value is tensor(-0.4660

Train student: Step  3 loss value is tensor(-0.4580, grad_fn=<MeanBackward0>)
Train student: Step  4 loss value is tensor(-0.4634, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4694, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4728, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4576, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4716, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4601, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4675, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4628, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4676, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4606, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4743, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4587, grad_

Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1341, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1603, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1597, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1357, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1423, grad_fn=<MulBackward0>)
Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1926, grad_fn=<MulBackward0>)
Train Teacher: Step  18   Teacher loss this step is: tensor(-0.1220, grad_fn=<MulBackward0>)
Train Teacher: Step  19   Teacher loss this step is: tensor(-0.1558, grad_fn=<MulBackward0>)
Train Teacher: Step  20   Teacher loss this step is: tensor(-0.1249, grad_fn=<MulBackward0>)
Train Teacher: Step  21   Teacher loss this step is: tensor(-0.1421, grad_fn=<MulBackward0>)
Train Teacher: Step  22   Teacher loss this step is: tensor(-0.1184, g

Train student: Step  38 loss value is tensor(-0.4680, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4667, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4675, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4705, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4609, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4799, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4619, grad_fn=<MeanBackward0>)
Train student: Step  45 loss value is tensor(-0.4662, grad_fn=<MeanBackward0>)
Train student: Step  46 loss value is tensor(-0.4667, grad_fn=<MeanBackward0>)
Train student: Step  47 loss value is tensor(-0.4749, grad_fn=<MeanBackward0>)
Train student: Step  48 loss value is tensor(-0.4659, grad_fn=<MeanBackward0>)
Train student: Step  49 loss value is tensor(-0.4743, grad_fn=<MeanBackward0>)
Train student: Step  50 loss value is tensor(-0.4742

Train Teacher: Step  34   Teacher loss this step is: tensor(-0.1479, grad_fn=<MulBackward0>)
Train Teacher: Step  35   Teacher loss this step is: tensor(-0.1627, grad_fn=<MulBackward0>)
Train Teacher: Step  36   Teacher loss this step is: tensor(-0.1522, grad_fn=<MulBackward0>)
Train Teacher: Step  37   Teacher loss this step is: tensor(-0.1363, grad_fn=<MulBackward0>)
Train Teacher: Step  38   Teacher loss this step is: tensor(-0.1510, grad_fn=<MulBackward0>)
Train Teacher: Step  39   Teacher loss this step is: tensor(-0.1218, grad_fn=<MulBackward0>)
Train Teacher: Step  40   Teacher loss this step is: tensor(-0.1072, grad_fn=<MulBackward0>)
Train Teacher: Step  41   Teacher loss this step is: tensor(-0.1683, grad_fn=<MulBackward0>)
Train Teacher: Step  42   Teacher loss this step is: tensor(-0.1300, grad_fn=<MulBackward0>)
Train Teacher: Step  43   Teacher loss this step is: tensor(-0.1772, grad_fn=<MulBackward0>)
Train Teacher: Step  44   Teacher loss this step is: tensor(-0.1723, g

Train student: Step  63 loss value is tensor(-0.4866, grad_fn=<MeanBackward0>)
Train student: Step  64 loss value is tensor(-0.4750, grad_fn=<MeanBackward0>)
Train student: Step  65 loss value is tensor(-0.4728, grad_fn=<MeanBackward0>)
Train student: Step  66 loss value is tensor(-0.4650, grad_fn=<MeanBackward0>)
Train student: Step  67 loss value is tensor(-0.4730, grad_fn=<MeanBackward0>)
Train student: Step  68 loss value is tensor(-0.4640, grad_fn=<MeanBackward0>)
Train student: Step  69 loss value is tensor(-0.4608, grad_fn=<MeanBackward0>)
Train student: Step  70 loss value is tensor(-0.4631, grad_fn=<MeanBackward0>)
Train student: Step  71 loss value is tensor(-0.4642, grad_fn=<MeanBackward0>)
Train student: Step  72 loss value is tensor(-0.4688, grad_fn=<MeanBackward0>)
Train student: Step  73 loss value is tensor(-0.5037, grad_fn=<MeanBackward0>)
Train student: Step  74 loss value is tensor(-0.4665, grad_fn=<MeanBackward0>)
Train student: Step  75 loss value is tensor(-0.4794

Train student: Step  3 loss value is tensor(-0.4686, grad_fn=<MeanBackward0>)
Train student: Step  4 loss value is tensor(-0.4572, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4865, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4614, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4513, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4601, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4763, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4704, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4643, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4727, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4768, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4609, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4723, grad_

Train Teacher: Step  10   Teacher loss this step is: tensor(-0.1615, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.1762, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1673, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1565, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1365, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1566, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1679, grad_fn=<MulBackward0>)
Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1837, grad_fn=<MulBackward0>)
Train Teacher: Step  18   Teacher loss this step is: tensor(-0.1676, grad_fn=<MulBackward0>)
Train Teacher: Step  19   Teacher loss this step is: tensor(-0.0922, grad_fn=<MulBackward0>)
Train Teacher: Step  20   Teacher loss this step is: tensor(-0.1593, g

Train student: Step  35 loss value is tensor(-0.4699, grad_fn=<MeanBackward0>)
Train student: Step  36 loss value is tensor(-0.4619, grad_fn=<MeanBackward0>)
Train student: Step  37 loss value is tensor(-0.4672, grad_fn=<MeanBackward0>)
Train student: Step  38 loss value is tensor(-0.4590, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4737, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4523, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4639, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4650, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4651, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4690, grad_fn=<MeanBackward0>)
Train student: Step  45 loss value is tensor(-0.4729, grad_fn=<MeanBackward0>)
Train student: Step  46 loss value is tensor(-0.4710, grad_fn=<MeanBackward0>)
Train student: Step  47 loss value is tensor(-0.4571

Train Teacher: Step  36   Teacher loss this step is: tensor(-0.1187, grad_fn=<MulBackward0>)
Train Teacher: Step  37   Teacher loss this step is: tensor(-0.1420, grad_fn=<MulBackward0>)
Train Teacher: Step  38   Teacher loss this step is: tensor(-0.1688, grad_fn=<MulBackward0>)
Train Teacher: Step  39   Teacher loss this step is: tensor(-0.1679, grad_fn=<MulBackward0>)
Train Teacher: Step  40   Teacher loss this step is: tensor(-0.1264, grad_fn=<MulBackward0>)
Train Teacher: Step  41   Teacher loss this step is: tensor(-0.1443, grad_fn=<MulBackward0>)
Train Teacher: Step  42   Teacher loss this step is: tensor(-0.1694, grad_fn=<MulBackward0>)
Train Teacher: Step  43   Teacher loss this step is: tensor(-0.1865, grad_fn=<MulBackward0>)
Train Teacher: Step  44   Teacher loss this step is: tensor(-0.1718, grad_fn=<MulBackward0>)
Train Teacher: Step  45   Teacher loss this step is: tensor(-0.1043, grad_fn=<MulBackward0>)
Train Teacher: Step  46   Teacher loss this step is: tensor(-0.1214, g

Train student: Step  66 loss value is tensor(-0.4662, grad_fn=<MeanBackward0>)
Train student: Step  67 loss value is tensor(-0.4626, grad_fn=<MeanBackward0>)
Train student: Step  68 loss value is tensor(-0.4750, grad_fn=<MeanBackward0>)
Train student: Step  69 loss value is tensor(-0.4627, grad_fn=<MeanBackward0>)
Train student: Step  70 loss value is tensor(-0.4718, grad_fn=<MeanBackward0>)
Train student: Step  71 loss value is tensor(-0.4741, grad_fn=<MeanBackward0>)
Train student: Step  72 loss value is tensor(-0.4620, grad_fn=<MeanBackward0>)
Train student: Step  73 loss value is tensor(-0.4735, grad_fn=<MeanBackward0>)
Train student: Step  74 loss value is tensor(-0.4624, grad_fn=<MeanBackward0>)
Train student: Step  75 loss value is tensor(-0.4763, grad_fn=<MeanBackward0>)
Train student: Step  76 loss value is tensor(-0.4854, grad_fn=<MeanBackward0>)
Train student: Step  77 loss value is tensor(-0.4760, grad_fn=<MeanBackward0>)
Train student: Step  78 loss value is tensor(-0.4561

Train student: Step  3 loss value is tensor(-0.4766, grad_fn=<MeanBackward0>)
Train student: Step  4 loss value is tensor(-0.4720, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4699, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4713, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4695, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4683, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4752, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4527, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4703, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4709, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4528, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4674, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4705, grad_

Train Teacher: Step  11   Teacher loss this step is: tensor(-0.1677, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1882, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1643, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1329, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1506, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1747, grad_fn=<MulBackward0>)
Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1849, grad_fn=<MulBackward0>)
Train Teacher: Step  18   Teacher loss this step is: tensor(-0.2080, grad_fn=<MulBackward0>)
Train Teacher: Step  19   Teacher loss this step is: tensor(-0.1485, grad_fn=<MulBackward0>)
Train Teacher: Step  20   Teacher loss this step is: tensor(-0.1312, grad_fn=<MulBackward0>)
Train Teacher: Step  21   Teacher loss this step is: tensor(-0.1351, g

Train student: Step  42 loss value is tensor(-0.4632, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4664, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4670, grad_fn=<MeanBackward0>)
Train student: Step  45 loss value is tensor(-0.4687, grad_fn=<MeanBackward0>)
Train student: Step  46 loss value is tensor(-0.4802, grad_fn=<MeanBackward0>)
Train student: Step  47 loss value is tensor(-0.4684, grad_fn=<MeanBackward0>)
Train student: Step  48 loss value is tensor(-0.4730, grad_fn=<MeanBackward0>)
Train student: Step  49 loss value is tensor(-0.4830, grad_fn=<MeanBackward0>)
Train student: Step  50 loss value is tensor(-0.4644, grad_fn=<MeanBackward0>)
Train student: Step  51 loss value is tensor(-0.4665, grad_fn=<MeanBackward0>)
Train student: Step  52 loss value is tensor(-0.4576, grad_fn=<MeanBackward0>)
Train student: Step  53 loss value is tensor(-0.4616, grad_fn=<MeanBackward0>)
Train student: Step  54 loss value is tensor(-0.4629

Train Teacher: Step  43   Teacher loss this step is: tensor(-0.1971, grad_fn=<MulBackward0>)
Train Teacher: Step  44   Teacher loss this step is: tensor(-0.1387, grad_fn=<MulBackward0>)
Train Teacher: Step  45   Teacher loss this step is: tensor(-0.1528, grad_fn=<MulBackward0>)
Train Teacher: Step  46   Teacher loss this step is: tensor(-0.1381, grad_fn=<MulBackward0>)
Train Teacher: Step  47   Teacher loss this step is: tensor(-0.1764, grad_fn=<MulBackward0>)
Train Teacher: Step  48   Teacher loss this step is: tensor(-0.1308, grad_fn=<MulBackward0>)
Train Teacher: Step  49   Teacher loss this step is: tensor(-0.1531, grad_fn=<MulBackward0>)
Train Teacher: Step  50   Teacher loss this step is: tensor(-0.1664, grad_fn=<MulBackward0>)
Train Teacher: Step  51   Teacher loss this step is: tensor(-0.1442, grad_fn=<MulBackward0>)
phi mean grad tensor(0.0063) V max grad tensor(0.0189)
student conv1 mean grad tensor(0.7778) student conv1 max grad tensor(2.5481)
test
Parameter containing:
tens

Train student: Step  77 loss value is tensor(-0.4830, grad_fn=<MeanBackward0>)
Train student: Step  78 loss value is tensor(-0.4807, grad_fn=<MeanBackward0>)
Train student: Step  79 loss value is tensor(-0.4735, grad_fn=<MeanBackward0>)
Train student: Step  80 loss value is tensor(-0.4754, grad_fn=<MeanBackward0>)
Train student: Step  81 loss value is tensor(-0.4720, grad_fn=<MeanBackward0>)
Train student: Step  82 loss value is tensor(-0.4677, grad_fn=<MeanBackward0>)
Train student: Step  83 loss value is tensor(-0.4558, grad_fn=<MeanBackward0>)
Train student: Step  84 loss value is tensor(-0.4774, grad_fn=<MeanBackward0>)
Train student: Step  85 loss value is tensor(-0.4725, grad_fn=<MeanBackward0>)
Train student: Step  86 loss value is tensor(-0.4818, grad_fn=<MeanBackward0>)
Train student: Step  87 loss value is tensor(-0.4800, grad_fn=<MeanBackward0>)
Train student: Step  88 loss value is tensor(-0.4955, grad_fn=<MeanBackward0>)
Train student: Step  89 loss value is tensor(-0.4946

Train student: Step  2 loss value is tensor(-0.4689, grad_fn=<MeanBackward0>)
Train student: Step  3 loss value is tensor(-0.4702, grad_fn=<MeanBackward0>)
Train student: Step  4 loss value is tensor(-0.4587, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4706, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4759, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4822, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4629, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4801, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4602, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4596, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4675, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4662, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4583, grad_f

Train Teacher: Step  5   Teacher loss this step is: tensor(-0.1696, grad_fn=<MulBackward0>)
Train Teacher: Step  6   Teacher loss this step is: tensor(-0.1989, grad_fn=<MulBackward0>)
Train Teacher: Step  7   Teacher loss this step is: tensor(-0.1674, grad_fn=<MulBackward0>)
Train Teacher: Step  8   Teacher loss this step is: tensor(-0.1562, grad_fn=<MulBackward0>)
Train Teacher: Step  9   Teacher loss this step is: tensor(-0.1497, grad_fn=<MulBackward0>)
Train Teacher: Step  10   Teacher loss this step is: tensor(-0.2107, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.2129, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1379, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1301, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.2084, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1471, grad_f

Train student: Step  34 loss value is tensor(-0.4663, grad_fn=<MeanBackward0>)
Train student: Step  35 loss value is tensor(-0.4637, grad_fn=<MeanBackward0>)
Train student: Step  36 loss value is tensor(-0.4849, grad_fn=<MeanBackward0>)
Train student: Step  37 loss value is tensor(-0.4693, grad_fn=<MeanBackward0>)
Train student: Step  38 loss value is tensor(-0.4655, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4755, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4732, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4628, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4563, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4683, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4732, grad_fn=<MeanBackward0>)
Train student: Step  45 loss value is tensor(-0.4701, grad_fn=<MeanBackward0>)
Train student: Step  46 loss value is tensor(-0.4595

Train Teacher: Step  37   Teacher loss this step is: tensor(-0.1690, grad_fn=<MulBackward0>)
Train Teacher: Step  38   Teacher loss this step is: tensor(-0.1153, grad_fn=<MulBackward0>)
Train Teacher: Step  39   Teacher loss this step is: tensor(-0.1520, grad_fn=<MulBackward0>)
Train Teacher: Step  40   Teacher loss this step is: tensor(-0.1557, grad_fn=<MulBackward0>)
Train Teacher: Step  41   Teacher loss this step is: tensor(-0.1953, grad_fn=<MulBackward0>)
Train Teacher: Step  42   Teacher loss this step is: tensor(-0.1294, grad_fn=<MulBackward0>)
Train Teacher: Step  43   Teacher loss this step is: tensor(-0.1856, grad_fn=<MulBackward0>)
Train Teacher: Step  44   Teacher loss this step is: tensor(-0.1535, grad_fn=<MulBackward0>)
Train Teacher: Step  45   Teacher loss this step is: tensor(-0.1367, grad_fn=<MulBackward0>)
Train Teacher: Step  46   Teacher loss this step is: tensor(-0.1337, grad_fn=<MulBackward0>)
Train Teacher: Step  47   Teacher loss this step is: tensor(-0.2063, g

Train student: Step  67 loss value is tensor(-0.4686, grad_fn=<MeanBackward0>)
Train student: Step  68 loss value is tensor(-0.4816, grad_fn=<MeanBackward0>)
Train student: Step  69 loss value is tensor(-0.4736, grad_fn=<MeanBackward0>)
Train student: Step  70 loss value is tensor(-0.4645, grad_fn=<MeanBackward0>)
Train student: Step  71 loss value is tensor(-0.4743, grad_fn=<MeanBackward0>)
Train student: Step  72 loss value is tensor(-0.4735, grad_fn=<MeanBackward0>)
Train student: Step  73 loss value is tensor(-0.4812, grad_fn=<MeanBackward0>)
Train student: Step  74 loss value is tensor(-0.4634, grad_fn=<MeanBackward0>)
Train student: Step  75 loss value is tensor(-0.4711, grad_fn=<MeanBackward0>)
Train student: Step  76 loss value is tensor(-0.4854, grad_fn=<MeanBackward0>)
Train student: Step  77 loss value is tensor(-0.4727, grad_fn=<MeanBackward0>)
Train student: Step  78 loss value is tensor(-0.4707, grad_fn=<MeanBackward0>)
Train student: Step  79 loss value is tensor(-0.4775

Train student: Step  3 loss value is tensor(-0.4720, grad_fn=<MeanBackward0>)
Train student: Step  4 loss value is tensor(-0.4662, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4691, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4652, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4471, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4726, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4566, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4617, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4665, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4757, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4730, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4656, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4581, grad_

Train Teacher: Step  6   Teacher loss this step is: tensor(-0.1877, grad_fn=<MulBackward0>)
Train Teacher: Step  7   Teacher loss this step is: tensor(-0.1413, grad_fn=<MulBackward0>)
Train Teacher: Step  8   Teacher loss this step is: tensor(-0.1571, grad_fn=<MulBackward0>)
Train Teacher: Step  9   Teacher loss this step is: tensor(-0.1855, grad_fn=<MulBackward0>)
Train Teacher: Step  10   Teacher loss this step is: tensor(-0.1415, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.1491, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1670, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1558, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1687, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1440, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1667, grad_

Train student: Step  31 loss value is tensor(-0.4748, grad_fn=<MeanBackward0>)
Train student: Step  32 loss value is tensor(-0.4624, grad_fn=<MeanBackward0>)
Train student: Step  33 loss value is tensor(-0.4569, grad_fn=<MeanBackward0>)
Train student: Step  34 loss value is tensor(-0.4697, grad_fn=<MeanBackward0>)
Train student: Step  35 loss value is tensor(-0.4658, grad_fn=<MeanBackward0>)
Train student: Step  36 loss value is tensor(-0.4641, grad_fn=<MeanBackward0>)
Train student: Step  37 loss value is tensor(-0.4687, grad_fn=<MeanBackward0>)
Train student: Step  38 loss value is tensor(-0.4764, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4823, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4777, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4617, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4650, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4463

Train Teacher: Step  29   Teacher loss this step is: tensor(-0.1974, grad_fn=<MulBackward0>)
Train Teacher: Step  30   Teacher loss this step is: tensor(-0.1718, grad_fn=<MulBackward0>)
Train Teacher: Step  31   Teacher loss this step is: tensor(-0.1334, grad_fn=<MulBackward0>)
Train Teacher: Step  32   Teacher loss this step is: tensor(-0.1671, grad_fn=<MulBackward0>)
Train Teacher: Step  33   Teacher loss this step is: tensor(-0.1874, grad_fn=<MulBackward0>)
Train Teacher: Step  34   Teacher loss this step is: tensor(-0.1648, grad_fn=<MulBackward0>)
Train Teacher: Step  35   Teacher loss this step is: tensor(-0.1406, grad_fn=<MulBackward0>)
Train Teacher: Step  36   Teacher loss this step is: tensor(-0.1427, grad_fn=<MulBackward0>)
Train Teacher: Step  37   Teacher loss this step is: tensor(-0.2163, grad_fn=<MulBackward0>)
Train Teacher: Step  38   Teacher loss this step is: tensor(-0.1162, grad_fn=<MulBackward0>)
Train Teacher: Step  39   Teacher loss this step is: tensor(-0.1636, g

Train student: Step  55 loss value is tensor(-0.4592, grad_fn=<MeanBackward0>)
Train student: Step  56 loss value is tensor(-0.4606, grad_fn=<MeanBackward0>)
Train student: Step  57 loss value is tensor(-0.4744, grad_fn=<MeanBackward0>)
Train student: Step  58 loss value is tensor(-0.4617, grad_fn=<MeanBackward0>)
Train student: Step  59 loss value is tensor(-0.4761, grad_fn=<MeanBackward0>)
Train student: Step  60 loss value is tensor(-0.4781, grad_fn=<MeanBackward0>)
Train student: Step  61 loss value is tensor(-0.4785, grad_fn=<MeanBackward0>)
Train student: Step  62 loss value is tensor(-0.4740, grad_fn=<MeanBackward0>)
Train student: Step  63 loss value is tensor(-0.4664, grad_fn=<MeanBackward0>)
Train student: Step  64 loss value is tensor(-0.4501, grad_fn=<MeanBackward0>)
Train student: Step  65 loss value is tensor(-0.4671, grad_fn=<MeanBackward0>)
Train student: Step  66 loss value is tensor(-0.4787, grad_fn=<MeanBackward0>)
Train student: Step  67 loss value is tensor(-0.4759

Train Teacher: Step  51   Teacher loss this step is: tensor(-0.1727, grad_fn=<MulBackward0>)
phi mean grad tensor(0.0070) V max grad tensor(0.0154)
student conv1 mean grad tensor(0.8279) student conv1 max grad tensor(2.7042)
test
Parameter containing:
tensor([[ 1.2419e+00, -3.8841e-01, -2.4872e-01, -4.8954e-02, -1.7558e-01,
         -1.3957e-01, -1.1712e-01, -3.4809e-01, -1.4093e-02, -1.4449e-01],
        [-6.1417e-01,  1.0465e+00, -1.2063e-01, -2.7485e-01, -2.1136e-01,
         -1.5385e-01, -1.7962e-01, -1.9789e-01, -2.0268e-01, -1.1175e-01],
        [-8.2079e-02,  5.9753e-02,  1.6445e+00, -3.1672e-02,  8.8994e-03,
          1.8795e-02, -2.3225e-02, -1.2231e-02, -4.8939e-02,  3.2075e-02],
        [ 1.1789e-01,  1.5120e-01,  1.3461e-01,  1.2989e+00,  1.0103e-01,
          9.3542e-02,  9.7361e-02,  8.5018e-02,  7.3091e-02,  9.5302e-02],
        [-5.8874e-02, -8.0637e-03,  8.2590e-03,  4.1681e-02,  1.6604e+00,
         -4.7104e-02,  4.0535e-03,  1.7664e-01,  2.3361e-02, -1.0304e-02],
   

Train student: Step  81 loss value is tensor(-0.4661, grad_fn=<MeanBackward0>)
Train student: Step  82 loss value is tensor(-0.4840, grad_fn=<MeanBackward0>)
Train student: Step  83 loss value is tensor(-0.4838, grad_fn=<MeanBackward0>)
Train student: Step  84 loss value is tensor(-0.4783, grad_fn=<MeanBackward0>)
Train student: Step  85 loss value is tensor(-0.4725, grad_fn=<MeanBackward0>)
Train student: Step  86 loss value is tensor(-0.4916, grad_fn=<MeanBackward0>)
Train student: Step  87 loss value is tensor(-0.4602, grad_fn=<MeanBackward0>)
Train student: Step  88 loss value is tensor(-0.4673, grad_fn=<MeanBackward0>)
Train student: Step  89 loss value is tensor(-0.4715, grad_fn=<MeanBackward0>)
Train student: Step  90 loss value is tensor(-0.4824, grad_fn=<MeanBackward0>)
Train student: Step  91 loss value is tensor(-0.4749, grad_fn=<MeanBackward0>)
Train student: Step  92 loss value is tensor(-0.4746, grad_fn=<MeanBackward0>)
Train student: Step  93 loss value is tensor(-0.4711

Train student: Step  3 loss value is tensor(-0.4491, grad_fn=<MeanBackward0>)
Train student: Step  4 loss value is tensor(-0.4664, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4780, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4551, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4694, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4641, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4657, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4606, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4720, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4715, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4710, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4671, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4575, grad_

Train Teacher: Step  6   Teacher loss this step is: tensor(-0.2268, grad_fn=<MulBackward0>)
Train Teacher: Step  7   Teacher loss this step is: tensor(-0.1728, grad_fn=<MulBackward0>)
Train Teacher: Step  8   Teacher loss this step is: tensor(-0.1397, grad_fn=<MulBackward0>)
Train Teacher: Step  9   Teacher loss this step is: tensor(-0.1764, grad_fn=<MulBackward0>)
Train Teacher: Step  10   Teacher loss this step is: tensor(-0.1555, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.1828, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1484, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1779, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1736, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1587, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.2140, grad_

Train student: Step  30 loss value is tensor(-0.4606, grad_fn=<MeanBackward0>)
Train student: Step  31 loss value is tensor(-0.4678, grad_fn=<MeanBackward0>)
Train student: Step  32 loss value is tensor(-0.4571, grad_fn=<MeanBackward0>)
Train student: Step  33 loss value is tensor(-0.4683, grad_fn=<MeanBackward0>)
Train student: Step  34 loss value is tensor(-0.4635, grad_fn=<MeanBackward0>)
Train student: Step  35 loss value is tensor(-0.4655, grad_fn=<MeanBackward0>)
Train student: Step  36 loss value is tensor(-0.4674, grad_fn=<MeanBackward0>)
Train student: Step  37 loss value is tensor(-0.4815, grad_fn=<MeanBackward0>)
Train student: Step  38 loss value is tensor(-0.4696, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4609, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4662, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4794, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4703

Train Teacher: Step  31   Teacher loss this step is: tensor(-0.1366, grad_fn=<MulBackward0>)
Train Teacher: Step  32   Teacher loss this step is: tensor(-0.1554, grad_fn=<MulBackward0>)
Train Teacher: Step  33   Teacher loss this step is: tensor(-0.1852, grad_fn=<MulBackward0>)
Train Teacher: Step  34   Teacher loss this step is: tensor(-0.1793, grad_fn=<MulBackward0>)
Train Teacher: Step  35   Teacher loss this step is: tensor(-0.1959, grad_fn=<MulBackward0>)
Train Teacher: Step  36   Teacher loss this step is: tensor(-0.2015, grad_fn=<MulBackward0>)
Train Teacher: Step  37   Teacher loss this step is: tensor(-0.2056, grad_fn=<MulBackward0>)
Train Teacher: Step  38   Teacher loss this step is: tensor(-0.1827, grad_fn=<MulBackward0>)
Train Teacher: Step  39   Teacher loss this step is: tensor(-0.1179, grad_fn=<MulBackward0>)
Train Teacher: Step  40   Teacher loss this step is: tensor(-0.1408, grad_fn=<MulBackward0>)
Train Teacher: Step  41   Teacher loss this step is: tensor(-0.1175, g

Train student: Step  64 loss value is tensor(-0.4786, grad_fn=<MeanBackward0>)
Train student: Step  65 loss value is tensor(-0.4640, grad_fn=<MeanBackward0>)
Train student: Step  66 loss value is tensor(-0.4601, grad_fn=<MeanBackward0>)
Train student: Step  67 loss value is tensor(-0.4572, grad_fn=<MeanBackward0>)
Train student: Step  68 loss value is tensor(-0.4725, grad_fn=<MeanBackward0>)
Train student: Step  69 loss value is tensor(-0.4733, grad_fn=<MeanBackward0>)
Train student: Step  70 loss value is tensor(-0.4509, grad_fn=<MeanBackward0>)
Train student: Step  71 loss value is tensor(-0.4764, grad_fn=<MeanBackward0>)
Train student: Step  72 loss value is tensor(-0.4679, grad_fn=<MeanBackward0>)
Train student: Step  73 loss value is tensor(-0.4713, grad_fn=<MeanBackward0>)
Train student: Step  74 loss value is tensor(-0.4789, grad_fn=<MeanBackward0>)
Train student: Step  75 loss value is tensor(-0.4654, grad_fn=<MeanBackward0>)
Train student: Step  76 loss value is tensor(-0.4687

Train student: Step  1 loss value is tensor(-0.4614, grad_fn=<MeanBackward0>)
Train student: Step  2 loss value is tensor(-0.4664, grad_fn=<MeanBackward0>)
Train student: Step  3 loss value is tensor(-0.4775, grad_fn=<MeanBackward0>)
Train student: Step  4 loss value is tensor(-0.4748, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4710, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4626, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4716, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4596, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4734, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4638, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4658, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4681, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4739, grad_fn

Train Teacher: Step  2   Teacher loss this step is: tensor(-0.1499, grad_fn=<MulBackward0>)
Train Teacher: Step  3   Teacher loss this step is: tensor(-0.2121, grad_fn=<MulBackward0>)
Train Teacher: Step  4   Teacher loss this step is: tensor(-0.1820, grad_fn=<MulBackward0>)
Train Teacher: Step  5   Teacher loss this step is: tensor(-0.1910, grad_fn=<MulBackward0>)
Train Teacher: Step  6   Teacher loss this step is: tensor(-0.1725, grad_fn=<MulBackward0>)
Train Teacher: Step  7   Teacher loss this step is: tensor(-0.1317, grad_fn=<MulBackward0>)
Train Teacher: Step  8   Teacher loss this step is: tensor(-0.1212, grad_fn=<MulBackward0>)
Train Teacher: Step  9   Teacher loss this step is: tensor(-0.1409, grad_fn=<MulBackward0>)
Train Teacher: Step  10   Teacher loss this step is: tensor(-0.1684, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.1854, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.2032, grad_fn=<

Train student: Step  30 loss value is tensor(-0.4701, grad_fn=<MeanBackward0>)
Train student: Step  31 loss value is tensor(-0.4686, grad_fn=<MeanBackward0>)
Train student: Step  32 loss value is tensor(-0.4628, grad_fn=<MeanBackward0>)
Train student: Step  33 loss value is tensor(-0.4539, grad_fn=<MeanBackward0>)
Train student: Step  34 loss value is tensor(-0.4590, grad_fn=<MeanBackward0>)
Train student: Step  35 loss value is tensor(-0.4677, grad_fn=<MeanBackward0>)
Train student: Step  36 loss value is tensor(-0.4637, grad_fn=<MeanBackward0>)
Train student: Step  37 loss value is tensor(-0.4567, grad_fn=<MeanBackward0>)
Train student: Step  38 loss value is tensor(-0.4727, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4666, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4861, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4601, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4592

Train Teacher: Step  34   Teacher loss this step is: tensor(-0.1467, grad_fn=<MulBackward0>)
Train Teacher: Step  35   Teacher loss this step is: tensor(-0.1734, grad_fn=<MulBackward0>)
Train Teacher: Step  36   Teacher loss this step is: tensor(-0.1690, grad_fn=<MulBackward0>)
Train Teacher: Step  37   Teacher loss this step is: tensor(-0.1198, grad_fn=<MulBackward0>)
Train Teacher: Step  38   Teacher loss this step is: tensor(-0.1471, grad_fn=<MulBackward0>)
Train Teacher: Step  39   Teacher loss this step is: tensor(-0.1683, grad_fn=<MulBackward0>)
Train Teacher: Step  40   Teacher loss this step is: tensor(-0.1405, grad_fn=<MulBackward0>)
Train Teacher: Step  41   Teacher loss this step is: tensor(-0.1633, grad_fn=<MulBackward0>)
Train Teacher: Step  42   Teacher loss this step is: tensor(-0.1759, grad_fn=<MulBackward0>)
Train Teacher: Step  43   Teacher loss this step is: tensor(-0.1674, grad_fn=<MulBackward0>)
Train Teacher: Step  44   Teacher loss this step is: tensor(-0.1554, g

Train student: Step  63 loss value is tensor(-0.4672, grad_fn=<MeanBackward0>)
Train student: Step  64 loss value is tensor(-0.4689, grad_fn=<MeanBackward0>)
Train student: Step  65 loss value is tensor(-0.4588, grad_fn=<MeanBackward0>)
Train student: Step  66 loss value is tensor(-0.4810, grad_fn=<MeanBackward0>)
Train student: Step  67 loss value is tensor(-0.4604, grad_fn=<MeanBackward0>)
Train student: Step  68 loss value is tensor(-0.4822, grad_fn=<MeanBackward0>)
Train student: Step  69 loss value is tensor(-0.4736, grad_fn=<MeanBackward0>)
Train student: Step  70 loss value is tensor(-0.4704, grad_fn=<MeanBackward0>)
Train student: Step  71 loss value is tensor(-0.4615, grad_fn=<MeanBackward0>)
Train student: Step  72 loss value is tensor(-0.4647, grad_fn=<MeanBackward0>)
Train student: Step  73 loss value is tensor(-0.4813, grad_fn=<MeanBackward0>)
Train student: Step  74 loss value is tensor(-0.4820, grad_fn=<MeanBackward0>)
Train student: Step  75 loss value is tensor(-0.4692

Train student: Step  2 loss value is tensor(-0.4694, grad_fn=<MeanBackward0>)
Train student: Step  3 loss value is tensor(-0.4683, grad_fn=<MeanBackward0>)
Train student: Step  4 loss value is tensor(-0.4596, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4653, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4669, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4622, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4713, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4689, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4713, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4557, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4688, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4679, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4581, grad_f

Train Teacher: Step  10   Teacher loss this step is: tensor(-0.1612, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.1710, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1895, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1662, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1893, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1578, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.2239, grad_fn=<MulBackward0>)
Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1181, grad_fn=<MulBackward0>)
Train Teacher: Step  18   Teacher loss this step is: tensor(-0.1417, grad_fn=<MulBackward0>)
Train Teacher: Step  19   Teacher loss this step is: tensor(-0.1755, grad_fn=<MulBackward0>)
Train Teacher: Step  20   Teacher loss this step is: tensor(-0.1694, g

Train student: Step  38 loss value is tensor(-0.4707, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4570, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4748, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4624, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4712, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4611, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4581, grad_fn=<MeanBackward0>)
Train student: Step  45 loss value is tensor(-0.4812, grad_fn=<MeanBackward0>)
Train student: Step  46 loss value is tensor(-0.4712, grad_fn=<MeanBackward0>)
Train student: Step  47 loss value is tensor(-0.4753, grad_fn=<MeanBackward0>)
Train student: Step  48 loss value is tensor(-0.4545, grad_fn=<MeanBackward0>)
Train student: Step  49 loss value is tensor(-0.4599, grad_fn=<MeanBackward0>)
Train student: Step  50 loss value is tensor(-0.4748

Train Teacher: Step  38   Teacher loss this step is: tensor(-0.1428, grad_fn=<MulBackward0>)
Train Teacher: Step  39   Teacher loss this step is: tensor(-0.1477, grad_fn=<MulBackward0>)
Train Teacher: Step  40   Teacher loss this step is: tensor(-0.1938, grad_fn=<MulBackward0>)
Train Teacher: Step  41   Teacher loss this step is: tensor(-0.1428, grad_fn=<MulBackward0>)
Train Teacher: Step  42   Teacher loss this step is: tensor(-0.2182, grad_fn=<MulBackward0>)
Train Teacher: Step  43   Teacher loss this step is: tensor(-0.1628, grad_fn=<MulBackward0>)
Train Teacher: Step  44   Teacher loss this step is: tensor(-0.1855, grad_fn=<MulBackward0>)
Train Teacher: Step  45   Teacher loss this step is: tensor(-0.1895, grad_fn=<MulBackward0>)
Train Teacher: Step  46   Teacher loss this step is: tensor(-0.1596, grad_fn=<MulBackward0>)
Train Teacher: Step  47   Teacher loss this step is: tensor(-0.1435, grad_fn=<MulBackward0>)
Train Teacher: Step  48   Teacher loss this step is: tensor(-0.1427, g

Train student: Step  67 loss value is tensor(-0.4629, grad_fn=<MeanBackward0>)
Train student: Step  68 loss value is tensor(-0.4718, grad_fn=<MeanBackward0>)
Train student: Step  69 loss value is tensor(-0.4835, grad_fn=<MeanBackward0>)
Train student: Step  70 loss value is tensor(-0.4777, grad_fn=<MeanBackward0>)
Train student: Step  71 loss value is tensor(-0.4751, grad_fn=<MeanBackward0>)
Train student: Step  72 loss value is tensor(-0.4949, grad_fn=<MeanBackward0>)
Train student: Step  73 loss value is tensor(-0.4861, grad_fn=<MeanBackward0>)
Train student: Step  74 loss value is tensor(-0.4595, grad_fn=<MeanBackward0>)
Train student: Step  75 loss value is tensor(-0.4865, grad_fn=<MeanBackward0>)
Train student: Step  76 loss value is tensor(-0.4746, grad_fn=<MeanBackward0>)
Train student: Step  77 loss value is tensor(-0.4602, grad_fn=<MeanBackward0>)
Train student: Step  78 loss value is tensor(-0.4748, grad_fn=<MeanBackward0>)
Train student: Step  79 loss value is tensor(-0.4987

Train student: Step  2 loss value is tensor(-0.4683, grad_fn=<MeanBackward0>)
Train student: Step  3 loss value is tensor(-0.4599, grad_fn=<MeanBackward0>)
Train student: Step  4 loss value is tensor(-0.4617, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4584, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4668, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4510, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4702, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4647, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4661, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4811, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4584, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4751, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4743, grad_f

Train Teacher: Step  6   Teacher loss this step is: tensor(-0.1645, grad_fn=<MulBackward0>)
Train Teacher: Step  7   Teacher loss this step is: tensor(-0.1657, grad_fn=<MulBackward0>)
Train Teacher: Step  8   Teacher loss this step is: tensor(-0.1601, grad_fn=<MulBackward0>)
Train Teacher: Step  9   Teacher loss this step is: tensor(-0.1554, grad_fn=<MulBackward0>)
Train Teacher: Step  10   Teacher loss this step is: tensor(-0.1704, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.1757, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1263, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1442, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1225, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1485, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1557, grad_

Train student: Step  36 loss value is tensor(-0.4504, grad_fn=<MeanBackward0>)
Train student: Step  37 loss value is tensor(-0.4698, grad_fn=<MeanBackward0>)
Train student: Step  38 loss value is tensor(-0.4636, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4619, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4622, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4694, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4637, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4581, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4641, grad_fn=<MeanBackward0>)
Train student: Step  45 loss value is tensor(-0.4548, grad_fn=<MeanBackward0>)
Train student: Step  46 loss value is tensor(-0.4751, grad_fn=<MeanBackward0>)
Train student: Step  47 loss value is tensor(-0.4781, grad_fn=<MeanBackward0>)
Train student: Step  48 loss value is tensor(-0.4654

Train Teacher: Step  36   Teacher loss this step is: tensor(-0.1640, grad_fn=<MulBackward0>)
Train Teacher: Step  37   Teacher loss this step is: tensor(-0.1965, grad_fn=<MulBackward0>)
Train Teacher: Step  38   Teacher loss this step is: tensor(-0.1473, grad_fn=<MulBackward0>)
Train Teacher: Step  39   Teacher loss this step is: tensor(-0.1467, grad_fn=<MulBackward0>)
Train Teacher: Step  40   Teacher loss this step is: tensor(-0.1572, grad_fn=<MulBackward0>)
Train Teacher: Step  41   Teacher loss this step is: tensor(-0.1243, grad_fn=<MulBackward0>)
Train Teacher: Step  42   Teacher loss this step is: tensor(-0.1662, grad_fn=<MulBackward0>)
Train Teacher: Step  43   Teacher loss this step is: tensor(-0.1526, grad_fn=<MulBackward0>)
Train Teacher: Step  44   Teacher loss this step is: tensor(-0.1847, grad_fn=<MulBackward0>)
Train Teacher: Step  45   Teacher loss this step is: tensor(-0.1840, grad_fn=<MulBackward0>)
Train Teacher: Step  46   Teacher loss this step is: tensor(-0.1270, g

Train student: Step  69 loss value is tensor(-0.4575, grad_fn=<MeanBackward0>)
Train student: Step  70 loss value is tensor(-0.4781, grad_fn=<MeanBackward0>)
Train student: Step  71 loss value is tensor(-0.4649, grad_fn=<MeanBackward0>)
Train student: Step  72 loss value is tensor(-0.4735, grad_fn=<MeanBackward0>)
Train student: Step  73 loss value is tensor(-0.4509, grad_fn=<MeanBackward0>)
Train student: Step  74 loss value is tensor(-0.4964, grad_fn=<MeanBackward0>)
Train student: Step  75 loss value is tensor(-0.4565, grad_fn=<MeanBackward0>)
Train student: Step  76 loss value is tensor(-0.4545, grad_fn=<MeanBackward0>)
Train student: Step  77 loss value is tensor(-0.4762, grad_fn=<MeanBackward0>)
Train student: Step  78 loss value is tensor(-0.4650, grad_fn=<MeanBackward0>)
Train student: Step  79 loss value is tensor(-0.4545, grad_fn=<MeanBackward0>)
Train student: Step  80 loss value is tensor(-0.4748, grad_fn=<MeanBackward0>)
Train student: Step  81 loss value is tensor(-0.4771

Train student: Step  3 loss value is tensor(-0.4690, grad_fn=<MeanBackward0>)
Train student: Step  4 loss value is tensor(-0.4645, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4764, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4745, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4554, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4620, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4674, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4706, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4588, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4637, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4630, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4559, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4738, grad_

Train Teacher: Step  10   Teacher loss this step is: tensor(-0.1325, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.1700, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1524, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1438, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1697, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1321, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1246, grad_fn=<MulBackward0>)
Train Teacher: Step  17   Teacher loss this step is: tensor(-0.2227, grad_fn=<MulBackward0>)
Train Teacher: Step  18   Teacher loss this step is: tensor(-0.1701, grad_fn=<MulBackward0>)
Train Teacher: Step  19   Teacher loss this step is: tensor(-0.1238, grad_fn=<MulBackward0>)
Train Teacher: Step  20   Teacher loss this step is: tensor(-0.2051, g

Train student: Step  33 loss value is tensor(-0.4711, grad_fn=<MeanBackward0>)
Train student: Step  34 loss value is tensor(-0.4742, grad_fn=<MeanBackward0>)
Train student: Step  35 loss value is tensor(-0.4714, grad_fn=<MeanBackward0>)
Train student: Step  36 loss value is tensor(-0.4723, grad_fn=<MeanBackward0>)
Train student: Step  37 loss value is tensor(-0.4588, grad_fn=<MeanBackward0>)
Train student: Step  38 loss value is tensor(-0.4770, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4690, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4697, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4567, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4605, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4690, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4802, grad_fn=<MeanBackward0>)
Train student: Step  45 loss value is tensor(-0.4763

Train Teacher: Step  29   Teacher loss this step is: tensor(-0.1685, grad_fn=<MulBackward0>)
Train Teacher: Step  30   Teacher loss this step is: tensor(-0.1822, grad_fn=<MulBackward0>)
Train Teacher: Step  31   Teacher loss this step is: tensor(-0.1203, grad_fn=<MulBackward0>)
Train Teacher: Step  32   Teacher loss this step is: tensor(-0.1497, grad_fn=<MulBackward0>)
Train Teacher: Step  33   Teacher loss this step is: tensor(-0.1797, grad_fn=<MulBackward0>)
Train Teacher: Step  34   Teacher loss this step is: tensor(-0.1850, grad_fn=<MulBackward0>)
Train Teacher: Step  35   Teacher loss this step is: tensor(-0.1414, grad_fn=<MulBackward0>)
Train Teacher: Step  36   Teacher loss this step is: tensor(-0.2215, grad_fn=<MulBackward0>)
Train Teacher: Step  37   Teacher loss this step is: tensor(-0.1630, grad_fn=<MulBackward0>)
Train Teacher: Step  38   Teacher loss this step is: tensor(-0.1506, grad_fn=<MulBackward0>)
Train Teacher: Step  39   Teacher loss this step is: tensor(-0.1435, g

Train student: Step  57 loss value is tensor(-0.4616, grad_fn=<MeanBackward0>)
Train student: Step  58 loss value is tensor(-0.4721, grad_fn=<MeanBackward0>)
Train student: Step  59 loss value is tensor(-0.4747, grad_fn=<MeanBackward0>)
Train student: Step  60 loss value is tensor(-0.4654, grad_fn=<MeanBackward0>)
Train student: Step  61 loss value is tensor(-0.4525, grad_fn=<MeanBackward0>)
Train student: Step  62 loss value is tensor(-0.4711, grad_fn=<MeanBackward0>)
Train student: Step  63 loss value is tensor(-0.4687, grad_fn=<MeanBackward0>)
Train student: Step  64 loss value is tensor(-0.4507, grad_fn=<MeanBackward0>)
Train student: Step  65 loss value is tensor(-0.4694, grad_fn=<MeanBackward0>)
Train student: Step  66 loss value is tensor(-0.4784, grad_fn=<MeanBackward0>)
Train student: Step  67 loss value is tensor(-0.4551, grad_fn=<MeanBackward0>)
Train student: Step  68 loss value is tensor(-0.4583, grad_fn=<MeanBackward0>)
Train student: Step  69 loss value is tensor(-0.4756

phi mean grad tensor(0.0088) V max grad tensor(0.0305)
student conv1 mean grad tensor(0.9381) student conv1 max grad tensor(3.0536)
test
Parameter containing:
tensor([[ 1.2890e+00, -3.9057e-01, -2.6513e-01, -4.8561e-02, -1.5435e-01,
         -1.2999e-01, -1.1263e-01, -3.6129e-01, -2.8251e-02, -1.4100e-01],
        [-6.2727e-01,  1.0762e+00, -1.4744e-01, -2.7276e-01, -2.1542e-01,
         -1.5750e-01, -1.5492e-01, -1.8952e-01, -2.2687e-01, -1.2455e-01],
        [-8.4329e-02,  6.0327e-02,  1.7150e+00, -3.0025e-02,  4.6239e-03,
          1.2141e-02, -1.7207e-02, -3.9195e-03, -6.1583e-02,  2.5325e-02],
        [ 1.1967e-01,  1.4784e-01,  1.2431e-01,  1.3693e+00,  1.0753e-01,
          9.6458e-02,  1.0333e-01,  8.7248e-02,  6.5499e-02,  9.6040e-02],
        [-6.0344e-02, -1.9485e-03, -7.3734e-03,  4.2311e-02,  1.7324e+00,
         -3.1693e-02,  1.5124e-02,  1.7490e-01,  1.0138e-02, -3.9396e-03],
        [ 3.3078e-01,  6.0376e-02,  2.7380e-02,  6.3508e-02,  3.1895e-02,
          1.6513e+00, 

Train student: Step  85 loss value is tensor(-0.4843, grad_fn=<MeanBackward0>)
Train student: Step  86 loss value is tensor(-0.4688, grad_fn=<MeanBackward0>)
Train student: Step  87 loss value is tensor(-0.4769, grad_fn=<MeanBackward0>)
Train student: Step  88 loss value is tensor(-0.4672, grad_fn=<MeanBackward0>)
Train student: Step  89 loss value is tensor(-0.4789, grad_fn=<MeanBackward0>)
Train student: Step  90 loss value is tensor(-0.4555, grad_fn=<MeanBackward0>)
Train student: Step  91 loss value is tensor(-0.4858, grad_fn=<MeanBackward0>)
Train student: Step  92 loss value is tensor(-0.4701, grad_fn=<MeanBackward0>)
Train student: Step  93 loss value is tensor(-0.4845, grad_fn=<MeanBackward0>)
Train student: Step  94 loss value is tensor(-0.4891, grad_fn=<MeanBackward0>)
Train student: Step  95 loss value is tensor(-0.4668, grad_fn=<MeanBackward0>)
Train student: Step  96 loss value is tensor(-0.4844, grad_fn=<MeanBackward0>)
Train student: Step  97 loss value is tensor(-0.4886

Train student: Step  7 loss value is tensor(-0.4490, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4713, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4633, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4636, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4640, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4707, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4729, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4665, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4634, grad_fn=<MeanBackward0>)
Train student: Step  16 loss value is tensor(-0.4605, grad_fn=<MeanBackward0>)
Train student: Step  17 loss value is tensor(-0.4694, grad_fn=<MeanBackward0>)
Train student: Step  18 loss value is tensor(-0.4673, grad_fn=<MeanBackward0>)
Train student: Step  19 loss value is tensor(-0.4821, g

Train Teacher: Step  8   Teacher loss this step is: tensor(-0.1785, grad_fn=<MulBackward0>)
Train Teacher: Step  9   Teacher loss this step is: tensor(-0.1575, grad_fn=<MulBackward0>)
Train Teacher: Step  10   Teacher loss this step is: tensor(-0.1686, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.2135, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1534, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1626, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1961, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1988, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1957, grad_fn=<MulBackward0>)
Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1417, grad_fn=<MulBackward0>)
Train Teacher: Step  18   Teacher loss this step is: tensor(-0.1854, gra

Train student: Step  33 loss value is tensor(-0.4871, grad_fn=<MeanBackward0>)
Train student: Step  34 loss value is tensor(-0.4771, grad_fn=<MeanBackward0>)
Train student: Step  35 loss value is tensor(-0.4762, grad_fn=<MeanBackward0>)
Train student: Step  36 loss value is tensor(-0.4697, grad_fn=<MeanBackward0>)
Train student: Step  37 loss value is tensor(-0.4658, grad_fn=<MeanBackward0>)
Train student: Step  38 loss value is tensor(-0.4706, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4605, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4564, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4651, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4692, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4757, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4757, grad_fn=<MeanBackward0>)
Train student: Step  45 loss value is tensor(-0.4507

Train Teacher: Step  32   Teacher loss this step is: tensor(-0.1255, grad_fn=<MulBackward0>)
Train Teacher: Step  33   Teacher loss this step is: tensor(-0.1717, grad_fn=<MulBackward0>)
Train Teacher: Step  34   Teacher loss this step is: tensor(-0.1083, grad_fn=<MulBackward0>)
Train Teacher: Step  35   Teacher loss this step is: tensor(-0.1674, grad_fn=<MulBackward0>)
Train Teacher: Step  36   Teacher loss this step is: tensor(-0.1871, grad_fn=<MulBackward0>)
Train Teacher: Step  37   Teacher loss this step is: tensor(-0.1537, grad_fn=<MulBackward0>)
Train Teacher: Step  38   Teacher loss this step is: tensor(-0.1574, grad_fn=<MulBackward0>)
Train Teacher: Step  39   Teacher loss this step is: tensor(-0.1872, grad_fn=<MulBackward0>)
Train Teacher: Step  40   Teacher loss this step is: tensor(-0.1619, grad_fn=<MulBackward0>)
Train Teacher: Step  41   Teacher loss this step is: tensor(-0.1658, grad_fn=<MulBackward0>)
Train Teacher: Step  42   Teacher loss this step is: tensor(-0.1196, g

Train student: Step  60 loss value is tensor(-0.4563, grad_fn=<MeanBackward0>)
Train student: Step  61 loss value is tensor(-0.4707, grad_fn=<MeanBackward0>)
Train student: Step  62 loss value is tensor(-0.4749, grad_fn=<MeanBackward0>)
Train student: Step  63 loss value is tensor(-0.4689, grad_fn=<MeanBackward0>)
Train student: Step  64 loss value is tensor(-0.4625, grad_fn=<MeanBackward0>)
Train student: Step  65 loss value is tensor(-0.4602, grad_fn=<MeanBackward0>)
Train student: Step  66 loss value is tensor(-0.4729, grad_fn=<MeanBackward0>)
Train student: Step  67 loss value is tensor(-0.4574, grad_fn=<MeanBackward0>)
Train student: Step  68 loss value is tensor(-0.4655, grad_fn=<MeanBackward0>)
Train student: Step  69 loss value is tensor(-0.4720, grad_fn=<MeanBackward0>)
Train student: Step  70 loss value is tensor(-0.4658, grad_fn=<MeanBackward0>)
Train student: Step  71 loss value is tensor(-0.4819, grad_fn=<MeanBackward0>)
Train student: Step  72 loss value is tensor(-0.4865

Train student: Step  2 loss value is tensor(-0.4533, grad_fn=<MeanBackward0>)
Train student: Step  3 loss value is tensor(-0.4655, grad_fn=<MeanBackward0>)
Train student: Step  4 loss value is tensor(-0.4630, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4780, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4606, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4771, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4704, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4663, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4680, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4614, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4787, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4650, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4606, grad_f

Train Teacher: Step  7   Teacher loss this step is: tensor(-0.1313, grad_fn=<MulBackward0>)
Train Teacher: Step  8   Teacher loss this step is: tensor(-0.1402, grad_fn=<MulBackward0>)
Train Teacher: Step  9   Teacher loss this step is: tensor(-0.1186, grad_fn=<MulBackward0>)
Train Teacher: Step  10   Teacher loss this step is: tensor(-0.1736, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.1287, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1569, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1982, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.2055, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1657, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1657, grad_fn=<MulBackward0>)
Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1405, grad

Train student: Step  30 loss value is tensor(-0.4704, grad_fn=<MeanBackward0>)
Train student: Step  31 loss value is tensor(-0.4709, grad_fn=<MeanBackward0>)
Train student: Step  32 loss value is tensor(-0.4684, grad_fn=<MeanBackward0>)
Train student: Step  33 loss value is tensor(-0.4602, grad_fn=<MeanBackward0>)
Train student: Step  34 loss value is tensor(-0.4685, grad_fn=<MeanBackward0>)
Train student: Step  35 loss value is tensor(-0.4587, grad_fn=<MeanBackward0>)
Train student: Step  36 loss value is tensor(-0.4574, grad_fn=<MeanBackward0>)
Train student: Step  37 loss value is tensor(-0.4716, grad_fn=<MeanBackward0>)
Train student: Step  38 loss value is tensor(-0.4754, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4745, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4620, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4781, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4634

Train Teacher: Step  35   Teacher loss this step is: tensor(-0.1667, grad_fn=<MulBackward0>)
Train Teacher: Step  36   Teacher loss this step is: tensor(-0.1000, grad_fn=<MulBackward0>)
Train Teacher: Step  37   Teacher loss this step is: tensor(-0.1361, grad_fn=<MulBackward0>)
Train Teacher: Step  38   Teacher loss this step is: tensor(-0.1593, grad_fn=<MulBackward0>)
Train Teacher: Step  39   Teacher loss this step is: tensor(-0.1633, grad_fn=<MulBackward0>)
Train Teacher: Step  40   Teacher loss this step is: tensor(-0.1668, grad_fn=<MulBackward0>)
Train Teacher: Step  41   Teacher loss this step is: tensor(-0.1704, grad_fn=<MulBackward0>)
Train Teacher: Step  42   Teacher loss this step is: tensor(-0.1593, grad_fn=<MulBackward0>)
Train Teacher: Step  43   Teacher loss this step is: tensor(-0.1564, grad_fn=<MulBackward0>)
Train Teacher: Step  44   Teacher loss this step is: tensor(-0.1874, grad_fn=<MulBackward0>)
Train Teacher: Step  45   Teacher loss this step is: tensor(-0.1325, g

Train student: Step  65 loss value is tensor(-0.4720, grad_fn=<MeanBackward0>)
Train student: Step  66 loss value is tensor(-0.4637, grad_fn=<MeanBackward0>)
Train student: Step  67 loss value is tensor(-0.4531, grad_fn=<MeanBackward0>)
Train student: Step  68 loss value is tensor(-0.4576, grad_fn=<MeanBackward0>)
Train student: Step  69 loss value is tensor(-0.4898, grad_fn=<MeanBackward0>)
Train student: Step  70 loss value is tensor(-0.4655, grad_fn=<MeanBackward0>)
Train student: Step  71 loss value is tensor(-0.4550, grad_fn=<MeanBackward0>)
Train student: Step  72 loss value is tensor(-0.4667, grad_fn=<MeanBackward0>)
Train student: Step  73 loss value is tensor(-0.4694, grad_fn=<MeanBackward0>)
Train student: Step  74 loss value is tensor(-0.4645, grad_fn=<MeanBackward0>)
Train student: Step  75 loss value is tensor(-0.4790, grad_fn=<MeanBackward0>)
Train student: Step  76 loss value is tensor(-0.4706, grad_fn=<MeanBackward0>)
Train student: Step  77 loss value is tensor(-0.4722

Train student: Step  2 loss value is tensor(-0.4618, grad_fn=<MeanBackward0>)
Train student: Step  3 loss value is tensor(-0.4659, grad_fn=<MeanBackward0>)
Train student: Step  4 loss value is tensor(-0.4556, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4636, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4608, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4680, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4681, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4710, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4704, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4588, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4562, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4625, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4543, grad_f

Train Teacher: Step  5   Teacher loss this step is: tensor(-0.1628, grad_fn=<MulBackward0>)
Train Teacher: Step  6   Teacher loss this step is: tensor(-0.1521, grad_fn=<MulBackward0>)
Train Teacher: Step  7   Teacher loss this step is: tensor(-0.1802, grad_fn=<MulBackward0>)
Train Teacher: Step  8   Teacher loss this step is: tensor(-0.1635, grad_fn=<MulBackward0>)
Train Teacher: Step  9   Teacher loss this step is: tensor(-0.1405, grad_fn=<MulBackward0>)
Train Teacher: Step  10   Teacher loss this step is: tensor(-0.1499, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.1378, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1570, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1753, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1681, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1474, grad_f

Train student: Step  29 loss value is tensor(-0.4723, grad_fn=<MeanBackward0>)
Train student: Step  30 loss value is tensor(-0.4703, grad_fn=<MeanBackward0>)
Train student: Step  31 loss value is tensor(-0.4737, grad_fn=<MeanBackward0>)
Train student: Step  32 loss value is tensor(-0.4657, grad_fn=<MeanBackward0>)
Train student: Step  33 loss value is tensor(-0.4673, grad_fn=<MeanBackward0>)
Train student: Step  34 loss value is tensor(-0.4655, grad_fn=<MeanBackward0>)
Train student: Step  35 loss value is tensor(-0.4678, grad_fn=<MeanBackward0>)
Train student: Step  36 loss value is tensor(-0.4642, grad_fn=<MeanBackward0>)
Train student: Step  37 loss value is tensor(-0.4547, grad_fn=<MeanBackward0>)
Train student: Step  38 loss value is tensor(-0.4609, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4667, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4725, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4633

Train Teacher: Step  35   Teacher loss this step is: tensor(-0.1980, grad_fn=<MulBackward0>)
Train Teacher: Step  36   Teacher loss this step is: tensor(-0.1408, grad_fn=<MulBackward0>)
Train Teacher: Step  37   Teacher loss this step is: tensor(-0.1672, grad_fn=<MulBackward0>)
Train Teacher: Step  38   Teacher loss this step is: tensor(-0.1829, grad_fn=<MulBackward0>)
Train Teacher: Step  39   Teacher loss this step is: tensor(-0.1189, grad_fn=<MulBackward0>)
Train Teacher: Step  40   Teacher loss this step is: tensor(-0.1746, grad_fn=<MulBackward0>)
Train Teacher: Step  41   Teacher loss this step is: tensor(-0.1563, grad_fn=<MulBackward0>)
Train Teacher: Step  42   Teacher loss this step is: tensor(-0.1796, grad_fn=<MulBackward0>)
Train Teacher: Step  43   Teacher loss this step is: tensor(-0.1512, grad_fn=<MulBackward0>)
Train Teacher: Step  44   Teacher loss this step is: tensor(-0.2079, grad_fn=<MulBackward0>)
Train Teacher: Step  45   Teacher loss this step is: tensor(-0.1465, g

Train student: Step  63 loss value is tensor(-0.4639, grad_fn=<MeanBackward0>)
Train student: Step  64 loss value is tensor(-0.4742, grad_fn=<MeanBackward0>)
Train student: Step  65 loss value is tensor(-0.4677, grad_fn=<MeanBackward0>)
Train student: Step  66 loss value is tensor(-0.4754, grad_fn=<MeanBackward0>)
Train student: Step  67 loss value is tensor(-0.4674, grad_fn=<MeanBackward0>)
Train student: Step  68 loss value is tensor(-0.4626, grad_fn=<MeanBackward0>)
Train student: Step  69 loss value is tensor(-0.4542, grad_fn=<MeanBackward0>)
Train student: Step  70 loss value is tensor(-0.4714, grad_fn=<MeanBackward0>)
Train student: Step  71 loss value is tensor(-0.4604, grad_fn=<MeanBackward0>)
Train student: Step  72 loss value is tensor(-0.4820, grad_fn=<MeanBackward0>)
Train student: Step  73 loss value is tensor(-0.4799, grad_fn=<MeanBackward0>)
Train student: Step  74 loss value is tensor(-0.4694, grad_fn=<MeanBackward0>)
Train student: Step  75 loss value is tensor(-0.4728

Train student: Step  4 loss value is tensor(-0.4637, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4464, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4648, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4711, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4666, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4669, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4620, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4761, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4695, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4754, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4665, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4723, grad_fn=<MeanBackward0>)
Train student: Step  16 loss value is tensor(-0.4673, grad

Train Teacher: Step  5   Teacher loss this step is: tensor(-0.1264, grad_fn=<MulBackward0>)
Train Teacher: Step  6   Teacher loss this step is: tensor(-0.1625, grad_fn=<MulBackward0>)
Train Teacher: Step  7   Teacher loss this step is: tensor(-0.1249, grad_fn=<MulBackward0>)
Train Teacher: Step  8   Teacher loss this step is: tensor(-0.1536, grad_fn=<MulBackward0>)
Train Teacher: Step  9   Teacher loss this step is: tensor(-0.1829, grad_fn=<MulBackward0>)
Train Teacher: Step  10   Teacher loss this step is: tensor(-0.2038, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.2172, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1166, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.2001, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1546, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1967, grad_f

Train student: Step  28 loss value is tensor(-0.4767, grad_fn=<MeanBackward0>)
Train student: Step  29 loss value is tensor(-0.4765, grad_fn=<MeanBackward0>)
Train student: Step  30 loss value is tensor(-0.4545, grad_fn=<MeanBackward0>)
Train student: Step  31 loss value is tensor(-0.4544, grad_fn=<MeanBackward0>)
Train student: Step  32 loss value is tensor(-0.4721, grad_fn=<MeanBackward0>)
Train student: Step  33 loss value is tensor(-0.4652, grad_fn=<MeanBackward0>)
Train student: Step  34 loss value is tensor(-0.4641, grad_fn=<MeanBackward0>)
Train student: Step  35 loss value is tensor(-0.4647, grad_fn=<MeanBackward0>)
Train student: Step  36 loss value is tensor(-0.4641, grad_fn=<MeanBackward0>)
Train student: Step  37 loss value is tensor(-0.4690, grad_fn=<MeanBackward0>)
Train student: Step  38 loss value is tensor(-0.4616, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4528, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4583

Train Teacher: Step  27   Teacher loss this step is: tensor(-0.2086, grad_fn=<MulBackward0>)
Train Teacher: Step  28   Teacher loss this step is: tensor(-0.1575, grad_fn=<MulBackward0>)
Train Teacher: Step  29   Teacher loss this step is: tensor(-0.1153, grad_fn=<MulBackward0>)
Train Teacher: Step  30   Teacher loss this step is: tensor(-0.1360, grad_fn=<MulBackward0>)
Train Teacher: Step  31   Teacher loss this step is: tensor(-0.1409, grad_fn=<MulBackward0>)
Train Teacher: Step  32   Teacher loss this step is: tensor(-0.1658, grad_fn=<MulBackward0>)
Train Teacher: Step  33   Teacher loss this step is: tensor(-0.1221, grad_fn=<MulBackward0>)
Train Teacher: Step  34   Teacher loss this step is: tensor(-0.1613, grad_fn=<MulBackward0>)
Train Teacher: Step  35   Teacher loss this step is: tensor(-0.1603, grad_fn=<MulBackward0>)
Train Teacher: Step  36   Teacher loss this step is: tensor(-0.1043, grad_fn=<MulBackward0>)
Train Teacher: Step  37   Teacher loss this step is: tensor(-0.1510, g

Train student: Step  53 loss value is tensor(-0.4728, grad_fn=<MeanBackward0>)
Train student: Step  54 loss value is tensor(-0.4600, grad_fn=<MeanBackward0>)
Train student: Step  55 loss value is tensor(-0.4733, grad_fn=<MeanBackward0>)
Train student: Step  56 loss value is tensor(-0.4699, grad_fn=<MeanBackward0>)
Train student: Step  57 loss value is tensor(-0.4610, grad_fn=<MeanBackward0>)
Train student: Step  58 loss value is tensor(-0.4596, grad_fn=<MeanBackward0>)
Train student: Step  59 loss value is tensor(-0.4729, grad_fn=<MeanBackward0>)
Train student: Step  60 loss value is tensor(-0.4614, grad_fn=<MeanBackward0>)
Train student: Step  61 loss value is tensor(-0.4675, grad_fn=<MeanBackward0>)
Train student: Step  62 loss value is tensor(-0.4706, grad_fn=<MeanBackward0>)
Train student: Step  63 loss value is tensor(-0.4738, grad_fn=<MeanBackward0>)
Train student: Step  64 loss value is tensor(-0.4670, grad_fn=<MeanBackward0>)
Train student: Step  65 loss value is tensor(-0.4716

Train Teacher: Step  51   Teacher loss this step is: tensor(-0.1728, grad_fn=<MulBackward0>)
phi mean grad tensor(0.0078) V max grad tensor(0.0221)
student conv1 mean grad tensor(1.0293) student conv1 max grad tensor(3.3440)
test
Parameter containing:
tensor([[ 1.3164e+00, -3.9179e-01, -2.4783e-01, -4.9187e-02, -1.4069e-01,
         -1.2952e-01, -1.2221e-01, -3.7257e-01, -2.2724e-02, -1.4203e-01],
        [-6.3495e-01,  1.0960e+00, -1.4607e-01, -2.7556e-01, -2.1508e-01,
         -1.6546e-01, -1.5243e-01, -1.8874e-01, -2.2343e-01, -1.3480e-01],
        [-8.7935e-02,  6.1903e-02,  1.7527e+00, -3.8396e-02,  2.8871e-03,
          7.7100e-03, -2.2210e-02, -3.0766e-03, -4.8455e-02,  1.5097e-02],
        [ 1.1908e-01,  1.4175e-01,  1.4113e-01,  1.4250e+00,  1.0743e-01,
          9.0911e-02,  9.0101e-02,  8.8964e-02,  7.6263e-02,  9.0643e-02],
        [-6.2114e-02, -3.0879e-03,  1.7405e-03,  3.2888e-02,  1.7862e+00,
         -2.9580e-02,  7.1789e-03,  1.7533e-01,  1.7929e-02, -3.6353e-03],
   

Train student: Step  83 loss value is tensor(-0.4838, grad_fn=<MeanBackward0>)
Train student: Step  84 loss value is tensor(-0.4758, grad_fn=<MeanBackward0>)
Train student: Step  85 loss value is tensor(-0.4718, grad_fn=<MeanBackward0>)
Train student: Step  86 loss value is tensor(-0.4694, grad_fn=<MeanBackward0>)
Train student: Step  87 loss value is tensor(-0.4797, grad_fn=<MeanBackward0>)
Train student: Step  88 loss value is tensor(-0.4898, grad_fn=<MeanBackward0>)
Train student: Step  89 loss value is tensor(-0.4645, grad_fn=<MeanBackward0>)
Train student: Step  90 loss value is tensor(-0.4651, grad_fn=<MeanBackward0>)
Train student: Step  91 loss value is tensor(-0.4807, grad_fn=<MeanBackward0>)
Train student: Step  92 loss value is tensor(-0.4847, grad_fn=<MeanBackward0>)
Train student: Step  93 loss value is tensor(-0.4957, grad_fn=<MeanBackward0>)
Train student: Step  94 loss value is tensor(-0.4705, grad_fn=<MeanBackward0>)
Train student: Step  95 loss value is tensor(-0.4723

Train student: Step  4 loss value is tensor(-0.4639, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4597, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4662, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4538, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4617, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4784, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4804, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4724, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4584, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4720, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4586, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4710, grad_fn=<MeanBackward0>)
Train student: Step  16 loss value is tensor(-0.4624, grad

Train Teacher: Step  8   Teacher loss this step is: tensor(-0.2223, grad_fn=<MulBackward0>)
Train Teacher: Step  9   Teacher loss this step is: tensor(-0.1164, grad_fn=<MulBackward0>)
Train Teacher: Step  10   Teacher loss this step is: tensor(-0.1363, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.1527, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1540, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1592, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1530, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1151, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1812, grad_fn=<MulBackward0>)
Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1447, grad_fn=<MulBackward0>)
Train Teacher: Step  18   Teacher loss this step is: tensor(-0.1485, gra

Train student: Step  31 loss value is tensor(-0.4537, grad_fn=<MeanBackward0>)
Train student: Step  32 loss value is tensor(-0.4654, grad_fn=<MeanBackward0>)
Train student: Step  33 loss value is tensor(-0.4617, grad_fn=<MeanBackward0>)
Train student: Step  34 loss value is tensor(-0.4513, grad_fn=<MeanBackward0>)
Train student: Step  35 loss value is tensor(-0.4741, grad_fn=<MeanBackward0>)
Train student: Step  36 loss value is tensor(-0.4621, grad_fn=<MeanBackward0>)
Train student: Step  37 loss value is tensor(-0.4646, grad_fn=<MeanBackward0>)
Train student: Step  38 loss value is tensor(-0.4542, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4562, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4639, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4778, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4690, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4657

Train Teacher: Step  28   Teacher loss this step is: tensor(-0.1128, grad_fn=<MulBackward0>)
Train Teacher: Step  29   Teacher loss this step is: tensor(-0.1687, grad_fn=<MulBackward0>)
Train Teacher: Step  30   Teacher loss this step is: tensor(-0.1901, grad_fn=<MulBackward0>)
Train Teacher: Step  31   Teacher loss this step is: tensor(-0.1832, grad_fn=<MulBackward0>)
Train Teacher: Step  32   Teacher loss this step is: tensor(-0.1730, grad_fn=<MulBackward0>)
Train Teacher: Step  33   Teacher loss this step is: tensor(-0.1585, grad_fn=<MulBackward0>)
Train Teacher: Step  34   Teacher loss this step is: tensor(-0.2049, grad_fn=<MulBackward0>)
Train Teacher: Step  35   Teacher loss this step is: tensor(-0.1612, grad_fn=<MulBackward0>)
Train Teacher: Step  36   Teacher loss this step is: tensor(-0.1061, grad_fn=<MulBackward0>)
Train Teacher: Step  37   Teacher loss this step is: tensor(-0.1484, grad_fn=<MulBackward0>)
Train Teacher: Step  38   Teacher loss this step is: tensor(-0.1894, g

Train student: Step  61 loss value is tensor(-0.4807, grad_fn=<MeanBackward0>)
Train student: Step  62 loss value is tensor(-0.4657, grad_fn=<MeanBackward0>)
Train student: Step  63 loss value is tensor(-0.4760, grad_fn=<MeanBackward0>)
Train student: Step  64 loss value is tensor(-0.4819, grad_fn=<MeanBackward0>)
Train student: Step  65 loss value is tensor(-0.4589, grad_fn=<MeanBackward0>)
Train student: Step  66 loss value is tensor(-0.4772, grad_fn=<MeanBackward0>)
Train student: Step  67 loss value is tensor(-0.4700, grad_fn=<MeanBackward0>)
Train student: Step  68 loss value is tensor(-0.4744, grad_fn=<MeanBackward0>)
Train student: Step  69 loss value is tensor(-0.4644, grad_fn=<MeanBackward0>)
Train student: Step  70 loss value is tensor(-0.4699, grad_fn=<MeanBackward0>)
Train student: Step  71 loss value is tensor(-0.4694, grad_fn=<MeanBackward0>)
Train student: Step  72 loss value is tensor(-0.4760, grad_fn=<MeanBackward0>)
Train student: Step  73 loss value is tensor(-0.4761

Train student: Step  2 loss value is tensor(-0.4571, grad_fn=<MeanBackward0>)
Train student: Step  3 loss value is tensor(-0.4634, grad_fn=<MeanBackward0>)
Train student: Step  4 loss value is tensor(-0.4589, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4677, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4645, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4703, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4686, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4580, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4675, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4704, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4799, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4544, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4665, grad_f

Train Teacher: Step  7   Teacher loss this step is: tensor(-0.1399, grad_fn=<MulBackward0>)
Train Teacher: Step  8   Teacher loss this step is: tensor(-0.1588, grad_fn=<MulBackward0>)
Train Teacher: Step  9   Teacher loss this step is: tensor(-0.1720, grad_fn=<MulBackward0>)
Train Teacher: Step  10   Teacher loss this step is: tensor(-0.1645, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.1607, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1667, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.2149, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1502, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1373, grad_fn=<MulBackward0>)
Train Teacher: Step  16   Teacher loss this step is: tensor(-0.1698, grad_fn=<MulBackward0>)
Train Teacher: Step  17   Teacher loss this step is: tensor(-0.1973, grad

Train student: Step  39 loss value is tensor(-0.4714, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4733, grad_fn=<MeanBackward0>)
Train student: Step  41 loss value is tensor(-0.4524, grad_fn=<MeanBackward0>)
Train student: Step  42 loss value is tensor(-0.4552, grad_fn=<MeanBackward0>)
Train student: Step  43 loss value is tensor(-0.4673, grad_fn=<MeanBackward0>)
Train student: Step  44 loss value is tensor(-0.4721, grad_fn=<MeanBackward0>)
Train student: Step  45 loss value is tensor(-0.4708, grad_fn=<MeanBackward0>)
Train student: Step  46 loss value is tensor(-0.4623, grad_fn=<MeanBackward0>)
Train student: Step  47 loss value is tensor(-0.4781, grad_fn=<MeanBackward0>)
Train student: Step  48 loss value is tensor(-0.4654, grad_fn=<MeanBackward0>)
Train student: Step  49 loss value is tensor(-0.4780, grad_fn=<MeanBackward0>)
Train student: Step  50 loss value is tensor(-0.4622, grad_fn=<MeanBackward0>)
Train student: Step  51 loss value is tensor(-0.4562

Train Teacher: Step  35   Teacher loss this step is: tensor(-0.1778, grad_fn=<MulBackward0>)
Train Teacher: Step  36   Teacher loss this step is: tensor(-0.2008, grad_fn=<MulBackward0>)
Train Teacher: Step  37   Teacher loss this step is: tensor(-0.2008, grad_fn=<MulBackward0>)
Train Teacher: Step  38   Teacher loss this step is: tensor(-0.1406, grad_fn=<MulBackward0>)
Train Teacher: Step  39   Teacher loss this step is: tensor(-0.1635, grad_fn=<MulBackward0>)
Train Teacher: Step  40   Teacher loss this step is: tensor(-0.1534, grad_fn=<MulBackward0>)
Train Teacher: Step  41   Teacher loss this step is: tensor(-0.1517, grad_fn=<MulBackward0>)
Train Teacher: Step  42   Teacher loss this step is: tensor(-0.1412, grad_fn=<MulBackward0>)
Train Teacher: Step  43   Teacher loss this step is: tensor(-0.1413, grad_fn=<MulBackward0>)
Train Teacher: Step  44   Teacher loss this step is: tensor(-0.1918, grad_fn=<MulBackward0>)
Train Teacher: Step  45   Teacher loss this step is: tensor(-0.1436, g

Train student: Step  67 loss value is tensor(-0.4553, grad_fn=<MeanBackward0>)
Train student: Step  68 loss value is tensor(-0.4619, grad_fn=<MeanBackward0>)
Train student: Step  69 loss value is tensor(-0.4711, grad_fn=<MeanBackward0>)
Train student: Step  70 loss value is tensor(-0.4855, grad_fn=<MeanBackward0>)
Train student: Step  71 loss value is tensor(-0.4751, grad_fn=<MeanBackward0>)
Train student: Step  72 loss value is tensor(-0.4635, grad_fn=<MeanBackward0>)
Train student: Step  73 loss value is tensor(-0.4739, grad_fn=<MeanBackward0>)
Train student: Step  74 loss value is tensor(-0.4654, grad_fn=<MeanBackward0>)
Train student: Step  75 loss value is tensor(-0.4801, grad_fn=<MeanBackward0>)
Train student: Step  76 loss value is tensor(-0.4681, grad_fn=<MeanBackward0>)
Train student: Step  77 loss value is tensor(-0.4554, grad_fn=<MeanBackward0>)
Train student: Step  78 loss value is tensor(-0.4679, grad_fn=<MeanBackward0>)
Train student: Step  79 loss value is tensor(-0.4759

Train student: Step  4 loss value is tensor(-0.4635, grad_fn=<MeanBackward0>)
Train student: Step  5 loss value is tensor(-0.4707, grad_fn=<MeanBackward0>)
Train student: Step  6 loss value is tensor(-0.4717, grad_fn=<MeanBackward0>)
Train student: Step  7 loss value is tensor(-0.4558, grad_fn=<MeanBackward0>)
Train student: Step  8 loss value is tensor(-0.4625, grad_fn=<MeanBackward0>)
Train student: Step  9 loss value is tensor(-0.4618, grad_fn=<MeanBackward0>)
Train student: Step  10 loss value is tensor(-0.4704, grad_fn=<MeanBackward0>)
Train student: Step  11 loss value is tensor(-0.4691, grad_fn=<MeanBackward0>)
Train student: Step  12 loss value is tensor(-0.4642, grad_fn=<MeanBackward0>)
Train student: Step  13 loss value is tensor(-0.4657, grad_fn=<MeanBackward0>)
Train student: Step  14 loss value is tensor(-0.4757, grad_fn=<MeanBackward0>)
Train student: Step  15 loss value is tensor(-0.4704, grad_fn=<MeanBackward0>)
Train student: Step  16 loss value is tensor(-0.4642, grad

Train Teacher: Step  5   Teacher loss this step is: tensor(-0.1754, grad_fn=<MulBackward0>)
Train Teacher: Step  6   Teacher loss this step is: tensor(-0.1923, grad_fn=<MulBackward0>)
Train Teacher: Step  7   Teacher loss this step is: tensor(-0.1687, grad_fn=<MulBackward0>)
Train Teacher: Step  8   Teacher loss this step is: tensor(-0.1162, grad_fn=<MulBackward0>)
Train Teacher: Step  9   Teacher loss this step is: tensor(-0.1724, grad_fn=<MulBackward0>)
Train Teacher: Step  10   Teacher loss this step is: tensor(-0.1645, grad_fn=<MulBackward0>)
Train Teacher: Step  11   Teacher loss this step is: tensor(-0.1528, grad_fn=<MulBackward0>)
Train Teacher: Step  12   Teacher loss this step is: tensor(-0.1662, grad_fn=<MulBackward0>)
Train Teacher: Step  13   Teacher loss this step is: tensor(-0.1893, grad_fn=<MulBackward0>)
Train Teacher: Step  14   Teacher loss this step is: tensor(-0.1404, grad_fn=<MulBackward0>)
Train Teacher: Step  15   Teacher loss this step is: tensor(-0.1118, grad_f

Train student: Step  28 loss value is tensor(-0.4595, grad_fn=<MeanBackward0>)
Train student: Step  29 loss value is tensor(-0.4700, grad_fn=<MeanBackward0>)
Train student: Step  30 loss value is tensor(-0.4532, grad_fn=<MeanBackward0>)
Train student: Step  31 loss value is tensor(-0.4572, grad_fn=<MeanBackward0>)
Train student: Step  32 loss value is tensor(-0.4506, grad_fn=<MeanBackward0>)
Train student: Step  33 loss value is tensor(-0.4700, grad_fn=<MeanBackward0>)
Train student: Step  34 loss value is tensor(-0.4703, grad_fn=<MeanBackward0>)
Train student: Step  35 loss value is tensor(-0.4700, grad_fn=<MeanBackward0>)
Train student: Step  36 loss value is tensor(-0.4661, grad_fn=<MeanBackward0>)
Train student: Step  37 loss value is tensor(-0.4734, grad_fn=<MeanBackward0>)
Train student: Step  38 loss value is tensor(-0.4669, grad_fn=<MeanBackward0>)
Train student: Step  39 loss value is tensor(-0.4770, grad_fn=<MeanBackward0>)
Train student: Step  40 loss value is tensor(-0.4593

Train Teacher: Step  29   Teacher loss this step is: tensor(-0.2019, grad_fn=<MulBackward0>)
Train Teacher: Step  30   Teacher loss this step is: tensor(-0.1883, grad_fn=<MulBackward0>)
Train Teacher: Step  31   Teacher loss this step is: tensor(-0.1658, grad_fn=<MulBackward0>)
Train Teacher: Step  32   Teacher loss this step is: tensor(-0.1640, grad_fn=<MulBackward0>)
Train Teacher: Step  33   Teacher loss this step is: tensor(-0.2127, grad_fn=<MulBackward0>)
Train Teacher: Step  34   Teacher loss this step is: tensor(-0.1557, grad_fn=<MulBackward0>)
Train Teacher: Step  35   Teacher loss this step is: tensor(-0.1727, grad_fn=<MulBackward0>)
Train Teacher: Step  36   Teacher loss this step is: tensor(-0.1678, grad_fn=<MulBackward0>)
Train Teacher: Step  37   Teacher loss this step is: tensor(-0.1834, grad_fn=<MulBackward0>)
Train Teacher: Step  38   Teacher loss this step is: tensor(-0.1726, grad_fn=<MulBackward0>)
Train Teacher: Step  39   Teacher loss this step is: tensor(-0.1413, g

RuntimeError: python_error

In [34]:
print(teacher.phi)

NameError: name 'teacher' is not defined