# 准备工作

## 导入模块

In [1]:
import os.path
import torch
import matplotlib.pyplot as plt

In [2]:
from nn import *
from vgg import VGG, VGG_BN
from load import load
from util import set_random_seeds, get_num_parameters, train, train_plus
from visual import loss_landscape, grad_pred, beta_smooth

## 载入数据

In [3]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
set_random_seeds(seed = 0, device = device)

train_loader = load(train = True)
test_loader = load(train = False)

Files already downloaded and verified
Files already downloaded and verified


# 神经网络

## 初始架构

In [4]:
model = NN()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
criterion = nn.CrossEntropyLoss()

root = '../Result/NN original 0.001'
best_model_file = os.path.join(root, 'model.pt')
losses_file = os.path.join(root, 'losses.pt')
train_errors_file = os.path.join(root, 'train_errors.pt')
test_errors_file = os.path.join(root, 'test_errors.pt')

_, train_errors_original, test_errors_original = \
    train(model, optimizer, criterion, train_loader, test_loader, device = device, 
          wrap_tqdms = False, print_errors = True,
          best_model_file = best_model_file, losses_file = losses_file,
          train_errors_file = train_errors_file, test_errors_file = test_errors_file)

Epoch:  1	Train Error: 0.46426	Test Error: 0.46610
Epoch:  2	Train Error: 0.39284	Test Error: 0.40720
Epoch:  3	Train Error: 0.34746	Test Error: 0.37080
Epoch:  4	Train Error: 0.30776	Test Error: 0.34930
Epoch:  5	Train Error: 0.29376	Test Error: 0.35000
Epoch:  6	Train Error: 0.26306	Test Error: 0.33230
Epoch:  7	Train Error: 0.22036	Test Error: 0.30660
Epoch:  8	Train Error: 0.20134	Test Error: 0.30160
Epoch:  9	Train Error: 0.19084	Test Error: 0.30770
Epoch: 10	Train Error: 0.16584	Test Error: 0.30770
Epoch: 11	Train Error: 0.15242	Test Error: 0.30270
Epoch: 12	Train Error: 0.13310	Test Error: 0.30720
Epoch: 13	Train Error: 0.12722	Test Error: 0.31080
Epoch: 14	Train Error: 0.10172	Test Error: 0.30940
Epoch: 15	Train Error: 0.09734	Test Error: 0.30760
Epoch: 16	Train Error: 0.09870	Test Error: 0.31590
Epoch: 17	Train Error: 0.06678	Test Error: 0.31460
Epoch: 18	Train Error: 0.05868	Test Error: 0.31480
Epoch: 19	Train Error: 0.05860	Test Error: 0.32420
Epoch: 20	Train Error: 0.06242	

In [None]:
print("Number of parameters:", get_num_parameters(model))

In [None]:
root = '../Result/NN original'
losses_file = os.path.join(root, 'losses.pt')
train_errors_file = os.path.join(root, 'train_errors.pt')
test_errors_file = os.path.join(root, 'test_errors.pt')

losses_original = torch.load(losses_file)
train_errors_original = torch.load(train_errors_file)
test_errors_original = torch.load(test_errors_file)

In [None]:
plt.plot(train_errors_original, '-+')
plt.plot(test_errors_original, '-+')
plt.xticks(range(0, 21, 2))
plt.legend(['train', 'test'])
plt.xlabel('epoch')
plt.ylabel('error')
plt.title('error on training and testing set')

fig_file = os.path.join(root, 'figure.png')
plt.savefig(fig_file)

plt.show()

In [None]:
lr = [1e-3, 2e-3, 1e-4, 5e-4]
losses_original = [losses_original, [], [], []]
for ind in range(1, 4):
    model = NN()
    optimizer = torch.optim.Adam(model.parameters(), lr = lr[ind])
    criterion = nn.CrossEntropyLoss()

    root = '../Result/NN original ' + str(lr[ind])
    best_model_file = os.path.join(root, 'model.pt')
    losses_file = os.path.join(root, 'losses.pt')
    train_errors_file = os.path.join(root, 'train_errors.pt')
    test_errors_file = os.path.join(root, 'test_errors.pt')

    print('lr =', lr[ind])
    losses_original[ind], _, _ = \
        train(model, optimizer, criterion, train_loader, test_loader, device = device,
              wrap_tqdms = False, print_errors = True,
              best_model_file = best_model_file, losses_file = losses_file,
              train_errors_file = train_errors_file, test_errors_file = test_errors_file)
    print()

In [None]:
losses_original = []
for ind in range(4):
    root = '../Result/NN original ' + str(lr[ind])
    losses_file = os.path.join(root, 'losses.pt')

    losses_original.append(torch.load(losses_file))

## 神经元数量

In [None]:
model = NN(hidden_channels = (4, 8), hidden_neurons = (32, 32))
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
criterion = nn.CrossEntropyLoss()

root = '../Result/NN smaller'
best_model_file = os.path.join(root, 'model.pt')
losses_file = os.path.join(root, 'losses.pt')
train_errors_file = os.path.join(root, 'train_errors.pt')
test_errors_file = os.path.join(root, 'test_errors.pt')

_, train_errors_smaller, test_errors_smaller = \
    train(model, optimizer, criterion, train_loader, test_loader, device = device, 
          wrap_tqdms = False, print_errors = True,
          best_model_file = best_model_file, losses_file = losses_file,
          train_errors_file = train_errors_file, test_errors_file = test_errors_file)

In [None]:
print("Number of parameters:", get_num_parameters(model))

In [None]:
model = NN(hidden_channels = (64, 128), hidden_neurons = (512, 512))
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
criterion = nn.CrossEntropyLoss()

root = '../Result/NN bigger'
best_model_file = os.path.join(root, 'model.pt')
losses_file = os.path.join(root, 'losses.pt')
train_errors_file = os.path.join(root, 'train_errors.pt')
test_errors_file = os.path.join(root, 'test_errors.pt')

_, train_errors_bigger, test_errors_bigger = \
    train(model, optimizer, criterion, train_loader, test_loader, device = device,
          wrap_tqdms = False, print_errors = True,
          best_model_file = best_model_file, losses_file = losses_file,
          train_errors_file = train_errors_file, test_errors_file = test_errors_file)

In [None]:
print("Number of parameters:", get_num_parameters(model))

In [None]:
plt.plot(train_errors_original, '-+')
plt.plot(train_errors_smaller, '-+')
plt.plot(train_errors_bigger, '-+')
plt.xticks(range(0, 21, 2))
plt.legend(['original', 'smaller', 'bigger'])
plt.xlabel('epoch')
plt.ylabel('error')
plt.title('error with different network structures')

fig_file = os.path.join(root, 'figure.png')
plt.savefig(fig_file)

plt.show()

## 损失函数

In [None]:
model = NN()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
criterion = nn.MultiMarginLoss()

root = '../Result/NN hinge'
best_model_file = os.path.join(root, 'model.pt')
losses_file = os.path.join(root, 'losses.pt')
train_errors_file = os.path.join(root, 'train_errors.pt')
test_errors_file = os.path.join(root, 'test_errors.pt')

_, train_errors_hinge, test_errors_hinge = \
    train(model, optimizer, criterion, train_loader, test_loader, device = device,
          wrap_tqdms = False, print_errors = True,
          best_model_file = best_model_file, losses_file = losses_file,
          train_errors_file = train_errors_file, test_errors_file = test_errors_file)

In [None]:
plt.plot(train_errors_original, '-+')
plt.plot(train_errors_hinge, '-+')
plt.xticks(range(0, 21, 2))
plt.legend(['cross entropy', 'multi-class hinge'])
plt.xlabel('epoch')
plt.ylabel('error')
plt.title('error with different loss functions')

fig_file = os.path.join(root, 'figure.png')
plt.savefig(fig_file)

plt.show()

## 正则化

In [None]:
weight_decay = (0.05, 0.01, 0.005, 0.0005, 5e-05, 5e-06)
train_errors_regularize = [[], [], [], [], [], []]
test_errors_regularize = [[], [], [], [], [], []]
for ind in range(6):
    model = NN()
    optimizer = torch.optim.Adam(model.parameters(), lr = 0.001, weight_decay = weight_decay[ind])
    criterion = nn.MultiMarginLoss()

    root = '../Result/NN regularize ' + str(weight_decay[ind])
    best_model_file = os.path.join(root, 'model.pt')
    losses_file = os.path.join(root, 'losses.pt')
    train_errors_file = os.path.join(root, 'train_errors.pt')
    test_errors_file = os.path.join(root, 'test_errors.pt')

    print('lambda =', weight_decay[ind])
    _, train_errors_regularize[ind], test_errors_regularize[ind] = \
        train(model, optimizer, criterion, train_loader, test_loader, device = device,
              wrap_tqdms = False, print_errors = True,
              best_model_file = best_model_file, losses_file = losses_file,
              train_errors_file = train_errors_file, test_errors_file = test_errors_file)
    print()

In [None]:
for ind in range(6):
    root = '../Result/NN regularize ' + str(weight_decay[ind])
    train_errors_file = os.path.join(root, 'train_errors.pt')
    test_errors_file = os.path.join(root, 'test_errors.pt')

    train_errors_regularize[ind] = torch.load(train_errors_file)
    test_errors_regularize[ind] = torch.load(test_errors_file)

In [None]:
plt.plot(train_errors_original, '-+')
for ind in range(6):
    plt.plot(train_errors_regularize[ind], '-+')
plt.xticks(range(0, 21, 2))
plt.legend(['lambda = 0', 'lambda = 0.05', 'lambda = 0.01', 'lambda = 0.005',
            'lambda = 0.0005', 'lambda = 5e-05', 'lambda = 5e-06'])
plt.xlabel('epoch')
plt.ylabel('error')
plt.title('error with different regularization parameters')

fig_file = os.path.join(root, 'figure.png')
plt.savefig(fig_file)

plt.show()

## 激活函数

In [None]:
model = NN_tanh()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
criterion = nn.CrossEntropyLoss()

root = '../Result/NN tanh'
best_model_file = os.path.join(root, 'model.pt')
losses_file = os.path.join(root, 'losses.pt')
train_errors_file = os.path.join(root, 'train_errors.pt')
test_errors_file = os.path.join(root, 'test_errors.pt')

_, train_errors_tanh, test_errors_tanh = \
    train(model, optimizer, criterion, train_loader, test_loader, device = device,
          wrap_tqdms = False, print_errors = True,
          best_model_file = best_model_file, losses_file = losses_file,
          train_errors_file = train_errors_file, test_errors_file = test_errors_file)

In [None]:
model = NN_softplus()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
criterion = nn.CrossEntropyLoss()

root = '../Result/NN softplus'
best_model_file = os.path.join(root, 'model.pt')
losses_file = os.path.join(root, 'losses.pt')
train_errors_file = os.path.join(root, 'train_errors.pt')
test_errors_file = os.path.join(root, 'test_errors.pt')

_, train_errors_softplus, test_errors_softplus = \
    train(model, optimizer, criterion, train_loader, test_loader, device = device,
          wrap_tqdms = False, print_errors = True,
          best_model_file = best_model_file, losses_file = losses_file,
          train_errors_file = train_errors_file, test_errors_file = test_errors_file)

In [None]:
plt.plot(train_errors_original, '-+')
plt.plot(train_errors_tanh, '-+')
plt.plot(train_errors_softplus, '-+')
plt.xticks(range(0, 21, 2))
plt.legend(['ReLU', 'tanh', 'softmax'])
plt.xlabel('epoch')
plt.ylabel('error')
plt.title('error with different activation functions')

fig_file = os.path.join(root, 'figure.png')
plt.savefig(fig_file)

plt.show()

## 优化器

In [None]:
model = NN()
optimizer = torch.optim.SGD(model.parameters(), lr = 0.001)
criterion = nn.CrossEntropyLoss()

root = '../Result/NN SGD'
best_model_file = os.path.join(root, 'model.pt')
losses_file = os.path.join(root, 'losses.pt')
train_errors_file = os.path.join(root, 'train_errors.pt')
test_errors_file = os.path.join(root, 'test_errors.pt')

_, train_errors_SGD, test_errors_SGD = \
    train(model, optimizer, criterion, train_loader, test_loader, device = device,
          wrap_tqdms = False, print_errors = True,
          best_model_file = best_model_file, losses_file = losses_file,
          train_errors_file = train_errors_file, test_errors_file = test_errors_file)

In [None]:
model = NN()
optimizer = torch.optim.SGD(model.parameters(), lr = 0.001, momentum = 0.9)
criterion = nn.CrossEntropyLoss()

root = '../Result/NN momentum'
best_model_file = os.path.join(root, 'model.pt')
losses_file = os.path.join(root, 'losses.pt')
train_errors_file = os.path.join(root, 'train_errors.pt')
test_errors_file = os.path.join(root, 'test_errors.pt')

_, train_errors_momentum, test_errors_momentum = \
    train(model, optimizer, criterion, train_loader, test_loader, device = device,
          wrap_tqdms = False, print_errors = True,
          best_model_file = best_model_file, losses_file = losses_file,
          train_errors_file = train_errors_file, test_errors_file = test_errors_file)

In [None]:
model = NN()
optimizer = torch.optim.Adagrad(model.parameters(), lr = 0.01)
criterion = nn.CrossEntropyLoss()

root = '../Result/NN Adagrad'
best_model_file = os.path.join(root, 'model.pt')
losses_file = os.path.join(root, 'losses.pt')
train_errors_file = os.path.join(root, 'train_errors.pt')
test_errors_file = os.path.join(root, 'test_errors.pt')

_, train_errors_Adagrad, test_errors_Adagrad = \
    train(model, optimizer, criterion, train_loader, test_loader, device = device,
          wrap_tqdms = False, print_errors = True,
          best_model_file = best_model_file, losses_file = losses_file,
          train_errors_file = train_errors_file, test_errors_file = test_errors_file)

In [None]:
plt.plot(train_errors_original, '-+')
plt.plot(train_errors_SGD, '-+')
plt.plot(train_errors_momentum, '-+')
plt.plot(train_errors_Adagrad, '-+')
plt.xticks(range(0, 21, 2))
plt.legend(['Adam', 'SGD', 'SGD + momentum', 'Adagrad'])
plt.xlabel('epoch')
plt.ylabel('error')
plt.title('error with different optimizers')

fig_file = os.path.join(root, 'figure.png')
plt.savefig(fig_file)

plt.show()

## 批归一化

In [None]:
model = NN_BN()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
criterion = nn.CrossEntropyLoss()

root = '../Result/NN BN 0.001'
best_model_file = os.path.join(root, 'model.pt')
losses_file = os.path.join(root, 'losses.pt')
train_errors_file = os.path.join(root, 'train_errors.pt')
test_errors_file = os.path.join(root, 'test_errors.pt')

_, train_errors_BN, test_errors_BN = \
    train(model, optimizer, criterion, train_loader, test_loader, device = device,
          wrap_tqdms = False, print_errors = True,
          best_model_file = best_model_file, losses_file = losses_file,
          train_errors_file = train_errors_file, test_errors_file = test_errors_file)

In [None]:
root = '../Result/NN BN'
losses_file = os.path.join(root, 'losses.pt')
train_errors_file = os.path.join(root, 'train_errors.pt')
test_errors_file = os.path.join(root, 'test_errors.pt')

losses_BN = torch.load(losses_file)
train_errors_BN = torch.load(train_errors_file)
test_errors_BN = torch.load(test_errors_file)

In [None]:
plt.plot(train_errors_original, '-+')
plt.plot(train_errors_BN, '-+')
plt.xticks(range(0, 21, 2))
plt.legend(['without BN', 'with BN'])
plt.xlabel('epoch')
plt.ylabel('error')
plt.title('error with or without batch normalization')

fig_file = os.path.join(root, 'figure.png')
plt.savefig(fig_file)

plt.show()

In [None]:
plt.plot(train_errors_BN, '-+')
plt.plot(test_errors_BN, '-+')
plt.xticks(range(0, 21, 2))
plt.legend(['train', 'test'])
plt.xlabel('epoch')
plt.ylabel('error')
plt.title('error on training and testing set')

plt.show()

In [None]:
losses_BN = [[], [], [], []]
for ind in range(4):
    model = NN_BN()
    optimizer = torch.optim.Adam(model.parameters(), lr = lr[ind])
    criterion = nn.CrossEntropyLoss()

    root = '../Result/NN BN ' + str(lr[ind])
    best_model_file = os.path.join(root, 'model.pt')
    losses_file = os.path.join(root, 'losses.pt')
    train_errors_file = os.path.join(root, 'train_errors.pt')
    test_errors_file = os.path.join(root, 'test_errors.pt')

    print('lr =', lr[ind])
    losses_BN[ind], _, _ = \
        train(model, optimizer, criterion, train_loader, test_loader, device = device,
              wrap_tqdms = False, print_errors = True,
              best_model_file = best_model_file, losses_file = losses_file,
              train_errors_file = train_errors_file, test_errors_file = test_errors_file)
    print()

In [None]:
losses_BN = []
for ind in range(4):
    root = '../Result/NN BN ' + str(lr[ind])
    losses_file = os.path.join(root, 'losses.pt')

    losses_BN.append(torch.load(losses_file))

In [None]:
min_curve, max_curve = loss_landscape(losses_original)
plt.fill_between(range(len(min_curve)), min_curve, max_curve, alpha = 0.5)

min_curve, max_curve = loss_landscape(losses_BN)
plt.fill_between(range(len(min_curve)), min_curve, max_curve, alpha = 0.5)

plt.legend(['NN without BN', 'NN with BN'])
plt.xlabel('step')
plt.ylabel('loss')
plt.title('loss landscape')

fig_file = os.path.join(root, 'figure.png')
plt.savefig(fig_file)

plt.show()

## 丢弃法

In [None]:
prob = (0.2, 0.5)
train_errors_dropout = [[], []]
test_errors_dropout = [[], []]
for ind in range(2):
    model = NN_dropout(prob = prob[ind])
    optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
    criterion = nn.CrossEntropyLoss()

    root = '../Result/NN dropout ' + str(prob[ind])
    best_model_file = os.path.join(root, 'model.pt')
    losses_file = os.path.join(root, 'losses.pt')
    train_errors_file = os.path.join(root, 'train_errors.pt')
    test_errors_file = os.path.join(root, 'test_errors.pt')

    print('prob =', prob[ind])
    _, train_errors_dropout[ind], test_errors_dropout[ind] = \
        train(model, optimizer, criterion, train_loader, test_loader, device = device,
              wrap_tqdms = False, print_errors = True,
              best_model_file = best_model_file, losses_file = losses_file,
              train_errors_file = train_errors_file, test_errors_file = test_errors_file)
    print()

In [None]:
plt.plot(train_errors_original, '-+')
plt.plot(train_errors_dropout[0], '-+')
plt.plot(train_errors_dropout[1], '-+')
plt.xticks(range(0, 21, 2))
plt.legend(['prob = 0', 'prob = 0.2', 'prob = 0.5'])
plt.xlabel('epoch')
plt.ylabel('error')
plt.title('error with or without dropout')

fig_file = os.path.join(root, 'figure.png')
plt.savefig(fig_file)

plt.show()

## 最优设置

In [None]:
model = NN_BN(hidden_channels = (64, 128), hidden_neurons = (512, 512))
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
criterion = nn.CrossEntropyLoss()

root = '../Result/NN opt'
best_model_file = os.path.join(root, 'model.pt')
losses_file = os.path.join(root, 'losses.pt')
train_errors_file = os.path.join(root, 'train_errors.pt')
test_errors_file = os.path.join(root, 'test_errors.pt')

losses, train_errors_opt, test_errors_opt = \
    train(model, optimizer, criterion, train_loader, test_loader, device = device,
          wrap_tqdms = False, print_errors = True,
          best_model_file = best_model_file, losses_file = losses_file,
          train_errors_file = train_errors_file, test_errors_file = test_errors_file)

In [None]:
plt.plot(train_errors_opt, '-+')
plt.plot(test_errors_opt, '-+')
plt.xticks(range(0, 21, 2))
plt.legend(['train', 'test'])
plt.xlabel('epoch')
plt.ylabel('error')
plt.title('error on training and testing set')

fig_file = os.path.join(root, 'figure.png')
plt.savefig(fig_file)

plt.show()

# VGG网络

In [None]:
losses_original = [[], [], [], []]
for ind in range(4):
    model = VGG()
    optimizer = torch.optim.Adam(model.parameters(), lr = lr[ind])
    criterion = nn.CrossEntropyLoss()

    root = '../Result/VGG original ' + str(lr[ind])
    best_model_file = os.path.join(root, 'model.pt')
    losses_file = os.path.join(root, 'losses.pt')
    train_errors_file = os.path.join(root, 'train_errors.pt')
    test_errors_file = os.path.join(root, 'test_errors.pt')

    print('lr =', lr[ind])
    losses_original[ind], _, _ = \
        train(model, optimizer, criterion, train_loader, test_loader, device = device,
              wrap_tqdms = True, print_errors = True,
              best_model_file = best_model_file, losses_file = losses_file,
              train_errors_file = train_errors_file, test_errors_file = test_errors_file)
    print()

In [None]:
losses_original = []
for ind in range(4):
    root = '../Result/VGG original ' + str(lr[ind])
    losses_file = os.path.join(root, 'losses.pt')

    losses_original.append(torch.load(losses_file))

In [None]:
losses_BN = [[], [], [], []]
for ind in range(4):
    model = VGG_BN()
    optimizer = torch.optim.Adam(model.parameters(), lr = lr[ind])
    criterion = nn.CrossEntropyLoss()

    root = '../Result/VGG BN ' + str(lr[ind])
    best_model_file = os.path.join(root, 'model.pt')
    losses_file = os.path.join(root, 'losses.pt')
    train_errors_file = os.path.join(root, 'train_errors.pt')
    test_errors_file = os.path.join(root, 'test_errors.pt')

    print('lr =', lr[ind])
    losses_BN[ind], _, _ = \
        train(model, optimizer, criterion, train_loader, test_loader, device = device,
              wrap_tqdms = True, print_errors = True,
              best_model_file = best_model_file, losses_file = losses_file,
              train_errors_file = train_errors_file, test_errors_file = test_errors_file)
    print()

In [None]:
losses_BN = []
for ind in range(4):
    root = '../Result/VGG BN ' + str(lr[ind])
    losses_file = os.path.join(root, 'losses.pt')

    losses_BN.append(torch.load(losses_file))

In [None]:
min_curve, max_curve = loss_landscape(losses_original)
plt.fill_between(range(len(min_curve)), min_curve, max_curve, alpha = 0.75)

min_curve, max_curve = loss_landscape(losses_BN)
plt.fill_between(range(len(min_curve)), min_curve, max_curve, alpha = 0.75)

plt.legend(['VGG without BN', 'VGG with BN'])
plt.xlabel('step')
plt.ylabel('loss')
plt.title('loss landscape')

fig_file = os.path.join(root, 'figure.png')
plt.savefig(fig_file)

plt.show()

In [None]:
root = '../Result/VGG original 0.001'
train_errors_file = os.path.join(root, 'train_errors.pt')
train_errors_original = torch.load(train_errors_file, map_location = device)

root = '../Result/VGG BN 0.001'
train_errors_file = os.path.join(root, 'train_errors.pt')
train_errors_BN = torch.load(train_errors_file, map_location = device)

In [None]:
plt.plot(train_errors_original, '-+')
plt.plot(train_errors_BN, '-+')
plt.xticks(range(0, 21, 2))
plt.legend(['without BN', 'with BN'])
plt.xlabel('epoch')
plt.ylabel('error')
plt.title('error with or without batch normalization')

fig_file = os.path.join(root, 'figure.png')
plt.savefig(fig_file)

plt.show()

In [None]:
parameters_original = [[], [], [], []]
grads_original = [[], [], [], []]
for ind in range(4):
    model = VGG()
    optimizer = torch.optim.Adam(model.parameters(), lr = lr[ind])
    criterion = nn.CrossEntropyLoss()

    root = '../Result/VGG original ' + str(lr[ind])
    best_model_file = os.path.join(root, 'model.pt')
    parameters_file = os.path.join(root, 'parameters.pt')
    grads_file = os.path.join(root, 'grads.pt')
    train_errors_file = os.path.join(root, 'train_errors.pt')
    test_errors_file = os.path.join(root, 'test_errors.pt')

    print('lr =', lr[ind])
    parameters_original[ind], grads_original[ind], _, _ = \
        train_plus(model, optimizer, criterion, train_loader, test_loader, device = device,
                   wrap_tqdms = True, print_errors = True,
                   best_model_file = best_model_file,
                   parameters_file = parameters_file, grads_file = grads_file,
                   train_errors_file = train_errors_file, test_errors_file = test_errors_file)
    print()

In [None]:
parameters_original = []
grads_original = []
for ind in range(4):
    root = '../Result/VGG original ' + str(lr[ind])
    parameters_file = os.path.join(root, 'parameters.pt')
    grads_file = os.path.join(root, 'grads.pt')

    parameters_original.append(torch.load(parameters_file, map_location = device))
    grads_original.append(torch.load(grads_file, map_location = device))

In [None]:
parameters_BN = [[], [], [], []]
grads_BN = [[], [], [], []]
for ind in range(4):
    model = VGG_BN()
    optimizer = torch.optim.Adam(model.parameters(), lr = lr[ind])
    criterion = nn.CrossEntropyLoss()

    root = '../Result/VGG BN ' + str(lr[ind])
    best_model_file = os.path.join(root, 'model.pt')
    parameters_file = os.path.join(root, 'parameters.pt')
    grads_file = os.path.join(root, 'grads.pt')
    train_errors_file = os.path.join(root, 'train_errors.pt')
    test_errors_file = os.path.join(root, 'test_errors.pt')

    print('lr =', lr[ind])
    parameters_BN[ind], grads_BN[ind], _, _ = \
        train_plus(model, optimizer, criterion, train_loader, test_loader, device = device,
                   wrap_tqdms = True, print_errors = True,
                   best_model_file = best_model_file, parameters_file = parameters_file, grads_file = grads_file,
                   train_errors_file = train_errors_file, test_errors_file = test_errors_file)
    print()

In [None]:
parameters_BN = []
grads_BN = []
for ind in range(4):
    root = '../Result/VGG BN ' + str(lr[ind])
    parameters_file = os.path.join(root, 'parameters.pt')
    grads_file = os.path.join(root, 'grads.pt')

    parameters_BN.append(torch.load(parameters_file, map_location = device))
    grads_BN.append(torch.load(grads_file, map_location = device))

In [None]:
min_curve, max_curve = grad_pred(grads_original)
plt.fill_between(range(len(min_curve)), min_curve, max_curve, alpha = 0.75)

min_curve, max_curve = grad_pred(grads_BN)
plt.fill_between(range(len(min_curve)), min_curve, max_curve, alpha = 0.75)

plt.ylim(0, 5)
plt.legend(['VGG without BN', 'VGG with BN'])
plt.xlabel('step')
plt.ylabel('gradient distance')
plt.title('gradient predictiveness')

plt.show()

In [None]:
max_curve1 = beta_smooth(parameters_original, grads_original)
plt.plot(max_curve1, alpha = 0.5)

max_curve2 = beta_smooth(parameters_BN, grads_BN)
plt.plot(max_curve2, alpha = 0.5)

plt.legend(['VGG without BN', 'VGG with BN'])
plt.xlabel('step')
plt.ylabel('beta')
plt.title('beta smoothness')

plt.show()