# Pytorch构建神经网络(三)(29(博客30)-33节)

## 4.1&4.2 使用tensorboard可视化CNN训练指标

* pytorch1.1.0以上的版本已经自动增加了tensorboard
* 在终端输入“tensorboard --version”可查看tensorboard的版本
* 在终端输入“tensorboard --logdir=runs”进入tensorboard(在写了tensorboard数据的路径下)

In [15]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

from torch.utils.tensorboard import SummaryWriter

torch.set_printoptions(linewidth=120)
torch.set_grad_enabled(True)


<torch.autograd.grad_mode.set_grad_enabled at 0x240b6f98cd0>

In [16]:
print(torch.__version__)
print(torchvision.__version__)

1.7.1
0.2.2


In [17]:
def get_num_correct(preds,labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

In [18]:
class Network(nn.Module):
    def __init__(self):
        super(Network,self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        self.fc1 = nn.Linear(in_features=12*4*4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)
    
    def forward(self, t):
        t = t 
        t = F.relu(self.conv1(t))
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        t = F.relu(self.conv2(t))
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        t = t.reshape(-1, 12*4*4)  # t.flatten(start_dim=1)
        t = F.relu(self.fc1(t))
        
        t = F.relu(self.fc2(t))
        
        t = self.out(t)
        return t
        

In [19]:
train_set = torchvision.datasets.FashionMNIST(
    root = './data/FashionMNIST',
    train = True,
    download = True,
    transform = transforms.Compose([
        transforms.ToTensor()
    ])
)
#train_loader = torch.utils.data.DataLoader(train_set, batch_size=100, shuffle=True)

In [20]:
train_loader = torch.utils.data.DataLoader(train_set, batch_size=100, shuffle=True)

### Starting out with TensorBoard (Network Graph and Images)

In [None]:
tb = SummaryWriter()

network = Network()
images, labels = next(iter(train_loader))
grid = torchvision.utils.make_grid(images)

tb.add_image('images', grid)
tb.add_graph(network, images)
tb.close()
# 此时就可以看到图像在tensorboard中

In [18]:
# 循环查看
network = Network()
train_loader = torch.utils.data.DataLoader(train_set, batch_size=100)
optimizer = optim.Adam(network.parameters(), lr=0.01)

images, labels = next(iter(train_loader))
grid = torchvision.utils.make_grid(images)

tb = SummaryWriter()
tb.add_image('images', grid)
tb.add_graph(network, images)

for epoch in range(10):

    total_loss = 0
    total_correct = 0

    for batch in train_loader: # Get Batch

        # Pass Batch
        # Calculate Loss
        # Calculate Gradient
        # Update Weights
        images, labels = batch
        
        preds = network(images) # Pass Batch
        loss = F.cross_entropy(preds, labels)  # Calculate loss
        
        optimizer.zero_grad()    # 梯度清零，否则会累加
        loss.backward()     # Calculate Gradients
        optimizer.step()    # Update Weights
        
        total_loss += loss.item()
        total_correct += get_num_correct(preds, labels)

    # 一组用于创建变量的
    tb.add_scalar('Loss', total_loss, epoch)
    tb.add_scalar('Number Correct', total_correct, epoch)
    tb.add_scalar('Accuracy', total_correct / len(train_set), epoch)
    
    # 一组用来创建直方图的值
    tb.add_histogram('conv1.bias', network.conv1.bias, epoch)
    tb.add_histogram('conv1.weight', network.conv1.weight, epoch)
    tb.add_histogram(
        'conv1.weight.grad'
        ,network.conv1.weight.grad
        ,epoch
    )

    print(
        "epoch", epoch, 
        "total_correct:", total_correct, 
        "loss:", total_loss
    )

tb.close()

epoch 0 total_correct: 46411 loss: 354.42062570154667
epoch 1 total_correct: 51218 loss: 237.75386726856232
epoch 2 total_correct: 51946 loss: 215.51920852065086
epoch 3 total_correct: 52331 loss: 206.8187402933836
epoch 4 total_correct: 52532 loss: 198.87907454371452
epoch 5 total_correct: 52580 loss: 196.97526659071445
epoch 6 total_correct: 52791 loss: 191.67158991098404
epoch 7 total_correct: 52885 loss: 189.4652373343706
epoch 8 total_correct: 53045 loss: 186.11422833800316
epoch 9 total_correct: 53124 loss: 183.70606738328934


In [6]:
# 对不同超参数的研究
#batch_size = 100
#lr =0.01
# 对不同的batchsize，lr的训练情况进行比较
# 方法1：但此方法需要多层for循环
batch_size_list = [100, 1000, 10000]
lr_list = [.01, .001, .0001, .00001]
for batch_size in batch_size_list:
    for lr in lr_list:
        network = Network()
        train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)
        images, labels = next(iter(train_loader))
        grid = torchvision.utils.make_grid(images)   # 创建能在tensorboard中查看的图像网格

        comment = f'batch_size={batch_size} lr ={lr}'
        tb = SummaryWriter(comment=comment)   # 在Summary Writer添加该注释，可帮助我们在tensorboard中唯一地识别该表示
        tb.add_image('images', grid)  # 将一批图像放在grid中进行显示
        tb.add_graph(network, images)   # 在tensorboard中看见网络结构的可视化图
        optimizer = optim.Adam(network.parameters(), lr=lr)

        for epoch in range(5):
    
            total_loss = 0
            total_correct = 0
    
            for batch in train_loader:    # Get Batch
                images, labels = batch
        
                preds = network(images) # Pass Batch
                loss = F.cross_entropy(preds, labels)  # Calculate loss
        
                optimizer.zero_grad()    # 梯度清零，否则会累加
                loss.backward()     # Calculate Gradients
                optimizer.step()    # Update Weights
        
                #total_loss += loss.item()
                total_loss += loss.item()*batch_size # 在对不同批次下的训练进行比较时，这样做可使结果更具有可比性
                total_correct += get_num_correct(preds, labels)
        
            tb.add_scalar("Loss", total_loss, epoch)
            tb.add_scalar("Number Correct", total_correct, epoch)
            tb.add_scalar("Accuracy", total_correct/len(train_set), epoch)
            '''
            这种表达方式只能看单个层的偏置，权重，及其梯度的变化趋势，无法看到全部的
            tb.add_histogram('conv1.bias', network.conv1.bias, epoch)
            tb.add_histogram('conv1.weight', network.conv1.weight, epoch)
            tb.add_histogram('conv1.weight.grad', network.conv1.weight.grad, epoch)
            '''
            for name, weight in network.named_parameters():
                tb.add_histogram(name, weight, epoch)
                tb.add_histogram(f'{name}.grad', weight.grad, epoch)
            print("epoch:", epoch, "total_correct:", total_correct, "loss", total_loss)

tb.close()

#### 对多层的偏置，权重及其梯度进行访问的原理

In [19]:
for name,weight in network.named_parameters():
    print(name, weight.shape)

conv1.weight torch.Size([6, 1, 5, 5])
conv1.bias torch.Size([6])
conv2.weight torch.Size([12, 6, 5, 5])
conv2.bias torch.Size([12])
fc1.weight torch.Size([120, 192])
fc1.bias torch.Size([120])
fc2.weight torch.Size([60, 120])
fc2.bias torch.Size([60])
out.weight torch.Size([10, 60])
out.bias torch.Size([10])


In [20]:
for name,weight in network.named_parameters():
    print(f'{name}.grad', weight.grad.shape)

conv1.weight.grad torch.Size([6, 1, 5, 5])
conv1.bias.grad torch.Size([6])
conv2.weight.grad torch.Size([12, 6, 5, 5])
conv2.bias.grad torch.Size([12])
fc1.weight.grad torch.Size([120, 192])
fc1.bias.grad torch.Size([120])
fc2.weight.grad torch.Size([60, 120])
fc2.bias.grad torch.Size([60])
out.weight.grad torch.Size([10, 60])
out.bias.grad torch.Size([10])


#### 更简单的方法对要更改的参数进行访问

In [21]:
from itertools import product

In [22]:
parameters = dict(
    lr = [.01, .001],
    batc_size = [10, 100, 1000],
    shuffle = [True, False]
)

In [23]:
param_values = [v for v in parameters.values()]
param_values

[[0.01, 0.001], [10, 100, 1000], [True, False]]

In [24]:
for lr, batch_size, shuffle in product(*param_values):
    print(lr, batch_size, shuffle)

0.01 10 True
0.01 10 False
0.01 100 True
0.01 100 False
0.01 1000 True
0.01 1000 False
0.001 10 True
0.001 10 False
0.001 100 True
0.001 100 False
0.001 1000 True
0.001 1000 False


In [None]:
#batch_size = 100
#lr =0.01
# 对不同的batchsize，lr的训练情况进行比较
# 方法2：只需一层循环
from itertools import product
parameters = dict(
    lr = [.01, .001],
    batch_size = [10, 100],
    shuffle = [True, False]
)
param_values = [v for v in parameters.values()]
print(param_values)
for lr, batch_size, shuffle in product(*param_values):
    network = Network()
    train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)
    images, labels = next(iter(train_loader))
    grid = torchvision.utils.make_grid(images)   # 创建能在tensorboard中查看的图像网格

    comment = f'batch_size={batch_size} lr ={lr} shuffle={shuffle}'
    tb = SummaryWriter(comment=comment)   # 在Summary Writer添加该注释，可帮助我们在tensorboard中唯一地识别该表示
    tb.add_image('images', grid)  # 将一批图像放在grid中进行显示
    tb.add_graph(network, images)   # 在tensorboard中看见网络结构的可视化图
    optimizer = optim.Adam(network.parameters(), lr=lr)

    for epoch in range(5):
    
        total_loss = 0
        total_correct = 0
    
        for batch in train_loader:    # Get Batch
            images, labels = batch
        
            preds = network(images) # Pass Batch
            loss = F.cross_entropy(preds, labels)  # Calculate loss
        
            optimizer.zero_grad()    # 梯度清零，否则会累加
            loss.backward()     # Calculate Gradients
            optimizer.step()    # Update Weights
        
            #total_loss += loss.item()
            total_loss += loss.item()*batch_size # 在对不同批次下的训练进行比较时，这样做可使结果更具有可比性
            total_correct += get_num_correct(preds, labels)
        
        tb.add_scalar("Loss", total_loss, epoch)
        tb.add_scalar("Number Correct", total_correct, epoch)
        tb.add_scalar("Accuracy", total_correct/len(train_set), epoch)
        '''
            这种表达方式只能看单个层的偏置，权重，及其梯度的变化趋势，无法看到全部的
            tb.add_histogram('conv1.bias', network.conv1.bias, epoch)
            tb.add_histogram('conv1.weight', network.conv1.weight, epoch)
            tb.add_histogram('conv1.weight.grad', network.conv1.weight.grad, epoch)
        '''
        for name, weight in network.named_parameters():
            tb.add_histogram(name, weight, epoch)
            tb.add_histogram(f'{name}.grad', weight.grad, epoch)
        print("epoch:", epoch, "total_correct:", total_correct, "loss", total_loss)

tb.close()

[[0.01, 0.001], [10, 100], [True, False]]


## 4.3 RunBuilder类的编写
* 该类的编写允许我们使用不同的参数值生成多个运行

In [21]:
from collections import OrderedDict
from collections import namedtuple
from itertools import product

In [22]:
class RunBuilder():
    @staticmethod
    def get_runs(params): # 通过指定类并指定方法来调用
        Run = namedtuple('Run', params.keys())
        runs = []
        for v in product(*params.values()):
            runs.append(Run(*v))
        return runs

In [23]:
params = OrderedDict(
    lr = [.01, .001],
    batch_size = [1000, 10000]
)

In [24]:
runs = RunBuilder.get_runs(params)
runs

[Run(lr=0.01, batch_size=1000),
 Run(lr=0.01, batch_size=10000),
 Run(lr=0.001, batch_size=1000),
 Run(lr=0.001, batch_size=10000)]

In [25]:
for run in runs:
    print(run, run.lr, run.batch_size)

Run(lr=0.01, batch_size=1000) 0.01 1000
Run(lr=0.01, batch_size=10000) 0.01 10000
Run(lr=0.001, batch_size=1000) 0.001 1000
Run(lr=0.001, batch_size=10000) 0.001 10000


In [26]:
# 创建RunBuilder类以后，comment表示为：
for run in RunBuilder.get_runs(params):
    comment = f'-{run}'
    print(comment)

-Run(lr=0.01, batch_size=1000)
-Run(lr=0.01, batch_size=10000)
-Run(lr=0.001, batch_size=1000)
-Run(lr=0.001, batch_size=10000)


In [27]:
# 整合
print(params.keys())
print(params.values())
Run = namedtuple('Run',params.keys())
runs = []
for v in product(*params.values()):
    runs.append(Run(*v)) # *告诉构造函数接受元组值作为与元祖本身相抵触的参数
runs

odict_keys(['lr', 'batch_size'])
odict_values([[0.01, 0.001], [1000, 10000]])


[Run(lr=0.01, batch_size=1000),
 Run(lr=0.01, batch_size=10000),
 Run(lr=0.001, batch_size=1000),
 Run(lr=0.001, batch_size=10000)]

# 4.4 如何试验大量的超参数
* 构建RunManager类可实现对大量超参数的试验

In [37]:
# begin_epoch 和 end_epoch允许我们在整个生命周期中管理这些值
class RunManager():
    # 构造函数
    def __init__(self):
        # 相同前缀的数据可以考虑用类来整合
        '''
        class Epoch():
            def __init__(self):
                self.count = 0
                self.loss = 0
                self.num_correct = 0
                self.start_time = None 
                
        e = Epoch()
        e.count
        '''
        self.epoch_count = 0 # 追踪周期的数量
        self.epoch_loss = 0 # 损失的运行周期
        self.epoch_num_correct = 0 # 正确预测数的周期
        self.epoch_start_time = None # 开始时间的周期
        
        self.run_params = None # 运行参数
        self.run_count = 0 # 运行计数
        self.run_data = [] # 运行数据 每个周期的参数值和结果
        self.run_start_time = None # 运行开始时间 计算运行时间
        
        self.network = None # 运行网络
        self.loader = None # 数据加载器
        self.tb = None # summary writer=》tensorboard
        
    # 提取开始运行所需的相关东西
    def begin_run(self, run, network, loader):
        self.run_start_time = time.time() # 获取运行的开始时间
        
        self.run_params = run
        self.run_count += 1
        
        self.network = network
        self.loader = loader
        self.tb = SummaryWriter(comment=f'-{run}') # 作为通用参数传入
        
        images, labels = next(iter(self.loader))
        grid = torchvision.utils.make_grid(images)
        
        self.tb.add_image('images', grid)
        self.tb.add_graph(self.network, images)
        
    # 本次运行结束 关闭tensorboard 并 将周期数重新设置为0
    def end_run(self):
        self.tb.close()
        self.epoch_count = 0
        
    # 每次周期开始
    def begin_epoch(self):
        self.epoch_start_time = time.time() # 本次周期开始时间
        
        # 本次周期内的变量
        self.epoch_count += 1
        self.epoch_loss = 0
        self.epoch_num_correct = 0
    
    # 周期结束
    def end_epoch(self):
        epoch_duration = time.time() - self.epoch_start_time # 周期市场
        run_duration = time.time() - self.run_start_time # 本次运行时长
        
        loss = self.epoch_loss/len(self.loader.dataset) # 计算损失
        accuracy = self.epoch_num_correct/len(self.loader.dataset) # 计算准确率
        
        # 将变量加入tensorboard
        self.tb.add_scalar('Loss', loss, self.epoch_count)
        self.tb.add_scalar('Accuracy', accuracy, self.epoch_count)
        
        # 将直方图加入tensorboard
        for name, param in self.network.named_parameters():
            self.tb.add_histogram(name, param, self.epoch_count)
            self.tb.add_histogram(f'{name}.grad', param.grad, self.epoch_count)
            
        # 周期内的相关结果记录
        results = OrderedDict()
        results["run"] = self.run_count
        results["epoch"] = self.epoch_count
        results["loss"] = loss
        results["accuracy"] = accuracy
        results["epoch duration"] = epoch_duration
        results["run duration"] = run_duration
        # 运行参数
        for k,v in self.run_params._asdict().items(): results[k] = v
        self.run_data.append(results)
        df = pd.DataFrame.from_dict(self.run_data, orient='columns')

        # 关于jupyter notebook的=》清除当前的输出并显示一个新的数据帧
        #clear_output(wait=True)
        display(df)
    
    def track_loss(self, loss):
        self.epoch_loss += loss.item()*self.loader.batch_size
    
    def track_num_correct(self, preds, labels):
        self.epoch_num_correct += self._get_num_correct(preds, labels)
    
    @torch.no_grad()
    def _get_num_correct(self, preds, labels):
        return preds.argmax(dim=1).eq(labels).sum().item()
    
    # 保存为json或者csv
    def save(self, fileName):
        pd.DataFrame.from_dict(
            self.run_data,
            orient='columns').to_csv(f'{fileName}.csv')
        with open(f'{fileName},json','w', encoding='utf-8') as f:
            json.dump(self.run_data, f, ensure_ascii=False, indent=4)

In [38]:
# 使用RunManager和RunBuilder类可以使得程序更简单、更易扩展、更容易推理
from  torch.utils.data import DataLoader
import time
import pandas as pd
from easydl import clear_output

params = OrderedDict(
    lr = [.01],
    batch_size =[1000, 2000],
    shuffle = [True, False]
)
m = RunManager()
for run in RunBuilder.get_runs(params):
    
    network = Network()
    loader = DataLoader(train_set, batch_size=run.batch_size, shuffle=run.shuffle)
    optimizer = optim.Adam(network.parameters(), lr=run.lr)
    
    m.begin_run(run, network, loader)
    for epoch in range(5):
        m.begin_epoch()
        for batch in loader:
            #images, labels = batch
            images = batch[0]
            labels = batch[1]
            preds = network(images)
            loss = F.cross_entropy(preds, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            m.track_loss(loss)
            m.track_num_correct(preds, labels)
            
        m.end_epoch()
    m.end_run()
m.save('resuls-0')

Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle
0,1,1,1.051913,0.599717,21.242159,22.757104,0.01,1000,True


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle
0,1,1,1.051913,0.599717,21.242159,22.757104,0.01,1000,True
1,1,2,0.533126,0.791267,21.803655,44.840013,0.01,1000,True


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle
0,1,1,1.051913,0.599717,21.242159,22.757104,0.01,1000,True
1,1,2,0.533126,0.791267,21.803655,44.840013,0.01,1000,True
2,1,3,0.429855,0.839567,23.115148,68.166595,0.01,1000,True


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle
0,1,1,1.051913,0.599717,21.242159,22.757104,0.01,1000,True
1,1,2,0.533126,0.791267,21.803655,44.840013,0.01,1000,True
2,1,3,0.429855,0.839567,23.115148,68.166595,0.01,1000,True
3,1,4,0.388675,0.855133,22.773599,91.314196,0.01,1000,True


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle
0,1,1,1.051913,0.599717,21.242159,22.757104,0.01,1000,True
1,1,2,0.533126,0.791267,21.803655,44.840013,0.01,1000,True
2,1,3,0.429855,0.839567,23.115148,68.166595,0.01,1000,True
3,1,4,0.388675,0.855133,22.773599,91.314196,0.01,1000,True
4,1,5,0.358946,0.865517,21.33937,112.959273,0.01,1000,True


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle
0,1,1,1.051913,0.599717,21.242159,22.757104,0.01,1000,True
1,1,2,0.533126,0.791267,21.803655,44.840013,0.01,1000,True
2,1,3,0.429855,0.839567,23.115148,68.166595,0.01,1000,True
3,1,4,0.388675,0.855133,22.773599,91.314196,0.01,1000,True
4,1,5,0.358946,0.865517,21.33937,112.959273,0.01,1000,True
5,2,1,0.966711,0.6276,22.01831,23.514305,0.01,1000,False


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle
0,1,1,1.051913,0.599717,21.242159,22.757104,0.01,1000,True
1,1,2,0.533126,0.791267,21.803655,44.840013,0.01,1000,True
2,1,3,0.429855,0.839567,23.115148,68.166595,0.01,1000,True
3,1,4,0.388675,0.855133,22.773599,91.314196,0.01,1000,True
4,1,5,0.358946,0.865517,21.33937,112.959273,0.01,1000,True
5,2,1,0.966711,0.6276,22.01831,23.514305,0.01,1000,False
6,2,2,0.544777,0.789667,24.739794,48.427362,0.01,1000,False


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle
0,1,1,1.051913,0.599717,21.242159,22.757104,0.01,1000,True
1,1,2,0.533126,0.791267,21.803655,44.840013,0.01,1000,True
2,1,3,0.429855,0.839567,23.115148,68.166595,0.01,1000,True
3,1,4,0.388675,0.855133,22.773599,91.314196,0.01,1000,True
4,1,5,0.358946,0.865517,21.33937,112.959273,0.01,1000,True
5,2,1,0.966711,0.6276,22.01831,23.514305,0.01,1000,False
6,2,2,0.544777,0.789667,24.739794,48.427362,0.01,1000,False
7,2,3,0.454927,0.832,24.0948,72.690711,0.01,1000,False


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle
0,1,1,1.051913,0.599717,21.242159,22.757104,0.01,1000,True
1,1,2,0.533126,0.791267,21.803655,44.840013,0.01,1000,True
2,1,3,0.429855,0.839567,23.115148,68.166595,0.01,1000,True
3,1,4,0.388675,0.855133,22.773599,91.314196,0.01,1000,True
4,1,5,0.358946,0.865517,21.33937,112.959273,0.01,1000,True
5,2,1,0.966711,0.6276,22.01831,23.514305,0.01,1000,False
6,2,2,0.544777,0.789667,24.739794,48.427362,0.01,1000,False
7,2,3,0.454927,0.832,24.0948,72.690711,0.01,1000,False
8,2,4,0.403349,0.85095,24.73671,97.555079,0.01,1000,False


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle
0,1,1,1.051913,0.599717,21.242159,22.757104,0.01,1000,True
1,1,2,0.533126,0.791267,21.803655,44.840013,0.01,1000,True
2,1,3,0.429855,0.839567,23.115148,68.166595,0.01,1000,True
3,1,4,0.388675,0.855133,22.773599,91.314196,0.01,1000,True
4,1,5,0.358946,0.865517,21.33937,112.959273,0.01,1000,True
5,2,1,0.966711,0.6276,22.01831,23.514305,0.01,1000,False
6,2,2,0.544777,0.789667,24.739794,48.427362,0.01,1000,False
7,2,3,0.454927,0.832,24.0948,72.690711,0.01,1000,False
8,2,4,0.403349,0.85095,24.73671,97.555079,0.01,1000,False
9,2,5,0.372126,0.86325,23.275681,121.166895,0.01,1000,False


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle
0,1,1,1.051913,0.599717,21.242159,22.757104,0.01,1000,True
1,1,2,0.533126,0.791267,21.803655,44.840013,0.01,1000,True
2,1,3,0.429855,0.839567,23.115148,68.166595,0.01,1000,True
3,1,4,0.388675,0.855133,22.773599,91.314196,0.01,1000,True
4,1,5,0.358946,0.865517,21.33937,112.959273,0.01,1000,True
5,2,1,0.966711,0.6276,22.01831,23.514305,0.01,1000,False
6,2,2,0.544777,0.789667,24.739794,48.427362,0.01,1000,False
7,2,3,0.454927,0.832,24.0948,72.690711,0.01,1000,False
8,2,4,0.403349,0.85095,24.73671,97.555079,0.01,1000,False
9,2,5,0.372126,0.86325,23.275681,121.166895,0.01,1000,False


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle
0,1,1,1.051913,0.599717,21.242159,22.757104,0.01,1000,True
1,1,2,0.533126,0.791267,21.803655,44.840013,0.01,1000,True
2,1,3,0.429855,0.839567,23.115148,68.166595,0.01,1000,True
3,1,4,0.388675,0.855133,22.773599,91.314196,0.01,1000,True
4,1,5,0.358946,0.865517,21.33937,112.959273,0.01,1000,True
5,2,1,0.966711,0.6276,22.01831,23.514305,0.01,1000,False
6,2,2,0.544777,0.789667,24.739794,48.427362,0.01,1000,False
7,2,3,0.454927,0.832,24.0948,72.690711,0.01,1000,False
8,2,4,0.403349,0.85095,24.73671,97.555079,0.01,1000,False
9,2,5,0.372126,0.86325,23.275681,121.166895,0.01,1000,False


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle
0,1,1,1.051913,0.599717,21.242159,22.757104,0.01,1000,True
1,1,2,0.533126,0.791267,21.803655,44.840013,0.01,1000,True
2,1,3,0.429855,0.839567,23.115148,68.166595,0.01,1000,True
3,1,4,0.388675,0.855133,22.773599,91.314196,0.01,1000,True
4,1,5,0.358946,0.865517,21.33937,112.959273,0.01,1000,True
5,2,1,0.966711,0.6276,22.01831,23.514305,0.01,1000,False
6,2,2,0.544777,0.789667,24.739794,48.427362,0.01,1000,False
7,2,3,0.454927,0.832,24.0948,72.690711,0.01,1000,False
8,2,4,0.403349,0.85095,24.73671,97.555079,0.01,1000,False
9,2,5,0.372126,0.86325,23.275681,121.166895,0.01,1000,False


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle
0,1,1,1.051913,0.599717,21.242159,22.757104,0.01,1000,True
1,1,2,0.533126,0.791267,21.803655,44.840013,0.01,1000,True
2,1,3,0.429855,0.839567,23.115148,68.166595,0.01,1000,True
3,1,4,0.388675,0.855133,22.773599,91.314196,0.01,1000,True
4,1,5,0.358946,0.865517,21.33937,112.959273,0.01,1000,True
5,2,1,0.966711,0.6276,22.01831,23.514305,0.01,1000,False
6,2,2,0.544777,0.789667,24.739794,48.427362,0.01,1000,False
7,2,3,0.454927,0.832,24.0948,72.690711,0.01,1000,False
8,2,4,0.403349,0.85095,24.73671,97.555079,0.01,1000,False
9,2,5,0.372126,0.86325,23.275681,121.166895,0.01,1000,False


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle
0,1,1,1.051913,0.599717,21.242159,22.757104,0.01,1000,True
1,1,2,0.533126,0.791267,21.803655,44.840013,0.01,1000,True
2,1,3,0.429855,0.839567,23.115148,68.166595,0.01,1000,True
3,1,4,0.388675,0.855133,22.773599,91.314196,0.01,1000,True
4,1,5,0.358946,0.865517,21.33937,112.959273,0.01,1000,True
5,2,1,0.966711,0.6276,22.01831,23.514305,0.01,1000,False
6,2,2,0.544777,0.789667,24.739794,48.427362,0.01,1000,False
7,2,3,0.454927,0.832,24.0948,72.690711,0.01,1000,False
8,2,4,0.403349,0.85095,24.73671,97.555079,0.01,1000,False
9,2,5,0.372126,0.86325,23.275681,121.166895,0.01,1000,False


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle
0,1,1,1.051913,0.599717,21.242159,22.757104,0.01,1000,True
1,1,2,0.533126,0.791267,21.803655,44.840013,0.01,1000,True
2,1,3,0.429855,0.839567,23.115148,68.166595,0.01,1000,True
3,1,4,0.388675,0.855133,22.773599,91.314196,0.01,1000,True
4,1,5,0.358946,0.865517,21.33937,112.959273,0.01,1000,True
5,2,1,0.966711,0.6276,22.01831,23.514305,0.01,1000,False
6,2,2,0.544777,0.789667,24.739794,48.427362,0.01,1000,False
7,2,3,0.454927,0.832,24.0948,72.690711,0.01,1000,False
8,2,4,0.403349,0.85095,24.73671,97.555079,0.01,1000,False
9,2,5,0.372126,0.86325,23.275681,121.166895,0.01,1000,False


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle
0,1,1,1.051913,0.599717,21.242159,22.757104,0.01,1000,True
1,1,2,0.533126,0.791267,21.803655,44.840013,0.01,1000,True
2,1,3,0.429855,0.839567,23.115148,68.166595,0.01,1000,True
3,1,4,0.388675,0.855133,22.773599,91.314196,0.01,1000,True
4,1,5,0.358946,0.865517,21.33937,112.959273,0.01,1000,True
5,2,1,0.966711,0.6276,22.01831,23.514305,0.01,1000,False
6,2,2,0.544777,0.789667,24.739794,48.427362,0.01,1000,False
7,2,3,0.454927,0.832,24.0948,72.690711,0.01,1000,False
8,2,4,0.403349,0.85095,24.73671,97.555079,0.01,1000,False
9,2,5,0.372126,0.86325,23.275681,121.166895,0.01,1000,False


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle
0,1,1,1.051913,0.599717,21.242159,22.757104,0.01,1000,True
1,1,2,0.533126,0.791267,21.803655,44.840013,0.01,1000,True
2,1,3,0.429855,0.839567,23.115148,68.166595,0.01,1000,True
3,1,4,0.388675,0.855133,22.773599,91.314196,0.01,1000,True
4,1,5,0.358946,0.865517,21.33937,112.959273,0.01,1000,True
5,2,1,0.966711,0.6276,22.01831,23.514305,0.01,1000,False
6,2,2,0.544777,0.789667,24.739794,48.427362,0.01,1000,False
7,2,3,0.454927,0.832,24.0948,72.690711,0.01,1000,False
8,2,4,0.403349,0.85095,24.73671,97.555079,0.01,1000,False
9,2,5,0.372126,0.86325,23.275681,121.166895,0.01,1000,False


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle
0,1,1,1.051913,0.599717,21.242159,22.757104,0.01,1000,True
1,1,2,0.533126,0.791267,21.803655,44.840013,0.01,1000,True
2,1,3,0.429855,0.839567,23.115148,68.166595,0.01,1000,True
3,1,4,0.388675,0.855133,22.773599,91.314196,0.01,1000,True
4,1,5,0.358946,0.865517,21.33937,112.959273,0.01,1000,True
5,2,1,0.966711,0.6276,22.01831,23.514305,0.01,1000,False
6,2,2,0.544777,0.789667,24.739794,48.427362,0.01,1000,False
7,2,3,0.454927,0.832,24.0948,72.690711,0.01,1000,False
8,2,4,0.403349,0.85095,24.73671,97.555079,0.01,1000,False
9,2,5,0.372126,0.86325,23.275681,121.166895,0.01,1000,False


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle
0,1,1,1.051913,0.599717,21.242159,22.757104,0.01,1000,True
1,1,2,0.533126,0.791267,21.803655,44.840013,0.01,1000,True
2,1,3,0.429855,0.839567,23.115148,68.166595,0.01,1000,True
3,1,4,0.388675,0.855133,22.773599,91.314196,0.01,1000,True
4,1,5,0.358946,0.865517,21.33937,112.959273,0.01,1000,True
5,2,1,0.966711,0.6276,22.01831,23.514305,0.01,1000,False
6,2,2,0.544777,0.789667,24.739794,48.427362,0.01,1000,False
7,2,3,0.454927,0.832,24.0948,72.690711,0.01,1000,False
8,2,4,0.403349,0.85095,24.73671,97.555079,0.01,1000,False
9,2,5,0.372126,0.86325,23.275681,121.166895,0.01,1000,False


NameError: name 'json' is not defined

# 4.5 使用DataLoader的多进程功能加速神经网络训练

* 使用data loader类的num_workers可选属性可加速神经网络的训练
* num_workers属性告诉data loader实例有多少个单元处理器用于数据加载
* num_workers值的选择的最好方式是进行试验

在添加单个人工进程之后，看到20%的提速是有意义的，因为主进程的工作更少了

当主进程忙于执行向前和向后的传递时，工作进程正在加载一下批数据

当主进程准备好另一批数据时，工作进程已经在内存中排队了

因此，主进程不需要从磁盘读取数据，这些数据寂静在内存中准备好了

=》实际上是减少读取实际数据从磁盘中批量读取数据的时间

In [40]:
# 使用RunManager和RunBuilder类可以使得程序更易扩展
params = OrderedDict(
    lr = [.01],
    batch_size =[1000, 2000],
    shuffle = [True, False],
    num_workers = [0,1,2,4,8,16]
)
m = RunManager()
for run in RunBuilder.get_runs(params):
    
    network = Network()
    loader = DataLoader(train_set, batch_size=run.batch_size, shuffle=run.shuffle, num_workers=run.num_workers)
    optimizer = optim.Adam(network.parameters(), lr=run.lr)
    
    m.begin_run(run, network, loader)
    for epoch in range(5):
        m.begin_epoch()
        for batch in loader:
            images, labels = batch
            preds = network(images)
            loss = F.cross_entropy(preds, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            m.track_loss(loss)
            m.track_num_correct(preds, labels)
            
            
        m.end_epoch()
    m.end_run()
m.save('resuls-1')

Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle,num_workers
0,1,1,1.066071,0.60665,24.488441,25.995935,0.01,1000,True,0


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle,num_workers
0,1,1,1.066071,0.60665,24.488441,25.995935,0.01,1000,True,0
1,1,2,0.546109,0.787217,23.114761,49.31515,0.01,1000,True,0


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle,num_workers
0,1,1,1.066071,0.60665,24.488441,25.995935,0.01,1000,True,0
1,1,2,0.546109,0.787217,23.114761,49.31515,0.01,1000,True,0
2,1,3,0.453987,0.8294,24.207589,73.634439,0.01,1000,True,0


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle,num_workers
0,1,1,1.066071,0.60665,24.488441,25.995935,0.01,1000,True,0
1,1,2,0.546109,0.787217,23.114761,49.31515,0.01,1000,True,0
2,1,3,0.453987,0.8294,24.207589,73.634439,0.01,1000,True,0
3,1,4,0.396741,0.855767,21.976259,95.727386,0.01,1000,True,0


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle,num_workers
0,1,1,1.066071,0.60665,24.488441,25.995935,0.01,1000,True,0
1,1,2,0.546109,0.787217,23.114761,49.31515,0.01,1000,True,0
2,1,3,0.453987,0.8294,24.207589,73.634439,0.01,1000,True,0
3,1,4,0.396741,0.855767,21.976259,95.727386,0.01,1000,True,0
4,1,5,0.367227,0.86675,22.188621,118.166341,0.01,1000,True,0


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle,num_workers
0,1,1,1.066071,0.60665,24.488441,25.995935,0.01,1000,True,0
1,1,2,0.546109,0.787217,23.114761,49.31515,0.01,1000,True,0
2,1,3,0.453987,0.8294,24.207589,73.634439,0.01,1000,True,0
3,1,4,0.396741,0.855767,21.976259,95.727386,0.01,1000,True,0
4,1,5,0.367227,0.86675,22.188621,118.166341,0.01,1000,True,0
5,2,1,0.966626,0.626517,15.332173,20.523053,0.01,1000,True,1


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle,num_workers
0,1,1,1.066071,0.60665,24.488441,25.995935,0.01,1000,True,0
1,1,2,0.546109,0.787217,23.114761,49.31515,0.01,1000,True,0
2,1,3,0.453987,0.8294,24.207589,73.634439,0.01,1000,True,0
3,1,4,0.396741,0.855767,21.976259,95.727386,0.01,1000,True,0
4,1,5,0.367227,0.86675,22.188621,118.166341,0.01,1000,True,0
5,2,1,0.966626,0.626517,15.332173,20.523053,0.01,1000,True,1
6,2,2,0.531059,0.797817,15.298302,35.95702,0.01,1000,True,1


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle,num_workers
0,1,1,1.066071,0.60665,24.488441,25.995935,0.01,1000,True,0
1,1,2,0.546109,0.787217,23.114761,49.31515,0.01,1000,True,0
2,1,3,0.453987,0.8294,24.207589,73.634439,0.01,1000,True,0
3,1,4,0.396741,0.855767,21.976259,95.727386,0.01,1000,True,0
4,1,5,0.367227,0.86675,22.188621,118.166341,0.01,1000,True,0
5,2,1,0.966626,0.626517,15.332173,20.523053,0.01,1000,True,1
6,2,2,0.531059,0.797817,15.298302,35.95702,0.01,1000,True,1
7,2,3,0.44645,0.834633,14.494056,50.693446,0.01,1000,True,1


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle,num_workers
0,1,1,1.066071,0.60665,24.488441,25.995935,0.01,1000,True,0
1,1,2,0.546109,0.787217,23.114761,49.31515,0.01,1000,True,0
2,1,3,0.453987,0.8294,24.207589,73.634439,0.01,1000,True,0
3,1,4,0.396741,0.855767,21.976259,95.727386,0.01,1000,True,0
4,1,5,0.367227,0.86675,22.188621,118.166341,0.01,1000,True,0
5,2,1,0.966626,0.626517,15.332173,20.523053,0.01,1000,True,1
6,2,2,0.531059,0.797817,15.298302,35.95702,0.01,1000,True,1
7,2,3,0.44645,0.834633,14.494056,50.693446,0.01,1000,True,1
8,2,4,0.39707,0.854267,15.850135,66.660269,0.01,1000,True,1


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle,num_workers
0,1,1,1.066071,0.60665,24.488441,25.995935,0.01,1000,True,0
1,1,2,0.546109,0.787217,23.114761,49.31515,0.01,1000,True,0
2,1,3,0.453987,0.8294,24.207589,73.634439,0.01,1000,True,0
3,1,4,0.396741,0.855767,21.976259,95.727386,0.01,1000,True,0
4,1,5,0.367227,0.86675,22.188621,118.166341,0.01,1000,True,0
5,2,1,0.966626,0.626517,15.332173,20.523053,0.01,1000,True,1
6,2,2,0.531059,0.797817,15.298302,35.95702,0.01,1000,True,1
7,2,3,0.44645,0.834633,14.494056,50.693446,0.01,1000,True,1
8,2,4,0.39707,0.854267,15.850135,66.660269,0.01,1000,True,1
9,2,5,0.360293,0.868683,16.254102,83.217089,0.01,1000,True,1


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle,num_workers
0,1,1,1.066071,0.60665,24.488441,25.995935,0.01,1000,True,0
1,1,2,0.546109,0.787217,23.114761,49.31515,0.01,1000,True,0
2,1,3,0.453987,0.8294,24.207589,73.634439,0.01,1000,True,0
3,1,4,0.396741,0.855767,21.976259,95.727386,0.01,1000,True,0
4,1,5,0.367227,0.86675,22.188621,118.166341,0.01,1000,True,0
5,2,1,0.966626,0.626517,15.332173,20.523053,0.01,1000,True,1
6,2,2,0.531059,0.797817,15.298302,35.95702,0.01,1000,True,1
7,2,3,0.44645,0.834633,14.494056,50.693446,0.01,1000,True,1
8,2,4,0.39707,0.854267,15.850135,66.660269,0.01,1000,True,1
9,2,5,0.360293,0.868683,16.254102,83.217089,0.01,1000,True,1


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle,num_workers
0,1,1,1.066071,0.60665,24.488441,25.995935,0.01,1000,True,0
1,1,2,0.546109,0.787217,23.114761,49.31515,0.01,1000,True,0
2,1,3,0.453987,0.8294,24.207589,73.634439,0.01,1000,True,0
3,1,4,0.396741,0.855767,21.976259,95.727386,0.01,1000,True,0
4,1,5,0.367227,0.86675,22.188621,118.166341,0.01,1000,True,0
5,2,1,0.966626,0.626517,15.332173,20.523053,0.01,1000,True,1
6,2,2,0.531059,0.797817,15.298302,35.95702,0.01,1000,True,1
7,2,3,0.44645,0.834633,14.494056,50.693446,0.01,1000,True,1
8,2,4,0.39707,0.854267,15.850135,66.660269,0.01,1000,True,1
9,2,5,0.360293,0.868683,16.254102,83.217089,0.01,1000,True,1


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle,num_workers
0,1,1,1.066071,0.60665,24.488441,25.995935,0.01,1000,True,0
1,1,2,0.546109,0.787217,23.114761,49.31515,0.01,1000,True,0
2,1,3,0.453987,0.8294,24.207589,73.634439,0.01,1000,True,0
3,1,4,0.396741,0.855767,21.976259,95.727386,0.01,1000,True,0
4,1,5,0.367227,0.86675,22.188621,118.166341,0.01,1000,True,0
5,2,1,0.966626,0.626517,15.332173,20.523053,0.01,1000,True,1
6,2,2,0.531059,0.797817,15.298302,35.95702,0.01,1000,True,1
7,2,3,0.44645,0.834633,14.494056,50.693446,0.01,1000,True,1
8,2,4,0.39707,0.854267,15.850135,66.660269,0.01,1000,True,1
9,2,5,0.360293,0.868683,16.254102,83.217089,0.01,1000,True,1


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle,num_workers
0,1,1,1.066071,0.60665,24.488441,25.995935,0.01,1000,True,0
1,1,2,0.546109,0.787217,23.114761,49.31515,0.01,1000,True,0
2,1,3,0.453987,0.8294,24.207589,73.634439,0.01,1000,True,0
3,1,4,0.396741,0.855767,21.976259,95.727386,0.01,1000,True,0
4,1,5,0.367227,0.86675,22.188621,118.166341,0.01,1000,True,0
5,2,1,0.966626,0.626517,15.332173,20.523053,0.01,1000,True,1
6,2,2,0.531059,0.797817,15.298302,35.95702,0.01,1000,True,1
7,2,3,0.44645,0.834633,14.494056,50.693446,0.01,1000,True,1
8,2,4,0.39707,0.854267,15.850135,66.660269,0.01,1000,True,1
9,2,5,0.360293,0.868683,16.254102,83.217089,0.01,1000,True,1


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle,num_workers
0,1,1,1.066071,0.60665,24.488441,25.995935,0.01,1000,True,0
1,1,2,0.546109,0.787217,23.114761,49.31515,0.01,1000,True,0
2,1,3,0.453987,0.8294,24.207589,73.634439,0.01,1000,True,0
3,1,4,0.396741,0.855767,21.976259,95.727386,0.01,1000,True,0
4,1,5,0.367227,0.86675,22.188621,118.166341,0.01,1000,True,0
5,2,1,0.966626,0.626517,15.332173,20.523053,0.01,1000,True,1
6,2,2,0.531059,0.797817,15.298302,35.95702,0.01,1000,True,1
7,2,3,0.44645,0.834633,14.494056,50.693446,0.01,1000,True,1
8,2,4,0.39707,0.854267,15.850135,66.660269,0.01,1000,True,1
9,2,5,0.360293,0.868683,16.254102,83.217089,0.01,1000,True,1


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle,num_workers
0,1,1,1.066071,0.60665,24.488441,25.995935,0.01,1000,True,0
1,1,2,0.546109,0.787217,23.114761,49.31515,0.01,1000,True,0
2,1,3,0.453987,0.8294,24.207589,73.634439,0.01,1000,True,0
3,1,4,0.396741,0.855767,21.976259,95.727386,0.01,1000,True,0
4,1,5,0.367227,0.86675,22.188621,118.166341,0.01,1000,True,0
5,2,1,0.966626,0.626517,15.332173,20.523053,0.01,1000,True,1
6,2,2,0.531059,0.797817,15.298302,35.95702,0.01,1000,True,1
7,2,3,0.44645,0.834633,14.494056,50.693446,0.01,1000,True,1
8,2,4,0.39707,0.854267,15.850135,66.660269,0.01,1000,True,1
9,2,5,0.360293,0.868683,16.254102,83.217089,0.01,1000,True,1


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle,num_workers
0,1,1,1.066071,0.60665,24.488441,25.995935,0.01,1000,True,0
1,1,2,0.546109,0.787217,23.114761,49.31515,0.01,1000,True,0
2,1,3,0.453987,0.8294,24.207589,73.634439,0.01,1000,True,0
3,1,4,0.396741,0.855767,21.976259,95.727386,0.01,1000,True,0
4,1,5,0.367227,0.86675,22.188621,118.166341,0.01,1000,True,0
5,2,1,0.966626,0.626517,15.332173,20.523053,0.01,1000,True,1
6,2,2,0.531059,0.797817,15.298302,35.95702,0.01,1000,True,1
7,2,3,0.44645,0.834633,14.494056,50.693446,0.01,1000,True,1
8,2,4,0.39707,0.854267,15.850135,66.660269,0.01,1000,True,1
9,2,5,0.360293,0.868683,16.254102,83.217089,0.01,1000,True,1


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle,num_workers
0,1,1,1.066071,0.60665,24.488441,25.995935,0.01,1000,True,0
1,1,2,0.546109,0.787217,23.114761,49.31515,0.01,1000,True,0
2,1,3,0.453987,0.8294,24.207589,73.634439,0.01,1000,True,0
3,1,4,0.396741,0.855767,21.976259,95.727386,0.01,1000,True,0
4,1,5,0.367227,0.86675,22.188621,118.166341,0.01,1000,True,0
5,2,1,0.966626,0.626517,15.332173,20.523053,0.01,1000,True,1
6,2,2,0.531059,0.797817,15.298302,35.95702,0.01,1000,True,1
7,2,3,0.44645,0.834633,14.494056,50.693446,0.01,1000,True,1
8,2,4,0.39707,0.854267,15.850135,66.660269,0.01,1000,True,1
9,2,5,0.360293,0.868683,16.254102,83.217089,0.01,1000,True,1


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle,num_workers
0,1,1,1.066071,0.60665,24.488441,25.995935,0.01,1000,True,0
1,1,2,0.546109,0.787217,23.114761,49.31515,0.01,1000,True,0
2,1,3,0.453987,0.8294,24.207589,73.634439,0.01,1000,True,0
3,1,4,0.396741,0.855767,21.976259,95.727386,0.01,1000,True,0
4,1,5,0.367227,0.86675,22.188621,118.166341,0.01,1000,True,0
5,2,1,0.966626,0.626517,15.332173,20.523053,0.01,1000,True,1
6,2,2,0.531059,0.797817,15.298302,35.95702,0.01,1000,True,1
7,2,3,0.44645,0.834633,14.494056,50.693446,0.01,1000,True,1
8,2,4,0.39707,0.854267,15.850135,66.660269,0.01,1000,True,1
9,2,5,0.360293,0.868683,16.254102,83.217089,0.01,1000,True,1


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle,num_workers
0,1,1,1.066071,0.60665,24.488441,25.995935,0.01,1000,True,0
1,1,2,0.546109,0.787217,23.114761,49.31515,0.01,1000,True,0
2,1,3,0.453987,0.8294,24.207589,73.634439,0.01,1000,True,0
3,1,4,0.396741,0.855767,21.976259,95.727386,0.01,1000,True,0
4,1,5,0.367227,0.86675,22.188621,118.166341,0.01,1000,True,0
5,2,1,0.966626,0.626517,15.332173,20.523053,0.01,1000,True,1
6,2,2,0.531059,0.797817,15.298302,35.95702,0.01,1000,True,1
7,2,3,0.44645,0.834633,14.494056,50.693446,0.01,1000,True,1
8,2,4,0.39707,0.854267,15.850135,66.660269,0.01,1000,True,1
9,2,5,0.360293,0.868683,16.254102,83.217089,0.01,1000,True,1


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle,num_workers
0,1,1,1.066071,0.60665,24.488441,25.995935,0.01,1000,True,0
1,1,2,0.546109,0.787217,23.114761,49.31515,0.01,1000,True,0
2,1,3,0.453987,0.8294,24.207589,73.634439,0.01,1000,True,0
3,1,4,0.396741,0.855767,21.976259,95.727386,0.01,1000,True,0
4,1,5,0.367227,0.86675,22.188621,118.166341,0.01,1000,True,0
5,2,1,0.966626,0.626517,15.332173,20.523053,0.01,1000,True,1
6,2,2,0.531059,0.797817,15.298302,35.95702,0.01,1000,True,1
7,2,3,0.44645,0.834633,14.494056,50.693446,0.01,1000,True,1
8,2,4,0.39707,0.854267,15.850135,66.660269,0.01,1000,True,1
9,2,5,0.360293,0.868683,16.254102,83.217089,0.01,1000,True,1


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle,num_workers
0,1,1,1.066071,0.60665,24.488441,25.995935,0.01,1000,True,0
1,1,2,0.546109,0.787217,23.114761,49.31515,0.01,1000,True,0
2,1,3,0.453987,0.8294,24.207589,73.634439,0.01,1000,True,0
3,1,4,0.396741,0.855767,21.976259,95.727386,0.01,1000,True,0
4,1,5,0.367227,0.86675,22.188621,118.166341,0.01,1000,True,0
5,2,1,0.966626,0.626517,15.332173,20.523053,0.01,1000,True,1
6,2,2,0.531059,0.797817,15.298302,35.95702,0.01,1000,True,1
7,2,3,0.44645,0.834633,14.494056,50.693446,0.01,1000,True,1
8,2,4,0.39707,0.854267,15.850135,66.660269,0.01,1000,True,1
9,2,5,0.360293,0.868683,16.254102,83.217089,0.01,1000,True,1


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle,num_workers
0,1,1,1.066071,0.60665,24.488441,25.995935,0.01,1000,True,0
1,1,2,0.546109,0.787217,23.114761,49.31515,0.01,1000,True,0
2,1,3,0.453987,0.8294,24.207589,73.634439,0.01,1000,True,0
3,1,4,0.396741,0.855767,21.976259,95.727386,0.01,1000,True,0
4,1,5,0.367227,0.86675,22.188621,118.166341,0.01,1000,True,0
5,2,1,0.966626,0.626517,15.332173,20.523053,0.01,1000,True,1
6,2,2,0.531059,0.797817,15.298302,35.95702,0.01,1000,True,1
7,2,3,0.44645,0.834633,14.494056,50.693446,0.01,1000,True,1
8,2,4,0.39707,0.854267,15.850135,66.660269,0.01,1000,True,1
9,2,5,0.360293,0.868683,16.254102,83.217089,0.01,1000,True,1


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle,num_workers
0,1,1,1.066071,0.60665,24.488441,25.995935,0.01,1000,True,0
1,1,2,0.546109,0.787217,23.114761,49.31515,0.01,1000,True,0
2,1,3,0.453987,0.8294,24.207589,73.634439,0.01,1000,True,0
3,1,4,0.396741,0.855767,21.976259,95.727386,0.01,1000,True,0
4,1,5,0.367227,0.86675,22.188621,118.166341,0.01,1000,True,0
5,2,1,0.966626,0.626517,15.332173,20.523053,0.01,1000,True,1
6,2,2,0.531059,0.797817,15.298302,35.95702,0.01,1000,True,1
7,2,3,0.44645,0.834633,14.494056,50.693446,0.01,1000,True,1
8,2,4,0.39707,0.854267,15.850135,66.660269,0.01,1000,True,1
9,2,5,0.360293,0.868683,16.254102,83.217089,0.01,1000,True,1


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle,num_workers
0,1,1,1.066071,0.60665,24.488441,25.995935,0.01,1000,True,0
1,1,2,0.546109,0.787217,23.114761,49.31515,0.01,1000,True,0
2,1,3,0.453987,0.8294,24.207589,73.634439,0.01,1000,True,0
3,1,4,0.396741,0.855767,21.976259,95.727386,0.01,1000,True,0
4,1,5,0.367227,0.86675,22.188621,118.166341,0.01,1000,True,0
5,2,1,0.966626,0.626517,15.332173,20.523053,0.01,1000,True,1
6,2,2,0.531059,0.797817,15.298302,35.95702,0.01,1000,True,1
7,2,3,0.44645,0.834633,14.494056,50.693446,0.01,1000,True,1
8,2,4,0.39707,0.854267,15.850135,66.660269,0.01,1000,True,1
9,2,5,0.360293,0.868683,16.254102,83.217089,0.01,1000,True,1


RuntimeError: DataLoader worker (pid(s) 12092, 11864, 12624, 2352, 13708, 14476, 11096, 10180, 1224, 15844, 2504, 11660) exited unexpectedly