In [None]:
一个深度学习项目的目录结构：
    data_loader.py    以batch为单位可遍历的数据
    net.py            深度学习的网络模型、误差函数、测量函数
    train.py          训练一个epoch的训练数据
    evaluate.py       评估一个epoch的评估数据
    train_and_evaluate.py  训练并评估数据，记录日志，记录参数，记录效果，调用train和evaluate
    train_main.py     训练和评估的主程序，初始化模型和数据等，并调用train_and_evaluate
    utils.py          辅助函数
    params.json       模型的超参数
    /data             数据目录，所有数据在此
    /runs             运行目录，所有运行时记录在此

## 第一步，获得数据 Data Explore

In [3]:
from data_loader import fetch_dataloader
import utils

In [9]:
types = ['train', 'val']
data_dir = 'data/MNIST'

json_path = 'params.json'
params = utils.Params(json_path)

dataloaders = fetch_dataloader(types, data_dir, params)

train_dl = dataloaders['train']
val_dl = dataloaders['val']


In [10]:
for data, target in train_dl:
    print('训练数据一个mini－batch数据的维度：［mini－batch大小，inChannel，Height，Width', 
          data.size(), '；和其分类：',target)
    break

训练数据一个mini－batch数据的维度：［mini－batch大小，inChannel，Height，Width torch.Size([64, 784]) ；和其分类： tensor([ 6,  2,  9,  4,  2,  1,  9,  1,  9,  4,  7,  1,  1,  8,
         2,  3,  7,  2,  8,  4,  2,  4,  9,  9,  3,  9,  9,  7,
         7,  5,  3,  3,  1,  8,  3,  8,  7,  3,  8,  5,  7,  9,
         6,  3,  2,  2,  1,  2,  5,  0,  9,  1,  0,  9,  7,  5,
         0,  8,  9,  0,  8,  0,  4,  9])


In [25]:
train_dl.dataset[0][0].size()

torch.Size([784])

## 第二步，建模 modeling of neural network

In [68]:
import torch
import torch.optim as optim

import net

# use GPU if available
params.cuda = torch.cuda.is_available()

model = net.Net(params).cuda() if params.cuda else net.Net(params)
optimizer = optim.Adam(model.parameters(), lr=params.learning_rate)

# fetch loss function and metrics
loss_fn = net.loss_fn
metrics = net.metrics

# set model to training mode
model.train()

# summary for current training loop and a running average object for loss
summ = []
loss_avg = utils.RunningAverage()

for i, (train_batch, labels_batch) in enumerate(train_dl):
    if i == 500:
        break
    
    train_batch.requires_grad = True
    
    # compute model output and loss
    output_batch = model(train_batch)
    loss = loss_fn(output_batch, labels_batch)

    # clear previous gradients, compute gradients of all tensors wrt loss
    optimizer.zero_grad()
    loss.backward()

    # performs updates using calculated gradients
    optimizer.step()

    # Evaluate summaries only once in a while
    if i % params.save_summary_steps == 0:
        # extract data from torch tensors, move to cpu, convert to numpy arrays
        output_batch = output_batch.data.cpu().numpy()
        labels_batch = labels_batch.data.cpu().numpy()

        # compute all metrics on this batch
        summary_batch = {metric:metrics[metric](output_batch, labels_batch)
                         for metric in metrics}
        summary_batch['loss'] = loss.data.item()
        summ.append(summary_batch)

    # update the average loss
    loss_avg.update(loss.data.item())


In [83]:
loss.data
for tag, value in model.named_parameters():
    print(value.grad.data.mean())

tensor(1.00000e-06 *
       -6.3776)
tensor(1.00000e-04 *
       -4.1262)
tensor(1.00000e-03 *
       -3.8412)
tensor(1.00000e-03 *
       -1.7876)


In [79]:
inFeatures = 28 * 28
outFeatures = 10
numSamples = 60000
scale = (2+10)/2
numSamples / (scale * (inFeatures + outFeatures))

12.594458438287154

<bound method Module.modules of ModuleList(
  (0): Linear(in_features=784, out_features=30, bias=True)
  (1): Linear(in_features=30, out_features=10, bias=True)
)>

False