In [None]:
一个深度学习项目的目录结构：
    data_loader.py    以batch为单位可遍历的数据
    net.py            深度学习的网络模型、误差函数、测量函数
    train.py          训练一个epoch的训练数据
    evaluate.py       评估一个epoch的评估数据
    train_and_evaluate.py  训练并评估数据，记录日志，记录参数，记录效果，调用train和evaluate
    train_main.py     训练和评估的主程序，初始化模型和数据等，并调用train_and_evaluate
    utils.py          辅助函数
    params.json       模型的超参数
    /data             数据目录，所有数据在此
    /runs             运行目录，所有运行时记录在此

## 第一步，获得数据 Data Explore

In [7]:
from data_loader import fetch_dataloader
import utils

import pandas as pd

In [2]:
types = ['train', 'val']
data_dir = 'data/MNIST'

json_path = 'params.json'
params = utils.Params(json_path)

dataloaders = fetch_dataloader(types, data_dir, params)

train_dl = dataloaders['train']
val_dl = dataloaders['val']


In [3]:
for data, target in train_dl:
    print('训练数据一个mini－batch数据的维度：［mini－batch大小，inChannel，Height，Width', 
          data.size(), '；和其分类：',target)
    break

训练数据一个mini－batch数据的维度：［mini－batch大小，inChannel，Height，Width torch.Size([64, 784]) ；和其分类： tensor([ 5,  5,  7,  1,  6,  0,  6,  4,  0,  2,  3,  7,  0,  8,
         7,  4,  7,  8,  1,  5,  4,  7,  4,  7,  7,  3,  8,  0,
         8,  1,  7,  7,  8,  3,  8,  1,  5,  9,  5,  1,  0,  3,
         3,  5,  7,  9,  3,  8,  7,  1,  9,  1,  3,  7,  6,  0,
         1,  6,  9,  3,  7,  9,  8,  7])


In [25]:
train_dl.dataset[0][0].size()

torch.Size([784])

## 第二步，建模 modeling of neural network

In [4]:
import torch
import torch.optim as optim

import net

# use GPU if available
params.cuda = torch.cuda.is_available()

model = net.Net(params).cuda() if params.cuda else net.Net(params)
optimizer = optim.Adam(model.parameters(), lr=params.learning_rate)

# fetch loss function and metrics
loss_fn = net.loss_fn
accuracy_fn = net.accuracy_fn

# set model to training mode
model.train()

# summary for current training loop and a running average object for loss
loss_avg = utils.RunningAverage()
accuracy_avg = utils.RunningAverage()

for i, (train_batch, labels_batch) in enumerate(train_dl):
    if i == 500:
        break
    
    train_batch.requires_grad = True
    
    # compute model output and loss
    output_batch = model(train_batch)
    loss = loss_fn(output_batch, labels_batch)
    accuracy = accuracy_fn(output_batch, labels_batch)

    # clear previous gradients, compute gradients of all tensors wrt loss
    optimizer.zero_grad()
    loss.backward()

    # performs updates using calculated gradients
    optimizer.step()

    # update the average loss
    loss_avg.update(loss.data.item())  # loss.data[0]
    accuracy_avg.update(accuracy)



In [5]:
sum(output_batch.max(dim=1)[1] == labels_batch).item() / len(labels_batch)

0.09375

In [123]:
??accuracy_fn

In [83]:
loss.data
for tag, value in model.named_parameters():
    print(value.grad.data.mean())

tensor(1.00000e-06 *
       -6.3776)
tensor(1.00000e-04 *
       -4.1262)
tensor(1.00000e-03 *
       -3.8412)
tensor(1.00000e-03 *
       -1.7876)


In [79]:
inFeatures = 28 * 28
outFeatures = 10
numSamples = 60000
scale = (2+10)/2
numSamples / (scale * (inFeatures + outFeatures))

12.594458438287154

In [85]:
print(model)

Net(
  (layers): ModuleList(
    (0): Linear(in_features=784, out_features=30, bias=True)
    (1): Linear(in_features=30, out_features=10, bias=True)
  )
)


In [88]:
model.train()

Net(
  (layers): ModuleList(
    (0): Linear(in_features=784, out_features=30, bias=True)
    (1): Linear(in_features=30, out_features=10, bias=True)
  )
)

In [94]:
optimizer.param_groups

[{'amsgrad': False,
  'betas': (0.9, 0.999),
  'eps': 1e-08,
  'lr': 0.001,
  'params': [Parameter containing:
   tensor([[-2.4560e-02, -3.2280e-02,  9.6532e-03,  ...,  1.9748e-02,
             1.0295e-03,  2.1261e-02],
           [-1.3537e-03,  3.3322e-02, -1.4561e-02,  ...,  2.6652e-02,
            -2.7239e-02, -3.1088e-02],
           [-3.8912e-03, -3.2239e-02, -2.1400e-02,  ..., -1.1273e-03,
            -1.7310e-02, -1.8220e-03],
           ...,
           [-3.9339e-02, -4.5194e-02,  5.8162e-03,  ...,  1.6468e-02,
            -3.3111e-02, -1.1704e-02],
           [ 4.4177e-03, -4.2200e-02, -5.0954e-02,  ..., -6.0457e-02,
            -1.7481e-02, -4.1432e-02],
           [ 1.2397e-02, -2.1283e-02, -4.4556e-02,  ..., -4.2328e-03,
             1.3061e-02, -1.4537e-02]]), Parameter containing:
   tensor(1.00000e-02 *
          [ 1.4183, -2.9061,  1.7377,  2.8914, -2.5677,  1.8172,  2.1049,
           -2.7016, -0.4296,  3.0822, -0.3704, -0.2884,  5.1959,  0.2983,
           -0.6647,  0.

In [95]:
a = '784-30-10'

In [99]:
[int(x) for x in a.split('-')]

[784, 30, 10]

In [45]:

train_matrics = {'accuracy': 0.1, 'loss':0.1}
val_matrics = {'accuracy': 0.2, 'loss':0.2}

In [47]:
train_matrics['']

{'accuracy': 0.1, 'loss': 0.1}

In [67]:
a = pd.DataFrame(params.dict, index=range(1))
train_matrics['type'] = 'train'
b = pd.DataFrame(train_matrics, index=range(1))

In [13]:
pd.DataFrame(params.dict, index=range(1), columns=params.dict.keys())

Unnamed: 0,train_batch_size,test_batch_size,num_workers,learning_rate,num_epochs,save_summary_steps,wide_of_layers,act_fn_name,cuda
0,64,64,4,0.0001,100,100,784-30-10,tanh,False


In [73]:
b

Unnamed: 0,accuracy,loss,type
0,0.1,0.1,train


In [77]:
pd.concat([a, b], axis=1)

Unnamed: 0,act_fn_name,cuda,learning_rate,num_epochs,num_workers,save_summary_steps,test_batch_size,train_batch_size,wide_of_layers,accuracy,loss,type
0,tanh,False,0.0001,100,4,100,64,64,784-30-10,0.1,0.1,train


In [79]:
params.dict

{'act_fn_name': 'tanh',
 'cuda': False,
 'learning_rate': 0.0001,
 'num_epochs': 100,
 'num_workers': 4,
 'save_summary_steps': 100,
 'test_batch_size': 64,
 'train_batch_size': 64,
 'wide_of_layers': '784-30-10'}

In [93]:
a = pd.DataFrame()
b = pd.DataFrame(params.dict, index=range(1))
a =a.append(b, ignore_index=True)

In [97]:
a =a.append(b, ignore_index=False)

In [98]:
a

Unnamed: 0,act_fn_name,cuda,learning_rate,num_epochs,num_workers,save_summary_steps,test_batch_size,train_batch_size,wide_of_layers
0,tanh,False,0.0001,100,4,100,64,64,784-30-10
1,tanh,False,0.0001,100,4,100,64,64,784-30-10
0,tanh,False,0.0001,100,4,100,64,64,784-30-10


In [101]:
params.dict['layer0.bias'] = 1

In [107]:
import copy

In [111]:
a = copy.deepcopy(params)

In [112]:
a

<utils.Params at 0x12948bac8>

In [113]:
a.dict

{'act_fn_name': 'tanh',
 'cuda': False,
 'layer0.bias': 1,
 'learning_rate': 0.0001,
 'num_epochs': 100,
 'num_workers': 4,
 'save_summary_steps': 100,
 'test_batch_size': 64,
 'train_batch_size': 64,
 'wide_of_layers': '784-30-10'}