In [2]:
import torch
import torchvision
from torch.utils import data

# data/model/loss/optimize/train

# 1 dataset

In [3]:
#mnist_train = torchvision.datasets.FashionMNIST?

In [4]:
mnist_train = torchvision.datasets.FashionMNIST('../data',train=True,download=True,
                                                transform = torchvision.transforms.ToTensor())

In [5]:
mnist_test = torchvision.datasets.FashionMNIST('../data',train=False,download=True,
                                              transform = torchvision.transforms.ToTensor())

In [6]:
mnist_train

Dataset FashionMNIST
    Number of datapoints: 60000
    Split: train
    Root Location: ../data
    Transforms (if any): ToTensor()
    Target Transforms (if any): None

In [7]:
mnist_train.targets[0]

tensor(9)

In [8]:
mnist_train.data[0].shape

torch.Size([28, 28])

## 1.2 dataloader

In [9]:
batch_size = 256

In [10]:
train_loader = data.DataLoader(mnist_train,batch_size=batch_size,shuffle=True)
test_loader = data.DataLoader(mnist_test,batch_size=batch_size,shuffle=False)

# 2 model

In [11]:
net = torch.nn.Sequential?

In [None]:
net = torch.nn.Sequential

In [12]:
net = torch.nn.Sequential(torch.nn.Flatten(),torch.nn.Linear(28*28,10))

In [13]:
for i in net.children():print(i)

Flatten(start_dim=1, end_dim=-1)
Linear(in_features=784, out_features=10, bias=True)


In [14]:
net[1].weight.data[0][:10]

tensor([ 0.0016,  0.0189,  0.0170, -0.0158, -0.0355, -0.0066, -0.0192, -0.0069,
        -0.0137, -0.0092])

In [17]:
def init_weights(l):
    if isinstance(l,torch.nn.Linear):
        l.weight.data.normal_(mean=0,std=0.01)

In [18]:
net.apply(init_weights)

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=10, bias=True)
)

In [19]:
net[1].weight.data[0][:10]

tensor([ 0.0053,  0.0089, -0.0027, -0.0297, -0.0095,  0.0017, -0.0176, -0.0093,
         0.0154,  0.0039])

## 2.2 loss

In [20]:
torch.nn.CrossEntropyLoss?

In [21]:
torch.nn.NLLLoss?

In [22]:
torch.nn.LogSoftmax?

In [23]:
loss_func = torch.nn.CrossEntropyLoss?

In [None]:
loss_func = torch.nn.CrossEntropyLoss

In [24]:
loss_func = torch.nn.CrossEntropyLoss()

## 2.3 optimize

In [25]:
optimize = torch.optim.Adam?

In [None]:
optimize = torch.optim.Adam

In [26]:
optimize = torch.optim.Adam(net.parameters(),lr=0.001,weight_decay=0.1)

# 3 train

In [27]:
class Accumulator:
    def __init__(self,n):
        self.data=[0.0]*n
    
    def add(self,*args):
        self.data = [a+float(b) for a,b in zip(self.data,args)]
    
    def reset(self):
        self.data = [0.0]*n
        
    def __getitem__(self,idx):
        return self.data[idx]

In [28]:
def accuracy(y_pred,y):
    y_pred = torch.argmax(y_pred,-1)
    return sum(y_pred==y)

In [29]:
y = torch.tensor([0, 2])
y_hat = torch.tensor([[0.1, 0.3, 0.6], [0.3, 0.2, 0.5]])

accuracy(y_hat, y)

tensor(1)

In [30]:
torch.argmax(y_hat,-1)

tensor([2, 2])

In [31]:
def evaluate_acc(net,data_iter):
    if isinstance(net,torch.nn.Module):
        net.eval()
    metric = Accumulator(2)
    with torch.no_grad():
        for x,y in data_iter:
            y_pred = net(x)
            ##compute 
            metric.add(accuracy(y_pred,y),y.numel())        
    return metric[0]/metric[1]

In [32]:
a = torch.Tensor([2,3,4])

In [33]:
b = torch.Tensor([1,3,2])

In [34]:
evaluate_acc(net,test_loader)

0.2059

In [35]:
def train_epoch(train_iter,net,loss,optimize):
    if isinstance(net,torch.nn.Module):
        net.train()
    ##metric
    metric = Accumulator(3)
    for x,y in train_iter:
        pred_y = net(x)
        #print(y)
        #print('predict',pred_y)
        #print(torch.argmax(pred_y))
        #print(y)
        loss_tmp = loss(pred_y,y)
        metric.add(loss_tmp*len(y),accuracy(pred_y,y),y.numel())
        optimize.zero_grad()
        loss_tmp.backward()
        optimize.step()
    return metric[0]/metric[2],metric[1]/metric[2]

In [36]:
train_epoch(train_loader,net,loss_func,optimize)

(0.9627803639729817, 0.7232333333333333)

In [37]:
def train(train_iter,test_iter,net,loss,optimize,num_epochs):
    for epoch in range(num_epochs):
        train_metrics = train_epoch(train_iter,net,loss,optimize)
        test_acc = evaluate_acc(net,test_loader)
        print('epoch:%d\ttrain_loss:%f\ttrain_acc:%f\ttest_acc:%f'%(epoch,train_metrics[0],train_metrics[1],test_acc))
    train_loss,train_acc = train_metrics

In [38]:
train(train_loader,test_loader,net,loss_func,optimize,num_epochs=10)

epoch:0	train_loss:0.823989	train_acc:0.770083	test_acc:0.757600
epoch:1	train_loss:0.820781	train_acc:0.770817	test_acc:0.749400
epoch:2	train_loss:0.821439	train_acc:0.770167	test_acc:0.758700
epoch:3	train_loss:0.821272	train_acc:0.770817	test_acc:0.761900
epoch:4	train_loss:0.819390	train_acc:0.772350	test_acc:0.754700
epoch:5	train_loss:0.820360	train_acc:0.770133	test_acc:0.749000
epoch:6	train_loss:0.820612	train_acc:0.770483	test_acc:0.763400
epoch:7	train_loss:0.820937	train_acc:0.770167	test_acc:0.758300
epoch:8	train_loss:0.820372	train_acc:0.769883	test_acc:0.753700
epoch:9	train_loss:0.819770	train_acc:0.771883	test_acc:0.764700
