In [1]:
import mindspore as ms
import numpy as np
from mindspore import nn
from mindspore.dataset import vision, transforms
from mindspore.dataset import MnistDataset
import mindspore.context as context
import mindspore.dataset as ds
import mindspore.dataset.transforms.c_transforms as C
import mindspore.dataset.vision.c_transforms as CV

In [2]:
from download import download

url = "https://mindspore-website.obs.cn-north-4.myhuaweicloud.com/" \
      "notebook/datasets/MNIST_Data.zip"
path = download(url, "./", kind="zip", replace=True)

Downloading data from https://mindspore-website.obs.cn-north-4.myhuaweicloud.com/notebook/datasets/MNIST_Data.zip (10.3 MB)

file_sizes: 100%|███████████████████████████| 10.8M/10.8M [00:12<00:00, 837kB/s]
Extracting zip file...
Successfully downloaded / unzipped to ./


In [3]:
DATA_DIR_TRAIN = "MNIST_Data/train" # 训练集信息
DATA_DIR_TEST = "MNIST_Data/test" # 测试集信息
train_dataset = MnistDataset(DATA_DIR_TRAIN)
test_dataset = MnistDataset(DATA_DIR_TEST)

In [4]:
#显示数据集的相关特性
print('训练数据集列名：',train_dataset.get_col_names())
print('测试数据集列名：',test_dataset.get_col_names())
image=train_dataset.create_dict_iterator().__next__()
print('图像长/宽/通道数：',image['image'].shape)
print('一张图像的标签样式：',image['label']) #一共 10 类，用 0-9 的数字表达类别。


训练数据集列名： ['image', 'label']
测试数据集列名： ['image', 'label']
图像长/宽/通道数： (28, 28, 1)
一张图像的标签样式： 8


In [5]:
def datapipe(dataset, batch_size):
    # Exercise 2: complete the image transformation process.
    # Add your code here. 
    # 定义 Map 操作尺寸缩放，归一化和通道变换
    rescale_op = vision.Rescale(1.0 / 255.0, 0)
    normalize_op = vision.Normalize(mean=(0.1307,), std=(0.3081,))
    hwc2chw_op = vision.HWC2CHW()
    # 对数据集进行 map 操作
    dataset = dataset.map(input_columns="image", operations=[rescale_op,normalize_op, hwc2chw_op])
    dataset = dataset.map(input_columns="label", operations=transforms.TypeCast(ms.int32))
    #设定 batchsize 大小
    dataset = dataset.batch(batch_size, drop_remainder=True)
    return dataset

In [6]:
train_dataset = datapipe(train_dataset, 64)
test_dataset = datapipe(test_dataset, 64)

In [7]:
for image, label in test_dataset.create_tuple_iterator():
    # Exercise 3: print the shape and data type of image and label for the first batch.
    # Add your code here.
    print(image.shape)
    print('图像长/宽/通道数：',image.shape)
    print('图像的标签类型：',label.dtype) #一共 10 类，用 0-9 的数字表达类别。
    break

(64, 1, 28, 28)
图像长/宽/通道数： (64, 1, 28, 28)
图像的标签类型： Int32


In [8]:
# Define model
class Network(nn.Cell):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        # Exercise 4：complete the network
        # self.dense_relu_sequential = nn.SequentialCell(
        # Add your code here.
        # )
        self.dense_relu_sequential = nn.SequentialCell(
            nn.Dense(28*28, 512),
            nn.ReLU(),
            nn.Dense(512, 512),
            nn.ReLU(),
            nn.Dense(512, 10)
        )

    def construct(self, x):
        x = self.flatten(x)
        logits = self.dense_relu_sequential(x)
        return logits

model = Network()
print(model)

Network<
  (flatten): Flatten<>
  (dense_relu_sequential): SequentialCell<
    (0): Dense<input_channels=784, output_channels=512, has_bias=True>
    (1): ReLU<>
    (2): Dense<input_channels=512, output_channels=512, has_bias=True>
    (3): ReLU<>
    (4): Dense<input_channels=512, output_channels=10, has_bias=True>
    >
  >


In [9]:
loss_fn = nn.CrossEntropyLoss()
optimizer = nn.SGD(model.trainable_params(), 1e-2)

In [10]:
# Define forward function
def forward_fn(data, label):
    # Exercise 5.1: implement the forward function and ensure that it returns both the loss and logits.
    # Add your code here.
    logits = model(data)
    loss = loss_fn(logits, label)
    
    return loss,logits


# Get gradient function
grad_fn = ms.value_and_grad(forward_fn,
                                   None,
                                   optimizer.parameters,
                                   has_aux=True)


# Define function of one-step training
def train_step(data, label):
    # Exercise 5.2: complete train step function to calculate gradients and optimize the model.
    # The function should return the loss for current step.
    # Add your code here.
    (loss, _), grads = grad_fn(data, label)
    optimizer(grads)
    return loss


def train(model, dataset):
    size = dataset.get_dataset_size()
    model.set_train()
    for batch, (data, label) in enumerate(dataset.create_tuple_iterator()):
        loss = train_step(data, label)

        if batch % 100 == 0:
            loss, current = loss.asnumpy(), batch
            print(f"loss: {loss:>7f}  [{current:>3d}/{size:>3d}]")

In [11]:
def test(model, dataset, loss_fn):
    num_batches = dataset.get_dataset_size()
    model.set_train(False)
    total, test_loss, correct = 0, 0, 0
    for data, label in dataset.create_tuple_iterator():
        pred = model(data)
        total += len(data)
        # Exercise 6: complete the test fucntion to calculate average loss and accuracy.
        # You need to define the following two variables.
        # test_loss: loss for current step.
        # correct: number of correct predictions for current step.
        # Add your code here.
        test_loss += loss_fn(pred, label).asnumpy()
        correct += (pred.argmax(1) == label).asnumpy().sum()

    test_loss /= num_batches
    correct /= total
    print(f"Test: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [12]:
epochs = 3
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(model, train_dataset)
    test(model, test_dataset, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 2.302857  [  0/937]
loss: 2.289016  [100/937]
loss: 2.262539  [200/937]
loss: 2.179817  [300/937]
loss: 1.991014  [400/937]
loss: 1.494566  [500/937]
loss: 0.909688  [600/937]
loss: 0.701747  [700/937]
loss: 0.553341  [800/937]
loss: 0.620125  [900/937]
Test: 
 Accuracy: 84.7%, Avg loss: 0.537542 

Epoch 2
-------------------------------
loss: 0.503181  [  0/937]
loss: 0.485737  [100/937]
loss: 0.428261  [200/937]
loss: 0.349538  [300/937]
loss: 0.451377  [400/937]
loss: 0.335934  [500/937]
loss: 0.345808  [600/937]
loss: 0.491195  [700/937]
loss: 0.244353  [800/937]
loss: 0.350746  [900/937]
Test: 
 Accuracy: 89.8%, Avg loss: 0.344236 

Epoch 3
-------------------------------
loss: 0.175925  [  0/937]
loss: 0.363152  [100/937]
loss: 0.527388  [200/937]
loss: 0.517931  [300/937]
loss: 0.334590  [400/937]
loss: 0.700319  [500/937]
loss: 0.243471  [600/937]
loss: 0.426479  [700/937]
loss: 0.253463  [800/937]
loss: 0.312751  [900/937]
Test: 
 

In [13]:
ms.save_checkpoint(model, "model.ckpt")
print("Saved Model to model.ckpt")

Saved Model to model.ckpt


In [None]:
# Instantiate a random initialized model
model = Network()
# Load checkpoint and load parameter to model
# Exercise 7.1: complete the process of loading checkpoint.
# Exercise 7.2: check if the checkpoint has been successfully loaded by printing the output of load_param_into_net.
# Add your code here.
param_dict = ms.load_checkpoint("model.ckpt")
param_not_load, _ = ms.load_param_into_net(model, param_dict)
print(param_not_load)

[]


In [None]:
model.set_train(False)
for data, label in test_dataset:
    pred = model(data)
    # Exercise 8: complete the model inference process.
    # Add your code here.
    predicted = np.argmax(pred.asnumpy(), axis=1)
    print(f'Predicted: "{predicted[:10]}", Actual: "{label[:10]}"')
    break

Predicted: "[1 3 1 8 3 2 0 8 1 2]", Actual: "[1 7 1 8 3 2 0 8 1 2]"
