In [1]:
import os
import sys
import time
import torch
from torch import nn, optim
import torch.nn.functional as F
import torchvision
from torchvision import transforms



class GlobalAvgPool2d(nn.Module):
    """
    全局平均池化层
    可通过将普通的平均池化的窗口形状设置成输入的高和宽实现
    """

    def __init__(self):
        super(GlobalAvgPool2d, self).__init__()

    def forward(self, x):
        return F.avg_pool2d(x, kernel_size=x.size()[2:])


class FlattenLayer(torch.nn.Module):
    def __init__(self):
        super(FlattenLayer, self).__init__()

    def forward(self, x):  # x shape: (batch, *, *, ...)
        return x.view(x.shape[0], -1)


class Residual(nn.Module):
    def __init__(self, in_channels, out_channels, use_1x1conv=False, stride=1):
        """
            use_1×1conv: 是否使用额外的1x1卷积层来修改通道数
            stride: 卷积层的步幅, resnet使用步长为2的卷积来替代pooling的作用，是个很赞的idea
        """
        super(Residual, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, stride=stride)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
        if use_1x1conv:
            self.conv3 = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride)
        else:
            self.conv3 = None
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.bn2 = nn.BatchNorm2d(out_channels)

    def forward(self, X):
        Y = F.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.conv3:
            X = self.conv3(X)
        return F.relu(Y + X)


def resnet_block(in_channels, out_channels, num_residuals, first_block=False):
    '''
    resnet block
    num_residuals: 当前block包含多少个残差块
    first_block: 是否为第一个block
    一个resnet block由num_residuals个残差块组成
    其中第一个残差块起到了通道数的转换和pooling的作用
    后面的若干残差块就是完成正常的特征提取
    '''
    if first_block:
        assert in_channels == out_channels  # 第一个模块的输出通道数同输入通道数一致
    blk = []
    for i in range(num_residuals):
        if i == 0 and not first_block:
            blk.append(Residual(in_channels, out_channels, use_1x1conv=True, stride=2))
        else:
            blk.append(Residual(out_channels, out_channels))
    return nn.Sequential(*blk)


# 定义resnet模型结构
net = nn.Sequential(
    nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),  # TODO: 缩小感受野, 缩channel
    nn.BatchNorm2d(32),
    nn.ReLU())
# nn.ReLU(),
# nn.MaxPool2d(kernel_size=2, stride=2))   # TODO：去掉maxpool缩小感受野

# 然后是连续4个block
net.add_module("resnet_block1", resnet_block(32, 32, 2, first_block=True))  # TODO: channel统一减半
net.add_module("resnet_block2", resnet_block(32, 64, 2))
net.add_module("resnet_block3", resnet_block(64, 128, 2))
net.add_module("resnet_block4", resnet_block(128, 256, 2))
# global average pooling
net.add_module("global_avg_pool", GlobalAvgPool2d())
# fc layer
net.add_module("fc", nn.Sequential(FlattenLayer(), nn.Linear(256, 10)))


def load_data_fashion_mnist(batch_size, root='../data'):
    """Download the fashion mnist dataset and then load into memory."""

    normalize = transforms.Normalize(mean=[0.28], std=[0.35])
    train_augs = transforms.Compose([
        transforms.RandomCrop(28, padding=2),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize
    ])

    test_augs = transforms.Compose([
        transforms.ToTensor(),
        normalize
    ])

    mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True, transform=train_augs)
    mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=test_augs)
    if sys.platform.startswith('win'):
        num_workers = 0  # 0表示不用额外的进程来加速读取数据
    else:
        num_workers = 4
    train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    return train_iter, test_iter

batch_size = 1
train_iter, test_iter = load_data_fashion_mnist(batch_size, root='/root/.pytorch/F_MNIST_data')
lr, num_epochs, lr_period, lr_decay = 0.01, 50, 5, 0.1
#optimizer = optim.Adam(net.parameters(), lr=lr)
optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


print('加载最优模型')
net.load_state_dict(torch.load('model/best.pth'))
net = net.to(device)

print('inference测试集')
net.eval()
id = 0
preds_list = []
with torch.no_grad():
    for X, y in test_iter:
        batch_pred = list(net(X.to(device)).argmax(dim=1).cpu().numpy())
#         print(batch_pred)
        for y_pred in batch_pred:
#             print(y_pred)
            preds_list.append((id, y_pred))
            id += 1

print('生成测试集评估文件')
with open('result.csv', 'w') as f:
    f.write('ID,Prediction\n')
    for id, pred in preds_list:
        f.write('{},{}\n'.format(id, pred))


加载最优模型
inference测试集
生成测试集评估文件


In [None]:
print(test_iter)

In [31]:
m = 0
for X, y in test_iter:
    if m < 1:
        print(X,y.numpy())
        pred1 = net(X.to(device))
        m+=1
print(pred1.argmax(dim=1).cpu().numpy())        
        
print(y.numpy() == pred1.argmax(dim=1).cpu().numpy())
# print(y == pred1.argmax(dim=1).cpu())


tensor([[[[-0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000,
           -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000,
           -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000,
           -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000],
          [-0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000,
           -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000,
           -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000,
           -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000],
          [-0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000,
           -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000,
           -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000,
           -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000],
          [-0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000,
           -0.8000, -0.8000, -0.800

[9]
[False]


In [30]:

import matplotlib.pyplot as plt
# print(X.numpy())

plt.imshow(X.numpy()[0][0]) # 显示图片
plt.axis('off') # 不显示坐标轴
plt.show()


AttributeError: 'list' object has no attribute 'numpy'

In [None]:
net

In [77]:
def predict(model, inputs):
    import numpy as np
    import torch
    import json
    
    inputs0 = json.loads(inputs[0])[0]
    inputs1 = np.array(inputs0)

    
    inputs2 = torch.Tensor(inputs1)
    print(inputs2,inputs2.dtype)
    print(torch.__version__)
    pred = model(inputs2)
    pred = pred.data.numpy()
    return [str(x) for x in pred]

In [5]:
# from clipper_admin import ClipperConnection, DockerContainerManager
# from clipper_admin.deployers.pytorch import deploy_pytorch_model
# clipper_conn = ClipperConnection(DockerContainerManager())
# clipper_conn.connect()


from clipper_admin import ClipperConnection, DockerContainerManager
# from clipper_admin.deployers.tensorflow import deploy_tensorflow_model
from clipper_admin.deployers.pytorch import deploy_pytorch_model
clipper_conn = ClipperConnection(DockerContainerManager())
clipper_conn.start_clipper()
clipper_conn.connect()

20-07-02:02:02:43 INFO     [docker_container_manager.py:184] [default-cluster] Starting managed Redis instance in Docker
20-07-02:02:02:47 INFO     [docker_container_manager.py:276] [default-cluster] Metric Configuration Saved at /tmp/tmp_7f5ah0y.yml
20-07-02:02:02:48 INFO     [clipper_admin.py:162] [default-cluster] Clipper is running
20-07-02:02:02:48 INFO     [clipper_admin.py:172] [default-cluster] Successfully connected to Clipper cluster at localhost:1337


In [78]:
clipper_conn.register_application(
    name="pytorch-app6", input_type="strings", default_output="-1.0", slo_micros=100000)
# deploy_tensorflow_model(clipper_conn,name="faster-mod4",version=1,input_type="strings",func=predict4,
# tf_sess_or_saved_model_path=sess,pkgs_to_install=['numpy'])

deploy_pytorch_model(clipper_conn,name="pytorch-mod6",version=1,input_type="strings",func=predict,
pytorch_model=net,pkgs_to_install=['numpy'])

clipper_conn.link_model_to_app(app_name="pytorch-app6",model_name="pytorch-mod6")

query_address = clipper_conn.get_query_addr()

20-07-02:03:38:30 INFO     [clipper_admin.py:236] [default-cluster] Application pytorch-app6 was successfully registered
20-07-02:03:38:31 INFO     [deployer_utils.py:41] Saving function to /tmp/tmp9gpwxqmnclipper
20-07-02:03:38:31 INFO     [deployer_utils.py:51] Serialized and supplied predict function
20-07-02:03:38:31 INFO     [pytorch.py:204] Torch model saved
20-07-02:03:38:31 INFO     [pytorch.py:218] Using Python 3.6 base image
20-07-02:03:38:31 INFO     [clipper_admin.py:534] [default-cluster] Building model Docker image with model data from /tmp/tmp9gpwxqmnclipper
20-07-02:03:38:32 INFO     [clipper_admin.py:539] [default-cluster] Step 1/3 : FROM clipper/pytorch36-container:0.4.1
20-07-02:03:38:32 INFO     [clipper_admin.py:539] [default-cluster]  ---> e3c73c7ad6b9
20-07-02:03:38:32 INFO     [clipper_admin.py:539] [default-cluster] Step 2/3 : RUN apt-get -y install build-essential && pip install numpy
20-07-02:03:38:32 INFO     [clipper_admin.py:539] [default-cluster]  ---> Us

['pytorch-app6']

In [50]:
X = X[0][0][0]

In [79]:
import matplotlib.pyplot as plt
import base64
import json
import requests
from PIL import Image
from datetime import datetime
import numpy as np
import time

class MyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        if isinstance(obj, time):
            return obj.__str__()
        else:
            return super(NpEncoder, self).default(obj)





url = "http://%s/pytorch-app6/predict" %  query_address

X0 = X.numpy()
print(X0,X0.dtype)
X1 = [X0]


#clipper要求输入必须为 字符串 故必须要序列化下
# list1 = {'image':[[1,2,3]]}
tva = json.dumps(X1,cls=MyEncoder)
# tva = json.dumps([list1],cls=MyEncoder)
# tva = [train_data]

# print(tva)
tva_j = json.dumps({"input": tva})
# print(tva_j)
#此处可看出 输入的最终数据为  [{"image": [[1, 2, 3]]}] 
#clipper 接收到的数据为 [b'[{"image": [[1, 2, 3]]}]' b'[{"image": [[1, 2, 3]]}]']
headers = {'Content-type': 'application/json'}
start = datetime.now()
r = requests.post(url, headers=headers, data=tva_j)
end = datetime.now()
latency = (end - start).total_seconds() * 1000.0
re = r.text

print(re)

[[[[-0.8        -0.8        -0.8        -0.8        -0.8
    -0.8        -0.8        -0.8        -0.8        -0.8
    -0.8        -0.8        -0.8        -0.8        -0.8
    -0.8        -0.8        -0.8        -0.8        -0.8
    -0.8        -0.8        -0.8        -0.8        -0.8
    -0.8        -0.8        -0.8       ]
   [-0.8        -0.8        -0.8        -0.8        -0.8
    -0.8        -0.8        -0.8        -0.8        -0.8
    -0.8        -0.8        -0.8        -0.8        -0.8
    -0.8        -0.8        -0.8        -0.8        -0.8
    -0.8        -0.8        -0.8        -0.8        -0.8
    -0.8        -0.8        -0.8       ]
   [-0.8        -0.8        -0.8        -0.8        -0.8
    -0.8        -0.8        -0.8        -0.8        -0.8
    -0.8        -0.8        -0.8        -0.8        -0.8
    -0.8        -0.8        -0.8        -0.8        -0.8
    -0.8        -0.8        -0.8        -0.8        -0.8
    -0.8        -0.8        -0.8       ]
   [-0.8        -0.8  

In [61]:


clipper_conn.get_all_apps()

['pytorch-app6']

In [62]:
clipper_conn.get_all_models()


['pytorch-mod6:1']

In [74]:
clipper_conn.unlink_model_from_app(model_name="pytorch-mod6", app_name="pytorch-app6")

20-07-02:03:38:02 INFO     [clipper_admin.py:344] Model pytorch-mod6 is now removed to application pytorch-app6


In [75]:
clipper_conn.delete_application('pytorch-app6')

20-07-02:03:38:05 INFO     [clipper_admin.py:260] [default-cluster] Application pytorch-app6 was successfully unregistered


In [76]:
clipper_conn.stop_models('pytorch-mod6')

20-07-02:03:38:07 INFO     [clipper_admin.py:1285] Model pytorch-mod6:1 was successfully deleted
20-07-02:03:38:07 INFO     [clipper_admin.py:1319] [default-cluster] Stopped all containers for these models and versions:
{'pytorch-mod6': ['1']}


In [80]:
clipper_conn.stop_all()

20-07-02:03:58:00 INFO     [clipper_admin.py:1424] [default-cluster] Stopped all Clipper cluster and all model containers


In [54]:
X

tensor([[[[-0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000,
           -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000,
           -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000,
           -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000],
          [-0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000,
           -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000,
           -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000,
           -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000],
          [-0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000,
           -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000,
           -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000,
           -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000],
          [-0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000, -0.8000,
           -0.8000, -0.8000, -0.800