In [None]:
import PIL.Image as Image   # 用于直接无损读取单通道图片 / cv不太方便
from skimage.io import imread, imshow # 用于读取TIFF图形文件
from matplotlib import pyplot as plt  # 用于展示绘图
import cv2


# 调整为解压后的数据中的图片即可
tif_img = cv2.imread('data_sets/raw_data/000009.tif', cv2.IMREAD_UNCHANGED) #cv2.IMREAD_GRAYSCALE
png_img = cv2.imread('./test.png', cv2.IMREAD_GRAYSCALE)
png_img_lable = cv2.imread('data_sets/raw_data/000009.png', cv2.IMREAD_GRAYSCALE)

imshow(tif_img[:, :, :3])
plt.show()
print('TIFF Data Shape: ', tif_img.shape)

imshow(png_img)
plt.show()
print('PNG Data Shape: ', png_img.shape)

imshow(png_img_lable)
plt.show()
print('PNG Data Shape: ', png_img_lable.shape)

## 学习率调整

In [None]:
import torch
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision.models import AlexNet
import matplotlib.pyplot as plt


model = AlexNet(num_classes=2)
optimizer = optim.SGD(params=model.parameters(), lr=0.0001)


scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=0.90)
epo = 15
plt.figure()
x = list(range(epo))
y = []
for epoch in range(epo):
    scheduler.step()
    lr = scheduler.get_last_lr()
    # print(epoch, scheduler.get_lr()[0])
    y.append(lr[0])
plt.xlabel("epoch")
plt.ylabel("learning rate")
plt.plot(x, y)
print('after {} epochs:'.format(epo), y[epo-1])

## 转换成onnx

In [None]:
import torchvision
import torch
from torch.autograd import Variable
import onnx
from utils.deeplearning import seg_model

input_name = ['input']
output_name = ['output']
input = Variable(torch.randn(1, 4, 256, 256)).cuda()

model=seg_model().cuda()
model= torch.nn.DataParallel(model)
checkpoints=torch.load('save/model/timm-efficientnet-b7/checkpoint-best.pth')
model.load_state_dict(checkpoints['state_dict'])
model.eval()

torch.onnx.export(model.module, input, 'timm-efficientnet-b7.onnx', input_names=input_name, output_names=output_name, verbose=True, opset_version=11)

In [None]:
test = onnx.load('lite.onnx')
onnx.checker.check_model(test)
print("==> Passed")

## 直接转成TRT

In [None]:
import torch
from torch2trt import torch2trt
from utils.deeplearning import seg_model

# create some regular pytorch model...
model=seg_model().cuda()
model= torch.nn.DataParallel(model)
checkpoints=torch.load('save/model/timm-efficientnet-b7/checkpoint-best.pth')
model.load_state_dict(checkpoints['state_dict'])
model.eval()

# create example data
x = torch.ones((1, 4, 256, 256)).cuda()

# convert to TensorRT feeding sampl
# e data as input
model_trt = torch2trt(model, [x])

In [None]:
from dataset import val_transform
import cv2
import PIL.Image as Image
import numpy as np

imdir = 'data_sets/raw_data/000007.tif'

image = cv2.imread(imdir, cv2.IMREAD_UNCHANGED)
img = val_transform(image=image)['image']
img=img.unsqueeze(0)
#print(img.shape)
with torch.no_grad():
    img=img.cuda()
    output = model_trt(img)
pred = output.squeeze().cpu().data.numpy()
pred = np.argmax(pred,axis=0)

img=Image.fromarray(np.uint8(pred))
img=img.convert('L')
#print(out_path)
out_path=os.path.join('test.png')
img.save(out_path)

In [None]:
import PIL.Image as Image   # 用于直接无损读取单通道图片 / cv不太方便
from skimage.io import imread, imshow # 用于读取TIFF图形文件
from matplotlib import pyplot as plt  # 用于展示绘图


# 调整为解压后的数据中的图片即可
tif_img = cv2.imread(imdir, cv2.IMREAD_UNCHANGED) #cv2.IMREAD_GRAYSCALE
png_img = cv2.imread('test.png', cv2.IMREAD_GRAYSCALE)

imshow(tif_img[:, :, :3])
plt.show()
print('TIFF Data Shape: ', tif_img.shape)

imshow(png_img)
plt.show()
print('PNG Data Shape: ', png_img.shape)

## ONNX运行时加速推理

In [4]:
import onnxruntime
print('device:', onnxruntime.get_device())
session = onnxruntime.InferenceSession('timm-efficientnet-b7_con.onnx')

device: GPU


In [131]:
import cv2
import torchvision.transforms as transforms
import torch
import numpy as np
from PIL import Image
import time


img_dir = 'data_sets/val/000236.tif'
img = cv2.imread(img_dir, cv2.IMREAD_UNCHANGED)
start = time.time()

img = img.transpose(2, 0, 1)


img = np.expand_dims(img, axis=0)

# tensor = transforms.ToTensor()(img).half()
# tensor = tensor.unsqueeze(0)
# print(tensor.shape)

result = session.run([], {"input": img})#tensor.cpu().numpy()

result = np.array(result)
pred = result.squeeze()
pred = np.argmax(pred,axis=0) + 1
print('with ', onnxruntime.get_device(), 'spend(s):', time.time() - start)

result = pred
img=Image.fromarray(np.uint8(result))
img=img.convert('L')
img.save('./test.png')

InvalidArgument: [ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Unexpected input data type. Actual: (N11onnxruntime17PrimitiveDataTypeIhEE) , expected: (N11onnxruntime17PrimitiveDataTypeINS_9MLFloat16EEE)

## CUDA

In [None]:
import numpy as np
import torch
import torchvision.transforms as transforms
import cv2
import time
from PIL import Image
import segmentation_models_pytorch as smp
from albumentations.pytorch import ToTensorV2
# from utils import colorEncode
import torch.nn as nn
from torch.cuda.amp import autocast
from utils.deeplearning import seg_model
from dataset import val_transform


def inference(img_dir):
    transform=val_transform

    image = cv2.imread(img_dir, cv2.IMREAD_UNCHANGED)
    img = transform(image=image)['image']
    img=img.unsqueeze(0)
    #print(img.shape)
    with torch.no_grad():
        img=img.cuda()
        start = time.time()
        output = model(img)
        print('with CUDA', 'spend(s):', time.time() - start)
    
    pred = output.squeeze().cpu().data.numpy()
    pred = np.argmax(pred,axis=0)
    return pred+1

model=seg_model().cuda()
model= torch.nn.DataParallel(model)
checkpoints=torch.load('save/model/timm-efficientnet-b7/checkpoint-best.pth')
model.load_state_dict(checkpoints['state_dict'])
model.eval()

In [None]:
result=inference(img_dir)
img=Image.fromarray(np.uint8(result))
img=img.convert('L')
img.save('./test.png')

## TensorRT加速

In [None]:
from PIL import Image
import numpy as np
import pycuda.driver as cuda
import time
import tensorrt as trt
import sys, os
sys.path.insert(1, os.path.join(sys.path[0], ".."))
import common

class ModelData(object):
    MODEL_PATH = "timm-efficientnet-b7_con.onnx"
    INPUT_SHAPE = (4, 256, 256)
    # We can convert TensorRT data types to numpy types with trt.nptype()
    DTYPE = trt.float32

# You can set the logger severity higher to suppress messages (or lower to display more messages).
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)

# Allocate host and device buffers, and create a stream.
def allocate_buffers(engine):
    # Determine dimensions and create page-locked memory buffers (i.e. won't be swapped to disk) to hold host inputs/outputs.
    h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(ModelData.DTYPE))
    h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(ModelData.DTYPE))
    # Allocate device memory for inputs and outputs.
    d_input = cuda.mem_alloc(h_input.nbytes)
    d_output = cuda.mem_alloc(h_output.nbytes)
    # Create a stream in which to copy inputs/outputs and run inference.
    stream = cuda.Stream()
    return h_input, d_input, h_output, d_output, stream

def do_inference(context, h_input, d_input, h_output, d_output, stream):
    # Transfer input data to the GPU.
    cuda.memcpy_htod_async(d_input, h_input, stream)
    # Run inference.
    context.execute_async(bindings=[int(d_input), int(d_output)], stream_handle=stream.handle)
    # Transfer predictions back from the GPU.
    cuda.memcpy_dtoh_async(h_output, d_output, stream)
    # Synchronize the stream
    stream.synchronize()

# The Onnx path is used for Onnx models.
def build_engine_onnx(model_file):
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
        builder.max_workspace_size = common.GiB(1)
        # Load the Onnx model and parse it in order to populate the TensorRT network.
        with open(model_file, 'rb') as model:
            parser.parse(model.read())
        last_layer = network.get_layer(network.num_layers - 1)
        network.mark_output(last_layer.get_output(0))
        returnresult=builder.build_cuda_engine(network)
        return returnresult

def load_normalized_test_case(test_image, pagelocked_buffer):
    # Converts the input image to a CHW Numpy array
    def normalize_image(image):

        # Resize, antialias and transpose the image to CHW.
        c, h, w = ModelData.INPUT_SHAPE
        image_arr = np.asarray(image.resize((w, h), Image.ANTIALIAS))
        image_arr = np.reshape(image_arr, image_arr.shape + (1,))
        image_arr=image_arr.transpose([2, 0, 1])
        image_arr=image_arr.astype(trt.nptype(ModelData.DTYPE))
        image_arr=image_arr.ravel()
        # This particular ResNet50 model requires some preprocessing, specifically, mean normalization.
        return (image_arr / 255.0 - 0.45) / 0.225

    # Normalize the image and copy to pagelocked memory.
    np.copyto(pagelocked_buffer, normalize_image(Image.open(test_image)))
    return test_image

In [None]:
onnx_model_file='timm-efficientnet-b7_con.onnx'
# Build a TensorRT engine.
with build_engine_onnx(onnx_model_file) as engine:
    # Inference is the same regardless of which parser is used to build the engine, since the model architecture is the same.
    # Allocate buffers and create a CUDA stream.
    h_input, d_input, h_output, d_output, stream = allocate_buffers(engine)
    with engine.create_execution_context() as context:
        # Load a normalized test case into the host input page-locked buffer.
        starttime=time.time()
        for i in range(100):
            test_image =img_dir
            test_case = load_normalized_test_case(test_image, h_input)
            # Run the engine. The output will be a 1D tensor of length 1000, where each value represents the
            # probability that the image corresponds to that label
            do_inference(context, h_input, d_input, h_output, d_output, stream)
            #print('ok')
        endtime=time.time()
        pertime=(endtime-starttime)/100
        print('perimg cost'+str(pertime))