https://github.com/marvis/pytorch-caffe-darknet-convert/blob/master/pytorch2caffe.py

### Imports

In [0]:
# mount google drive
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:
!pip install onnx

help(torch.onnx.export)


In [0]:
#!pip install --upgrade --force-reinstall torch


In [0]:
#!pip uninstall torchvision
#!pip install -c pytorch pytorch-nightly torchvision cudatoolkit=10.0
!pip install torchvision

In [0]:
!pip install caffe

Collecting caffe
[31m  ERROR: Could not find a version that satisfies the requirement caffe (from versions: none)[0m
[31mERROR: No matching distribution found for caffe[0m


In [0]:
# Some standard imports
import io
import numpy as np

from torch import nn
import torch.utils.model_zoo as model_zoo
import torch.onnx
import torchvision

# Super Resolution model definition in PyTorch
import torch.nn.init as init

In [0]:
import sys
#sys.path.append('/data/temp/caffe/python')
import caffe
from collections import OrderedDict
import torch.nn as nn
import torch.nn.functional as F
import torch
import numpy as np
from torch.autograd import Variable
from prototxt import *

In [0]:
print(torch.__version__)
print(torchvision.__version__)

1.2.0
0.4.0


### Define the paths

In [0]:
base_path = './gdrive/My Drive/Colab Notebooks/Fer-dataset/' 
checkpoint_name = 'akash-mobilenet_v2-FER1-60perc.pt'
onnx_export_path = base_path + "ONNX/akash_mobilenet_60_bc2.onnx"

To export a model, you call the torch.onnx._export() function. 
This will execute the model, recording a trace of what operators are used to compute the outputs. 
Because _export runs the model, we need provide an input tensor x. 
The values in this tensor are not important; it can be an image or a 
random tensor as long as it is the right size.

In [0]:

# Standard ImageNet input - 3 channels, 224x224,
# values don't matter as we care about network structure.
# But they can also be real inputs.


# A model class instance (class not shown)
torch_model = torchvision.models.mobilenet_v2(pretrained=False)
torch_model.classifier[1] = torch.nn.Linear(1280, 7)

#print(model)

# Initialize model with the pretrained weights
map_location = lambda storage, loc: storage
if torch.cuda.is_available():
    map_location = None

# Load the weights from a file (.pth usually)
state_dict = torch.load(base_path + checkpoint_name, map_location=torch.device('cpu'))

# Load the weights now into a model net architecture defined by our class
torch_model.load_state_dict(state_dict)

# set the train mode to false since we will only run the forward pass.
torch_model.train(False)



In [0]:
# Create the right input shape (e.g. for an image)
dummy_input = torch.randn(1, 3, 256, 256)


torch.onnx.export(torch_model, dummy_input, onnx_export_path)

# Export the model
torch_out = torch.onnx._export(torch_model,             # model being run
                               dummy_input,             # model input (or a tuple for multiple inputs)
                               onnx_export_path,             # where to save the model (can be a file or file-like object)
                               export_params=True)      # store the trained parameter weights inside the model file

torch_out is the output after executing the model. Normally you can ignore this output, but here we will use it to verify that the model we exported computes the same values when run in Caffe2.

Now let’s take the ONNX representation and use it in Caffe2. This part can normally be done in a separate process or on another machine, but we will continue in the same process so that we can verify that Caffe2 and PyTorch are computing the same value for the network:

In [0]:


layer_dict = {'ConvNdBackward'    : 'Convolution',
              'ThresholdBackward' : 'ReLU',
              'MaxPool2dBackward' : 'Pooling',
              'AvgPool2dBackward' : 'Pooling',
              'DropoutBackward'   : 'Dropout',
              'AddmmBackward'     : 'InnerProduct',
              'BatchNormBackward' : 'BatchNorm',
              'AddBackward'       : 'Eltwise',
              'SoftmaxBackward'   : 'Softmax',
              'ViewBackward'      : 'Reshape'}

layer_id = 0
def pytorch2caffe(input_var, output_var, protofile, caffemodel):
    global layer_id
    net_info = pytorch2prototxt(input_var, output_var)
    print_prototxt(net_info)
    save_prototxt(net_info, protofile)

    net = caffe.Net(protofile, caffe.TEST)
    params = net.params

    layer_id = 1
    seen = set()
    def convert_layer(func):
        if True:
            global layer_id
            parent_type = str(type(func).__name__)
    
            if hasattr(func, 'next_functions'):
                for u in func.next_functions:
                    if u[0] is not None:
                        child_type = str(type(u[0]).__name__)
                        child_name = child_type + str(layer_id)
                        if child_type != 'AccumulateGrad' and (parent_type != 'AddmmBackward' or child_type != 'TransposeBackward'):
                            if u[0] not in seen:
                                convert_layer(u[0])
                                seen.add(u[0])
                            if child_type != 'ViewBackward':
                                layer_id = layer_id + 1
    
            parent_name = parent_type+str(layer_id)
            print('converting %s' % parent_name)
            if parent_type == 'ConvNdBackward':
                weights = func.next_functions[1][0].variable.data
                if func.next_functions[2][0]:
                    biases = func.next_functions[2][0].variable.data
                else:
                    biases = None
                save_conv2caffe(weights, biases, params[parent_name])
            elif parent_type == 'BatchNormBackward':
                running_mean = func.running_mean
                running_var = func.running_var
                #print('%s running_mean' % parent_name, running_mean)
                #exit(0)
                scale_weights = func.next_functions[1][0].variable.data
                scale_biases = func.next_functions[2][0].variable.data
                bn_name = parent_name + "_bn"
                scale_name = parent_name + "_scale"
                save_bn2caffe(running_mean, running_var, params[bn_name])
                save_scale2caffe(scale_weights, scale_biases, params[scale_name])
            elif parent_type == 'AddmmBackward':
                biases = func.next_functions[0][0].variable.data
                weights = func.next_functions[2][0].next_functions[0][0].variable.data
                save_fc2caffe(weights, biases, params[parent_name])
        
    convert_layer(output_var.grad_fn)
    print('save caffemodel to %s' % caffemodel)
    net.save(caffemodel)

def save_conv2caffe(weights, biases, conv_param):
    if biases is not None:
        conv_param[1].data[...] = biases.numpy() 
    conv_param[0].data[...] = weights.numpy() 

def save_fc2caffe(weights, biases, fc_param):
    fc_param[1].data[...] = biases.numpy() 
    fc_param[0].data[...] = weights.numpy() 

def save_bn2caffe(running_mean, running_var, bn_param):
    bn_param[0].data[...] = running_mean.numpy()
    bn_param[1].data[...] = running_var.numpy()
    bn_param[2].data[...] = np.array([1.0])

def save_scale2caffe(weights, biases, scale_param):
    scale_param[1].data[...] = biases.numpy()
    scale_param[0].data[...] = weights.numpy()

#def pytorch2prototxt(model, x, var):
def pytorch2prototxt(input_var, output_var):
    global layer_id
    net_info = OrderedDict()
    props = OrderedDict()
    props['name'] = 'pytorch'
    props['input'] = 'data'
    props['input_dim'] = input_var.size()
    
    layers = []

    layer_id = 1
    seen = set()
    top_names = dict()
    def add_layer(func):
        global layer_id
        parent_type = str(type(func).__name__)
        parent_bottoms = []

        if hasattr(func, 'next_functions'):
            for u in func.next_functions:
                if u[0] is not None:
                    child_type = str(type(u[0]).__name__)
                    child_name = child_type + str(layer_id)
                    if child_type != 'AccumulateGrad' and (parent_type != 'AddmmBackward' or child_type != 'TransposeBackward'):
                        if u[0] not in seen:
                            top_name = add_layer(u[0])
                            parent_bottoms.append(top_name)
                            seen.add(u[0])
                        else:
                            top_name = top_names[u[0]]
                            parent_bottoms.append(top_name)
                        if child_type != 'ViewBackward':
                            layer_id = layer_id + 1
    
        parent_name = parent_type+str(layer_id)
        layer = OrderedDict()
        layer['name'] = parent_name
        layer['type'] = layer_dict[parent_type]
        parent_top = parent_name
        if len(parent_bottoms) > 0:
            layer['bottom'] = parent_bottoms 
        else:
            layer['bottom'] = ['data']
        layer['top'] = parent_top
        if parent_type == 'ConvNdBackward':
            weights = func.next_functions[1][0].variable
            conv_param = OrderedDict()
            conv_param['num_output'] = weights.size(0)
            conv_param['pad'] = func.padding[0]
            conv_param['kernel_size'] = weights.size(2)
            conv_param['stride'] = func.stride[0]
            if func.next_functions[2][0] == None:
                conv_param['bias_term'] = 'false'
            layer['convolution_param'] = conv_param
        elif parent_type == 'BatchNormBackward':
            bn_layer = OrderedDict()
            bn_layer['name'] = parent_name + "_bn"
            bn_layer['type'] = 'BatchNorm'
            bn_layer['bottom'] = parent_bottoms
            bn_layer['top'] = parent_top
            batch_norm_param = OrderedDict()
            batch_norm_param['use_global_stats'] = 'true'
            bn_layer['batch_norm_param'] = batch_norm_param
    
            scale_layer = OrderedDict()
            scale_layer['name'] = parent_name + "_scale"
            scale_layer['type'] = 'Scale'
            scale_layer['bottom'] = parent_top
            scale_layer['top'] = parent_top
            scale_param = OrderedDict()
            scale_param['bias_term'] = 'true'
            scale_layer['scale_param'] = scale_param
        elif parent_type == 'ThresholdBackward':
            parent_top = parent_bottoms[0]
        elif parent_type == 'SoftmaxBackward':
            parent_top = parent_bottoms[0]
        elif parent_type == 'MaxPool2dBackward':
            pooling_param = OrderedDict()
            pooling_param['pool'] = 'MAX'
            pooling_param['kernel_size'] = func.kernel_size[0]
            pooling_param['stride'] = func.stride[0]
            pooling_param['pad'] = func.padding[0]
            layer['pooling_param']  = pooling_param
        elif parent_type == 'AvgPool2dBackward':
            pooling_param = OrderedDict()
            pooling_param['pool'] = 'AVE'
            pooling_param['kernel_size'] = func.kernel_size[0]
            pooling_param['stride'] = func.stride[0]
            layer['pooling_param'] = pooling_param
        elif parent_type == 'DropoutBackward':
            parent_top = parent_bottoms[0]
            dropout_param = OrderedDict()
            dropout_param['dropout_ratio'] = func.p
            layer['dropout_param'] = dropout_param
        elif parent_type == 'AddmmBackward':
            inner_product_param = OrderedDict()
            inner_product_param['num_output'] = func.next_functions[0][0].variable.size(0)
            layer['inner_product_param'] = inner_product_param
        elif parent_type == 'ViewBackward':
            parent_top = parent_bottoms[0]
        elif parent_type == 'AddBackward':
            eltwise_param = OrderedDict()
            eltwise_param['operation'] = 'SUM'
            layer['eltwise_param'] = eltwise_param
    
        layer['top'] = parent_top # reset layer['top'] as parent_top may change
        if parent_type != 'ViewBackward':
            if parent_type == "BatchNormBackward":
                layers.append(bn_layer)
                layers.append(scale_layer)
            else:
                layers.append(layer)
            #layer_id = layer_id + 1
        top_names[func] = parent_top
        return parent_top
    
    add_layer(output_var.grad_fn)
    net_info['props'] = props
    net_info['layers'] = layers
    return net_info

if __name__ == '__main__':
    import torchvision
    from visualize import make_dot

    model_name = 'resnet50'

    if model_name == 'resnet50':
        m = torchvision.models.resnet50(pretrained=True)
    elif model_name == 'vgg16':
        m = torchvision.models.vgg16()
        m.classifier.add_module('softmax', torch.nn.Softmax())
    m.eval() # very important here, otherwise batchnorm running_mean, running_var will be incorrect
    input_var = Variable(torch.rand(1, 3, 224, 224))

    print(m)
    output_var = m(input_var)
    fp = open("out.dot", "w")
    dot = make_dot(output_var)
    print >> fp, dot
    fp.close()
    #exit(0)

    if model_name == 'resnet50':
        pytorch2caffe(input_var, output_var, 'resnet50-pytorch2caffe.prototxt', 'resnet50-pytorch2caffe.caffemodel')
    elif model_name == 'vgg16':
        pytorch2caffe(input_var, output_var, 'vgg16-pytorch2caffe.prototxt', 'vgg16-pytorch2caffe.caffemodel')

<google.protobuf.pyext._message.MessageDescriptor object at 0x7f10c1c60db0>
