Skip to content

ValueError: optimizer got an empty parameter list #277

@OpenBanboo

Description

@OpenBanboo

Hi PyTorch Friends,

I'm trying to building customized layer by following the guide Extending PyTorch Tutorial and use the customized layers to replace the nn.Conv2d and nn.Linear layer in the official example of mnist main.py line 55-59.

However, after replacing with my own customized layers, the testing step (forward) is working without error, while training the new model, it gives an error as "ValueError: optimizer got an empty parameter list". Also, the new_model.parameters() does not have any items.

The following is my modified Net (nn.Module)

class Decomp_Net(nn.Module):
    def __init__(self, path_pretrained_model="mymodel.pth"):
        super(Decomp_Net, self).__init__()
        # Load the pretrained model
        # Load the saved weights
        self.path_pretrained_model = path_pretrained_model
        try:
            params = torch.load(self.path_pretrained_model)
            print("Loaded pretrained model.")
        except:
            raise("No pretrained model saved.")

        # Conv Layer 1
        self.W_conv1 = params.items()[0]
        self.B_conv1 = params.items()[1][1]
        self.W_conv1 = self.W_conv1[1].view(10, 25)
        self.W_conv1 = self.W_conv1.t()
        self.D_conv1, self.X_a_conv1 = create_dic_fuc.create_dic(A=self.W_conv1, M=25, N=10, Lmax=9, Epsilon=0.7, mode=1)

        # Conv Layer 2
        self.W_conv2 = params.items()[2]
        self.B_conv2 = params.items()[3][1]
        self.W_conv2 = self.W_conv2[1].view(200, 25)
        self.W_conv2 = self.W_conv2.t()
        self.D_conv2, self.X_a_conv2 = create_dic_fuc.create_dic(A=self.W_conv2, M=25, N=200, Lmax=199, Epsilon=0.7, mode=1)

        # Layer FC1
        self.W_fc1 = params.items()[4]
        self.B_fc1 = params.items()[5][1]
        self.D_fc1, self.X_a_fc1 = create_dic_fuc.create_dic(A=self.W_fc1[1], M=50, N=320, Lmax=319, Epsilon=0.8, mode=1)

        # Layer FC2
        self.W_fc2 = params.items()[6] # Feching the last fully connect layer of the orinal model
        self.B_fc2 = params.items()[7][1] 
        self.D_fc2, self.X_a_fc2 = create_dic_fuc.create_dic(A=self.W_fc2[1], M=10, N=50, Lmax=49, Epsilon=0.5, mode=1)

        self.conv1 = ConvDecomp2d(coefs=self.X_a_conv1, dictionary=self.D_conv1, bias_val=self.B_conv1, input_channels=1, output_channels=10, kernel_size=5, bias=True)
        self.conv2 = ConvDecomp2d(coefs=self.X_a_conv2, dictionary=self.D_conv2, bias_val=self.B_conv2, input_channels=10, output_channels=20, kernel_size=5, bias=True)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = FCDecomp(coefs=self.X_a_fc1, dictionary=self.D_fc1, bias_val=self.B_fc1, input_features=320, output_features=50)
        self.fc2 = FCDecomp(coefs=self.X_a_fc2, dictionary=self.D_fc2, bias_val=self.B_fc2, input_features=50, output_features=10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x)

I defined the customized function as follows:

class LinearDecomp(Function):
    # Note that both forward and backward are @staticmethods
    @staticmethod
    def forward(ctx, input, coefs, dictionary, bias=None):
        weight = torch.mm(dictionary, coefs).cuda() # reconstruct the weight
        ctx.save_for_backward(input, weight, dictionary, coefs, bias)
        output = input.mm(weight.t())
        if bias is not None:
            output += bias.unsqueeze(0).expand_as(output)
        return output

    # This function has only a single output, so it gets only one gradient
    @staticmethod
    def backward(ctx, grad_output):
        input, weight, coefs, dictionary, bias = ctx.saved_variables
        grad_input = grad_input = grad_coefs = grad_bias = None
        grad_weight = grad_output.t().mm(input) # do not output

        if ctx.needs_input_grad[0]:
            grad_input = grad_output.mm(weight)

        # if ctx.needs_input_grad[1]:
        grad_weight = grad_output.t().mm(input) # do not output grad_weight

        if ctx.needs_input_grad[2]:
            grad_coefs = dictionary.t().mm(grad_weight)

        if ctx.needs_input_grad[3]:
            grad_dictionary = grad_weight.t().mm(grad_coefs.t())

        if bias is not None and ctx.needs_input_grad[4]:
            grad_bias = grad_output.sum(0).squeeze(0)

        return grad_input, grad_coefs, grad_dictionary, grad_bias

The customized layer is defined as:

class FCDecomp(nn.Module):
    def __init__(self, coefs, dictionary, bias_val, input_features, output_features, bias=True):
        super(FCDecomp, self).__init__()
        self.dictionary = nn.Parameter(dictionary, requires_grad=False).cuda()
        self.coefs = nn.Parameter(coefs, requires_grad=True).cuda()
        if bias:
            self.bias = nn.Parameter(bias_val, requires_grad=True).cuda()
        else:
            self.register_parameter('bias', None)

    def forward(self, input):
        return LinearDecomp.apply(input, self.coefs, self.dictionary, self.bias)

Could anyone provide me some suggestion or hints for this issue? Thank you very much!

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions