In [1]:

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from collections import OrderedDict

class NetworkModel(nn.Module):

    def __init__(self, inputDim):

        # Initialize the network layers.

        super(NetworkModel, self).__init__()

        self.lin1 = nn.Linear(inputDim, inputDim, bias=False)
        self.lin2 = nn.Linear(inputDim, inputDim, bias=False)

    def forward(self, x):

        # A forward function
        # Linear function without activation

        x = self.lin1(x)
        x = self.lin2(x)
        
#         Vin -> V1 -> Vout
#         V1 = Vin * W1'
#         Vout = V1 * W2' 
#             = Vin * W1' * W2'

        return x

class TrainModel():

    def __init__(self, model, device, learningRate, inputDim, batchSize, numberOfSteps):

        self.device = device
        self.net = model.to(self.device)
        self.optimizer = optim.Adam(self.net.parameters(), lr=learningRate)
        self.inputDim = inputDim
        self.batchSize = batchSize
        self.numberOfSteps = numberOfSteps

    def train(self,):

        for step in range(self.numberOfSteps):

            self.optimizer.zero_grad()
            input = torch.randn((self.batchSize, self.inputDim)).to(self.device)
            output = self.net(input)
            loss = F.mse_loss(input, output)
            loss.backward()
            self.optimizer.step()

            weight_dict = OrderedDict(self.net.named_parameters())
            weightFunction = weight_dict['lin1.weight'].T @ weight_dict['lin2.weight'].T
            identityMatrix = torch.eye(self.inputDim).to(self.device)
            error = F.mse_loss(weightFunction, identityMatrix)
            if((step+1)%1000==0):
                print("Step: " + str(step) + " ||WeightFunction-I|| Error: " + str(error))
                print(weightFunction, 6)
#                 print(torch.round(weightFunction), 6)
                print(weight_dict['lin1.weight'])
                print(weight_dict['lin2.weight'])
        

    def test(self,n):

        test_samples = torch.randn(n, self.inputDim).to(self.device)
        preds = self.net(test_samples)
        print(test_samples)
        print(preds)


def main():

    # Please change the inputs here
    inputDim = 4

    # Additional inputs may not be required to change.
    learningRate = 0.0005
    batchSize = 64
    numberOfSteps = 10000

    # Additional Initializer
    device = torch.device('cpu')
    model =  NetworkModel(inputDim)
    trainer = TrainModel(model, device, learningRate, inputDim, batchSize, numberOfSteps)
    trainer.train()
    trainer.test(1)

if __name__ == '__main__':
    main()

Step: 999 ||WeightFunction-I|| Error: tensor(0.0200, grad_fn=<MseLossBackward0>)
tensor([[ 0.7684,  0.0387,  0.1464,  0.0242],
        [ 0.0629,  0.8018, -0.0838, -0.0204],
        [ 0.2391, -0.1139,  0.6665,  0.0148],
        [-0.0608, -0.0063,  0.0724,  0.9605]], grad_fn=<MmBackward0>) 6
Parameter containing:
tensor([[ 0.7484, -0.1304,  0.4822, -0.0352],
        [ 0.0666,  0.9112,  0.0936,  0.2674],
        [ 0.0968,  0.3443, -0.4541, -0.6413],
        [ 0.4027,  0.1218, -0.1884,  0.6211]], requires_grad=True)
Parameter containing:
tensor([[ 0.8573,  0.0483,  0.2960,  0.2356],
        [-0.0630,  0.7437,  0.3331,  0.0100],
        [ 0.5548,  0.2841, -0.5817, -0.5748],
        [-0.3140,  0.0829, -0.6780,  0.7930]], requires_grad=True)
Step: 1999 ||WeightFunction-I|| Error: tensor(9.3683e-07, grad_fn=<MseLossBackward0>)
tensor([[ 9.9967e-01,  3.0348e-04, -8.9198e-05, -2.0683e-05],
        [ 3.9765e-05,  9.9737e-01, -3.6532e-04, -4.3188e-04],
        [ 8.2767e-04, -2.4938e-03,  9.9929e-0

In [4]:

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from collections import OrderedDict

class NetworkModel(nn.Module):

    def __init__(self, inputDim):

        # Initialize the network layers.

        super(NetworkModel, self).__init__()

        self.lin1 = nn.Linear(inputDim, inputDim, bias=False)
        self.lin2 = nn.Linear(inputDim, inputDim, bias=False)
        self.relu = torch.nn.ReLU()

    def forward(self, x):

        # A forward function
        # Linear function with activation

        x = self.lin1(x)
        x = self.relu(x)
        x = self.lin2(x)

        return x

class TrainModel():

    def __init__(self, model, device, learningRate, inputDim, batchSize, numberOfSteps):

        self.device = device
        self.net = model.to(self.device)
        self.optimizer = optim.Adam(self.net.parameters(), lr=learningRate)
        self.inputDim = inputDim
        self.batchSize = batchSize
        self.numberOfSteps = numberOfSteps

    def train(self,):

        for step in range(self.numberOfSteps):

            self.optimizer.zero_grad()
            input = torch.randn((self.batchSize, self.inputDim)).to(self.device)
            output = self.net(input)
            loss = F.mse_loss(input, output)
            loss.backward()
            self.optimizer.step()

            weight_dict = OrderedDict(self.net.named_parameters())
            weightFunction = weight_dict['lin1.weight'].T @ weight_dict['lin2.weight'].T
            identityMatrix = torch.eye(self.inputDim).to(self.device)
            error = F.mse_loss(weightFunction, identityMatrix)
            if(step%5000==0):
                print("Step: " + str(step) + " ||WeightFunction-I|| Error: " + str(error))
                print(weightFunction, 6)

#         print(torch.round(weightFunction), 6)

    def test(self,n):

        test_samples = torch.randn(n, self.inputDim).to(self.device)
        preds = self.net(test_samples)
        print(test_samples)
        print(preds)


def main():

    # Please change the inputs here
    inputDim = 4

    # Additional inputs may not be required to change.
    learningRate = 0.0005
    batchSize = 64
    numberOfSteps = 100000

    # Additional Initializer
    device = torch.device('cpu')
    model =  NetworkModel(inputDim)
    trainer = TrainModel(model, device, learningRate, inputDim, batchSize, numberOfSteps)
    trainer.train()
    trainer.test(1)

if __name__ == '__main__':
    main()

Step: 0 ||WeightFunction-I|| Error: tensor(0.3352, grad_fn=<MseLossBackward0>)
tensor([[-0.1423,  0.1059,  0.0789, -0.0142],
        [-0.0827,  0.0473, -0.2936,  0.2561],
        [-0.4200,  0.1677, -0.1813,  0.1069],
        [ 0.0613, -0.0089,  0.1973, -0.1490]], grad_fn=<MmBackward0>) 6
Step: 5000 ||WeightFunction-I|| Error: tensor(0.0570, grad_fn=<MseLossBackward0>)
tensor([[ 1.4308, -0.0270,  0.1435, -0.0122],
        [-0.0226,  1.4455,  0.1530, -0.0431],
        [ 0.1463,  0.1410,  0.5654,  0.1368],
        [-0.0223, -0.0154,  0.1563,  1.4539]], grad_fn=<MmBackward0>) 6
Step: 10000 ||WeightFunction-I|| Error: tensor(0.0555, grad_fn=<MseLossBackward0>)
tensor([[ 1.4452, -0.0192,  0.1339, -0.0374],
        [-0.0303,  1.4520,  0.1286, -0.0294],
        [ 0.1397,  0.1213,  0.5807,  0.1681],
        [-0.0275, -0.0143,  0.1676,  1.4251]], grad_fn=<MmBackward0>) 6
Step: 15000 ||WeightFunction-I|| Error: tensor(0.0585, grad_fn=<MseLossBackward0>)
tensor([[ 1.4429, -0.0169,  0.1698, -0.0302

Learning with single layer neural network:

In [15]:
import torch.nn as nn

class NetworkModule(nn.Module):
    
    def __init__(self, inputDim):
        
        super(NetworkModel, self).__init__()
        self.lin = nn.Linear(inputDim, inputDim, bias=False)
        
    def forward(self, x):
        
        x = self.lin(x)
        return x
    
class TrainModel():
    
    def __init__(self, model, device, learningRate, inputDim, batchSize, numberOfSteps):
        
        self.device = device
        self.net = model.to(self.device)
        self.optimizer = optim.Adam(self.net.parameters(), lr=learningRate)
        self.inputDim = inputDim
        self.batchSize = batchSize
        self.numberOfSteps = numberOfSteps
        
    def train(self):
        
        for step in range(self.numberOfSteps):

            self.optimizer.zero_grad()
            input = torch.randn((self.batchSize, self.inputDim)).to(self.device)
            output = self.net(input)
            loss = F.mse_loss(input, output)
            loss.backward()
            self.optimizer.step()

            weight_dict = OrderedDict(self.net.named_parameters())
            weightFunction = weight_dict['lin1.weight'].T
            identityMatrix = torch.eye(self.inputDim).to(self.device)
            error = F.mse_loss(weightFunction, identityMatrix)
            if(step%1000==0):
                print("Step: " + str(step) + " ||WeightFunction-I|| Error: " + str(error))
                print(weightFunction)
        
    def test(self,n):

        test_samples = torch.randn(n, self.inputDim).to(self.device)
        preds = self.net(test_samples)
        print(test_samples)
        print(preds)
        
def main():
        
    inputDim = 4

    learningRate = 0.000005
    batchSize = 64
    numberOfSteps = 10000

    # Additional Initializer
    device = torch.device('cpu')
    model =  NetworkModel(inputDim)
    trainer = TrainModel(model, device, learningRate, inputDim, batchSize, numberOfSteps)
    trainer.train()
    trainer.test(1)

if __name__ == '__main__':
    main() 
    
    

Step: 0 ||WeightFunction-I|| Error: tensor(0.3683, grad_fn=<MseLossBackward0>)
tensor([[-0.0088, -0.0940, -0.0361, -0.4316],
        [-0.3889, -0.3898, -0.3222, -0.3349],
        [ 0.1227,  0.3530, -0.2808, -0.1061],
        [-0.1524, -0.2852, -0.0176,  0.3043]], grad_fn=<PermuteBackward0>)
Step: 1000 ||WeightFunction-I|| Error: tensor(0.3697, grad_fn=<MseLossBackward0>)
tensor([[-0.0115, -0.0900, -0.0317, -0.4270],
        [-0.3937, -0.3935, -0.3175, -0.3302],
        [ 0.1188,  0.3482, -0.2851, -0.1106],
        [-0.1571, -0.2805, -0.0222,  0.2997]], grad_fn=<PermuteBackward0>)
Step: 2000 ||WeightFunction-I|| Error: tensor(0.3711, grad_fn=<MseLossBackward0>)
tensor([[-0.0141, -0.0861, -0.0274, -0.4226],
        [-0.3985, -0.3972, -0.3129, -0.3257],
        [ 0.1150,  0.3435, -0.2895, -0.1151],
        [-0.1619, -0.2758, -0.0267,  0.2951]], grad_fn=<PermuteBackward0>)
Step: 3000 ||WeightFunction-I|| Error: tensor(0.3725, grad_fn=<MseLossBackward0>)
tensor([[-0.0169, -0.0821, -0.0229, 