In [79]:
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR
import math

In [175]:
transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
        ])
dataset1 = datasets.MNIST('../data', train=True, download=True,
                   transform=transform,target_transform=transforms.Compose([
                                 lambda x:torch.tensor([x]), # or just torch.tensor
                                 lambda x:F.one_hot(x,10)]),)
dataset2 = datasets.MNIST('../data', train=False,
                   transform=transform)
train_loader = torch.utils.data.DataLoader(dataset1,batch_size=32)
test_loader = torch.utils.data.DataLoader(dataset2, )

In [166]:
# class KDRightLayer(nn.Module):
#     """ Custom KDRightLayer"""
#     def __init__(self, n, q, m, activation_string='Identity'):
#         super().__init__()
#         self.n = n
#         self.activation_string = activation_string
#         self.activation = getattr(nn, self.activation_string)()  

        
#         self.q = q #input_shape[1]
#         self.m = m #input_shape[2]
        
#         weights = torch.Tensor(m, self.n)
#         self.weights = nn.Parameter(weights)  # nn.Parameter is a Tensor that's a module parameter.
#         bias = torch.Tensor(self.q, self.n )
#         self.bias = nn.Parameter(bias)

#         # initialize weights and biases
#         nn.init.kaiming_uniform_(self.weights, a=math.sqrt(5)) # weight init
#         fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weights)
#         bound = 1 / math.sqrt(fan_in)
#         nn.init.uniform_(self.bias, -bound, bound)  # bias init

#     def forward(self, input):
#         return torch.matmul(input, self.weights) + self.bias

class KDLeftLayer(nn.Module):
    """ Custom KDRightLayer"""
    def __init__(self, input_dim, weight_dim, activation_string='Identity'):
        super().__init__()
        self.m = input_dim[0]
        self.n = input_dim[1]
        self.activation_string = activation_string
        self.activation =  getattr(nn, self.activation_string)()  

        
        weights = torch.Tensor(weight_dim[0], weight_dim[1])
        self.weights = nn.Parameter(weights)
        
        bias = torch.Tensor(input_dim[0], weight_dim[1])
        self.bias = nn.Parameter(bias)

        # initialize weights and biases
        nn.init.kaiming_uniform_(self.weights, a=math.sqrt(5)) # weight init
        
        fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weights)
        bound = 1 / math.sqrt(fan_in)
        nn.init.uniform_(self.bias, -bound, bound)  # bias init

    def forward(self, input):
        return torch.matmul(input, self.weights) + self.bias
    

In [165]:
class KDRightLayer(nn.Module):
    """ Custom KDLeftLayer"""
    def __init__(self, input_dim, weight_dim, activation_string='Identity'):
        super().__init__()
        
        self.activation_string = activation_string
        self.activation =  getattr(nn, self.activation_string)()  

        
        weights = torch.Tensor(weight_dim[0], weight_dim[1])
        self.weights = nn.Parameter(weights)
        
        bias = torch.Tensor(weight_dim[0], input_dim[1])
        self.bias = nn.Parameter(bias)

        # initialize weights and biases
        nn.init.kaiming_uniform_(self.weights, a=math.sqrt(5)) # weight init
        
        fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weights)
        bound = 1 / math.sqrt(fan_in)
        nn.init.uniform_(self.bias, -bound, bound)  # bias init

    def forward(self, input):
#         print("custom_left")
        return torch.permute(torch.matmul(torch.permute(input,dims=[0, 2, 1]),
                                                  self.weights.t()),dims=[0, 2, 1]) + self.bias
    

In [167]:
class BasicModel(nn.Module):
    def __init__(self):
        super().__init__()

        self.KDleft1 = KDLeftLayer(input_dim=(28, 28), weight_dim=(28, 64), activation_string='ReLU')
        self.KDright1 = KDRightLayer(input_dim=(28, 64), weight_dim=(64, 28), activation_string='ReLU')
        
        self.KDleft2 = KDLeftLayer(input_dim=(64, 64), weight_dim=(64, 64), activation_string='ReLU')
        self.KDright2 = KDRightLayer(input_dim=(64, 64), weight_dim=(64, 64), activation_string='ReLU')
        
        
        self.KDleft3 = KDLeftLayer(input_dim=(64, 64), weight_dim=(64, 64), activation_string='ReLU')
        self.KDright3 = KDRightLayer(input_dim=(64, 64), weight_dim=(64, 64), activation_string='ReLU')
        
        
        self.KDleft4 = KDLeftLayer(input_dim=(64, 64), weight_dim=(64, 5), activation_string='ReLU')
        self.KDright4 = KDRightLayer(input_dim=(64, 5), weight_dim=(2, 64), activation_string='ReLU')
        self.flat = nn.Flatten()
        self.Softmax = nn.LogSoftmax(dim=1)

    def forward(self, x):
        x = self.KDleft1(x)
        x = self.KDright1(x)
        
        x = self.KDleft2(x)
        x = self.KDright2(x)
        
        x = self.KDleft3(x)
        x = self.KDright3(x)
        
        x = self.KDleft4(x)
        x = self.KDright4(x)
        x = self.flat(x)
        x = self.Softmax(x)
        return x

model = BasicModel()
summary(model)

Layer (type:depth-idx)                   Param #
├─KDLeftLayer: 1-1                       --
|    └─ReLU: 2-1                         --
├─KDRightLayer: 1-2                      --
|    └─ReLU: 2-2                         --
├─KDLeftLayer: 1-3                       --
|    └─ReLU: 2-3                         --
├─KDRightLayer: 1-4                      --
|    └─ReLU: 2-4                         --
├─KDLeftLayer: 1-5                       --
|    └─ReLU: 2-5                         --
├─KDRightLayer: 1-6                      --
|    └─ReLU: 2-6                         --
├─KDLeftLayer: 1-7                       --
|    └─ReLU: 2-7                         --
├─KDRightLayer: 1-8                      --
|    └─ReLU: 2-8                         --
├─Flatten: 1-9                           --
├─LogSoftmax: 1-10                       --
Total params: 0
Trainable params: 0
Non-trainable params: 0


Layer (type:depth-idx)                   Param #
├─KDLeftLayer: 1-1                       --
|    └─ReLU: 2-1                         --
├─KDRightLayer: 1-2                      --
|    └─ReLU: 2-2                         --
├─KDLeftLayer: 1-3                       --
|    └─ReLU: 2-3                         --
├─KDRightLayer: 1-4                      --
|    └─ReLU: 2-4                         --
├─KDLeftLayer: 1-5                       --
|    └─ReLU: 2-5                         --
├─KDRightLayer: 1-6                      --
|    └─ReLU: 2-6                         --
├─KDLeftLayer: 1-7                       --
|    └─ReLU: 2-7                         --
├─KDRightLayer: 1-8                      --
|    └─ReLU: 2-8                         --
├─Flatten: 1-9                           --
├─LogSoftmax: 1-10                       --
Total params: 0
Trainable params: 0
Non-trainable params: 0

In [180]:
import torch.nn.functional as F
import matplotlib.pyplot as plt
bce_loss = torch.nn.BCEWithLogitsLoss()
opt = torch.optim.Adam(lr=1e-4,params=model.parameters())
loss_array = []
# model = model.cuda()
for e in range(100):
    l = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        data = data.reshape(-1,28,28)
        opt.zero_grad()
        

        x = model(data)
        x = x.reshape(-1,10)
        target = target.reshape(-1,10)
        print(x.shape)
#         print(target.shape)
#         loss = F.nll_loss(x,target).mean()
        loss = bce_loss(x.float(),target.float()).sum()
        l = l+loss
        loss.backward()
        opt.step()
    loss_array.append(l.detach().numpy())
plt.plot(loss_array)

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument mat2 in method wrapper_mm)

In [61]:
def _report_model_parameters(model):
        """ Reports the model size """

        all_params = sum(p.numel() for p in model.parameters())
        trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
        size = all_params * (32 / 8)  # Bytes
        print(all_params, trainable_params, size)
#         print("Model has %.1f M parameters (%.1f M trainable) with an estimated size of %all_params, trainable_params / 1.0e6, size / 1.0e6)


In [75]:
_report_model_parameters(model)

346 346 1384.0


In [76]:
model.parameters()

<generator object Module.parameters at 0x7f1834871040>

In [77]:
from torchsummary import summary

In [78]:
summary(model)

Layer (type:depth-idx)                   Param #
├─KDRightLayer: 1-1                      --
|    └─ReLU: 2-1                         --
├─KDLeftLayer: 1-2                       --
|    └─Identity: 2-2                     --
├─Flatten: 1-3                           --
├─LogSoftmax: 1-4                        --
Total params: 0
Trainable params: 0
Non-trainable params: 0


Layer (type:depth-idx)                   Param #
├─KDRightLayer: 1-1                      --
|    └─ReLU: 2-1                         --
├─KDLeftLayer: 1-2                       --
|    └─Identity: 2-2                     --
├─Flatten: 1-3                           --
├─LogSoftmax: 1-4                        --
Total params: 0
Trainable params: 0
Non-trainable params: 0

In [63]:
import torch
import math


class Polynomial3(torch.nn.Module):
    def __init__(self):
        """
        In the constructor we instantiate four parameters and assign them as
        member parameters.
        """
        super().__init__()
        self.a = torch.nn.Parameter(torch.randn(()))
        self.b = torch.nn.Parameter(torch.randn(()))
        self.c = torch.nn.Parameter(torch.randn(()))
        self.d = torch.nn.Parameter(torch.randn(()))

    def forward(self, x):
        """
        In the forward function we accept a Tensor of input data and we must return
        a Tensor of output data. We can use Modules defined in the constructor as
        well as arbitrary operators on Tensors.
        """
        return self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3

    def string(self):
        """
        Just like any class in Python, you can also define custom method on PyTorch modules
        """
        return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3'


# Create Tensors to hold input and outputs.
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

# Construct our model by instantiating the class defined above
modelPoly = Polynomial3()

In [64]:
# Construct our loss function and an Optimizer. The call to model.parameters()
# in the SGD constructor will contain the learnable parameters (defined
# with torch.nn.Parameter) which are members of the model.
criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(modelPoly.parameters(), lr=1e-6)
for t in range(2000):
    # Forward pass: Compute predicted y by passing x to the model
    y_pred = modelPoly(x)

    # Compute and print loss
    loss = criterion(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())

    # Zero gradients, perform a backward pass, and update the weights.
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

print(f'Result: {modelPoly.string()}')

99 2156.09521484375
199 1431.120849609375
299 951.0269775390625
399 633.064453125
499 422.4577331542969
599 282.9432678222656
699 190.5121612548828
799 129.2667694091797
899 88.6793212890625
999 61.778175354003906
1099 43.94529724121094
1199 32.121734619140625
1299 24.281173706054688
1399 19.080839157104492
1499 15.630996704101562
1599 13.341946601867676
1699 11.822671890258789
1799 10.814119338989258
1899 10.144418716430664
1999 9.699589729309082
Result: y = 0.009044463746249676 + 0.8290848731994629 x + -0.0015603243373334408 x^2 + -0.08939655870199203 x^3


In [65]:
class BasicModel(nn.Module):
    def __init__(self):
        super().__init__()
        
        # self.linear = nn.Linear(256, 2)
        self.p1 = Polynomial3()
        self.p2 = Polynomial3()

    def forward(self, x):
        x = self.p1(x)
        x = self.p2(x)

        
        return x

model = BasicModel()
summary(model)

Layer (type:depth-idx)                   Param #
├─Polynomial3: 1-1                       4
├─Polynomial3: 1-2                       4
Total params: 8
Trainable params: 8
Non-trainable params: 0


Layer (type:depth-idx)                   Param #
├─Polynomial3: 1-1                       4
├─Polynomial3: 1-2                       4
Total params: 8
Trainable params: 8
Non-trainable params: 0

In [68]:
summary(model)

Layer (type:depth-idx)                   Param #
├─Polynomial3: 1-1                       4
├─Polynomial3: 1-2                       4
Total params: 8
Trainable params: 8
Non-trainable params: 0


Layer (type:depth-idx)                   Param #
├─Polynomial3: 1-1                       4
├─Polynomial3: 1-2                       4
Total params: 8
Trainable params: 8
Non-trainable params: 0

In [67]:
_report_model_parameters(modelPoly)

4 4 16.0
