In [None]:
import torch
import numpy as np
import numpy.random as rd

In [None]:
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
from mpl_toolkits.mplot3d import axes3d
from mpl_toolkits.mplot3d.proj3d import proj_transform
from mpl_toolkits.mplot3d.art3d import Line3DCollection
from matplotlib.text import Annotation
from matplotlib import animation, rc
import matplotlib.pyplot as plt
plt.rcParams['animation.convert_path'] = 'C:/Program Files/ImageMagick-7.0.8-Q16/magick.exe'

In [None]:
class dataset:
    def __init__(self, n, batch_size = 1):
        self.batch_size = batch_size
        self.x1 = torch.randn(n,1)
        self.x2 = torch.randn(n,1)
        self.y = 0.6*self.x1**2 - 0.3*self.x2**3 - 0.3*self.x1*self.x2 + torch.randn(n,1)
        self.data = torch.cat((self.x1,self.x2,self.y),1).cuda()
        self.data_iter = rd.choice(range(n),size=n,replace=False)
    
    def __len__(self):
        return self.data.shape[0] // self.batch_size
    
    def __getitem__(self,ind):
        if ind < len(self):
            return self.data[self.data_iter[self.batch_size*ind:self.batch_size*(ind+1)],:]
    
    def mix(self):
        self.data_iter = rd.choice(range(n),size=n,replace=False)
        
    
data = dataset(5000, 100)

In [None]:
fig = plt.figure(dpi=40,figsize=(20,15))
ax = fig.gca(projection='3d')
ax.scatter(data.x1,data.x2,data.y, marker='o', s = 20, alpha=0.3)
ax.view_init(30, 100)
plt.tight_layout()
plt.show()

In [6]:
class net:
    def __init__(self):
        self.cal_grad = True
        self.grads = {}
        if self.modules:
            self.grads = {}
            for layers in self.modules.keys():
                self.grads[layers] = {}
                for params in self.modules[layers].keys():
                    self.grads[layers][params] = torch.zeros(self.modules[layers][params].shape)
        self.act = self.ReLU
    
    def ReLU(self, x):
        return torch.max(x,1)
    
    def Sigmoid(self, x):
        return torch.exp(x)/(torch.exp(x)+1)
    
    def deriv(self, x, fun, eps = 1e-4):
        return (fun(x + eps) - fun(x - eps))/(2*eps)
    
    def train(self):
        self.cal_grad = True
    
    def test(self):
        self.cal_grad = False
        
    def forward(self,x,eps=1e-4):
        x = torch.matmul(x[:,0:2],self.input)
        for layer in self.modules:
            x = self.act(torch.matmul(x,self.modules[layer]['w']) + torch.cat([self.modules[layer]['b']]*x.shape[0],0))
            if self.cal_grad:
                self.grads[layer]['w'] -= self.lr * (self.act(torch.matmul(x,self.modules[layer]['w'] + eps) + self.modules[layer]['b']) + self.act(torch.matmul(x,self.modules[layer]['w'] - eps) + self.modules[layer]['b']))/(2*eps)
                self.grads[layer]['b'] -= self.lr * (self.act(torch.matmul(x,self.modules[layer]['w']) + self.modules[layer]['b'] + eps) + self.act(torch.matmul(x,self.modules[layer]['w']) + self.modules[layer]['b'] - eps))/(2*eps)
        return torch.matmul(x,self.output)
    
    def loss(self,x,y):
        return torch.mean((x-y)**2)
        
class mlp(net):
    def __init__(self,depth,width,lr = 1e-4, cuda=True):
        if cuda:
            self.input = torch.randn(2,width).cuda()
            self.modules = {}
            for layers in range(depth):
                self.modules["linear" + "%d" %(layers+1)] = {
                    "w" : torch.randn(width,width).cuda()*((width/2)**(1/2)),
                    "b" : torch.zeros(1,width).cuda()
                }
            self.output = torch.randn(1,width).cuda()
            self.lr = lr
            super().__init__()
        else:
            self.input = torch.randn(2,width)
            self.modules = {}
            for layers in range(depth):
                self.modules["linear" + "%d" %(layers+1)] = {
                    "w" : torch.randn(width,width)*((width/2)**(1/2)),
                    "b" : torch.zeros(1,width)
                }
            self.output = torch.randn(1,width)
            self.lr = lr
            super().__init__()
        

In [7]:
model = mlp(3,10)

In [8]:
for ind in range(len(data)):
    model.forward(data[ind])

TypeError: matmul(): argument 'input' (position 1) must be Tensor, not tuple

In [129]:
model.grads['linear1']['w']

tensor([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]])