In [1]:
import numpy as np

In [2]:
# 定义一个layer
class Layer:
    def __init__(self):
        pass
    def forward(self, input):
        return input
    def backward(self, input, grad_output):
        pass

In [3]:
# 定义Relu层
class ReLU(Layer):
    def __init__(self):
        pass
    def forward(self,input):
        return np.maximum(0,input) # relu函数为max(0,x)
    def backward(self,input,grad_output):
        relu_grad = input>0        #relu函数导数为1 if x>0 else 0
        return grad_output*relu_grad

In [4]:
class Sigmoid(Layer):
    def __init__(self):
            pass
    
    def _sigmoid(self,x):
        return 1.0/(1+np.exp(-x))
    
    def forward(self,input):
        return self._sigmoid(input)
    
    def backward(self,input,grad_output):
        sigmoid_grad = self._sigmoid(input)*(1-self._sigmoid(input))
        return grad_output*sigmoid_grad

In [5]:
class Tanh(Layer):
    def __init__(self):
        pass
    def _tanh(self,x):
        return np.tanh(x)
    def forward(self,input):
        return self._tanh(input)
    def backward(self, input, grad_output):
        grad_tanh = 1-(self._tanh(input))**2
        return grad_output*grad_tanh

In [6]:
class Dense(Layer):
    def __init__(self, input_units, output_units, learning_rate=0.01):
        self.learning_rate = learning_rate
        self.weights = np.random.randn(input_units, output_units)*0.01
        self.biases = np.zeros(output_units)
    def forward(self,input):
        return np.dot(input,self.weights)+self.biases
    def backward(self,input,grad_output):
        grad_input = np.dot(grad_output, self.weights.T)
        grad_weights = np.dot(input.T,grad_output)/input.shape[0]
        grad_biases = grad_output.mean(axis=0)
        self.weights = self.weights - self.learning_rate*grad_weights
        self.biases = self.biases - self.learning_rate*grad_biases
        return grad_input

In [7]:
network = []
network.append(Dense(1,50))
network.append(Tanh())
network.append(Dense(50,1))

In [8]:
def forward(network,X):
    activations = []
    input = X
    for layer in network:
        activations.append(layer.forward(input))
        input = activations[-1]
                
    assert len(activations) == len(network)
    return activations

In [9]:
def predict(network,X):
    logits = forward(network,X)[-1]
    return logits

In [10]:
def train(network,X,y):    
    layer_activations = forward(network,X)
    layer_inputs = [X]+layer_activations  
    logits = layer_activations[-1]
    
    # 这里的损失函数需要自己定义
    loss = np.square(logits - y).sum()
    loss_grad = 2.0*(logits-y)
    
    for layer_i in range(len(network))[::-1]:
        layer = network[layer_i]
        loss_grad = layer.backward(layer_inputs[layer_i],loss_grad) #grad w.r.t. input, also weight updates
        
    return np.mean(loss)

In [11]:
x_train = np.linspace(-np.pi,0.7 * np.pi,140).reshape(140,-1)
y_train = np.sin(x_train)
x_test = np.linspace(np.pi*0.7,np.pi,60).reshape(60,-1)
y_test = np.sin(x_test)

In [12]:
losses = []
for h in range(3,100):
    network = []
    network.append(Dense(1,h))
    network.append(Tanh())
    network.append(Dense(h,1))
    ll = []
    for e in range(100000):
        loss = train(network,x_train,y_train)
        ll.append(loss)
    print(np.mean(ll[-1000:]))
    losses.append(np.mean(ll[-1000:]))

0.13300017792605673
0.060643261307100134
0.14289152959980828
0.02596492110720839
0.13910498756551443
0.028394363443679346
0.07321920044557413
0.09922604785750398
0.025939559408027286
0.11062305407549483
0.016619036653404154
0.009675160658931951
0.020503951095894146
0.0146014980133216
0.01293759401080886
0.01910760231614981
0.02010049162619612
0.08633884139233361
0.01339240633122121
0.01623087870244985
0.016217043976487374
0.02064293118101773
0.019814075304301775
0.014204722955660798
0.010764661537895707
0.006062868917051043
0.01250198044052161
0.018967087732555378
0.08612818216529654
0.01340410023779842
0.021393983049641404
0.013007108081950781
0.10993709598786418
0.106624819450012
0.012573410770463983
0.021909814398699167
0.012935389160872649
0.012222021671755609
0.01610958747297214
0.020110961131035096
0.012807117672627879
0.010448241932397262
0.019764131837171562
0.009014299290214104
0.012957589899780068
0.01790746309293229
0.02015180937832748
0.012009783372657587
0.0115256096249159