In [1]:
import numpy as np
from dataset.mnist import load_mnist

# import torch


def overlay_y_on_x(x, y):
    """
    Replace the first 10 pixels of data [x] with one-hot-encoded label [y]
    """
    x_ = x.copy()
    x_[:, :10] *= 0.0
    for i in range(x.shape[0]):
        x_[i, y[i]] = x[i, :].max()
    return x_


def relu(x):
    return np.maximum(0, x)

class AdamOptimizer:
    def __init__(self, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8):
        self.learning_rate = learning_rate
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon
        self.m = None
        self.v = None
        self.t = 0

    def update(self, params, grads):
        if self.m is None:
            self.m = [0] * len(params)
            self.v = [0] * len(params)

        self.t += 1
        for i, grad in enumerate(grads):
            self.m[i] = self.beta1 * self.m[i] + (1 - self.beta1) * grad
            self.v[i] = self.beta2 * self.v[i] + (1 - self.beta2) * grad**2
            m_hat = self.m[i] / (1 - self.beta1**self.t)
            v_hat = self.v[i] / (1 - self.beta2**self.t)
            params[i] -= self.learning_rate * m_hat / (np.sqrt(v_hat) + self.epsilon)


# 定义神经网络类
class Layer:
    def __init__(self, input_size, output_size, thresh=4.0):
        # 初始化权重和偏置
        self.W = np.random.randn(input_size, output_size)
        self.B = np.zeros((1, output_size))
        # self.accu = []
        self.threshold = thresh


    def forward(self, X):
        X = X  / X.max() #np.linalg.norm(X, ord=2)
        Y = np.dot(X, self.W) + self.B
        return relu(Y)


    def compute_loss(self, Y_pos, Y_neg):
        # 计算正样本和负样本的损失
        g_pos = np.mean(Y_pos**2, axis=1)
        g_neg = np.mean(Y_neg**2, axis=1)
        loss = ((np.log(1 + np.exp(-g_pos + self.threshold)) + np.log(1 + np.exp(g_neg - self.threshold))) / 2).mean()
        return loss

    def compute_loss_gradients(self, Y_pos, Y_neg):
        # 损失函数梯度计算
        N_pos = Y_pos.shape[0]
        N_neg = Y_neg.shape[0]

        g_pos = np.mean(Y_pos**2, axis=1)
        g_neg = np.mean(Y_neg**2, axis=1)

        dloss_dgpos = -0.5 * np.exp(-g_pos + self.threshold) / (1 + np.exp(-g_pos + self.threshold)) 
        dloss_dgneg = 0.5 * np.exp(g_neg - self.threshold) / (1 + np.exp(g_neg - self.threshold)) 

        dloss_dY_pos = (2 / N_pos) * dloss_dgpos[:, np.newaxis] * Y_pos
        dloss_dY_neg = (2 / N_neg) * dloss_dgneg[:, np.newaxis] * Y_neg

        return dloss_dY_pos, dloss_dY_neg
    

    def backward(self, X_pos, X_neg, Y_pos, Y_neg):
        X_pos = X_pos / X_pos.max() #np.linalg.norm(X_pos, ord=2)
        X_neg = X_neg / X_neg.max() #np.linalg.norm(X_neg, ord=2)
        # 计算损失梯度
        dloss_dY_pos, dloss_dY_neg = self.compute_loss_gradients(Y_pos, Y_neg)
        
        # 计算ReLU激活后的梯度
        relu_grad_pos = (Y_pos > 0).astype(np.float32)
        relu_grad_neg = (Y_neg > 0).astype(np.float32)
        dloss_dY_pos *= relu_grad_pos
        dloss_dY_neg *= relu_grad_neg
        
        # 权重和偏置的梯度
        dW_pos = np.dot(X_pos.T, dloss_dY_pos) / X_pos.shape[0]
        dW_neg = np.dot(X_neg.T, dloss_dY_neg) / X_neg.shape[0]
        dB_pos = np.sum(dloss_dY_pos, axis=0, keepdims=True) / X_pos.shape[0]
        dB_neg = np.sum(dloss_dY_neg, axis=0, keepdims=True) / X_neg.shape[0]
        
        # 合并正样本和负样本的梯度
        dW = dW_pos + dW_neg
        dB = dB_pos + dB_neg
        return dW, dB


    def train(self, X_train_pos, X_train_neg, num_epochs=2000, lr = 6e-3, batch_size=10):
        optimizer = AdamOptimizer(learning_rate=lr)

        for epoch in range(num_epochs):
            dW_accum = 0
            dB_accum = 0
            for i in range(0, X_train_pos.shape[0], batch_size):
                # 获取 mini-batch
                batch_mask = np.random.choice(X_train_pos.shape[0], batch_size)
                # print(batch_mask)
                X_train_pos = X_train_pos[batch_mask]
                X_train_neg = X_train_neg[batch_mask]

                # 正样本并进行前向传播
                Y_pos = self.forward(X_train_pos)
        
                # 负样本并进行前向传播
                Y_neg = self.forward(X_train_neg)
                loss = self.compute_loss(Y_pos, Y_neg)

                dW, dB = self.backward(X_train_pos, X_train_neg, Y_pos, Y_neg)
                dW_accum += dW
                dB_accum += dB
            # 打印损失
            print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss:.12f}')
            # 使用优化器更新参数
            optimizer.update([self.W, self.B], [dW_accum, dB_accum])

            # 打印损失
            # if (epoch+1) % 10 == 0:
            # print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss:.12f}')

        return self.forward(X_train_pos), self.forward(X_train_neg)


class Net:
    def __init__(self, dims):
        super().__init__()
        # self.accu = []
        self.layers = []
        for d in range(len(dims) - 1):
            self.layers += [Layer(dims[d], dims[d + 1])]

    def predict(self, x): 
        # 这个predict方法是理解ff方法的关键，它不是像普通的predict方法一样，输入一个样本，输出一个长度为num_cls的softmax预测向量
        # 而是一个样本反复输入这个网络num_cls次，把每种带标签的可能都计算一个goodness，也就是这个数据是好数据的可能性，找出最高goodness的就是预测类别
        goodness_per_label = []
        for label in range(10): # 对每一个标签进行预测
            label = label * np.ones(x.shape[0], dtype=np.int64)
            h = overlay_y_on_x(x, label) # h是输入x和标签label的叠加
            goodness = [] # goodness是一个列表，里面存放了每一层的结果向量的均方
            for layer in self.layers: # 对每一层进行前传
                h = layer.forward(h) # h是每一层的输出
                goodness += [(h**2).mean(1)] # goodness是每一层的结果向量的均方。h.pow(2)是h的每一个元素的平方，mean(1)是对每一行求均值
            # goodness_per_label += [sum(goodness).unsqueeze(1)] # goodness_per_label是每一层的结果向量的均方的和
            goodness_per_label.append((h**2).mean(1))
        # goodness_per_label = torch.cat(goodness_per_label, 1) # goodness_per_label是每一层的结果向量的均方的和的列表
        return np.array(goodness_per_label).argmax(0)#goodness_per_label.argmax(1) # 返回的是goodness_per_label中每一行最大值的索引，也就是说，返回的是每一行最大值的列索引

    def eval(self, y_pred, y_true):
        acc = np.mean(y_pred == y_true)
        # self.accu.append(acc)
        print(y_pred, y_true)
        print('\n', y_pred[:30], '\n', y_true[:30])
        print(f'Accuracy: {acc:.12f}')
        return acc
    
    def train(self, x, y, li_epochs, li_lrs, li_bzs): # 这个train方法是对整个网络进行训练，训练的目标是让正样本的结果向量的均方上升，负样本的结果向量的均方下降
        mask = np.random.choice(x.shape[0], 100)
        x = x[mask]
        y = y[mask]

        x_pos = overlay_y_on_x(x, y)


        # rnd = torch.randperm(x.shape[0]) # 生成一个从0到n-1的随机整数序列。
        y_rnd = y.copy()
        for i, y_i in enumerate(y):
            li = list(range(10))
            li.remove(y_i)
            j = np.random.choice(li)
            y_rnd[i] = j

        x_neg = overlay_y_on_x(x, y_rnd)


        h_pos, h_neg = x_pos, x_neg # h_pos和h_neg是正样本和负样本的输入
        for i, layer in enumerate(self.layers): # 对每一层进行训练
            print('training layer', i, '...') # 这里的i是层数
            h_pos, h_neg = layer.train(h_pos, h_neg, num_epochs=li_epochs[i], lr=li_lrs[i], batch_size=li_bzs[i]) # 对每一层进行训练，得到了正样本和负样本的结果向量，这个结果向量是该层的输出，也是下一层的输入
            # 也就是说，这个训练的过程中，正样本在前传过程中得到的每一层输出都被认为是正的，负样本在前传过程中得到的每一层输出都被认为是负的，也就是说，出身决定一切

        y_pred =  self.predict(x_te)
        self.eval(y_pred, y_te)


np.random.seed(0)


(x, y), (x_te, y_te) = load_mnist(normalize=True, one_hot_label=False)
def trasform(x):
    return (x - 0.1307) / 0.3081
x, x_te = trasform(x), trasform(x_te) 

# x_pos = overlay_y_on_x(x, y)


# # rnd = torch.randperm(x.shape[0]) # 生成一个从0到n-1的随机整数序列。
# y_rnd = y.copy()
# for i, y_i in enumerate(y):
#     li = list(range(10))
#     li.remove(y_i)
#     j = np.random.choice(li)
#     y_rnd[i] = j

# x_neg = overlay_y_on_x(x, y_rnd)

net = Net([784, 500, 500])
net.train(x, y, li_epochs=[10000, 1000], li_lrs=[3e-2, 5e-2], li_bzs=[128, 128])


training layer 0 ...
Epoch 1/10000, Loss: 23.744129189085
Epoch 2/10000, Loss: 10.234234593180
Epoch 3/10000, Loss: 3.581423406304
Epoch 4/10000, Loss: 1.056314821012
Epoch 5/10000, Loss: 1.307851072025
Epoch 6/10000, Loss: 1.679179389790
Epoch 7/10000, Loss: 1.865923734402
Epoch 8/10000, Loss: 1.939402489907
Epoch 9/10000, Loss: 1.952444806520
Epoch 10/10000, Loss: 1.955983423770
Epoch 11/10000, Loss: 1.936744149649
Epoch 12/10000, Loss: 1.890152517847
Epoch 13/10000, Loss: 1.762753066144
Epoch 14/10000, Loss: 1.692218033985
Epoch 15/10000, Loss: 1.607383636604
Epoch 16/10000, Loss: 1.543542982471
Epoch 17/10000, Loss: 1.598063597501
Epoch 18/10000, Loss: 1.589234798513
Epoch 19/10000, Loss: 1.480185132120
Epoch 20/10000, Loss: 1.337844739927
Epoch 21/10000, Loss: 1.243264441692
Epoch 22/10000, Loss: 1.285377193986
Epoch 23/10000, Loss: 1.337781471220
Epoch 24/10000, Loss: 1.348445034758
Epoch 25/10000, Loss: 1.288928324310
Epoch 26/10000, Loss: 1.236513992449
Epoch 27/10000, Loss: 1.

KeyboardInterrupt: 

In [27]:
net.train(x, y, li_epochs=[100, 10000], li_lrs=[1e-4, 0.004e-2], li_bzs=[128, 128])

training layer 0 ...
Epoch 1/100, Loss: 1.868860150434
Epoch 2/100, Loss: 1.981393527618
Epoch 3/100, Loss: 1.950303753046
Epoch 4/100, Loss: 2.074326320523
Epoch 5/100, Loss: 1.855226768768
Epoch 6/100, Loss: 1.816353685436
Epoch 7/100, Loss: 1.754365993893
Epoch 8/100, Loss: 1.770587236228
Epoch 9/100, Loss: 1.744645660713
Epoch 10/100, Loss: 1.459918753410
Epoch 11/100, Loss: 1.339904679145
Epoch 12/100, Loss: 1.275031856463
Epoch 13/100, Loss: 1.236855235130
Epoch 14/100, Loss: 1.179940928580
Epoch 15/100, Loss: 1.209513739211
Epoch 16/100, Loss: 1.250500363370
Epoch 17/100, Loss: 1.130472070414
Epoch 18/100, Loss: 1.089623565589
Epoch 19/100, Loss: 1.092661879946
Epoch 20/100, Loss: 1.057235169567
Epoch 21/100, Loss: 0.942183447674
Epoch 22/100, Loss: 0.885010784018
Epoch 23/100, Loss: 0.856627823501
Epoch 24/100, Loss: 0.890555094042
Epoch 25/100, Loss: 0.855144308077
Epoch 26/100, Loss: 0.835646506224
Epoch 27/100, Loss: 0.798044309450
Epoch 28/100, Loss: 0.759572918764
Epoch 29

In [21]:
from dataset.mnist import load_mnist
load_mnist(normalize=True, one_hot_label=False)

((array([[0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         ...,
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.]], dtype=float32),
  array([5, 0, 4, ..., 5, 6, 8], dtype=uint8)),
 (array([[0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         ...,
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.]], dtype=float32),
  array([7, 2, 1, ..., 4, 5, 6], dtype=uint8)))

In [74]:
import numpy as np
from dataset.mnist import load_mnist

import torch


def overlay_y_on_x(x, y):
    """
    Replace the first 10 pixels of data [x] with one-hot-encoded label [y]
    """
    x_ = x.copy()
    x_[:, :10] *= 0.0
    for i in range(x.shape[0]):
        x_[i, y[i]] = x[i, :].max()
    return x_


def relu(x):
    return np.maximum(0, x)

class AdamOptimizer:
    def __init__(self, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8):
        self.learning_rate = learning_rate
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon
        self.m = None
        self.v = None
        self.t = 0

    def update(self, params, grads):
        if self.m is None:
            self.m = [0] * len(params)
            self.v = [0] * len(params)

        self.t += 1
        for i, grad in enumerate(grads):
            self.m[i] = self.beta1 * self.m[i] + (1 - self.beta1) * grad
            self.v[i] = self.beta2 * self.v[i] + (1 - self.beta2) * grad**2
            m_hat = self.m[i] / (1 - self.beta1**self.t)
            v_hat = self.v[i] / (1 - self.beta2**self.t)
            params[i] -= self.learning_rate * m_hat / (np.sqrt(v_hat) + self.epsilon)


# 定义神经网络类
class Layer:
    def __init__(self, input_size, output_size, thresh=4.0):
        # 初始化权重和偏置
        self.W = np.random.randn(input_size, output_size)
        self.B = np.zeros((1, output_size))
        # self.accu = []
        self.threshold = thresh


    def forward(self, X):
        X = X  / X.max() #np.linalg.norm(X, ord=2)
        Y = np.dot(X, self.W) + self.B
        return relu(Y)


    def compute_loss(self, Y_pos, Y_neg):
        # 计算正样本和负样本的损失
        g_pos = np.mean(Y_pos**2, axis=1)
        g_neg = np.mean(Y_neg**2, axis=1)
        loss = ((np.log(1 + np.exp(-g_pos + self.threshold)) + np.log(1 + np.exp(g_neg - self.threshold))) / 2).mean()
        return loss

    def compute_loss_gradients(self, Y_pos, Y_neg):
        # 损失函数梯度计算
        N_pos = Y_pos.shape[0]
        N_neg = Y_neg.shape[0]

        g_pos = np.mean(Y_pos**2, axis=1)
        g_neg = np.mean(Y_neg**2, axis=1)

        dloss_dgpos = -0.5 * np.exp(-g_pos + self.threshold) / (1 + np.exp(-g_pos + self.threshold)) 
        dloss_dgneg = 0.5 * np.exp(g_neg - self.threshold) / (1 + np.exp(g_neg - self.threshold)) 

        dloss_dY_pos = (2 / N_pos) * dloss_dgpos[:, np.newaxis] * Y_pos
        dloss_dY_neg = (2 / N_neg) * dloss_dgneg[:, np.newaxis] * Y_neg

        return dloss_dY_pos, dloss_dY_neg
    

    def backward(self, X_pos, X_neg, Y_pos, Y_neg):
        X_pos = X_pos / X_pos.max() #np.linalg.norm(X_pos, ord=2)
        X_neg = X_neg / X_neg.max() #np.linalg.norm(X_neg, ord=2)
        # 计算损失梯度
        dloss_dY_pos, dloss_dY_neg = self.compute_loss_gradients(Y_pos, Y_neg)
        
        # 计算ReLU激活后的梯度
        relu_grad_pos = (Y_pos > 0).astype(np.float32)
        relu_grad_neg = (Y_neg > 0).astype(np.float32)
        dloss_dY_pos *= relu_grad_pos
        dloss_dY_neg *= relu_grad_neg
        
        # 权重和偏置的梯度
        dW_pos = np.dot(X_pos.T, dloss_dY_pos) / X_pos.shape[0]
        dW_neg = np.dot(X_neg.T, dloss_dY_neg) / X_neg.shape[0]
        dB_pos = np.sum(dloss_dY_pos, axis=0, keepdims=True) / X_pos.shape[0]
        dB_neg = np.sum(dloss_dY_neg, axis=0, keepdims=True) / X_neg.shape[0]
        
        # 合并正样本和负样本的梯度
        dW = dW_pos + dW_neg
        dB = dB_pos + dB_neg
        return dW, dB


    def train(self, X_train_pos, X_train_neg, num_epochs=2000, lr = 6e-3, batch_size=10):
        optimizer = AdamOptimizer(learning_rate=lr)

        for epoch in range(num_epochs):
            dW_accum = 0
            dB_accum = 0
            for i in range(0, X_train_pos.shape[0], batch_size):
                # 获取 mini-batch
                batch_mask = np.random.choice(X_train_pos.shape[0], batch_size)
                # print(batch_mask)
                X_train_pos = X_train_pos[batch_mask]
                X_train_neg = X_train_neg[batch_mask]

                # 正样本并进行前向传播
                Y_pos = self.forward(X_train_pos)
        
                # 负样本并进行前向传播
                Y_neg = self.forward(X_train_neg)
                loss = self.compute_loss(Y_pos, Y_neg)

                dW, dB = self.backward(X_train_pos, X_train_neg, Y_pos, Y_neg)
                dW_accum += dW
                dB_accum += dB
            # 打印损失
            print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss:.12f}')
            # 使用优化器更新参数
            optimizer.update([self.W, self.B], [dW_accum, dB_accum])

            # 打印损失
            # if (epoch+1) % 10 == 0:
            # print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss:.12f}')

        return self.forward(X_train_pos), self.forward(X_train_neg)


class Net(torch.nn.Module):
    def __init__(self, dims):
        super().__init__()
        # self.accu = []
        self.layers = []
        for d in range(len(dims) - 1):
            self.layers += [Layer(dims[d], dims[d + 1])]

    def predict(self, x): 
        # 这个predict方法是理解ff方法的关键，它不是像普通的predict方法一样，输入一个样本，输出一个长度为num_cls的softmax预测向量
        # 而是一个样本反复输入这个网络num_cls次，把每种带标签的可能都计算一个goodness，也就是这个数据是好数据的可能性，找出最高goodness的就是预测类别
        goodness_per_label = []
        for label in range(10): # 对每一个标签进行预测
            label = label * np.ones(x.shape[0], dtype=np.int64)
            h = overlay_y_on_x(x, label) # h是输入x和标签label的叠加
            goodness = [] # goodness是一个列表，里面存放了每一层的结果向量的均方
            for layer in self.layers: # 对每一层进行前传
                h = layer.forward(h) # h是每一层的输出
                goodness += [(h**2).mean(1)] # goodness是每一层的结果向量的均方。h.pow(2)是h的每一个元素的平方，mean(1)是对每一行求均值
            # goodness_per_label += [sum(goodness).unsqueeze(1)] # goodness_per_label是每一层的结果向量的均方的和
            goodness_per_label.append((h**2).mean(1))
        # goodness_per_label = torch.cat(goodness_per_label, 1) # goodness_per_label是每一层的结果向量的均方的和的列表
        return np.array(goodness_per_label).argmax(0)#goodness_per_label.argmax(1) # 返回的是goodness_per_label中每一行最大值的索引，也就是说，返回的是每一行最大值的列索引

    def eval(self, y_pred, y_true):
        acc = np.mean(y_pred == y_true)
        # self.accu.append(acc)
        print(y_pred, y_true)
        print('\n', y_pred[:30], '\n', y_true[:30])
        print(f'Accuracy: {acc:.12f}')
        return acc
    
    def train(self, x_pos, x_neg, li_epochs, li_lrs, li_bzs): # 这个train方法是对整个网络进行训练，训练的目标是让正样本的结果向量的均方上升，负样本的结果向量的均方下降
        h_pos, h_neg = x_pos, x_neg # h_pos和h_neg是正样本和负样本的输入
        for i, layer in enumerate(self.layers): # 对每一层进行训练
            print('training layer', i, '...') # 这里的i是层数
            h_pos, h_neg = layer.train(h_pos, h_neg, num_epochs=li_epochs[i], lr=li_lrs[i], batch_size=li_bzs[i]) # 对每一层进行训练，得到了正样本和负样本的结果向量，这个结果向量是该层的输出，也是下一层的输入
            # 也就是说，这个训练的过程中，正样本在前传过程中得到的每一层输出都被认为是正的，负样本在前传过程中得到的每一层输出都被认为是负的，也就是说，出身决定一切

        y_pred =  self.predict(x_te)
        self.eval(y_pred, y_te)


np.random.seed(0)


(x, y), (x_te, y_te) = load_mnist(normalize=True, one_hot_label=False)
def trasform(x):
    return (x - 0.1307) / 0.3081
x, x_te = trasform(x), trasform(x_te) 

x_pos = overlay_y_on_x(x, y)


# rnd = torch.randperm(x.shape[0]) # 生成一个从0到n-1的随机整数序列。
y_rnd = y.copy()
for i, y_i in enumerate(y):
    li = list(range(10))
    li.remove(y_i)
    j = np.random.choice(li)
    y_rnd[i] = j

x_neg = overlay_y_on_x(x, y_rnd)

net = Net([784, 500, 500])
net.train(x_pos, x_neg, li_epochs=[2000, 4000], li_lrs=[5e-3, 3e-3], li_bzs=[128, 128])


training layer 0 ...
Epoch 1/2000, Loss: 29.820793667614
Epoch 2/2000, Loss: 25.092604833371
Epoch 3/2000, Loss: 22.221727725057
Epoch 4/2000, Loss: 20.157951816450
Epoch 5/2000, Loss: 18.563774244033
Epoch 6/2000, Loss: 17.280762471863
Epoch 7/2000, Loss: 16.220863874155
Epoch 8/2000, Loss: 15.328976078235
Epoch 9/2000, Loss: 14.568105149774
Epoch 10/2000, Loss: 13.912080693109
Epoch 11/2000, Loss: 13.341347226886
Epoch 12/2000, Loss: 12.840982340957
Epoch 13/2000, Loss: 12.399620276541
Epoch 14/2000, Loss: 12.008227184745
Epoch 15/2000, Loss: 11.659582985234
Epoch 16/2000, Loss: 11.347786078448
Epoch 17/2000, Loss: 11.067949096774
Epoch 18/2000, Loss: 10.815961938112
Epoch 19/2000, Loss: 10.588366780677
Epoch 20/2000, Loss: 10.382226934672
Epoch 21/2000, Loss: 10.195011273731
Epoch 22/2000, Loss: 10.024541526878
Epoch 23/2000, Loss: 9.868930366638
Epoch 24/2000, Loss: 9.726549546837
Epoch 25/2000, Loss: 9.595984088193
Epoch 26/2000, Loss: 9.475988291858
Epoch 27/2000, Loss: 9.3654660

In [97]:
print(y[:30])
y_rnd = y.copy()
for i, y_i in enumerate(y):
    li = list(range(10))
    li.remove(y_i)
    j = np.random.choice(li)
    y_rnd[i] = j
print((y == y_rnd).sum())

[5 0 4 1 9 2 1 3 1 4 3 5 3 6 1 7 2 8 6 9 4 0 9 1 1 2 4 3 2 7]
0


In [92]:
y_i

5

In [96]:
li = list(range(10))
li.remove(5)
li


[0, 1, 2, 3, 4, 6, 7, 8, 9]

In [76]:
np.random.choice(list(range(10)).remove(3))

ValueError: a must be 1-dimensional or an integer

In [77]:
print(list(range(10)).remove(3))

None


In [79]:
a = list(range(10)).remove(3)

In [84]:
print(list(range(10)).remove(3))


None


In [86]:
# 生成0～9的列表
a = list(range(10))

In [87]:
a.remove(3)

In [88]:
a

[0, 1, 2, 4, 5, 6, 7, 8, 9]

In [64]:
# 写一个函数从[1,2,3]中删去1
def remove_one(lst):
    lst.remove(1)
    return lst


SyntaxError: invalid syntax (745752343.py, line 1)

In [67]:
print(list(range(10)).remove(1))

None


In [69]:
a = list(range(10))


In [71]:
a.remove(3)

In [72]:
a

[0, 2, 4, 5, 6, 7, 8, 9]

In [62]:
(y == y[rnd]).sum()

5928

In [60]:
rnd

tensor([22022, 35561, 51289,  ..., 42092, 48651, 54661])

In [40]:
net.train(x_pos, x_neg, li_epochs=[0, 6000], li_lrs=[1e-3, 3e-3], li_bzs=[100, 100])

training layer 0 ...
training layer 1 ...
Epoch 1/6000, Loss: 1.355259192981
Epoch 2/6000, Loss: 1.393258281226
Epoch 3/6000, Loss: 1.127134552624
Epoch 4/6000, Loss: 1.167433679858
Epoch 5/6000, Loss: 1.082523969534
Epoch 6/6000, Loss: 1.010809251338
Epoch 7/6000, Loss: 1.112767309203
Epoch 8/6000, Loss: 1.278783615847
Epoch 9/6000, Loss: 1.218445670482
Epoch 10/6000, Loss: 1.234114443443
Epoch 11/6000, Loss: 1.264480367400
Epoch 12/6000, Loss: 1.397398927983
Epoch 13/6000, Loss: 1.334384126393
Epoch 14/6000, Loss: 1.261944157363
Epoch 15/6000, Loss: 1.224992686980
Epoch 16/6000, Loss: 1.521551342540
Epoch 17/6000, Loss: 1.577472675085
Epoch 18/6000, Loss: 2.214028877733
Epoch 19/6000, Loss: 1.863354157870
Epoch 20/6000, Loss: 1.368112766760
Epoch 21/6000, Loss: 1.209215873094
Epoch 22/6000, Loss: 1.707415383189
Epoch 23/6000, Loss: 1.228701890338
Epoch 24/6000, Loss: 2.187578076128
Epoch 25/6000, Loss: 2.577156186383
Epoch 26/6000, Loss: 2.629832663170
Epoch 27/6000, Loss: 2.13254477

In [22]:
net.train(x_pos, x_neg, li_epochs=[0, 0], li_lrs=[1e-3, 1e-3], li_bzs=[100, 100])

training layer 0 ...
training layer 1 ...
[9 9 2 ... 9 9 2] [7 2 1 ... 4 5 6]

 [9 9 2 9 9 2 9 9 9 9 9 9 9 9 2 9 9 9 9 9 9 9 9 9 9 9 9 9 9 2] 
 [7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4 9 6 6 5 4 0 7 4 0 1]
Accuracy: 0.104200000000
