In [3]:
import numpy as np
import os
import pickle

def load_CIFAR_batch(filename):
    """ Load single batch of CIFAR """
    with open(filename, 'rb') as f:
        datadict = pickle.load(f, encoding='bytes')
        X = datadict[b'data']
        Y = datadict[b'labels']
        X = X.reshape(10000, 3, 32, 32).astype("float")
        Y = np.array(Y)
        return X, Y

def load_cifar():
    X_train, y_train = [], []
    for batch in range(1, 6):
        X_batch, y_batch = load_CIFAR_batch(os.path.join('cifar-10-batches-py', 'data_batch_%d' % batch))
        X_train.append(X_batch)
        y_train.append(y_batch)
    X_train = np.concatenate(X_train)
    y_train = np.concatenate(y_train)
    X_test, y_test = load_CIFAR_batch(os.path.join('cifar-10-batches-py', 'test_batch'))

    indices = np.random.permutation(X_train.shape[0])
    training_idx, validation_idx = indices[:49000], indices[49000:]
    X_train, X_val = X_train[training_idx, :], X_train[validation_idx, :]
    y_train, y_val = y_train[training_idx], y_train[validation_idx]

    return {
        'train_images': X_train,
        'train_labels': y_train,
        'validation_images': X_val,
        'validation_labels': y_val,
        'test_images': X_test,
        'test_labels': y_test
    }

# 加载 CIFAR 数据集
cifar_data = load_cifar()
print("Data loaded successfully.")
print("Train images shape:", cifar_data['train_images'].shape)
print("Train labels shape:", cifar_data['train_labels'].shape)
print("Validation images shape:", cifar_data['validation_images'].shape)
print("Validation labels shape:", cifar_data['validation_labels'].shape)
print("Test images shape:", cifar_data['test_images'].shape)
print("Test labels shape:", cifar_data['test_labels'].shape)


Data loaded successfully.
Train images shape: (49000, 3, 32, 32)
Train labels shape: (49000,)
Validation images shape: (1000, 3, 32, 32)
Validation labels shape: (1000,)
Test images shape: (10000, 3, 32, 32)
Test labels shape: (10000,)


In [4]:
import numpy as np

def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=0, keepdims=True))
    return exp_x / np.sum(exp_x, axis=0, keepdims=True)

def cross_entropy(x):
    # 添加一个小的常数以避免log(0)
    return -np.log(x + 1e-15)

def regularized_cross_entropy(layers, lam, x):
    loss = np.mean(cross_entropy(x))
    for layer in layers:
        weights = layer.get_weights()
        loss += lam * np.sum(weights ** 2)
    return loss

def leakyReLU(x, alpha=0.01):
    return np.where(x > 0, x, alpha * x)

def leakyReLU_derivative(x, alpha=0.01):
    return np.where(x > 0, 1, alpha)

def lr_schedule(learning_rate, iteration):
    if iteration <= 10000:
        return learning_rate
    elif iteration <= 30000:
        return learning_rate * 0.1
    else:
        return learning_rate * 0.01

# 示例使用
class DummyLayer:
    def get_weights(self):
        # 返回一个示例权重矩阵
        return np.array([1.0, 2.0, 3.0])

layers = [DummyLayer() for _ in range(5)]
lam = 0.01
x = np.array([0.1, 0.5, 0.4])

# Softmax测试
output = softmax(x)
print("Softmax output:", output)

# 交叉熵测试
loss = cross_entropy(output)
print("Cross entropy loss:", loss)

# 正则化交叉熵测试
reg_loss = regularized_cross_entropy(layers, lam, output)
print("Regularized cross entropy loss:", reg_loss)

# Leaky ReLU测试
relu_output = leakyReLU(x)
print("Leaky ReLU output:", relu_output)

# Leaky ReLU导数测试
relu_derivative = leakyReLU_derivative(x)
print("Leaky ReLU derivative:", relu_derivative)

# 学习率调度测试
for i in [0, 10000, 20000, 40000]:
    lr = lr_schedule(0.1, i)
    print(f"Learning rate at iteration {i}:", lr)


Softmax output: [0.26030255 0.38832577 0.35137169]
Cross entropy loss: [1.34591068 0.94591068 1.04591068]
Regularized cross entropy loss: 1.8125773500084779
Leaky ReLU output: [0.1 0.5 0.4]
Leaky ReLU derivative: [1. 1. 1.]
Learning rate at iteration 0: 0.1
Learning rate at iteration 10000: 0.1
Learning rate at iteration 20000: 0.010000000000000002
Learning rate at iteration 40000: 0.001
