###  MicroGrad demo

In [1]:
# !pip install numpy
import random
import numpy as np
# !pip install matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
from micrograd.engine import Tensor
from micrograd.nn import Module
from micrograd.nn import Linear

In [3]:
np.random.seed(1337)
random.seed(1337)

In [4]:
class MyModel(Module):
    def __init__(self):
        super().__init__()
        
        # 定义子模块
        # 因为 Linear 继承自 Module，所以 self.fc1 会被自动添加到 _modules 字典中
        self.fc1 = Linear(20, 64)
        self.fc2 = Linear(64, 10)
        
        # 定义一个普通属性，它不会被添加到 _parameters 或 _modules 中
        self.dropout_rate = 0.5

    def forward(self, x: Tensor) -> Tensor:
        # 调用子模块，就像调用函数一样
        x = self.fc1(x)
        # 假设你的 Tensor 类实现了 relu 方法
        # x = x.relu()
        x = self.fc2(x)
        return x

In [5]:
# 1. 创建一个模型实例
model = MyModel()

# 2. 打印模型结构，看看 __repr__ 方法是否生效
print("--- 模型结构 ---")
print(model)
# 输出会类似:
# MyModel(fc1=Linear(weight=Tensor(shape=(20, 64)), bias=Tensor(shape=(64,))), fc2=Linear(weight=Tensor(shape=(64, 10)), bias=Tensor(shape=(10,))))

# 3. 测试 parameters() 方法是否能正确收集所有参数
print("\n--- 模型所有参数 ---")
for i, param in enumerate(model.parameters()):
    print(f"参数 {i+1}: {param.data.shape}")
# 输出会是:
# 参数 1: (20, 64)  (来自 fc1.weight)
# 参数 2: (64,)     (来自 fc1.bias)
# 参数 3: (64, 10)  (来自 fc2.weight)
# 参数 4: (10,)     (来自 fc2.bias)

# 4. 测试前向传播
print("\n--- 测试前向传播 ---")
dummy_input = Tensor(np.random.randn(32, 20)) # 32个样本，每个样本20个特征
output = model(dummy_input)
print(f"输入形状: {dummy_input.data.shape}")
print(f"输出形状: {output.data.shape}") # 应该是 (32, 10)

--- 模型结构 ---
MyModel(fc1=ReLULinear(20), fc2=ReLULinear(64))

--- 模型所有参数 ---
参数 1: (20, 64)
参数 2: (1, 64)
参数 3: (64, 10)
参数 4: (1, 10)

--- 测试前向传播 ---
输入形状: (32, 20)
输出形状: (32, 10)


In [6]:
# 设置随机种子保证可复现
np.random.seed(42)

# 样本数与特征数
n_samples = 1000
n_features = 20

# 生成特征矩阵 X (1000x20)
X = Tensor(np.random.randn(n_samples, n_features))

# 真实参数 w 和 b
true_w = Tensor(np.random.randn(n_features, 1))
true_b = Tensor(2.5)

# 添加噪声项 ε
noise = Tensor(np.random.randn(n_samples, 1) * 0.5)

# 标签 y = Xw + b + ε
y = Tensor(X @ true_w + true_b + noise)

# print("X shape:", X.shape)
# print("y shape:", y.shape)
print(X)
print(y)

Tensor(data=[[ 0.49671415 -0.1382643   0.64768854 ...  0.31424733 -0.90802408
  -1.4123037 ]
 [ 1.46564877 -0.2257763   0.0675282  ... -1.95967012 -1.32818605
   0.19686124]
 [ 0.73846658  0.17136828 -0.11564828 ... -0.30921238  0.33126343
   0.97554513]
 ...
 [-0.69193084  0.48996231  0.96006087 ... -2.61546896  1.74723733
   0.55573132]
 [-0.11676412 -0.66722324 -2.87603228 ... -1.03302885  0.71373522
   0.32797039]
 [ 0.96259198  0.51259951 -0.75311158 ...  0.37835397  1.71352973
  -1.6199198 ]], grad=[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]])
Tensor(data=array([[ 3.43822562e+00],
       [ 1.15957394e+01],
       [ 7.52557050e+00],
       [-4.85227526e+00],
       [ 1.76458271e+00],
       [-3.50668716e+00],
       [-4.15816958e+00],
       [ 3.99047139e+00],
       [ 4.91277362e-01],
       [-6.67785153e-01],
       [ 1.43524534e+01],
       [ 7.47107449e+00],
       [ 

In [7]:
from micrograd.optim import MSELoss, SGD

# --- 3. 初始化组件 (学习率可以适当增大) ---
# model = MyModel()
criterion = MSELoss()
optimizer = SGD(model.parameters(), lr=0.01) # 因为数据归一化了，学习率可以变大

# 超参数
lr = 0.001        # 学习率
batch_size = 1  # 批次大小
n_epochs = 50    # 训练轮数

# 保存每轮损失以便可视化
losses = []

# 训练过程
for epoch in range(n_epochs):
    # 打乱数据顺序
    indices = np.random.permutation(n_samples)
    X_shuffled = X[indices]
    y_shuffled = y[indices]

    # 按批训练
    for i in range(0, n_samples, batch_size):
        X_batch = X_shuffled[i:i+batch_size]
        y_batch = y_shuffled[i:i+batch_size]

        # 前向传播
        y_pred = model(X_batch)
        
        # 计算损失
        loss = criterion(y_pred, y_batch)
        
        # 反向传播 (计算梯度)
        loss.backward()
        
        # 更新参数
        optimizer.step()
        
        # 清空梯度，为下一次迭代做准备
        optimizer.zero_grad()

    # 记录每轮损失
    # loss = compute_loss(X, y, w, b)
    losses.append(loss)
    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1:02d} | Loss: {loss.data:.4f}")

Epoch 10 | Loss: array([[18.49460901]])
Epoch 20 | Loss: array([[11.93508171]])
Epoch 30 | Loss: array([[11.10907085]])
Epoch 40 | Loss: array([[88.0663019]])
Epoch 50 | Loss: array([[780.22261502]])
