In [17]:
import numpy as np

def compute_W_prime(W, X, Y, B):
    """
    计算 W' = W - [W(Y - X) - B] (Y^T Y)^{-1} Y^T

    参数:
        W: (m, n) numpy 数组
        X: (n, k) numpy 数组
        Y: (p, k) numpy 数组
        B: (m, k) numpy 数组

    返回:
        W_prime: (m, p) numpy 数组
    """
    # 计算 Y - X
    Y_minus_X = Y - X  # (p, k) - (n, k)
    # 计算 W @ (Y - X)
    WY_X = W @ Y_minus_X  # (m, n) @ (n, k) = (m, k)
    # 计算 [W(Y - X) - B]
    diff = WY_X - B  # (m, k) - (m, k)
    # 计算 (Y^T Y)^{-1}
    try:
        YTY_inv = np.linalg.inv(Y @ Y.T)  # (p, k) @ (k, p) = (p, p)
    except:
        YTY_inv = np.linalg.pinv(Y @ Y.T)  # (p, k) @ (k, p) = (p, p)
    # 计算 (Y^T)
    YT = Y.T  # (k, p)
    # 计算 [W(Y - X) - B] (Y^T Y)^{-1} Y^T
    middle = diff @ YTY_inv @ YT  # (m, k) @ (p, p) @ (k, p) -> (m, p)
    # 计算 W'
    W_prime = W - middle  # (m, p)
    return W_prime

In [16]:
import torch

# 假设你已经加载了 X, Y, B
X = torch.load('/home/ls/Github/ChatGLM3-main/QF/parameters/layer23_u.pt')
Y = torch.load('/home/ls/Github/ChatGLM3-main/QF/parameters/layer23_up.pt') 
v = torch.load('/home/ls/Github/ChatGLM3-main/QF/parameters/layer23_v.pt')
vp = torch.load('/home/ls/Github/ChatGLM3-main/QF/parameters/layer23_vp.pt')
W = torch.load('/home/ls/Github/ChatGLM3-main/QF/parameters/layer23_weight.pt')

B = v - vp

# 去掉第一维度
X = X.squeeze(0)  # [1, 13, 8960] -> [13, 8960]
Y = Y.squeeze(0)  # [1, 13, 8960] -> [13, 8960]  
B = B.squeeze(0)  # [1, 13, 1536] -> [13, 1536]

print(f"X shape: {X.shape}")  # torch.Size([13, 8960])
print(f"Y shape: {Y.shape}")  # torch.Size([13, 8960])
print(f"B shape: {B.shape}")  # torch.Size([13, 1536])
print(f"W shape: {W.shape}")  # torch.Size([1536, 8960])

# 转换为numpy数组
X_np = X.detach().cpu().numpy()
Y_np = Y.detach().cpu().numpy()
B_np = B.detach().cpu().numpy()
W_np = W.detach().cpu().numpy()


# 调用compute_W_prime函数
W_prime = compute_W_prime(W_np, X_np, Y_np, B_np)

print(f"W_prime shape: {W_prime.shape}")
print(f"W_prime 前几个元素: {W_prime.flatten()[:10]}")

X shape: torch.Size([13, 8960])
Y shape: torch.Size([13, 8960])
B shape: torch.Size([13, 1536])
W shape: torch.Size([1536, 8960])


ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 13 is different from 8960)

In [13]:
W.shape

torch.Size([1536, 8960])

In [None]:
W.shape

torch.Size([1536, 8960])

In [None]:
W.shape

torch.Size([1536, 8960])

In [6]:
# 测试用例
W = np.array([[1, 2],
              [3, 4]])
X = np.array([[1, 0],
              [0, 1]])
B = np.array([[1, 2],
              [3, 4]])
Y = np.array([[2, 0],
              [0, 1]])

W_prime = compute_W_prime(W, X, Y, B)


print("W':\n", W_prime)

W':
 [[1. 4.]
 [3. 8.]]


In [None]:
if os.path.exists(u_path):
    print(f"正在读取文件: {u_path}")
    u_data = torch.load(u_path, map_location='cpu')
    
    print(f"数据类型: {type(u_data)}")
    print(f"数据形状: {u_data.shape}")
    print(f"数据类型: {u_data.dtype}")
    print(f"数据设备: {u_data.device}")
    
    # 显示前几个元素
    print(f"\n前10个元素:")
    print(u_data.flatten()[:10])
    
    # 显示统计信息
    print(f"\n统计信息:")
    print(f"最小值: {u_data.min().item()}")
    print(f"最大值: {u_data.max().item()}")
    print(f"均值: {u_data.mean().item()}")
    print(f"标准差: {u_data.std().item()}")
    
else:
    print(f"文件不存在: {u_path}") 