In [3]:
import numpy as np

def build_transform_matrix_replace_ind0_with_market(X, F, K, L):
    """
    We have K=3 industries, L=2 styles => total dim=5.
    We'll REPLACE old factor #0 (Industry0) with 'Market' = average of [Ind0, Ind1, Ind2],
    and then partial out Ind1, Ind2 w.r.t. Market. 
    The style factors remain as is.
    
    X is Nx5, F is 5x5.
    We return T, also 5x5, full-rank if 'Market' is truly a sum of old Ind0..Ind2.
    """
    dim = K + L   # 5 in this example
    T = np.zeros((dim, dim))  # we'll fill columns carefully
    
    # 1) Define Market factor from old Ind0..Ind2
    t_M = np.zeros(dim)
    # let's do "equal weighting" across the 3 industry columns => each = 1/3
    for i in range(K):
        t_M[i] = 1.0 / K  # K=3
    
    # Put that as the FIRST column => new factor #0
    T[:, 0] = t_M
    
    # 2) For old factor #1..(K-1), partial out Market
    #    i.e. if K=3, we do i=1..2 => partial out Ind1, Ind2
    for old_ind in range(1, K):
        e_i = np.zeros(dim)
        e_i[old_ind] = 1.0
        alpha = (e_i @ F @ t_M) / (t_M @ F @ t_M)
        T[:, old_ind] = e_i - alpha * t_M
        # That is the "pure industry residual" for old_ind
    
    # 3) Keep the style columns as identity => columns K..(K+L-1)
    #    e.g. columns 3..4 remain e_3, e_4
    for style_col in range(K, K+L):
        e_style = np.zeros(dim)
        e_style[style_col] = 1.0
        T[:, style_col] = e_style
    
    return T


# === Example usage ===
if __name__ == "__main__":
    np.set_printoptions(precision=4, suppress=True)
    
    # Suppose we have N=10 stocks, K=3 industries, L=2 styles => total 5 factors
    N = 10
    K = 3
    L = 2
    dim = K+L
    
    np.random.seed(123)
    
    # Fake X (N x dim)
    X = np.random.randn(N, dim)
    
    # Fake F (dim x dim), must be pos. semidef for real usage
    R = np.random.randn(dim, dim)
    F_approx = R @ R.T   # random SPD
    
    # Build the corrected T
    T = build_transform_matrix_replace_ind0_with_market(X, F_approx, K, L)
    
    # New exposures
    X_new = X @ T
    
    # New factor covariance
    F_new = T.T @ F_approx @ T
    
    print("Transformation matrix T:\n", T)
    print("Old F:\n", F_approx)
    print("New F:\n", F_new)
    
    # Test for random portfolio
    wtest = np.random.randn(N)
    old_var = wtest @ (X @ F_approx) @ (X.T) @ wtest
    new_var = wtest @ (X_new @ F_new) @ (X_new.T) @ wtest
    
    print("Old portfolio variance:", old_var)
    print("New portfolio variance:", new_var)


Transformation matrix T:
 [[ 0.3333 -0.3506 -0.3035  0.      0.    ]
 [ 0.3333  0.6494 -0.3035  0.      0.    ]
 [ 0.3333 -0.3506  0.6965  0.      0.    ]
 [ 0.      0.      0.      1.      0.    ]
 [ 0.      0.      0.      0.      1.    ]]
Old F:
 [[ 6.4321 -0.4109  1.8228 -0.5361  3.0964]
 [-0.4109  8.397  -0.0347 -1.0842 -2.1113]
 [ 1.8228 -0.0347  5.0954  1.2112 -2.2113]
 [-0.5361 -1.0842  1.2112  2.1838  0.9606]
 [ 3.0964 -2.1113 -2.2113  0.9606  7.9724]]
New F:
 [[ 2.5199  0.     -0.     -0.1364 -0.4087]
 [ 0.      5.6092 -2.4481 -0.9408 -1.6814]
 [-0.     -2.4481  3.0061  1.3354 -1.8391]
 [-0.1364 -0.9408  1.3354  2.1838  0.9606]
 [-0.4087 -1.6814 -1.8391  0.9606  7.9724]]
Old portfolio variance: 151.6080983554587
New portfolio variance: 66.2821010868502
