In [16]:
import numpy as np

def build_rotation_matrix(X: np.ndarray, F: np.ndarray, K: float, L: float):
    """
    We have K=3 industries, L=2 styles => total dim=5.
    We'll REPLACE old factor #0 (Industry0) with 'Market' = average of [Ind0, Ind1, Ind2],
    and then partial out Ind1, Ind2 w.r.t. Market. 
    The style factors remain as is.
    
    X is Nx5, F is 5x5.
    We return T, also 5x5, full-rank if 'Market' is truly a sum of old Ind0..Ind2.
    """

    T = np.zeros(shape=(X.shape[1], X.shape[1]))  # we'll fill columns carefully
    
    # 1) Define Market factor from old Ind0..Ind2
    # Put that as the FIRST column => new factor #0
    T[:, 0] = np.ones(X.shape[1])
    
    # 2) For old factor #1..(K-1), partial out Market
    #    i.e. if K=3, we do i=1..2 => partial out Ind1, Ind2
    for old_ind in range(1, K+L):
    
        e_i = np.zeros(X.shape[1])
        e_i[old_ind] = 1.0
        T[:, old_ind] = e_i

        # Execute Gram-Schmidt
        for j in range(old_ind):        
            alpha = (T[:, old_ind].T @ T[:, j]) / (T[:, j].T @ T[:, j])
            T[:, old_ind] -= alpha * T[:, j]
        
        # Make this unit norm
        T[:, old_ind] /= np.linalg.norm(T[:, old_ind])
        # That is the "pure industry residual" for old_ind
        
    print(T)
    return T


# === Example usage ===
if __name__ == "__main__":
    np.set_printoptions(precision=4, suppress=True)
    
    # Suppose we have N=10 stocks, K=3 industries, L=2 styles => total 5 factors
    N = 10
    K = 3
    L = 2
    dim = K+L
    
    np.random.seed(123)
    
    # Fake X (N x dim)
    X = np.random.randn(N, dim)
    
    # Fake F (dim x dim), must be pos. semidef for real usage
    R = np.random.randn(dim, dim)
    F_approx = R @ R.T   # random SPD
    
    # Build the corrected T
    T = build_rotation_matrix(X, F_approx, K, L)
    
    # New exposures
    X_new = X @ T
    
    # New factor covariance
    F_new = T.T @ F_approx @ T
    
    print("Transformation matrix T:\n", T)
    print("Old F:\n", F_approx)
    print("New F:\n", F_new)
    
    # Test for random portfolio
    wtest = np.random.randn(N)
    old_var = wtest @ (X @ F_approx) @ (X.T) @ wtest
    new_var = wtest @ (X_new @ F_new) @ (X_new.T) @ wtest
    
    print("Old portfolio variance:", old_var)
    print("New portfolio variance:", new_var)


[[ 1.     -0.2236 -0.2887 -0.4082 -0.7071]
 [ 1.      0.8944  0.     -0.     -0.    ]
 [ 1.     -0.2236  0.866   0.      0.    ]
 [ 1.     -0.2236 -0.2887  0.8165 -0.    ]
 [ 1.     -0.2236 -0.2887 -0.4082  0.7071]]
Transformation matrix T:
 [[ 1.     -0.2236 -0.2887 -0.4082 -0.7071]
 [ 1.      0.8944  0.     -0.     -0.    ]
 [ 1.     -0.2236  0.866   0.      0.    ]
 [ 1.     -0.2236 -0.2887  0.8165 -0.    ]
 [ 1.     -0.2236 -0.2887 -0.4082  0.7071]]
Old F:
 [[ 6.4321 -0.4109  1.8228 -0.5361  3.0964]
 [-0.4109  8.397  -0.0347 -1.0842 -2.1113]
 [ 1.8228 -0.0347  5.0954  1.2112 -2.2113]
 [-0.5361 -1.0842  1.2112  2.1838  0.9606]
 [ 3.0964 -2.1113 -2.2113  0.9606  7.9724]]
New F:
 [[31.4854 -1.7232 -0.9227 -5.1604 -1.9074]
 [-1.7232  9.6926  1.3366  1.3154 -0.9178]
 [-0.9227  1.3366  5.3793  2.8564 -3.0903]
 [-5.1604  1.3154  2.8564  4.6057  0.4195]
 [-1.9074 -0.9178 -3.0903  0.4195  4.1059]]
Old portfolio variance: 151.6080983554587
New portfolio variance: 3858.646691549563


In [8]:
X.sum(axis=1)

array([-1.8902, -0.805 , -0.3651,  5.3486,  1.2142, -2.1612, -4.5982,
       -0.0785, -2.0115,  6.0103])