In [9]:
import numpy as np
import pandas as pd

In [2]:
def get_hat_matrix(X):
    """
    Computes the Hat matrix (projection matrix) for a given design matrix X.
    
    Args:
        X (numpy.ndarray): The design matrix of shape (n_samples, n_features).
    
    Returns:
        numpy.ndarray: The Hat matrix of shape (n_samples, n_samples).
    """
    # Calculate the matrix product X'X
    XtX = np.dot(X.T, X)
    
    # Calculate the inverse of (X'X)
    XtX_inv = np.linalg.pinv(XtX)
    
    # Calculate the Hat matrix
    H = np.dot(X, np.dot(XtX_inv, X.T))
    
    return H

In [15]:
# Example usage
# Generate some sample data
data = pd.read_excel('DA1_hat_data.xlsx')
data

Unnamed: 0,Color,Quality,Price
0,7,5,65
1,3,7,38
2,5,8,51
3,8,1,38
4,9,3,55
5,5,4,43
6,4,0,25
7,2,6,33
8,8,7,71
9,6,4,51


In [8]:

# Add a column of ones for the intercept term
X = np.column_stack((np.ones(n_samples), X))

# Calculate the Hat matrix
H = get_hat_matrix(X)
#print(H)  # Output: (100, 100)
H

array([[ 0.21654154,  0.08150552,  0.01891755, -0.07607394, -0.04469079,
         0.1288346 ,  0.30392766,  0.09969713,  0.17592992,  0.03265668,
         0.06275412],
       [ 0.08150552,  0.20759447,  0.19843417,  0.0851218 ,  0.00180134,
        -0.0026508 ,  0.08774337,  0.06961814,  0.07277517,  0.29108138,
        -0.09302456],
       [ 0.01891755,  0.19843417,  0.27691664,  0.15383796, -0.01254176,
         0.15966905, -0.11978858,  0.02187698,  0.07154106,  0.29518477,
        -0.06404783],
       [-0.07607394,  0.0851218 ,  0.15383796,  0.32079662,  0.31527535,
         0.00303029, -0.16388559,  0.09640663, -0.03230829,  0.13793109,
         0.15986807],
       [-0.04469079,  0.00180134, -0.01254176,  0.31527535,  0.48255153,
        -0.16380648,  0.02089352,  0.17746164, -0.05871481, -0.01115545,
         0.29292591],
       [ 0.1288346 , -0.0026508 ,  0.15966905,  0.00303029, -0.16380648,
         0.7029837 , -0.16528567, -0.0327558 ,  0.24924689, -0.08463565,
         0.205