In [1]:
# Dataset: [road_width, traffic_volume, avg_speed]
data = [
    [10, 500, 60],
    [8, 1000, 40],
    [6, 1500, 20],
    [12, 400, 70],
    [7, 1200, 30],
    [5, 1800, 15]
]

In [2]:
def calculate_mean(data):
    return [sum(column) / len(column) for column in zip(*data)]

mean = calculate_mean(data)
print(f"Feature Means: {mean}")


Feature Means: [8.0, 1066.6666666666667, 39.166666666666664]


In [3]:
def center_data(data, mean):
    return [[value - mean[i] for i, value in enumerate(row)] for row in data]

# Center the data
centered_data = center_data(data, mean)
print(f"Centered Data: {centered_data}")

Centered Data: [[2.0, -566.6666666666667, 20.833333333333336], [0.0, -66.66666666666674, 0.8333333333333357], [-2.0, 433.33333333333326, -19.166666666666664], [4.0, -666.6666666666667, 30.833333333333336], [-1.0, 133.33333333333326, -9.166666666666664], [-3.0, 733.3333333333333, -24.166666666666664]]


In [4]:
def covariance_matrix(data):
    n = len(data)
    d = len(data[0])
    cov_matrix = [[0.0] * d for _ in range(d)]

    for i in range(d):
        for j in range(d):
            cov_matrix[i][j] = sum(centered_data[k][i] * centered_data[k][j] for k in range(n)) / (n - 1)
    
    return cov_matrix

# Calculate the covariance matrix
cov_matrix = covariance_matrix(centered_data)
print(f"Covariance Matrix: {cov_matrix}")

Covariance Matrix: [[6.8, -1400.0, 57.0], [-1400.0, 302666.6666666666, -11933.333333333332], [57.0, -11933.333333333332, 484.1666666666667]]


In [5]:
def power_iteration(matrix, num_simulations=1000):
    """Compute the dominant eigenvector of a matrix."""
    b_k = [1] * len(matrix)  # Initialize a random vector
    for _ in range(num_simulations):
        # Multiply by the matrix
        b_k1 = [sum(matrix[i][j] * b_k[j] for j in range(len(matrix))) for i in range(len(matrix))]
        # Normalize the vector
        norm = sum(x**2 for x in b_k1) ** 0.5
        b_k = [x / norm for x in b_k1]
    return b_k

def eigen_decomposition(matrix):
    eigenvalues = []
    eigenvectors = []

    for _ in range(len(matrix)):
        eigenvector = power_iteration(matrix)
        eigenvalue = sum(matrix[i][i] * eigenvector[i] for i in range(len(matrix)))
        eigenvalues.append(eigenvalue)
        eigenvectors.append(eigenvector)
    
    return eigenvalues, eigenvectors

# Perform eigen decomposition
eigenvalues, eigenvectors = eigen_decomposition(cov_matrix)
print(f"Eigenvalues: {eigenvalues}")
print(f"Eigenvectors: {eigenvectors}")

Eigenvalues: [302409.3321595308, 302409.3321595308, 302409.3321595308]
Eigenvectors: [[-0.004622149016372531, 0.9992129036196306, -0.039398083436854], [-0.004622149016372531, 0.9992129036196306, -0.039398083436854], [-0.004622149016372531, 0.9992129036196306, -0.039398083436854]]


In [6]:
def select_top_components(eigenvalues, eigenvectors, k):
    eigen_pairs = sorted(zip(eigenvalues, eigenvectors), key=lambda x: x[0], reverse=True)
    top_eigenvectors = [pair[1] for pair in eigen_pairs[:k]]
    return top_eigenvectors

# Select the top 2 components
k = 2
top_components = select_top_components(eigenvalues, eigenvectors, k)
print(f"Top {k} Components: {top_components}")

Top 2 Components: [[-0.004622149016372531, 0.9992129036196306, -0.039398083436854], [-0.004622149016372531, 0.9992129036196306, -0.039398083436854]]


In [7]:
def project_data(data, components):
    return [[sum(row[i] * components[j][i] for i in range(len(row))) for j in range(len(components))] for row in data]

projected_data = project_data(centered_data, top_components)
print(f"Projected Data: {projected_data}")

Projected Data: [[-567.0506830874245, -567.0506830874245], [-66.6470253108395, -66.6470253108395], [433.7566324657456, 433.7566324657456], [-667.3751985817889, -667.3751985817889], [133.59415839647153, 133.59415839647153], [733.7221161178354, 733.7221161178354]]
