In [6]:
import pandas as pd
import numpy as np
import math
import random

In [8]:
df = pd.read_csv('../data/Asgmnt1_data.csv',header=None)

In [9]:
df.shape

(16000, 128)

In [10]:
normalized_rows = []
number_rows = df.shape[0]

for i in range(number_rows):
    normalized_row = []
    row_values = df.iloc[i, :]
    
    mean = row_values.mean()
    std = row_values.std(ddof=1)
    
    for val in row_values:
        normalized_val = (val - mean) / std
        normalized_row.append(normalized_val)
        
    normalized_rows.append(pd.Series(normalized_row, index=df.columns, name=df.index[i]))
    
normalized_data = pd.concat(normalized_rows, axis=1).T

In [11]:
normalized_data.shape

(16000, 128)

In [12]:
def calculate_euclidean_distance(row1, row2):
    sum_square = 0
    for (val1, val2) in zip(row1, row2):
        sum_square += (val1 - val2) ** 2
    
    return math.sqrt(sum_square)

In [13]:
def construct_euclidean_distance_matrix(dataframe):
    n = dataframe.shape[0]
    distance_matrix = np.zeros((n, n))
    for i in range(n):
        for j in range(i, n):
            distance = calculate_euclidean_distance(dataframe.iloc[i], dataframe.iloc[j])
            distance_matrix[i, j] = distance
            distance_matrix[j, i] = distance
    
    return distance_matrix


In [14]:
normalized_distance_matrix = construct_euclidean_distance_matrix(normalized_data)
np.savetxt("distances.csv", normalized_distance_matrix, delimiter=',', fmt='%.6f')

In [15]:
normalized_distance_matrix.shape

(16000, 16000)

In [16]:
def haar_non_square_matrix(m, n):
    if n < m:
        raise ValueError("Number of columns must be greater than or equal to the number of rows")
    
    H = np.zeros((m, n))

    # First row is all ones
    H[0, :] = 1 / np.sqrt(n)
    
    # Build the rest of the matrix
    for level in range(1, m):
        step = n // (2 ** level)
        if step == 0:
            break  # Stop if the step becomes zero (more rows than meaningful steps)
        
        for i in range(0, n, 2 * step):
            H[level, i:i + step] = 1 / np.sqrt(step)
            H[level, i + step:i + 2 * step] = -1 / np.sqrt(step)
    
    return H

In [17]:
h = haar_non_square_matrix(128, 16000).T

In [18]:
wavelet_transformed_normalized_data = normalized_data * h

In [19]:
wavelet_transformed_normalized_data.shape

(16000, 128)

In [20]:
wavelet_transformed_distance_matrix = construct_euclidean_distance_matrix(wavelet_transformed_normalized_data.iloc[:, :4])

In [None]:
np.savetxt('wavelet_transformed_distance_matrix.csv', wavelet_transformed_distance_matrix, delimiter=',', fmt='%.6f')

In [21]:
def PCA(data_before_pca, n_principal_components):
    covariance_matrix = np.dot(data_before_pca.T, data_before_pca)
    eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix)
    indices = np.argsort(eigenvalues)[::-1]
    eigenvalues = eigenvalues[indices]
    eigenvectors = eigenvectors[indices]
    components = eigenvectors[:, :n_principal_components]
    
    data_after_pca = np.dot(data_before_pca, components)
    
    return data_after_pca, eigenvectors[:n_principal_components] 

In [22]:
pca_transformed_data, top_eigenvalues = PCA(normalized_data, 4)

In [23]:
pca_transformed_data

array([[-1.73864934, -0.37510514, -0.36458072, -0.25849118],
       [ 8.12407994,  1.14390919, -1.90952603,  0.33827447],
       [ 1.38638276,  0.31307967,  0.12591414,  0.13767397],
       ...,
       [-0.12002671,  0.30739413,  3.01892774, -2.5399854 ],
       [-0.4864626 , -1.59798679,  4.54223475, -2.97407417],
       [ 0.37858034,  0.83722512, -4.89169555,  3.29943755]])

In [24]:
top_eigenvalues

array([[ 6.94158769e-02, -1.23599362e-01, -4.29133480e-02,
         1.12102030e-01,  1.26372375e-01, -3.31753769e-02,
        -1.03016572e-01, -2.60132001e-02, -8.83883476e-02,
         1.66272181e-01, -1.63837046e-01,  1.12720684e-03,
        -1.36631389e-01, -5.02509035e-02, -7.59787013e-02,
        -1.26286259e-01,  1.54312139e-01, -9.61187849e-02,
        -5.80218624e-02, -5.37720069e-02,  2.98135795e-03,
        -1.21780296e-01,  6.67449116e-02, -2.09694833e-03,
        -3.94752561e-02, -1.73554981e-02, -7.35801648e-02,
        -7.23668310e-02,  2.91160484e-01,  5.55655040e-03,
         1.19675456e-01,  5.19197838e-02, -2.69449648e-02,
         5.46720778e-02,  1.04869584e-01, -8.01282252e-02,
        -4.24880096e-02, -8.54283736e-02, -5.28886005e-02,
        -1.38515321e-01, -1.34876471e-01, -7.58007635e-02,
        -1.07406053e-01,  1.09252945e-02,  5.74430748e-02,
        -3.59853772e-02, -5.61597395e-02, -5.59656849e-02,
         7.68358944e-02, -1.63092063e-01, -1.06789994e-0

In [25]:
pca_transformed_distance_matrix = construct_euclidean_distance_matrix(pd.DataFrame(pca_transformed_data))

In [None]:
np.savetxt('pca_transformed_distance_matrix.csv', pca_transformed_distance_matrix, delimiter=',', fmt='%.6f')
np.savetxt('wavelet_transformed_distance_matrix.csv', wavelet_transformed_distance_matrix, delimiter=',', fmt='%.6f')

In [40]:
len(pca_transformed_distance_matrix)

TypeError: object of type 'NoneType' has no len()

In [7]:
def generate_random_pairs(matrix_size):
    row1 = -1, col1 = -2, row2 = -1, col2 = -2
    
    while (row1 == row2 and col1 == col2) or (row1 == col1 and row2 == col1):
        row1, col1 = random.randint(0, matrix_size-1), random.randint(0, matrix_size-1)
        row2, col2 = random.randint(0, matrix_size-1), random.randint(0, matrix_size-1)
    return (row1, col1), (row2, col2)

def compare_random_cells(matrix_list, n):
    matrix_size = len(matrices[0])
    for _ in range(n):
        pair1, pair2 = generate_random_pairs(matrix_size)
        
        for idx, matrix in enumerate(matrices):
            cell1 = matrix[pair1[0]][pair1[1]]
            cell2 = matrix[pair2[0]][pair2[1]]
            print(f"Matrix {idx+1} Comparison: Cell {pair1} = {cell1}, Cell {pair2} = {cell2}")

SyntaxError: invalid syntax. Maybe you meant '==' or ':=' instead of '='? (2222891630.py, line 2)

In [15]:
matrices = [normalized_distance_matrix, wavelet_transformed_distance_matrix, pca_transformed_distance_matrix]

NameError: name 'normalized_distance_matrix' is not defined