In [2]:
#IMPORT STATEMENTS
import selenium
import numpy as np
import warnings
warnings.filterwarnings('ignore')  # supress scikit 'future warnings'
import pandas as pd
from sklearn import preprocessing
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import GridSearchCV
from scipy.optimize import linear_sum_assignment
import matplotlib         
from matplotlib import pyplot as plt
from scipy.stats import norm
from scipy.stats import norm, kurtosis
from sklearn import linear_model
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LinearRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn import svm
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from scipy import stats
import math
import pickle
import random
import copy
import itertools
from dataclasses import dataclass
random.seed(42)


In [103]:
# HEAVILY USED FUNCTIONS THROUGHOUT PROJECT

def rank_approx(F: np.ndarray, rank_n: int):
    # Input: F -> Convultional Filter to get the best approx
    # Input: rank_n -> Compute rank-rank_n approx on F
    # Returns f, g where fg^T is the best rank-rank_n approx for F
    M, N = np.shape(F)
    U,S,V = np.linalg.svd(F)
    U_up_to_rank = np.reshape(U[:,:rank_n], (M,rank_n))
    S_up_to_rank = np.reshape(np.diag(S[:rank_n]), (rank_n,rank_n))
    V_up_to_rank = np.reshape(V[:rank_n], (rank_n,N))
    
    f = np.matmul(U_up_to_rank, S_up_to_rank)
    g = V_up_to_rank.T
    
    return f, g

def conv_reg(F: np.ndarray, A: np.ndarray):
    M_a, N_a = np.shape(A)
    M_f, N_f = np.shape(F)
    M_res = M_a - M_f + 1
    N_res = N_a - N_f + 1
    
    res = np.zeros((M_res, N_res))
    
    for r in range(M_res):
        for c in range(N_res):
            curr_filter_result = np.sum(A[r:r+M_f,c:c+N_f] * F)
            res[r][c] = curr_filter_result
    
    return res

def conv_approx(F: np.ndarray, A: np.ndarray, rank_n: int):
    M_a, N_a = np.shape(A)
    M_f, N_f = np.shape(F)
    M_res = M_a - M_f + 1
    N_res = N_a - N_f + 1
    
    final_res = np.zeros((M_res, N_res))
    f, g = rank_approx(F, rank_n)
    
    for i in range(rank_n):
        left_matrix = np.zeros((M_res, M_a))
        right_matrix = np.zeros((N_a, N_res))

        for r in range(M_res):
            left_matrix[r,r:r+M_f] = (f.T)[i]

        for c in range(N_res):
            right_matrix[c:c+N_f,c] = g[i]
            
        add_matrix = np.matmul(left_matrix, np.matmul(A, right_matrix))
        final_res = final_res + add_matrix
    
    return final_res

In [104]:
# CELL TO VALIDATE THE RANK_APPROX FUNCTION WITH FEW EXAMPLES
# NOTE THAT DUE TO FLOATING POINT ISSUES, THERE IS EXTREMELY SLIGHT
# ERRORS WHICH IS ACCOUNTED FOR BY USING diff_norm <= 1e-12

test = np.array([[2.0,1.0], [2.0,1.0]])
f_test_res, g_test_res = rank_approx(test, 1)
print("F_test")
print(test)
print("Approx F_test with rank: 1")
f_approx = np.matmul(f_test_res, g_test_res.T)
print(f_approx)
diff_norm = np.linalg.norm(test - f_approx)
print("FROB NORM OF DIFF: " + str(diff_norm))
assert diff_norm <= 1e-12

test[1][1] += 0.1
f_test_res_2, g_test_res_2 = rank_approx(test, 1)
print("\nF_test_2")
print(test)
print("Approx F_test_2 with rank: 1")
f_approx = np.matmul(f_test_res_2, g_test_res_2.T)
print(f_approx)
diff_norm = np.linalg.norm(f_approx - test)
print("FROB NORM OF DIFF: " + str(diff_norm))
assert diff_norm <= 0.07

f_test_res_2, g_test_res_2 = rank_approx(test, 2)
print("Approx F_test_2 with rank: 2")
f_approx = np.matmul(f_test_res_2, g_test_res_2.T)
print(f_approx)
diff_norm = np.linalg.norm(test - f_approx)
print("FROB NORM OF DIFF: " + str(diff_norm))
assert diff_norm <= 1e-12


F_test
[[2. 1.]
 [2. 1.]]
Approx F_test with rank: 1
[[2. 1.]
 [2. 1.]]
FROB NORM OF DIFF: 1.961044852356119e-15

F_test_2
[[2.  1. ]
 [2.  1.1]]
Approx F_test_2 with rank: 1
[[1.97920426 1.03959153]
 [2.020372   1.0612152 ]]
FROB NORM OF DIFF: 0.0626037711536505
Approx F_test_2 with rank: 2
[[2.  1. ]
 [2.  1.1]]
FROB NORM OF DIFF: 1.6910413304902302e-15


In [110]:
# CELL TO VALIDATE CONV_REG AND CONV_APPROX FUNCTIONS WITH AN EXAMPLE
# THIS CELL CHECKS WHEN RANK OF F IS 1

A = np.random.randn(10,10)
test_f = np.array([[1.0,1.0], [1.0,1.0]])

conv_reg_A = conv_reg(test_f, A)

# Check ensuring conv_reg function is working properly
for i in range(np.shape(conv_reg_A)[0]):
    for j in range(np.shape(conv_reg_A)[1]):
        check = A[i][j] + A[i][j+1] + A[i+1][j] + A[i+1][j+1]
        assert check == conv_reg_A[i][j]
        
conv_approx_A_1 = conv_approx(test_f, A, 1)
f, g = rank_approx(test_f, 1)
diff_norm = np.linalg.norm(conv_approx_A_1 - conv_reg_A)
print("FROB NORM OF DIFF BETWEEN REG AND APPROX CONV WHERE EXACT APPROX IS POSSIBLE: " + str(diff_norm))

# NOTE THAT DUE TO FLOATING POINT ERROR THERE MAY BE SLIGHT
# ERRORS FROM THE EXACT BUT WITH SUCH A SMALL ERROR WE CAN
# JUST CONSIDER THE MATRICES EQUAL AND THE FUNCTIONS VALID
assert diff_norm <= 1e-10

FROB NORM OF DIFF BETWEEN REG AND APPROX CONV WHERE EXACT APPROX IS POSSIBLE: 4.979322416962324e-15


In [111]:
# CELL TO VALIDATE CONV_REG AND CONV_APPROX FUNCTIONS WITH AN EXAMPLE
# THIS CELL CHECKS WHEN RANK OF F IS 1

A = np.random.randn(10,10)
test_f = np.array([[1.0,1.0], [1.0,1.1]])

conv_reg_A = conv_reg(test_f, A)

# Check ensuring conv_reg function is working properly
for i in range(np.shape(conv_reg_A)[0]):
    for j in range(np.shape(conv_reg_A)[1]):
        check = A[i][j] + A[i][j+1] + A[i+1][j] + A[i+1][j+1] * 1.1
        assert check == conv_reg_A[i][j]
        
conv_approx_A_1 = conv_approx(test_f, A, 2)
f, g = rank_approx(test_f, 2)
diff_norm = np.linalg.norm(conv_approx_A_1 - conv_reg_A)
print("FROB NORM OF DIFF BETWEEN REG AND APPROX CONV WHERE EXACT APPROX IS POSSIBLE: " + str(diff_norm))

# NOTE THAT DUE TO FLOATING POINT ERROR THERE MAY BE SLIGHT
# ERRORS FROM THE EXACT BUT WITH SUCH A SMALL ERROR WE CAN
# JUST CONSIDER THE MATRICES EQUAL AND THE FUNCTIONS VALID
assert diff_norm <= 1e-10

FROB NORM OF DIFF BETWEEN REG AND APPROX CONV WHERE EXACT APPROX IS POSSIBLE: 1.3260591617493812e-14
