In [147]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial import distance_matrix
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn import linear_model
from sklearn.metrics import mean_squared_error

In [148]:
d_test = np.load('MNIST_test_set.npy', allow_pickle = True)
d_train = np.load('MNIST_training_set.npy', allow_pickle = True)

In [149]:
X_train = d_train.item().get('features')
Y_train = d_train.item().get('labels')

print(X_train.shape)
print(Y_train.shape)

X_test = d_test.item().get('features')
Y_test = d_test.item().get('labels')

print(X_test.shape)
print(Y_test.shape)

(2000, 256)
(2000,)
(500, 256)
(500,)


# Plot some of the training and test sets 

def plot_digits(XX, N):
    """Small helper function to plot N**2 digits."""
    fig, ax = plt.subplots(N, N, figsize=(8, 8))
    for i in range(N):
      for j in range(N):
        ax[i,j].imshow(XX[(N)*i+j,:].reshape((16, 16)), cmap="Greys")
        ax[i,j].axis("off")
    
    #fig.savefig('sample_256.png')

plot_digits(X_train, 8)

print(Y_train[22:28])

In [151]:
pca = PCA(n_components = 16)
pca.fit(X_train)

PCA(n_components=16)

In [152]:
pca_full = PCA()
pca_full.fit(X_train)

PCA()

fig2, ax2 = plt.subplots(1,2, figsize = (15,5))
ax2[0].plot(np.log(pca_full.singular_values_))
ax2[0].set_xlabel('index $j$')
ax2[0].set_ylabel('$\log(\sigma_j)$')
ax2[0].set_title('256 Principal Components', fontsize=18)


ax2[1].plot(np.log(pca.singular_values_))
ax2[1].set_xlabel('index $j$')
ax2[1].set_ylabel('$\log(\sigma_j)$')
ax2[1].set_title('16 Principal Components', fontsize=18)


fig3, ax3 = plt.subplots(4,4, figsize=(20,20))

for k in range(4):
  for j in range(4):
    
    im1 = np.reshape(pca.components_[(k*4 + j),:],(16,16))
    
    ax3[k,j].imshow(im1, cmap = 'Greys')
    ax3[k,j].set_xlabel('x')
    ax3[k,j].set_title('PC '+str(k*4 + j + 1))


plt.show

In [155]:
def Frob(comp, data):
    pca_total = PCA()
    pca_new = PCA(n_components = comp)
    
    pca_total.fit(data)
    pca_new.fit(data)
    frob_norm = np.sqrt(sum(np.square(pca_new.singular_values_)))/np.sqrt(sum(np.square(pca_total.singular_values_)))
    return frob_norm

In [222]:
Frob(14, X_train)

0.9052289284016043

In [198]:
Frob_x = np.arange(1,257)
Frob_y = np.zeros(256)

for i in np.arange(256):
    Frob_y[i] = Frob(Frob_x[i], X_train)

fig6, ax6 = plt.subplots()
ax6.plot(Frob_x, Frob_y, label = 'PC Modes')
ax6.set_xlabel('PC Modes')
ax6.set_ylabel('Frob Norm')
ax6.scatter([Frob_x[2], Frob_x[6], Frob_x[13]], [Frob_y[2], Frob_y[6], Frob_y[13]])
ax6.annotate('(3, .67)', (Frob_x[2],Frob_y[2]), fontsize = 12)
ax6.annotate('(7, .81)', (Frob_x[6],Frob_y[6]), fontsize = 12)
ax6.annotate('(14, .91)', (Frob_x[13],Frob_y[13]), fontsize = 12)

ax6_2 = ax6.twiny()
ax6_2.plot(np.log(Frob_x), Frob_y, color = 'darkorange', label = 'log(PC Modes)')
ax6_2.set_xlabel('log(PC Modes)')
ax6_2.set_ylabel('Frobenius Norm')
fig6.legend(fontsize = 10, bbox_to_anchor = (.9, .7))

In [227]:
#fig6.savefig('Frob_norm.png')

In [157]:
pca1=PCA(16)
trans1 = pca1.fit_transform(X_train)
inv1 = pca1.inverse_transform(trans1)

def plot_digits_2(XX, N):
    """Small helper function to plot N**2 digits."""
    fig4, ax4 = plt.subplots(N, N, figsize=(8, 8))
    for i in range(N):
      for j in range(N):
        ax4[i,j].imshow(XX[(N)*i+j,:].reshape((16, 16)), cmap="Greys")
        ax4[i,j].axis("off")

    #fig4.savefig('sample_16.png')
plot_digits_2(inv1, 8)

print(Y_train[0:8**2])

# Train classifier 

In [159]:
def extract_feat_lab(a, b, X, Y):
    label_a = np.where(Y == a)
    label_b = np.where(Y == b)
    
    label_a=label_a[0]
    label_b=label_b[0]
    
    feature_a = X[label_a, :]
    feature_b = X[label_b, :]
    
    feature_a_b = np.append(feature_a, feature_b, axis=0)
    label_a_b = np.append(np.full((1,len(label_a)),a), np.full((1,len(label_b)),b))
    
    label_a_b = np.where(label_a_b == a, -1, label_a_b)
    label_a_b = np.where(label_a_b == b, 1, label_a_b)
    
    return feature_a_b, label_a_b

In [160]:
def PCA_MSE(comp, lam, a, b, feature, label):

    X_train_a_b, Y_train_a_b = extract_feat_lab(a, b, X_train, Y_train)
    
    pca_comp = PCA(comp)
    pca_comp.fit(X_train)
    
    trans_train = pca_comp.transform(X_train_a_b)
    
    clf = linear_model.RidgeCV(alphas=(lam, lam * .1, lam * .01), fit_intercept = False,
                               cv = None, store_cv_values=True)
    clf.fit(trans_train, Y_train_a_b)
    
    feature_a_b, label_a_b = extract_feat_lab(a, b, feature, label)
    
    feature_trans = pca_comp.transform(feature_a_b)
    
    pred = clf.predict(feature_trans)
    
    MSE = mean_squared_error(label_a_b, pred)

    return MSE

In [161]:
MSE_1_8_16_train = PCA_MSE(16, 10, 1, 8, X_train, Y_train)
print('MSE (1,8) Train: ' + str(MSE_1_8_16_train))


MSE_1_8_16_test = PCA_MSE(16, 10, 1, 8, X_test, Y_test)
print('MSE (1,8) Test: ' + str(MSE_1_8_16_test))



MSE_3_8_16_train = PCA_MSE(16, 10, 3, 8, X_train, Y_train)
print('MSE (3,8) Train: ' + str(MSE_3_8_16_train))


MSE_3_8_16_test = PCA_MSE(16, 10, 3, 8, X_test, Y_test)
print('MSE (3,8) Test: ' + str(MSE_3_8_16_test))



MSE_2_7_16_train = PCA_MSE(16, 10, 2, 7, X_train, Y_train)
print('MSE (2,7) Train: ' + str(MSE_2_7_16_train))


MSE_2_7_16_test = PCA_MSE(16, 10, 2, 7, X_test, Y_test)
print('MSE (2,7) Test: ' + str(MSE_2_7_16_test))



MSE (1,8) Train: 0.08813624979441376
MSE (1,8) Test: 0.08555498332204307
MSE (3,8) Train: 0.19029422404662888
MSE (3,8) Test: 0.2295486555829837
MSE (2,7) Train: 0.09812199304498453
MSE (2,7) Test: 0.12923048256642095


In [162]:
def euc_distance(a, b, comp, data, label):
    
    pca_dist = PCA(comp)
    pca_dist.fit(X_train)
    
    X, Y = extract_feat_lab(a,b, data, label)
    
    X = pca_dist.transform(X)
    
    Y_a = np.where(Y==-1)
    Y_a = Y_a[0]
    X_a = X[Y_a, :]
    
    X_a_mean = np.mean(X_a, axis=0)
    
    Y_b = np.where(Y==1)
    Y_b = Y_b[0]
    X_b = X[Y_b, :]
    
    X_b_mean = np.mean(X_b, axis=0)
    
    ave_dist = np.linalg.norm(X_a_mean - X_b_mean)
    
    return ave_dist

In [163]:
ave_dist_test_error = []
annotations = "annotations = ["


for i in range(9):
    j = i + 1
    while j < 10:
        dist = euc_distance(i, j, 16, X_test, Y_test)
        error = PCA_MSE(16, .1, i, j, X_test, Y_test)
        ave_dist_test_error.append([dist, error, i, j])
        
        annotations = annotations + '"' + "(" + str(i) + "," + str(j) + ")" + '"'  + "," + " "
        
        
        j = j + 1

dist_error = np.asarray(ave_dist_test_error)
annotations = annotations + "]"

In [164]:
m, b = np.polyfit(dist_error.transpose()[0][:], dist_error.transpose()[1][:], 1)
print(m)
print(b)

-0.04739769625722877
0.31892859115211447


In [165]:
annotations = ["(0,1)", "(0,2)", "(0,3)", "(0,4)", "(0,5)",
               "(0,6)", "(0,7)", "(0,8)", "(0,9)", "(1,2)",
               "(1,3)", "(1,4)", "(1,5)", "(1,6)", "(1,7)",
               "(1,8)", "(1,9)", "(2,3)", "(2,4)", "(2,5)",
               "(2,6)", "(2,7)", "(2,8)", "(2,9)", "(3,4)",
               "(3,5)", "(3,6)", "(3,7)", "(3,8)", "(3,9)",
               "(4,5)", "(4,6)", "(4,7)", "(4,8)", "(4,9)",
               "(5,6)", "(5,7)", "(5,8)", "(5,9)", "(6,7)",
               "(6,8)", "(6,9)", "(7,8)", "(7,9)", "(8,9)"]

fig5, ax5 = plt.subplots(figsize=(30,15))
ax5.scatter(dist_error.transpose()[0][:],dist_error.transpose()[1][:])
ax5.set_xlabel("Ave L2 Distance Between Two Points After Projection Onto 16 PC Modes",
           fontsize = 30)
ax5.set_ylabel("Test MSE", fontsize = 30)
#ax5.set_title("L2 Norm vs Test MSE",fontsize=40)
ax5.tick_params(axis = 'x')
ax5.tick_params(axis = 'y')
ax5.plot(dist_error.transpose()[0][:], m*dist_error.transpose()[0][:] + b, 
         label = 'Least Squares Fit (Degree 1)')
for i, label in enumerate(annotations):
    ax5.annotate(label, (dist_error[i][0], dist_error[i][1]), fontsize = 20)
ax5.legend(fontsize = 20, bbox_to_anchor = (.9, .8))
plt.show()

In [167]:
#fig.savefig('sample_256.png')
#fig2.savefig('log_sing.png')
#fig3.savefig('PC_modes.png')
#fig4.savefig('sample_16.png')
#fig5.savefig('L2_norm_MSE.png')