# **Task 1**: Linear Algebra and Optimization for ML -  MO431A
University of Campinas (UNICAMP), Institute of Computing (IC)

Prof. Jacques Wainer, 2021s1


## Imports needed for this project

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from sklearn.preprocessing import Normalizer
from sklearn.decomposition import PCA

## Database reading

In [None]:
X = np.load('db/dados.npy')

### Printing the first three images from the database

In [None]:
fig = plt.figure(figsize=(10, 7))

for i in range(3):
    #reshape figure    
    img = np.reshape(X[i], (28, 28))
    
    #positioning figure
    fig.add_subplot(1, 3, i+1)  
    
    # showing image
    plt.imshow(img)
    plt.axis('off')
    plt.title("Sample #{0}".format(i))
    plt.imshow(img, cmap=cm.gray)
    

### Dataset Normalization

In [None]:
X = Normalizer().transform(X)

## Matrix Factoring

In [None]:
## Full Matrix
U, D, Vt = np.linalg.svd(X, full_matrices=True)
# Shape das matrizes
print(f'U  full matriz shape = {U.shape}')  #Autovectors
print(f'D  full matriz shape = {D.shape}')  #Autovalues
print(f'Vt full matriz shape = {Vt.shape}') #Orthogonal


## Compact Matrix
Uc, Dc, Vtc = np.linalg.svd(X, full_matrices=False)
# Shape das matrizes compactas
print(f'U  compact matriz shape = {Uc.shape}')  #Autovectors
print(f'D  compact matriz shape = {Dc.shape}')  #Autovalues
print(f'Vt compact matriz shape = {Vtc.shape}') #Orthogonal

## Dimensionality Reduction

### Reduction to 100 dimensions

In [None]:
pca = PCA(n_components=100)
reducedMatrix = pca.fit_transform(X)

# Shape of reduced Matrix
print(f'Reduced Matrix shape: {reducedMatrix.shape}')


### Reconstructed Matrix

In [None]:
reconstruct_matrix = pca.inverse_transform(reducedMatrix)

print(f'Reconstruct matrix shape: {reconstruct_matrix.shape}')

## Printing of the first three reconstructed images

In [None]:
fig = plt.figure(figsize=(10, 7))

for i in range(3):
    #reshape figure    
    img = np.reshape(reconstruct_matrix[i], (28, 28))
    
    #positioning figure
    fig.add_subplot(1, 3, i+1)  
    
    # showing image
    plt.imshow(img)
    plt.axis('off')
    plt.title("Sample #{0}".format(i))
    plt.imshow(img, cmap=cm.gray)
    

## Print of the first three Eigen-digits

In [None]:
# Compute U, D and Vt matrices for the reduced matrix
U_reduced, D_reduced, Vt_reduced = np.linalg.svd(reducedMatrix, full_matrices=False)

print(f'Digit #0: \n{Vt_reduced[0]} \n')
print(f'Digit #1: \n {Vt_reduced[1]} \n')
print(f'Digit #2: \n{Vt_reduced[2]}')



In [None]:
## Exemplo dos eigen-digitos Vt reduzido

fig = plt.figure(figsize=(10, 10))
# The eigen values are the bases of the reduced subspace, thus Vt will be used
for i in range(3):
    #reshape figure    
    img = np.reshape(Vt_reduced[i], (10, 10))
    
    #positioning figure
    fig.add_subplot(1, 3, i+1)  
    
    # showing image
    plt.imshow(img)
    plt.axis('off')
    plt.title("Sample #{0}".format(i))
    plt.imshow(img, cmap=cm.gray)

In [None]:
## Exemplo dos eigen-digitos 

fig = plt.figure(figsize=(10, 10))
# The eigen values are the bases of the reduced subspace, thus Vt will be used
for i in range(3):
    #reshape figure    
    img = np.reshape(Vt[i], (28, 28))
    
    #positioning figure
    fig.add_subplot(1, 3, i+1)  
    
    # showing image
    plt.imshow(img)
    plt.axis('off')
    plt.title("Sample #{0}".format(i))
    plt.imshow(img, cmap=cm.gray)
    

## Decision of the number of dimensions

In [None]:
### Quantas dimensões manter, seguindo a regra do singular values > 1

pca = PCA(n_components=271)
pca.fit(X)
reducedMatrix_SV = pca.transform(X)
_, d_sv, _ = np.linalg.svd(reducedMatrix_SV, full_matrices=False)

print(f'Dimension = 271: \n{d_sv} \n')

pca = PCA(n_components=272)
pca.fit(X)
reducedMatrix_SV = pca.transform(X)
_, d_sv, _ = np.linalg.svd(reducedMatrix_SV, full_matrices=False)

print(f'Dimension = 272: \n{d_sv} \n')

pca = PCA(n_components=273)
pca.fit(X)
reducedMatrix_SV = pca.transform(X)
_, d_sv, _ = np.linalg.svd(reducedMatrix_SV, full_matrices=False)

print(f'Dimension = 273: \n{d_sv} \n')


### Conclusion
Following the singular values > 1 rule, we must keep 272 dimensions

In [None]:
### Capturar 80% da variância

# calcular variância da matriz compacta
sd_value = Dc.sum() * (8/10) # 80% do valor
print(f'80% of value: {sd_value}')

r_list = [50, 75, 125, 150, 200, 250, 275, 276, 300]

for r in r_list:
    pca = PCA(n_components=r)
    pca.fit(X)
    reducedMatrix_SD = pca.transform(X)
    _, d_sd, _ = np.linalg.svd(reducedMatrix_SD, full_matrices=False)
    print(f'Sum of D = {d_sd.sum()} \t for r = [{r}]')

print('Best value of r = 276')

### Conclusion
Following the rule of capturing 80% of the variance, we must keep 275 dimensions

In [None]:
### Capturar 95% da variância

# calcular variância da matriz compacta
sd_value = Dc.sum() * (95/100) # 95% do valor
print(f'95% of value: {sd_value}')

r_list = [500, 525, 550, 575, 600, 625, 650, 675, 700, 725, 750]

for r in r_list:
    pca = PCA(n_components=r)
    pca.fit(X)
    reducedMatrix_SD = pca.transform(X)
    _, d_sd, _ = np.linalg.svd(reducedMatrix_SD, full_matrices=False)
    print(f'Sum of D = {d_sd.sum()} \t for r = [{r}]')
    
print('Best value of r = 675')


### Conclusion
Following the rule of capturing 95% of the variance, we must keep 675 dimensions