# Minimos Quadrados

In [1]:
import numpy as np
from sklearn.metrics import accuracy_score
from tensorflow.keras.datasets import mnist

def least_squares_classification(X, y, alpha):
    # Adiciona uma coluna de 1s para representar o termo de viés
    X = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)

    # Calcula os coeficientes usando a fórmula dos mínimos quadrados com regularização de Ridge
    theta = np.linalg.inv(X.T @ X + alpha * np.eye(X.shape[1])) @ X.T @ y

    return theta

def calculate_error(X, y, theta):
    # Adiciona uma coluna de 1s para representar o termo de viés
    X = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)

    # Realiza as predições
    predictions = X @ theta

    # Converte as predições em rótulos
    predicted_labels = np.argmax(predictions, axis=1)

    # Calcula o erro (taxa de erro)
    error = np.mean(predicted_labels != y)

    return error

# Carrega o conjunto de dados MNIST
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Ajusta a forma dos dados
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)

# Normaliza os dados dividindo por 255.0
X_train = X_train / 255.0
X_test = X_test / 255.0

# Converte os rótulos para uma representação one-hot
num_classes = 10
y_train_onehot = np.eye(num_classes)[y_train]

# Define o valor de regularização (alpha)
alpha = 0.01

# Realiza a classificação usando mínimos quadrados com regularização de Ridge
theta = least_squares_classification(X_train, y_train_onehot, alpha)

# Adiciona uma coluna de 1s aos dados de teste
X_test = np.concatenate((np.ones((X_test.shape[0], 1)), X_test), axis=1)

# Realiza as predições
predictions = X_test @ theta

# Converte as predições em rótulos
predicted_labels = np.argmax(predictions, axis=1)

# # Calcula a acurácia
# accuracy = np.mean(predicted_labels == y_test) * 100
# print("Acurácia da classificação: {:.2f}%".format(accuracy))

# Calcular a precisão das previsões
accuracy = accuracy_score(y_test, predicted_labels)
print("Accuracy:", accuracy)


2023-08-20 15:51:09.828776: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-08-20 15:51:10.347100: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-08-20 15:51:10.349509: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Accuracy: 0.8603


# Perceptron Logístico

In [2]:
import numpy as np
from sklearn.metrics import accuracy_score
from tensorflow.keras.datasets import mnist


In [46]:
class LogisticPerceptron:
    def __init__(self, num_features, num_classes, learning_rate=0.01, num_epochs=100):
        self.num_features = num_features
        self.num_classes = num_classes
        self.learning_rate = learning_rate
        self.num_epochs = num_epochs
        self.weights = np.zeros((num_features + 1, num_classes))  # +1 for the bias term

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def predict(self, x):
        activations = np.dot(np.insert(x, 0, 1), self.weights)
        probabilities = self.sigmoid(activations)
        return np.argmax(probabilities)

    def train(self, X, y):
        X = np.insert(X, 0, 1, axis=1)  # Inserting bias term
        y = np.eye(self.num_classes)[y]  # One-hot encoding
        for _ in range(self.num_epochs):
            for i in range(len(X)):
                x = X[i]
                target = y[i]
                activations = np.dot(x, self.weights)
                probabilities = self.sigmoid(activations)
                error = target - probabilities
                delta = self.learning_rate * np.outer(x, error)
                self.weights += delta

# Carregando o conjunto de dados MNIST
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Ajusta a forma dos dados
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)

# Normalizar os dados
X_train = X_train / 255.0
X_test = X_test / 255.0

# Criar o objeto do perceptron logístico
num_features = X_train.shape[1]
num_classes = len(np.unique(y_train))
perceptron = LogisticPerceptron(num_features=num_features, num_classes=num_classes)

# Treinar o perceptron
perceptron.train(X_train, y_train)

# Realizar previsões no conjunto de teste
predictions = []
for sample in X_test:
    prediction = perceptron.predict(sample)
    predictions.append(prediction)

# Calcular a precisão das previsões
accuracy = accuracy_score(y_test, predictions)
print("Accuracy:", accuracy)

Accuracy: 0.91


# Minimos Quadrados + PCA

In [14]:
import numpy as np
from sklearn.metrics import accuracy_score
from tensorflow.keras.datasets import mnist

def least_squares_classification(X, y, alpha):
    # Adiciona uma coluna de 1s para representar o termo de viés
    X = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)

    # Calcula os coeficientes usando a fórmula dos mínimos quadrados com regularização de Ridge
    theta = np.linalg.inv(X.T @ X + alpha * np.eye(X.shape[1])) @ X.T @ y

    return theta

# Carregando o conjunto de dados MNIST
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Ajusta a forma dos dados
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)

# Normalizar os dados
X_train = X_train / 255.0
X_test = X_test / 255.0

# -------------------------- PCA -------------------------- #
# --------------------------------------------------------- #
# Calcular a matriz de covariância
cov_matrix = np.cov(X_train.T)

# Calcular os autovetores e autovalores
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)

# Ordenar os autovetores em ordem decrescente dos autovalores
sorted_indices = np.argsort(eigenvalues)[::-1]
sorted_eigenvalues = eigenvalues[sorted_indices]
sorted_eigenvectors = eigenvectors[:, sorted_indices]

# Escolher o número de componentes principais
num_components = 200

# Selecionar as componentes principais
principal_components = sorted_eigenvectors[:, :num_components]

# Projetar os dados nas componentes principais
X_train_pca = np.dot(X_train, principal_components)

# Projetar os dados nas componentes principais
X_test_pca = np.dot(X_test, principal_components)

# -------------------------- PCA -------------------------- #
# --------------------------------------------------------- #

# Converte os rótulos para uma representação one-hot
num_classes = 10
y_train_onehot = np.eye(num_classes)[y_train]

# Define o valor de regularização (alpha)
alpha = 0.01

# Realiza a classificação usando mínimos quadrados com regularização de Ridge
theta = least_squares_classification(X_train_pca, y_train_onehot, alpha)

# Adiciona uma coluna de 1s aos dados de teste
X_test = np.concatenate((np.ones((X_test_pca.shape[0], 1)), X_test_pca), axis=1)

# Realiza as predições
predictions = X_test @ theta

# Converte as predições em rótulos
predicted_labels = np.argmax(predictions, axis=1)

# # Calcula a acurácia
# accuracy = np.mean(predicted_labels == y_test) * 100
# print("Acurácia da classificação: {:.2f}%".format(accuracy))

# Calcular a precisão das previsões
accuracy = accuracy_score(y_test, predicted_labels)
print("Accuracy:", accuracy)

Accuracy: 0.8618


# Perceptron Logistico + PCA

In [47]:
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.decomposition import PCA
from tensorflow.keras.datasets import mnist

class LogisticPerceptron:
    def __init__(self, num_features, num_classes, learning_rate=0.01, num_epochs=100):
        self.num_features = num_features
        self.num_classes = num_classes
        self.learning_rate = learning_rate
        self.num_epochs = num_epochs
        self.weights = np.zeros((num_features + 1, num_classes))  # +1 for the bias term

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def predict(self, x):
        activations = np.dot(np.insert(x, 0, 1), self.weights)
        probabilities = self.sigmoid(activations)
        return np.argmax(probabilities)

    def train(self, X, y):
        X = np.insert(X, 0, 1, axis=1)  # Inserting bias term
        y = np.eye(self.num_classes)[y]  # One-hot encoding
        for _ in range(self.num_epochs):
            for i in range(len(X)):
                x = X[i]
                target = y[i]
                activations = np.dot(x, self.weights)
                probabilities = self.sigmoid(activations)
                error = target - probabilities
                delta = self.learning_rate * np.outer(x, error)
                self.weights += delta


# Carregando o conjunto de dados MNIST
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Ajusta a forma dos dados
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)

# Normalizar os dados
X_train = X_train / 255.0
X_test = X_test / 255.0

# -------------------------- PCA -------------------------- #
# --------------------------------------------------------- #
# Calcular a matriz de covariância
cov_matrix = np.cov(X_train.T)

# Calcular os autovetores e autovalores
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)

# Ordenar os autovetores em ordem decrescente dos autovalores
sorted_indices = np.argsort(eigenvalues)[::-1]
sorted_eigenvalues = eigenvalues[sorted_indices]
sorted_eigenvectors = eigenvectors[:, sorted_indices]

# Escolher o número de componentes principais
num_components = 200

# Selecionar as componentes principais
principal_components = sorted_eigenvectors[:, :num_components]

# Projetar os dados nas componentes principais
X_train_pca = np.dot(X_train, principal_components)

# Projetar os dados nas componentes principais
X_test_pca = np.dot(X_test, principal_components)
# -------------------------- PCA -------------------------- #
# --------------------------------------------------------- #

# Criar o objeto do perceptron logístico
num_features = X_train_pca.shape[1]
num_classes = len(np.unique(y_train))
perceptron = LogisticPerceptron(num_features=num_features, num_classes=num_classes)

# Treinar o perceptron
perceptron.train(X_train_pca, y_train)

# Realizar previsões no conjunto de teste
predictions = []
for sample in X_test_pca:
    prediction = perceptron.predict(sample)
    predictions.append(prediction)

# Calcular a precisão das previsões
accuracy = accuracy_score(y_test, predictions)
print("Accuracy:", accuracy)


Accuracy: 0.909


# Multi Layer Perceptron

In [33]:
# Regras heurísticas para determinação do número de neurônios ocultos (q)
# fonte referência: https://repositorio.ufc.br/bitstream/riufc/52214/3/2020_dis_juponte.pdf

import numpy as np

p = 28*28
m = 10

# Regra do valor médio
q1 = (p + m) / 2
q1 = int(q1)
print('Regra do valor médio: ', q1)

# Regra da raiz quadrada
q2 = np.sqrt(p * m)
q2 = int(q2)
print('Regra da raiz quadrada: ', q2)

# Regra de Kolmogorov
q3 = 2*p + 1
q3 = int(q3)
print('Regra de kolmogorov: ', q3)

Regra do valor médio:  397
Regra da raiz quadrada:  88
Regra de kolmogorov:  1569


In [34]:
import numpy as np
import requests, gzip, os, hashlib
from tensorflow.keras.datasets import mnist
import matplotlib.pyplot as plt
%pylab inline

(X, Y), (X_test, y_test) = mnist.load_data()

#Validation split
rand=np.arange(60000)
np.random.shuffle(rand)
train_no=rand[:50000]

val_no=np.setdiff1d(rand,train_no)

X_train,X_val=X[train_no,:,:],X[val_no,:,:]
y_train,y_val=Y[train_no],Y[val_no]

def init(x, y):
    layer = np.random.uniform(-1, 1., size=(x,y)) / np.sqrt(x*y)
    return layer.astype(np.float32)

# sigmoid function
def sigmoid(x):
    return 1 / (np.exp(-x)+1)

# derivative of sigmoid
def d_sigmoid(x):
    return 1 / (np.exp(-x)+1) * (1 - 1 / (np.exp(-x)+1))
# def d_sigmoid(x):
#     return (np.exp(-x)) / ((np.exp(-x) + 1)**2)

# sofmax function
def softmax(x):
    exp_element = np.exp(x-np.max(x))
    return exp_element / np.sum(exp_element, axis=0)

# derivative of softmax
def d_softmax(x):
    exp_element = np.exp(x-x.max())
    return exp_element / np.sum(exp_element, axis=0) * (1-exp_element / np.sum(exp_element, axis=0))


# foward and backward pass
def forward_backward_pass(x, y):
    targets = np.zeros((len(y), 10), np.float32)
    targets[range(targets.shape[0]), y] = 1    
    
    x_l1 = x.dot(l1)
    x_sigmoid = sigmoid(x_l1)

    x_l2 = x_sigmoid.dot(l2)
    out = softmax(x_l2)

    error = 2 * (out - targets) / out.shape[0] * d_softmax(x_l2)
    update_l2 = x_sigmoid.T @ error

    error = ((l2).dot(error.T)).T * d_sigmoid(x_l1)
    update_l1 = x.T @ error

    return out, update_l1, update_l2


%pylab is deprecated, use %matplotlib inline and import the required libraries.
Populating the interactive namespace from numpy and matplotlib


## Regra 1: Valor médio

In [35]:
epochs = 10000
lr = 0.001
batch = 128

np.random.seed(42)
l1 = init(28*28, q1)
l2 = init(q1, 10)

accuracies, losses, val_accuracies, val_losses, test_accuracies, test_losses = [], [], [], [], [], []

for i in range(epochs):
    sample = np.random.randint(0, X_train.shape[0], size=(batch))
    x = X_train[sample].reshape((-1, 28*28))
    y = y_train[sample]

    out, update_l1, update_l2 = forward_backward_pass(x, y)

    category = np.argmax(out, axis=1)
    accuracy = (category == y).mean()
    accuracies.append(accuracy)

    loss = ((category - y)**2).mean()
    losses.append(loss.item())

    l1 = l1 - lr*update_l1
    l2 = l2 - lr*update_l2

    if(i%20 == 0):
        X_val = X_val.reshape((-1, 28*28))
        val_out = np.argmax(softmax(sigmoid(X_val.dot(l1)).dot(l2)), axis=1)
        val_acc = (val_out == y_val).mean()
        val_accuracies.append(val_acc.item())
        val_loss = ((val_out - y_val)**2).mean()
        val_losses.append(val_loss.item())
    if(i%500 == 0): print(f'For {i}th epoch: train accuracy: {accuracy:.3f} | validation accuracy:{val_acc:.3f}')

For 0th epoch: train accuracy: 0.062 | validation accuracy:0.078
For 500th epoch: train accuracy: 0.727 | validation accuracy:0.701
For 1000th epoch: train accuracy: 0.711 | validation accuracy:0.730
For 1500th epoch: train accuracy: 0.773 | validation accuracy:0.745
For 2000th epoch: train accuracy: 0.773 | validation accuracy:0.755
For 2500th epoch: train accuracy: 0.844 | validation accuracy:0.763
For 3000th epoch: train accuracy: 0.859 | validation accuracy:0.769
For 3500th epoch: train accuracy: 0.820 | validation accuracy:0.772
For 4000th epoch: train accuracy: 0.805 | validation accuracy:0.774
For 4500th epoch: train accuracy: 0.703 | validation accuracy:0.776
For 5000th epoch: train accuracy: 0.797 | validation accuracy:0.776
For 5500th epoch: train accuracy: 0.773 | validation accuracy:0.778
For 6000th epoch: train accuracy: 0.781 | validation accuracy:0.776
For 6500th epoch: train accuracy: 0.789 | validation accuracy:0.776
For 7000th epoch: train accuracy: 0.820 | validation

In [36]:
X_test=X_test.reshape((-1,28*28))
test_out=np.argmax(softmax(sigmoid(X_test.dot(l1)).dot(l2)),axis=1)
test_acc=(test_out==y_test).mean().item()
print(f'Test accuracy = {test_acc*100:.2f}%')

Test accuracy = 76.84%


## Regra 2: Raiz quadrada

In [37]:
epochs = 10000
lr = 0.001
batch = 128

np.random.seed(42)
l1 = init(28*28, q2)
l2 = init(q2, 10)

accuracies, losses, val_accuracies, val_losses, test_accuracies, test_losses = [], [], [], [], [], []

for i in range(epochs):
    sample = np.random.randint(0, X_train.shape[0], size=(batch))
    x = X_train[sample].reshape((-1, 28*28))
    y = y_train[sample]

    out, update_l1, update_l2 = forward_backward_pass(x, y)

    category = np.argmax(out, axis=1)
    accuracy = (category == y).mean()
    accuracies.append(accuracy)

    loss = ((category - y)**2).mean()
    losses.append(loss.item())

    l1 = l1 - lr*update_l1
    l2 = l2 - lr*update_l2

    if(i%20 == 0):
        X_val = X_val.reshape((-1, 28*28))
        val_out = np.argmax(softmax(sigmoid(X_val.dot(l1)).dot(l2)), axis=1)
        val_acc = (val_out == y_val).mean()
        val_accuracies.append(val_acc.item())
        val_loss = ((val_out - y_val)**2).mean()
        val_losses.append(val_loss.item())
    if(i%500 == 0): print(f'For {i}th epoch: train accuracy: {accuracy:.3f} | validation accuracy:{val_acc:.3f}')

For 0th epoch: train accuracy: 0.062 | validation accuracy:0.077
For 500th epoch: train accuracy: 0.445 | validation accuracy:0.498
For 1000th epoch: train accuracy: 0.711 | validation accuracy:0.604
For 1500th epoch: train accuracy: 0.641 | validation accuracy:0.657
For 2000th epoch: train accuracy: 0.656 | validation accuracy:0.684
For 2500th epoch: train accuracy: 0.727 | validation accuracy:0.703
For 3000th epoch: train accuracy: 0.734 | validation accuracy:0.720
For 3500th epoch: train accuracy: 0.836 | validation accuracy:0.732
For 4000th epoch: train accuracy: 0.812 | validation accuracy:0.743
For 4500th epoch: train accuracy: 0.758 | validation accuracy:0.749
For 5000th epoch: train accuracy: 0.719 | validation accuracy:0.756
For 5500th epoch: train accuracy: 0.734 | validation accuracy:0.764
For 6000th epoch: train accuracy: 0.750 | validation accuracy:0.768
For 6500th epoch: train accuracy: 0.820 | validation accuracy:0.773
For 7000th epoch: train accuracy: 0.727 | validation

In [38]:
X_test=X_test.reshape((-1,28*28))
test_out=np.argmax(softmax(sigmoid(X_test.dot(l1)).dot(l2)),axis=1)
test_acc=(test_out==y_test).mean().item()
print(f'Test accuracy = {test_acc*100:.2f}%')

Test accuracy = 81.15%


## Regra 3: kolmogorov

In [39]:
epochs = 10000
lr = 0.001
batch = 128

np.random.seed(42)
l1 = init(28*28, q3)
l2 = init(q3, 10)

accuracies, losses, val_accuracies, val_losses, test_accuracies, test_losses = [], [], [], [], [], []

for i in range(epochs):
    sample = np.random.randint(0, X_train.shape[0], size=(batch))
    x = X_train[sample].reshape((-1, 28*28))
    y = y_train[sample]

    out, update_l1, update_l2 = forward_backward_pass(x, y)

    category = np.argmax(out, axis=1)
    accuracy = (category == y).mean()
    accuracies.append(accuracy)

    loss = ((category - y)**2).mean()
    losses.append(loss.item())

    l1 = l1 - lr*update_l1
    l2 = l2 - lr*update_l2

    if(i%20 == 0):
        X_val = X_val.reshape((-1, 28*28))
        val_out = np.argmax(softmax(sigmoid(X_val.dot(l1)).dot(l2)), axis=1)
        val_acc = (val_out == y_val).mean()
        val_accuracies.append(val_acc.item())
        val_loss = ((val_out - y_val)**2).mean()
        val_losses.append(val_loss.item())
    if(i%500 == 0): print(f'For {i}th epoch: train accuracy: {accuracy:.3f} | validation accuracy:{val_acc:.3f}')

For 0th epoch: train accuracy: 0.070 | validation accuracy:0.102
For 500th epoch: train accuracy: 0.750 | validation accuracy:0.725
For 1000th epoch: train accuracy: 0.672 | validation accuracy:0.733
For 1500th epoch: train accuracy: 0.742 | validation accuracy:0.740
For 2000th epoch: train accuracy: 0.727 | validation accuracy:0.741
For 2500th epoch: train accuracy: 0.781 | validation accuracy:0.737
For 3000th epoch: train accuracy: 0.664 | validation accuracy:0.734
For 3500th epoch: train accuracy: 0.758 | validation accuracy:0.720
For 4000th epoch: train accuracy: 0.773 | validation accuracy:0.709
For 4500th epoch: train accuracy: 0.727 | validation accuracy:0.695
For 5000th epoch: train accuracy: 0.680 | validation accuracy:0.680
For 5500th epoch: train accuracy: 0.672 | validation accuracy:0.668
For 6000th epoch: train accuracy: 0.633 | validation accuracy:0.654
For 6500th epoch: train accuracy: 0.719 | validation accuracy:0.643
For 7000th epoch: train accuracy: 0.594 | validation

In [40]:
X_test=X_test.reshape((-1,28*28))
test_out=np.argmax(softmax(sigmoid(X_test.dot(l1)).dot(l2)),axis=1)
test_acc=(test_out==y_test).mean().item()
print(f'Test accuracy = {test_acc*100:.2f}%')

Test accuracy = 57.05%


# Multi Layer Perceptron + PCA

In [41]:
import numpy as np
# Regra da raiz quadrada
q2 = np.sqrt(p * m)
q2 = int(q2)
print('Regra da raiz quadrada: ', q2)

Regra da raiz quadrada:  88


In [43]:
import numpy as np
import requests, gzip, os, hashlib
from tensorflow.keras.datasets import mnist
import matplotlib.pyplot as plt
%pylab inline

(X, Y), (X_test, y_test) = mnist.load_data()

#Validation split
rand=np.arange(60000)
np.random.shuffle(rand)
train_no=rand[:50000]

val_no=np.setdiff1d(rand,train_no)

X_train,X_val=X[train_no,:,:],X[val_no,:,:]
y_train,y_val=Y[train_no],Y[val_no]

X_train = X_train.reshape((-1, 28*28))
X_val = X_val.reshape((-1, 28*28))
X_test = X_test.reshape((-1, 28*28))

def init(x, y):
    layer = np.random.uniform(-1, 1., size=(x,y)) / np.sqrt(x*y)
    return layer.astype(np.float32)

# sigmoid function
def sigmoid(x):
    return 1 / (np.exp(-x)+1)

# derivative of sigmoid
def d_sigmoid(x):
    return 1 / (np.exp(-x)+1) * (1 - (1 / (np.exp(-x)+1)))

# sofmax function
def softmax(x):
    exp_element = np.exp(x-x.max())
    return exp_element / np.sum(exp_element, axis=0)

# derivative of softmax
def d_softmax(x):
    exp_element = np.exp(x-x.max())
    return exp_element / np.sum(exp_element, axis=0) * (1-exp_element / np.sum(exp_element, axis=0))

# foward and backward pass
def forward_backward_pass(x, y):
    targets = np.zeros((len(y), 10), np.float32)
    targets[range(targets.shape[0]), y] = 1

    x_l1 = x.dot(l1)
    x_sigmoid = sigmoid(x_l1)

    x_l2 = x_sigmoid.dot(l2)
    out = softmax(x_l2)


    error = 2 * (out - targets) / out.shape[0] * d_softmax(x_l2)
    update_l2 = x_sigmoid.T @ error


    error = ((l2).dot(error.T)).T * d_sigmoid(x_l1)
    update_l1 = x.T @ error

    return out, update_l1, update_l2

%pylab is deprecated, use %matplotlib inline and import the required libraries.
Populating the interactive namespace from numpy and matplotlib


In [44]:
# -------------------------- PCA -------------------------- #
# --------------------------------------------------------- #
# Calcular a matriz de covariância
cov_matrix = np.cov(X_train.T)

# Calcular os autovetores e autovalores
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)

# Ordenar os autovetores em ordem decrescente dos autovalores
sorted_indices = np.argsort(eigenvalues)[::-1]
sorted_eigenvalues = eigenvalues[sorted_indices]
sorted_eigenvectors = eigenvectors[:, sorted_indices]

# Escolher o número de componentes principais
num_components = 155

# Selecionar as componentes principais
principal_components = sorted_eigenvectors[:, :num_components]

# Projetar os dados nas componentes principais
X_train_pca = np.dot(X_train, principal_components)

# Projetar os dados nas componentes principais
X_val_pca = np.dot(X_val, principal_components)

# Projetar os dados nas componentes principais
X_test_pca = np.dot(X_test, principal_components)

# -------------------------- PCA -------------------------- #
# --------------------------------------------------------- #

epochs = 10000
lr = 0.001
batch = 128

np.random.seed(42)
l1 = init(num_components, q2)
l2 = init(q2, 10)

accuracies, losses, val_accuracies, val_losses, test_accuracies, test_losses = [], [], [], [], [], []

for i in range(epochs):
    sample = np.random.randint(0, X_train_pca.shape[0], size=(batch))
    x = X_train_pca[sample]
    y = y_train[sample]

    out, update_l1, update_l2 = forward_backward_pass(x, y)

    category = np.argmax(out, axis=1)
    accuracy = (category == y).mean()
    accuracies.append(accuracy)

    loss = ((category - y)**2).mean()
    losses.append(loss.item())

    l1 = l1 - lr*update_l1
    l2 = l2 - lr*update_l2

    if(i%20 == 0):
        val_out = np.argmax(softmax(sigmoid(X_val_pca.dot(l1)).dot(l2)), axis=1)
        val_acc = (val_out == y_val).mean()
        val_accuracies.append(val_acc.item())
        val_loss = ((val_out - y_val)**2).mean()
        val_losses.append(val_loss.item())
    if(i%500 == 0): print(f'For {i}th epoch: train accuracy: {accuracy:.3f} | validation accuracy:{val_acc:.3f}')

For 0th epoch: train accuracy: 0.109 | validation accuracy:0.096
For 500th epoch: train accuracy: 0.172 | validation accuracy:0.196
For 1000th epoch: train accuracy: 0.281 | validation accuracy:0.317
For 1500th epoch: train accuracy: 0.422 | validation accuracy:0.410
For 2000th epoch: train accuracy: 0.461 | validation accuracy:0.473
For 2500th epoch: train accuracy: 0.562 | validation accuracy:0.521
For 3000th epoch: train accuracy: 0.484 | validation accuracy:0.554
For 3500th epoch: train accuracy: 0.672 | validation accuracy:0.580
For 4000th epoch: train accuracy: 0.664 | validation accuracy:0.603
For 4500th epoch: train accuracy: 0.586 | validation accuracy:0.618
For 5000th epoch: train accuracy: 0.680 | validation accuracy:0.630
For 5500th epoch: train accuracy: 0.594 | validation accuracy:0.642
For 6000th epoch: train accuracy: 0.719 | validation accuracy:0.650
For 6500th epoch: train accuracy: 0.648 | validation accuracy:0.659
For 7000th epoch: train accuracy: 0.656 | validation

In [45]:
test_out=np.argmax(softmax(sigmoid(X_test_pca.dot(l1)).dot(l2)),axis=1)
test_acc=(test_out==y_test).mean().item()
print(f'Test accuracy = {test_acc*100:.2f}%')

Test accuracy = 70.77%


# CNN

In [49]:
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers

In [50]:
# Model / data parameters
num_classes = 10
input_shape = (28, 28, 1)

# Load the data and split it between train and test sets
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

# Scale images to the [0, 1] range
x_train = x_train.astype("float32") / 255
x_test = x_test.astype("float32") / 255
# Make sure images have shape (28, 28, 1)
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

In [51]:
model = keras.Sequential(
    [
        keras.Input(shape=input_shape),
        layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Flatten(),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation="softmax"),
    ]
)

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 26, 26, 32)        320       
                                                                 
 max_pooling2d (MaxPooling2  (None, 13, 13, 32)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 11, 11, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 5, 5, 64)          0         
 g2D)                                                            
                                                                 
 flatten (Flatten)           (None, 1600)              0         
                                                                 
 dropout (Dropout)           (None, 1600)              0

In [52]:
batch_size = 128
epochs = 15

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1)

2023-08-20 19:58:28.934116: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 169344000 exceeds 10% of free system memory.


Epoch 1/15
  3/422 [..............................] - ETA: 22s - loss: 2.2786 - accuracy: 0.1224

2023-08-20 19:58:30.132893: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 17981568 exceeds 10% of free system memory.
2023-08-20 19:58:30.133451: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 17981568 exceeds 10% of free system memory.
2023-08-20 19:58:30.185855: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 17981568 exceeds 10% of free system memory.
2023-08-20 19:58:30.186086: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 17981568 exceeds 10% of free system memory.


Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.src.callbacks.History at 0x7f1dd8f31d50>

In [53]:
score = model.evaluate(x_test, y_test, verbose=0)
print("Test loss:", score[0])
print("Test accuracy:", score[1])

Test loss: 0.025683002546429634
Test accuracy: 0.9911999702453613
