# Minimos Quadrados

In [34]:
import numpy as np
from sklearn.metrics import accuracy_score
from tensorflow.keras.datasets import mnist

def least_squares_classification(X, y, alpha):
    # Adiciona uma coluna de 1s para representar o termo de viés
    X = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)

    # Calcula os coeficientes usando a fórmula dos mínimos quadrados com regularização de Ridge
    theta = np.linalg.inv(X.T @ X + alpha * np.eye(X.shape[1])) @ X.T @ y

    return theta

def calculate_error(X, y, theta):
    # Adiciona uma coluna de 1s para representar o termo de viés
    X = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)

    # Realiza as predições
    predictions = X @ theta

    # Converte as predições em rótulos
    predicted_labels = np.argmax(predictions, axis=1)

    # Calcula o erro (taxa de erro)
    error = np.mean(predicted_labels != y)

    return error

# Carrega o conjunto de dados MNIST
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Ajusta a forma dos dados
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)

# Normaliza os dados dividindo por 255.0
X_train = X_train / 255.0
X_test = X_test / 255.0

# Converte os rótulos para uma representação one-hot
num_classes = 10
y_train_onehot = np.eye(num_classes)[y_train]

# Define o valor de regularização (alpha)
alpha = 0.01

# Realiza a classificação usando mínimos quadrados com regularização de Ridge
theta = least_squares_classification(X_train, y_train_onehot, alpha)

# Adiciona uma coluna de 1s aos dados de teste
X_test = np.concatenate((np.ones((X_test.shape[0], 1)), X_test), axis=1)

# Realiza as predições
predictions = X_test @ theta

# Converte as predições em rótulos
predicted_labels = np.argmax(predictions, axis=1)

# # Calcula a acurácia
# accuracy = np.mean(predicted_labels == y_test) * 100
# print("Acurácia da classificação: {:.2f}%".format(accuracy))

# Calcular a precisão das previsões
accuracy = accuracy_score(y_test, predicted_labels)
print("Accuracy:", accuracy)


Accuracy: 0.8603


# Perceptron Logístico

## Using Sklearn

In [59]:
import numpy as np
from sklearn.linear_model import Perceptron
from sklearn.metrics import accuracy_score
from tensorflow.keras.datasets import mnist

# Carrega o conjunto de dados MNIST
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Ajusta a forma dos dados
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)

# Normaliza os dados dividindo por 255.0
X_train = X_train / 255.0
X_test = X_test / 255.0

# Cria uma instância do perceptron logístico
model = Perceptron()

# Treina o modelo
model.fit(X_train, y_train)

# Realiza as predições no conjunto de teste
y_pred = model.predict(X_test)

# Calcula a acurácia
accuracy = accuracy_score(y_test, y_pred)

print("Acurácia da classificação: {:.2f}%".format(accuracy * 100))

Acurácia da classificação: 88.05%


## A mão

In [1]:
import numpy as np
from sklearn.metrics import accuracy_score
from tensorflow.keras.datasets import mnist


2023-08-14 21:10:33.742885: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-08-14 21:10:34.338036: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-08-14 21:10:34.340889: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [101]:
class LogisticPerceptron:
    def __init__(self, num_features, num_classes, learning_rate=0.01, num_epochs=5):
        self.num_features = num_features
        self.num_classes = num_classes
        self.learning_rate = learning_rate
        self.num_epochs = num_epochs
        self.weights = np.zeros((num_features + 1, num_classes))  # +1 for the bias term

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def predict(self, x):
        activations = np.dot(np.insert(x, 0, 1), self.weights)
        probabilities = self.sigmoid(activations)
        return np.argmax(probabilities)

    def train(self, X, y):
        X = np.insert(X, 0, 1, axis=1)  # Inserting bias term
        y = np.eye(self.num_classes)[y]  # One-hot encoding
        for _ in range(self.num_epochs):
            for i in range(len(X)):
                x = X[i]                
                target = y[i]                
                activations = np.dot(x, self.weights)
                probabilities = self.sigmoid(activations)              
                error = target - activations                
                delta = self.learning_rate * np.outer(x, error)
                self.weights += delta

In [94]:
# Carregando o conjunto de dados MNIST
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [95]:
# Ajusta a forma dos dados
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)

In [96]:
# Normalizar os dados
X_train = X_train / 255.0
X_test = X_test / 255.0

In [97]:
# Criar o objeto do perceptron logístico
num_features = X_train.shape[1]
num_classes = len(np.unique(y_train))
perceptron = LogisticPerceptron(num_features=num_features, num_classes=num_classes)

In [98]:
# Treinar o perceptron
perceptron.train(X_train, y_train)

In [99]:
# Realizar previsões no conjunto de teste
predictions = []
for sample in X_test:
    prediction = perceptron.predict(sample)
    predictions.append(prediction)

In [100]:
# Calcular a precisão das previsões
accuracy = accuracy_score(y_test, predictions)
print("Accuracy:", accuracy)

Accuracy: 0.7043


# Minimos Quadrados + PCA

In [52]:
import numpy as np
from sklearn.metrics import accuracy_score
from tensorflow.keras.datasets import mnist

def least_squares_classification(X, y, alpha):
    # Adiciona uma coluna de 1s para representar o termo de viés
    X = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)

    # Calcula os coeficientes usando a fórmula dos mínimos quadrados com regularização de Ridge
    theta = np.linalg.inv(X.T @ X + alpha * np.eye(X.shape[1])) @ X.T @ y

    return theta

# Carregando o conjunto de dados MNIST
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Ajusta a forma dos dados
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)

# Normalizar os dados
X_train = X_train / 255.0
X_test = X_test / 255.0

# -------------------------- PCA -------------------------- #
# --------------------------------------------------------- #
# Calcular a matriz de covariância
cov_matrix = np.cov(X_train.T)

# Calcular os autovetores e autovalores
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)

# Ordenar os autovetores em ordem decrescente dos autovalores
sorted_indices = np.argsort(eigenvalues)[::-1]
sorted_eigenvalues = eigenvalues[sorted_indices]
sorted_eigenvectors = eigenvectors[:, sorted_indices]

# Escolher o número de componentes principais
num_components = 100

# Selecionar as componentes principais
principal_components = sorted_eigenvectors[:, :num_components]

# Projetar os dados nas componentes principais
X_train_pca = np.dot(X_train, principal_components)

# Projetar os dados nas componentes principais
X_test_pca = np.dot(X_test, principal_components)

# -------------------------- PCA -------------------------- #
# --------------------------------------------------------- #

# Converte os rótulos para uma representação one-hot
num_classes = 10
y_train_onehot = np.eye(num_classes)[y_train]

# Define o valor de regularização (alpha)
alpha = 0.01

# Realiza a classificação usando mínimos quadrados com regularização de Ridge
theta = least_squares_classification(X_train_pca, y_train_onehot, alpha)

# Adiciona uma coluna de 1s aos dados de teste
X_test = np.concatenate((np.ones((X_test_pca.shape[0], 1)), X_test_pca), axis=1)

# Realiza as predições
predictions = X_test @ theta

# Converte as predições em rótulos
predicted_labels = np.argmax(predictions, axis=1)

# # Calcula a acurácia
# accuracy = np.mean(predicted_labels == y_test) * 100
# print("Acurácia da classificação: {:.2f}%".format(accuracy))

# Calcular a precisão das previsões
accuracy = accuracy_score(y_test, predicted_labels)
print("Accuracy:", accuracy)

Accuracy: 0.8611


In [40]:
import numpy as np
from sklearn.metrics import accuracy_score
from tensorflow.keras.datasets import mnist

def least_squares_classification(X, y, alpha):
    # Adiciona uma coluna de 1s para representar o termo de viés
    X = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)

    # Calcula os coeficientes usando a fórmula dos mínimos quadrados com regularização de Ridge
    theta = np.linalg.inv(X.T @ X + alpha * np.eye(X.shape[1])) @ X.T @ y

    return theta

# Carrega o conjunto de dados MNIST
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Ajusta a forma dos dados
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)

# Normaliza os dados dividindo por 255.0
X_train = X_train / 255.0
X_test = X_test / 255.0

# -------------------------- PCA -------------------------- #
# --------------------------------------------------------- #
# Calcular a matriz de covariância
cov_matrix = np.cov(X_train.T)

# Calcular os autovetores e autovalores
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)

# Ordenar os autovetores em ordem decrescente dos autovalores
sorted_indices = np.argsort(eigenvalues)[::-1]
sorted_eigenvalues = eigenvalues[sorted_indices]
sorted_eigenvectors = eigenvectors[:, sorted_indices]

# Escolher o número de componentes principais
num_components = 155

# Selecionar as componentes principais
principal_components = sorted_eigenvectors[:, :num_components]

# Projetar os dados de treino nas componentes principais
X_train_pca = np.dot(X_train, principal_components)

# Projetar os dados de teste nas componentes principais
X_test_pca = np.dot(y_test, principal_components)
# -------------------------- PCA -------------------------- #
# --------------------------------------------------------- #

# Converte os rótulos para uma representação one-hot
num_classes = 10
y_train_onehot = np.eye(num_classes)[y_train]

# Define o valor de regularização (alpha)
alpha = 0.01

# Realiza a classificação usando mínimos quadrados com regularização de Ridge
theta = least_squares_classification(X_train_pca, y_train_onehot, alpha)

# Adiciona uma coluna de 1s aos dados de teste
X_test = np.concatenate((np.ones((X_test.shape[0], 1)), X_test_pca), axis=1)

# Realiza as predições
predictions = X_test @ theta

# Converte as predições em rótulos
predicted_labels = np.argmax(predictions, axis=1)

# # Calcula a acurácia
# accuracy = np.mean(predicted_labels == y_test) * 100
# print("Acurácia da classificação: {:.2f}%".format(accuracy))

# Calcular a precisão das previsões
accuracy = accuracy_score(y_test, predicted_labels)
print("Accuracy:", accuracy)

ValueError: shapes (10000,) and (784,155) not aligned: 10000 (dim 0) != 784 (dim 0)

# Perceptron Logistico + PCA

In [39]:
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.decomposition import PCA
from tensorflow.keras.datasets import mnist

class LogisticPerceptron:
    def __init__(self, num_features, num_classes, learning_rate=0.01, num_epochs=100):
        self.num_features = num_features
        self.num_classes = num_classes
        self.learning_rate = learning_rate
        self.num_epochs = num_epochs
        self.weights = np.zeros((num_features + 1, num_classes))  # +1 for the bias term

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def predict(self, x):
        activations = np.dot(np.insert(x, 0, 1), self.weights)
        probabilities = self.sigmoid(activations)
        return np.argmax(probabilities)

    def train(self, X, y):
        X = np.insert(X, 0, 1, axis=1)  # Inserting bias term
        y = np.eye(self.num_classes)[y]  # One-hot encoding
        for _ in range(self.num_epochs):
            for i in range(len(X)):
                x = X[i]
                target = y[i]
                activations = np.dot(x, self.weights)
                probabilities = self.sigmoid(activations)
                error = target - probabilities
                delta = self.learning_rate * np.outer(x, error)
                self.weights += delta


# Carregando o conjunto de dados MNIST
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Ajusta a forma dos dados
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)

# Normalizar os dados
X_train = X_train / 255.0
X_test = X_test / 255.0

# -------------------------- PCA -------------------------- #
# --------------------------------------------------------- #
# Calcular a matriz de covariância
cov_matrix = np.cov(X_train.T)

# Calcular os autovetores e autovalores
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)

# Ordenar os autovetores em ordem decrescente dos autovalores
sorted_indices = np.argsort(eigenvalues)[::-1]
sorted_eigenvalues = eigenvalues[sorted_indices]
sorted_eigenvectors = eigenvectors[:, sorted_indices]

# Escolher o número de componentes principais
num_components = 155

# Selecionar as componentes principais
principal_components = sorted_eigenvectors[:, :num_components]

# Projetar os dados nas componentes principais
X_train_pca = np.dot(X_train, principal_components)

# Projetar os dados nas componentes principais
X_test_pca = np.dot(X_test, principal_components)


# -------------------------- PCA -------------------------- #
# --------------------------------------------------------- #

# Criar o objeto do perceptron logístico
num_features = X_train_pca.shape[1]
num_classes = len(np.unique(y_train))
perceptron = LogisticPerceptron(num_features=num_features, num_classes=num_classes)

# Treinar o perceptron
perceptron.train(X_train_pca, y_train)

# Realizar previsões no conjunto de teste
predictions = []
for sample in X_test_pca:
    prediction = perceptron.predict(sample)
    predictions.append(prediction)

# Calcular a precisão das previsões
accuracy = accuracy_score(y_test, predictions)
print("Accuracy:", accuracy)


Accuracy: 0.9058


# MLP com Q neurônios ocultos

In [9]:
import numpy as np
from keras.datasets import mnist

class MLP:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        # Inicialização dos pesos
        self.W1 = np.random.randn(self.input_size, self.hidden_size)
        self.b1 = np.zeros((1, self.hidden_size))
        self.W2 = np.random.randn(self.hidden_size, self.output_size)
        self.b2 = np.zeros((1, self.output_size))
    
    def forward(self, X):
        # Propagação direta
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = self.sigmoid(self.z1)
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.a2 = self.softmax(self.z2)
        return self.a2
    
    def backward(self, X, y, learning_rate):
        # Retropropagação
        m = X.shape[0]
        self.dz2 = self.a2 - y
        self.dW2 = (1/m) * np.dot(self.a1.T, self.dz2)
        self.db2 = (1/m) * np.sum(self.dz2, axis=0, keepdims=True)
        self.dz1 = np.dot(self.dz2, self.W2.T) * self.sigmoid_derivative(self.z1)
        self.dW1 = (1/m) * np.dot(X.T, self.dz1)
        self.db1 = (1/m) * np.sum(self.dz1, axis=0, keepdims=True)
        
        # Atualização dos pesos e viés
        self.W1 -= learning_rate * self.dW1
        self.b1 -= learning_rate * self.db1
        self.W2 -= learning_rate * self.dW2
        self.b2 -= learning_rate * self.db2
    
    def train(self, X, y, num_epochs, learning_rate):
        for epoch in range(num_epochs):
            # Propagação direta e retropropagação
            y_pred = self.forward(X)
            self.backward(X, y, learning_rate)
            
            # Cálculo da função de custo (entropia cruzada categórica)
            # loss = self.categorical_crossentropy(y_pred, y)
            
            # # Print do custo a cada 10 épocas
            # if (epoch+1) % 10 == 0:
            #     print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss:.4f}")
            
            # Cálculo do MSE (Mean Squared Error)
            mse_loss = self.mse(y_pred, y)

            # Print do custo a cada 10 épocas
            if (epoch + 1) % 10 == 0:
                print(f"Epoch {epoch + 1}/{num_epochs}, MSE Loss: {mse_loss:.4f}")
    

    def mse(self, y_pred, y_true):
        return np.mean(np.square(y_pred - y_true))
    
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    
    def sigmoid_derivative(self, x):
        return self.sigmoid(x) * (1 - self.sigmoid(x))
    
    def softmax(self, x):
        exp_scores = np.exp(x)
        return exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
    
    def categorical_crossentropy(self, y_pred, y_true):
        epsilon = 1e-10
        y_pred = np.clip(y_pred, epsilon, 1.0 - epsilon)
        return -np.sum(y_true * np.log(y_pred)) / y_pred.shape[0]

# Carregamento do conjunto de dados MNIST
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Pré-processamento dos dados
X_train = X_train.reshape(-1, 28*28) / 255.0
X_test = X_test.reshape(-1, 28*28) / 255.0
y_train = np.eye(10)[y_train]
y_test = np.eye(10)[y_test]

# Criação do MLP
mlp = MLP(input_size=784, hidden_size=512, output_size=10)

# Treinamento do MLP
mlp.train(X_train, y_train, num_epochs=100, learning_rate=0.1)

# Avaliação do MLP nos dados de teste
predictions = mlp.forward(X_test)
# accuracy = np.mean(np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1))
# print("\nAcurácia nos dados de teste:", accuracy)
mse = mlp.mse(predictions, y_test)
print("\nMSE nos dados de teste:", mse)


Epoch 10/100, MSE Loss: 0.1527
Epoch 20/100, MSE Loss: 0.1388
Epoch 30/100, MSE Loss: 0.1242
Epoch 40/100, MSE Loss: 0.1116
Epoch 50/100, MSE Loss: 0.1012
Epoch 60/100, MSE Loss: 0.0926
Epoch 70/100, MSE Loss: 0.0854
Epoch 80/100, MSE Loss: 0.0796
Epoch 90/100, MSE Loss: 0.0748
Epoch 100/100, MSE Loss: 0.0708

MSE nos dados de teste: 0.06976132299603907
