# Questão 02)

## Mínimos quadrados

In [43]:
import pandas as pd
import numpy as np

import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# Read xlsx file
ws = pd.read_excel("Real_estate_valuation_dataset.xlsx", engine='openpyxl')
# Drop useless column
ws = ws.drop('No', axis=1)
# Convert in numpy ndarray
data = np.array(ws)

X = data[:, :-1]
Y = data[:, -1]

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

def ridge_regression(X, y, alpha):
    # Adiciona uma coluna de 1s para representar o termo de viés
    X = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)

    # Calcula os coeficientes usando a fórmula dos mínimos quadrados com regularização de Ridge
    theta = np.linalg.inv(X.T @ X + alpha * np.eye(X.shape[1])) @ X.T @ y

    return theta

def calculate_error(X, y, theta):
    # Adiciona uma coluna de 1s para representar o termo de viés
    X = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)

    # Realiza as predições
    predictions = X @ theta

    # Converte as predições em rótulos
    predicted_labels = np.argmax(predictions, axis=1)

    # Calcula o erro (taxa de erro)
    error = np.mean(predicted_labels != y)

    return error


# Define o valor de regularização (alpha)
alpha = 0.01

# Realiza a regressão usando mínimos quadrados com regularização de Ridge
theta = ridge_regression(x_train, y_train, alpha)

# Adiciona uma coluna de 1s aos dados de teste
X_test = np.concatenate((np.ones((x_test.shape[0], 1)), x_test), axis=1)

# Realiza as predições
predictions = X_test @ theta

# Calcular o erro médio quadrado (MSE) nas previsões
mse = mean_squared_error(y_test, predictions)
print("Erro médio quadrado (MSE):", mse)
mae = mean_absolute_error(y_test, predictions)
print("Erro médio absoluto (MAE):", mae)
r2 = r2_score(y_test, predictions)
print("R2:", r2)
rmse = np.sqrt(mse)
print("Raiz do erro médio quadrado (RMSE):", rmse)

Erro médio quadrado (MSE): 54.60475048894781
Erro médio absoluto (MAE): 5.326757449309436
R2: 0.6745062372078383
Raiz do erro médio quadrado (RMSE): 7.389502722710629


## ELM

In [11]:
import pandas as pd
import numpy as np

import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

class ELMRegressor:
    def __init__(self, num_hidden_neurons):
        self.num_hidden_neurons = num_hidden_neurons
        self.weights_input_hidden = None
        self.weights_hidden_output = None
        self.bias_hidden = None

    def _sigmoid(self, x):
        return 1.0 / (1.0 + np.exp(-x))

    def _relu(self, x):
        return np.maximum(0, x)

    def _tanh(self, x):
        return np.tanh(x)

    def fit(self, X, y):
        num_samples, num_features = X.shape
        
        # Inicialização aleatória dos pesos e vieses
        self.weights_input_hidden = np.random.rand(num_features, self.num_hidden_neurons)
        self.bias_hidden = np.random.rand(1, self.num_hidden_neurons)

        
        # Calcula as saídas da camada oculta
        hidden_output = self._relu(np.dot(X, self.weights_input_hidden) + self.bias_hidden)

        # Calcula os pesos da camada de saída usando a pseudo-inversa
        self.weights_hidden_output = np.dot(np.linalg.pinv(hidden_output), y)

    def predict(self, X):
        hidden_output = self._relu(np.dot(X, self.weights_input_hidden) + self.bias_hidden)
        predictions = np.dot(hidden_output, self.weights_hidden_output)
        return predictions

# Read xlsx file
ws = pd.read_excel("Real_estate_valuation_dataset.xlsx", engine='openpyxl')
# Drop useless column
ws = ws.drop('No', axis=1)
# Convert in numpy ndarray
data = np.array(ws)

X = data[:, :-1]
Y = data[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

# Crie uma instância do regressor ELM e ajuste-o aos dados de treinamento
elm_regressor = ELMRegressor(num_hidden_neurons=16)
elm_regressor.fit(X_train, y_train)

# Faça previsões nos dados de teste
predictions = elm_regressor.predict(X_test)
#print("predictions: ", predictions)

# Calcule o Mean Squared Error (MSE)
mse = np.mean((y_test - predictions)**2)
print("Mean Squared Error:", mse)

# Calculando o MAE para o conjunto de testes
mae_test = mean_absolute_error(y_test, predictions)
print("Test MAE:", mae_test)
r2 = r2_score(y_test, predictions)
print("R2:", r2)
rmse = np.sqrt(mse)
print("Raiz do erro médio quadrado (RMSE):", rmse)

Mean Squared Error: 66.71673352060317
Test MAE: 6.324687683775253
R2: 0.6023078497681544
Raiz do erro médio quadrado (RMSE): 8.168031189987166


## MLP

In [29]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

import warnings
warnings.filterwarnings('ignore')


# Read xlsx file
ws = pd.read_excel("Real_estate_valuation_dataset.xlsx", engine='openpyxl')
# Drop useless column
ws = ws.drop('No', axis=1)
# Convert to numpy ndarray
data = np.array(ws)

X = data[:, :-1]
Y = data[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

def init(x, y):
    layer = np.random.uniform(-1, 1., size=(x, y)) / np.sqrt(x * y)
    return layer.astype(np.float32)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def d_sigmoid(x):
    return x * (1 - x)

def forward_backward_pass(x, y):
    x_l1 = x.dot(l1)
    x_sigmoid = sigmoid(x_l1)

    x_l2 = x_sigmoid.dot(l2)
    out = x_l2
    
    error = 2 * (out - y) / out.shape[0]
    update_l2 = x_sigmoid.T @ error
    
    error = (error @ l2.T) * d_sigmoid(x_sigmoid)
    update_l1 = x.T @ error
    
    return out, update_l1, update_l2

epochs = 20000
lr = 0.001
batch = 32
num_hidden_neurons = 20

np.random.seed(42)
l1 = init(X_train.shape[1], num_hidden_neurons)
l2 = init(num_hidden_neurons, 1)

losses = []

#y = y.reshape((-1, 1))

for i in range(epochs):
    sample = np.random.randint(0, X_train.shape[0], size=(batch))
    x = X_train[sample]
    y = y_train[sample].reshape((-1, 1))

    out, update_l1, update_l2 = forward_backward_pass(x, y)

    loss = mean_squared_error(y, out)  # Calculate MSE
    losses.append(loss)

    l1 -= lr * update_l1
    l2 -= lr * update_l2

    if i % 100 == 0:
        print(f'Epoch {i}, Loss: {loss}')


X_test = X_test.reshape((-1, X_train.shape[1]))
y_test = y_test.reshape((-1, 1))

# Teste
def test(x, y, l1, l2):
    x_l1 = x.dot(l1)
    x_sigmoid = sigmoid(x_l1)

    x_l2 = x_sigmoid.dot(l2)
    out = x_l2
    
    return out

# Calculando as previsões para o conjunto de testes
y_pred_test = test(X_test, y_test, l1, l2)

# Calculando o MSE para o conjunto de testes
mse_test = mean_squared_error(y_test, y_pred_test)
print("Test MSE:", mse_test)

# Calculando o MAE para o conjunto de testes
mae_test = mean_absolute_error(y_test, y_pred_test)
print("Test MAE:", mae_test)

r2 = r2_score(y_test, y_pred_test)
print("R2:", r2)

rmse = np.sqrt(mse_test)
print("Raiz do erro médio quadrado (RMSE):", rmse)

Epoch 0, Loss: 1389.579348449164
Epoch 100, Loss: 156.37180298715873
Epoch 200, Loss: 285.4814588420636
Epoch 300, Loss: 171.20383242004408
Epoch 400, Loss: 94.50702702753408
Epoch 500, Loss: 63.17298549792008
Epoch 600, Loss: 107.94596233887168
Epoch 700, Loss: 66.0014340836988
Epoch 800, Loss: 92.68832780689397
Epoch 900, Loss: 108.047838820616
Epoch 1000, Loss: 93.92130956117094
Epoch 1100, Loss: 50.605318658464704
Epoch 1200, Loss: 64.22754526229127
Epoch 1300, Loss: 97.25518668763925
Epoch 1400, Loss: 74.19741776817122
Epoch 1500, Loss: 45.17845496438835
Epoch 1600, Loss: 48.48217341483631
Epoch 1700, Loss: 204.25730517338718
Epoch 1800, Loss: 71.20631976123488
Epoch 1900, Loss: 35.65754939103971
Epoch 2000, Loss: 68.41198639555947
Epoch 2100, Loss: 75.4238776995586
Epoch 2200, Loss: 67.4919752637213
Epoch 2300, Loss: 131.2276928324167
Epoch 2400, Loss: 85.7449322476584
Epoch 2500, Loss: 427.7625645990054
Epoch 2600, Loss: 93.42926277185893
Epoch 2700, Loss: 42.82319532967115
Epoc