# Questão 02)

## Mínimos quadrados

In [54]:
import pandas as pd
import numpy as np

import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split

In [55]:
# Read xlsx file
ws = pd.read_excel("Real_estate_valuation_dataset.xlsx", engine='openpyxl')
# Drop useless column
ws = ws.drop('No', axis=1)
# Convert in numpy ndarray
data = np.array(ws)

In [56]:
ws.head()

Unnamed: 0,X1 transaction date,X2 house age,X3 distance to the nearest MRT station,X4 number of convenience stores,X5 latitude,X6 longitude,Y house price of unit area
0,2012.916667,32.0,84.87882,10,24.98298,121.54024,37.9
1,2012.916667,19.5,306.5947,9,24.98034,121.53951,42.2
2,2013.583333,13.3,561.9845,5,24.98746,121.54391,47.3
3,2013.5,13.3,561.9845,5,24.98746,121.54391,54.8
4,2012.833333,5.0,390.5684,5,24.97937,121.54245,43.1


In [57]:
X = data[:, :-1]
Y = data[:, -1]

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

print("x_train:", x_train.shape)
print("y_train:", y_train.shape)
print("x_test:", x_test.shape)
print("y_test:", y_test.shape)

x_train: (331, 6)
y_train: (331,)
x_test: (83, 6)
y_test: (83,)


In [58]:
def ridge_regression(X, y, alpha):
    # Adiciona uma coluna de 1s para representar o termo de viés
    X = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)

    # Calcula os coeficientes usando a fórmula dos mínimos quadrados com regularização de Ridge
    theta = np.linalg.inv(X.T @ X + alpha * np.eye(X.shape[1])) @ X.T @ y

    return theta

def calculate_error(X, y, theta):
    # Adiciona uma coluna de 1s para representar o termo de viés
    X = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)

    # Realiza as predições
    predictions = X @ theta

    # Converte as predições em rótulos
    predicted_labels = np.argmax(predictions, axis=1)

    # Calcula o erro (taxa de erro)
    error = np.mean(predicted_labels != y)

    return error


In [59]:

# Define o valor de regularização (alpha)
alpha = 0.01

# Realiza a regressão usando mínimos quadrados com regularização de Ridge
theta = ridge_regression(x_train, y_train, alpha)

# Adiciona uma coluna de 1s aos dados de teste
X_test = np.concatenate((np.ones((x_test.shape[0], 1)), x_test), axis=1)

# Realiza as predições
predictions = X_test @ theta

# Calcular o erro médio quadrado (MSE) nas previsões
mse = mean_squared_error(y_test, predictions)
print("Erro médio quadrado (MSE):", mse)
mae = mean_absolute_error(y_test, predictions)
print("Erro médio absoluto (MAE):", mae)

Erro médio quadrado (MSE): 54.60475048894781
Erro médio absoluto (MAE): 5.326757449309436


## ELM

In [34]:
import pandas as pd
import numpy as np

import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

class ELMRegressor:
    def __init__(self, num_hidden_neurons):
        self.num_hidden_neurons = num_hidden_neurons
        self.weights_input_hidden = None
        self.weights_hidden_output = None
        self.bias_hidden = None

    def _sigmoid(self, x):
        return 1.0 / (1.0 + np.exp(-x))

    def _relu(self, x):
        return np.maximum(0, x)

    def _tanh(self, x):
        return np.tanh(x)

    def fit(self, X, y):
        num_samples, num_features = X.shape
        
        # Inicialização aleatória dos pesos e vieses
        self.weights_input_hidden = np.random.rand(num_features, self.num_hidden_neurons)
        self.bias_hidden = np.random.rand(1, self.num_hidden_neurons)

        
        # Calcula as saídas da camada oculta
        hidden_output = self._sigmoid(np.dot(X, self.weights_input_hidden) + self.bias_hidden)

        # Calcula os pesos da camada de saída usando a pseudo-inversa
        self.weights_hidden_output = np.dot(np.linalg.pinv(hidden_output), y)

    def predict(self, X):
        hidden_output = self._sigmoid(np.dot(X, self.weights_input_hidden) + self.bias_hidden)
        predictions = np.dot(hidden_output, self.weights_hidden_output)
        return predictions

# Read xlsx file
ws = pd.read_excel("Real_estate_valuation_dataset.xlsx", engine='openpyxl')
# Drop useless column
ws = ws.drop('No', axis=1)
# Convert in numpy ndarray
data = np.array(ws)

X = data[:, :-1]
Y = data[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

# Crie uma instância do regressor ELM e ajuste-o aos dados de treinamento
elm_regressor = ELMRegressor(num_hidden_neurons=16)
elm_regressor.fit(X_train, y_train)

# Faça previsões nos dados de teste
predictions = elm_regressor.predict(X_test)
#print("predictions: ", predictions)

# Calcule o Mean Squared Error (MSE)
mse = np.mean((y_test - predictions)**2)
print("Mean Squared Error:", mse)

# Calculando o MAE para o conjunto de testes
mae_test = mean_absolute_error(y_test, predictions)
print("Test MAE:", mae_test)

Mean Squared Error: 63.80386815780189
Test MAE: 5.9639761213878995


In [6]:
X_test.shape

(83, 6)

In [7]:
predictions.shape

(83,)

In [31]:
def sigmoid(x):
        return 1.0 / (1.0 + np.exp(-x))

In [33]:
sigmoid(160.3)

1.0

## MLP

In [45]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings('ignore')


# Read xlsx file
ws = pd.read_excel("Real_estate_valuation_dataset.xlsx", engine='openpyxl')
# Drop useless column
ws = ws.drop('No', axis=1)
# Convert to numpy ndarray
data = np.array(ws)

X = data[:, :-1]
Y = data[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

def init(x, y):
    layer = np.random.uniform(-1, 1., size=(x, y)) / np.sqrt(x * y)
    return layer.astype(np.float32)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def d_sigmoid(x):
    return x * (1 - x)

def forward_backward_pass(x, y):
    x_l1 = x.dot(l1)
    x_sigmoid = sigmoid(x_l1)

    x_l2 = x_sigmoid.dot(l2)
    out = x_l2
    
    error = 2 * (out - y) / out.shape[0]
    update_l2 = x_sigmoid.T @ error
    
    error = (error @ l2.T) * d_sigmoid(x_sigmoid)
    update_l1 = x.T @ error
    
    return out, update_l1, update_l2

epochs = 10000
lr = 0.001
batch = 16
num_hidden_neurons = 20

np.random.seed(42)
l1 = init(X_train.shape[1], num_hidden_neurons)
l2 = init(num_hidden_neurons, 1)

losses = []

#y = y.reshape((-1, 1))

for i in range(epochs):
    sample = np.random.randint(0, X_train.shape[0], size=(batch))
    x = X_train[sample]
    y = y_train[sample].reshape((-1, 1))

    out, update_l1, update_l2 = forward_backward_pass(x, y)

    loss = mean_squared_error(y, out)  # Calculate MSE
    losses.append(loss)

    l1 -= lr * update_l1
    l2 -= lr * update_l2

    if i % 100 == 0:
        print(f'Epoch {i}, Loss: {loss}')


X_test = X_test.reshape((-1, X_train.shape[1]))
y_test = y_test.reshape((-1, 1))

# Teste
def test(x, y, l1, l2):
    x_l1 = x.dot(l1)
    x_sigmoid = sigmoid(x_l1)

    x_l2 = x_sigmoid.dot(l2)
    out = x_l2
    
    return out

# Calculando as previsões para o conjunto de testes
y_pred_test = test(X_test, y_test, l1, l2)

# Calculando o MSE para o conjunto de testes
mse_test = mean_squared_error(y_test, y_pred_test)
print("Test MSE:", mse_test)

# Calculando o MAE para o conjunto de testes
mae_test = mean_absolute_error(y_test, y_pred_test)
print("Test MAE:", mae_test)

Epoch 0, Loss: 1233.0773878263674
Epoch 100, Loss: 155.36832558914145
Epoch 200, Loss: 188.3879252833759
Epoch 300, Loss: 164.57718445456777
Epoch 400, Loss: 163.91996020032371
Epoch 500, Loss: 103.09379810511678
Epoch 600, Loss: 339.63747011354536
Epoch 700, Loss: 174.7317902097364
Epoch 800, Loss: 220.7116176469917
Epoch 900, Loss: 87.95089893501762
Epoch 1000, Loss: 157.78005153375386
Epoch 1100, Loss: 192.82265978674147
Epoch 1200, Loss: 199.72882306377713
Epoch 1300, Loss: 92.49243333536926
Epoch 1400, Loss: 167.70055074343855
Epoch 1500, Loss: 280.0281252810136
Epoch 1600, Loss: 124.69116027943502
Epoch 1700, Loss: 103.36067918634188
Epoch 1800, Loss: 201.77519953846465
Epoch 1900, Loss: 348.0638274899434
Epoch 2000, Loss: 122.50572807609495
Epoch 2100, Loss: 118.8652214412685
Epoch 2200, Loss: 96.5217352397188
Epoch 2300, Loss: 130.09039360641583
Epoch 2400, Loss: 150.7865790792987
Epoch 2500, Loss: 175.75464920784523
Epoch 2600, Loss: 331.3757866265173
Epoch 2700, Loss: 134.200