In [1]:
import numpy as np

from linear_regression import SimpleLinearRegressionAM, SimpleLinearRegressionGD, MultipleLinearRegressionAM, MultipleLinearRegressionGD, MultipleLinearRegressionSGD, RegularizedLinearRegressionGD
from regression_metrics import MSE, R2

## 1. Implemente os seguintes métodos

### a. Regressão Linear univariada - método analítico

In [2]:
X_train, y_train = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), np.array([
        3, 5, 7, 9, 11, 13, 15, 17, 19, 21])
X_test, y_test = np.array([11, 12, 13, 14, 15]), np.array([23, 25, 27, 29, 31])

model = SimpleLinearRegressionAM()
model.fit(X_train, y_train)

y_predict = model.predict(X_test)

y_predict

array([23., 25., 27., 29., 31.])

### b. Regressão Linear univariada - gradiente descendente

In [3]:
X_train, y_train = np.array([1., 2., 3., 4., 5., 6., 7., 8., 9., 10.]), np.array([
        3., 5., 7., 9., 11., 13., 15., 17., 19., 21.])
X_test, y_test = np.array([11., 12., 13., 14., 15.]), np.array([23., 25., 27., 29., 31.])

model = SimpleLinearRegressionGD(learning_rate=0.01, ages=8918)
model.fit(X_train, y_train)

y_predict = model.predict(X_test)

y_predict

array([23., 25., 27., 29., 31.])

### c. Regressão Linear multivariada – método analítico (não esquecer de adicionar termo de bias)

In [4]:
X_train, y_train = np.array([[1, 2, 3], [4, 5, 6],
                        [7, 8, 9], [10, 11, 12]]), np.array([15, 33, 51, 69])
X_test, y_test = np.array([[11, 12, 13], [14, 15, 16]]), np.array([75, 93])

model = MultipleLinearRegressionAM()
model.fit(X_train, y_train)

y_predict = model.predict(X_test)

y_predict

array([75., 93.])

### d. Regressão Linear multivariada – gradiente descendente

In [5]:
X_train, y_train = np.array([[1, 2, 3], [4, 5, 6],
                        [7, 8, 9], [10, 11, 12]]), np.array([15, 33, 51, 69])
X_test, y_test = np.array([[11, 12, 13], [14, 15, 16]]), np.array([75, 93])

model = MultipleLinearRegressionGD(ages=3165)
model.fit(X_train, y_train)

y_predict = model.predict(X_test)

y_predict

array([75., 93.])

### e. Regressão Linear multivariada – gradiente descendente estocástico

In [6]:
X_train, y_train = np.array([[1, 2, 3], [4, 5, 6],
                        [7, 8, 9], [10, 11, 12]]), np.array([15, 33, 51, 69])
X_test, y_test = np.array([[11, 12, 13], [14, 15, 16]]), np.array([75, 93])

model = MultipleLinearRegressionSGD(ages=5522)
model.fit(X_train, y_train)

y_predict = model.predict(X_test)

y_predict

array([75., 93.])

### f. Regressão quadrática usando regressão múltipla

In [7]:
class SquareRegression(MultipleLinearRegressionAM):
    def fit(self, X, y):
        square_X = np.power(X, 2)
        _X = np.c_[X, square_X]

        super().fit(_X, y)

    def predict(self, x):
        square_x = np.power(x, 2)
        _x = np.c_[x, square_x]

        return super().predict(_x)


X_train, y_train = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), np.array([
        1, 4, 9, 16, 25, 36, 49, 64, 81, 100])
X_test, y_test = np.array([11, 12, 13, 14, 15]), np.array([121, 144, 169, 196, 225])

model = SquareRegression()
model.fit(X_train, y_train)

y_predict = model.predict(X_test)

y_predict

array([121., 144., 169., 196., 225.])

### g. Regressão cúbica usando regressão múltipla

In [8]:
class CubicRegression(MultipleLinearRegressionAM):
    def fit(self, X, y):
        square_X = np.power(X, 2)
        cubic_X = np.power(X, 3)
        _X = np.c_[X, square_X, cubic_X]

        super().fit(_X, y)

    def predict(self, x):
        square_x = np.power(x, 2)
        cubic_x = np.power(x, 3)
        _x = np.c_[x, square_x, cubic_x]

        return super().predict(_x)


X_train, y_train = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), np.array(
        [1, 8, 27, 64, 125, 216, 343, 512, 729, 1000])
X_test, y_test = np.array([11, 12, 13, 14, 15]), np.array(
    [1331, 1728, 2197, 2744, 3375])

model = CubicRegression()
model.fit(X_train, y_train)

y_predict = model.predict(X_test)

y_predict

array([1331.        , 1728.        , 2197.        , 2744.        ,
       3375.00000001])

### h. Regressão Linear Regularizada multivariada – gradiente descendente

In [64]:
X_train, y_train = np.array([[1, 2, 3], [4, 5, 6],
                        [7, 8, 9], [10, 11, 12]]), np.array([15, 33, 51, 69])
X_test, y_test = np.array([[11, 12, 13], [14, 15, 16]]), np.array([75, 93])

model = RegularizedLinearRegressionGD(learning_rate=0.001, ages=23796, regularization_rate=0)
model.fit(X_train, y_train)

y_predict = model.predict(X_test)

y_predict

array([75., 93.])

## 2. Implemente as funções

### a. MSE(y_true, y_predict)

In [10]:
y_true, y_predict = np.array([23., 25., 26., 30., 31.]), np.array([23., 25., 27., 29., 31.])

MSE(y_true, y_predict)

0.4

### b. R2(y_true, y_predict)

In [11]:
y_true, y_predict = np.array([23., 25., 26., 30., 31.]), np.array([23., 25., 27., 29., 31.])

R2(y_true, y_predict)

0.9565217391304348

## 3. Carregue o conjunto de dados Boston House Price Dataset (https://archive.ics.uci.edu/ml/machine-learning-databases/housing/). Nesse link também contém a descrição dos atributos

In [83]:
dataset = np.loadtxt('./datasets/housing.data')

dataset

array([[6.3200e-03, 1.8000e+01, 2.3100e+00, ..., 3.9690e+02, 4.9800e+00,
        2.4000e+01],
       [2.7310e-02, 0.0000e+00, 7.0700e+00, ..., 3.9690e+02, 9.1400e+00,
        2.1600e+01],
       [2.7290e-02, 0.0000e+00, 7.0700e+00, ..., 3.9283e+02, 4.0300e+00,
        3.4700e+01],
       ...,
       [6.0760e-02, 0.0000e+00, 1.1930e+01, ..., 3.9690e+02, 5.6400e+00,
        2.3900e+01],
       [1.0959e-01, 0.0000e+00, 1.1930e+01, ..., 3.9345e+02, 6.4800e+00,
        2.2000e+01],
       [4.7410e-02, 0.0000e+00, 1.1930e+01, ..., 3.9690e+02, 7.8800e+00,
        1.1900e+01]])

### 4. Vamos analisar apenas a variável LSTAT como atributo preditor e a variável MEDV como atributo alvo

In [85]:
dataset = dataset[:, 12:]

dataset

array([[ 4.98, 24.  ],
       [ 9.14, 21.6 ],
       [ 4.03, 34.7 ],
       ...,
       [ 5.64, 23.9 ],
       [ 6.48, 22.  ],
       [ 7.88, 11.9 ]])

### 5. Embaralhe as amostras com seus valores alvo. Divida o conjunto de dados em 80% para treino e 20% para teste.

In [88]:
np.random.shuffle(dataset)

X = dataset[0]
y = dataset[1]


def my_train_test_split(X, y, test_size):
    n_train = int(np.shape(y)[0] * (1 - test_size))
    
    return X[:n_train], X[n_train:], y[:n_train], y[n_test:]

                  
X_train, X_test, y_train, y_test = my_train_test_split(X, y, test_size)

SyntaxError: invalid syntax (<ipython-input-88-41b6b281aed8>, line 10)