In [1]:
# Load Libraries
from sklearn.linear_model import LinearRegression
from sklearn.datasets import load_boston

In [2]:
# Load data with only 2 features
boston = load_boston()
features = boston.data[:,0:2]
target = boston.target

In [3]:
# Create linear regression
regression = LinearRegression()

In [4]:
# Fit the linear regression
model = regression.fit(features, target)

In [5]:
# y = w0 + w1 x1 + w2 x2
#w0 = model.intercept_
#w1 e w2 = model.coef_
print("model.coef_: ", model.coef_)
print("model.intercept_: ", model.intercept_)

model.coef_:  [-0.35207832  0.11610909]
model.intercept_:  22.485628113468223


In [7]:
model.predict(features)[0]*1000 # predicted by the linear regression model

24573.366631705547

In [8]:
target[0]*1000 # true value

24000.0

## Interactive Effects
#### Problema:
temos caracteristicas cujos efeitos na variavel alvo depende de outra feature.
E.g.
Imagine que vamos fazer um café e temos duas features binárias:
(1) a presença de açucar (sugar); (2) se foi ou não mexido o café (stirred).
E queremos prever se o café está doce.

Note que apenas colocar açucar (sugar = 1 | stirred = 0) não vai deixar o café doce.
Se mexer o café, mas não colocar açucar (sugar = 0 | stirred = 1) também não.
Ou seja, é a interação entre colocar açucar e mexer que faz o café ficar doce. As duas features são dependentes.

y = w0 + w1 x1 + w2 x2 + w3 x1 x2

onde x1 e x2 são os valores de sugar e stirred

In [11]:
# Load libraries
from sklearn.linear_model import LinearRegression
from sklearn.datasets import load_boston
from sklearn.preprocessing import PolynomialFeatures

# Load data with only two features
boston = load_boston()
features = boston.data[:,0:2]
target = boston.target

# Create interaction term
interaction = PolynomialFeatures(degree=3, 
                                 include_bias=False, 
                                 interaction_only=True)
features_interaction = interaction.fit_transform(features)

# Create linear regression
regression = LinearRegression()

# Fit the linear regression
model = regression.fit(features_interaction, target)

In [None]:
print("model.coef_: ", model.coef_)
print("model.intercept_: ", model.intercept_)

Nesse modelo, usamos um dataset contendo apenas 2 features. Aqui está os valores da primeira observação dessas features:

In [10]:
features[0]

array([6.32e-03, 1.80e+01])

Para criar um termo de interação, nós simplesmente multiplicamos aqueles dois valores juntos para cada observação:

In [13]:
# Import library
import numpy as np

# For each observation, multiply the values of the first and second feature
interaction_term = np.multiply(features[:, 0], features[:, 1])

# View interaction term for the first observation
interaction_term[0]

0.11376

In [14]:
# View the values of the first observation
features_interaction[0]

array([6.3200e-03, 1.8000e+01, 1.1376e-01])

### Fitting a Nonlinear Relationship

In [15]:
# Load library
from sklearn.linear_model import LinearRegression
from sklearn.datasets import load_boston
from sklearn.preprocessing import PolynomialFeatures

# Load data with one feature
boston = load_boston()
features = boston.data[:,0:1]
target = boston.target

# Create polynomial features x^2 and x^3
polynomial = PolynomialFeatures(degree=3, 
                                include_bias=False)
features_polynomial = polynomial.fit_transform(features)

# Create linear regression
regression = LinearRegression()

# Fit the linear regression
model = regression.fit(features_polynomial, target)

In [17]:
print("model.coef_: ", model.coef_)
print("model.intercept_: ", model.intercept_)

model.coef_:  [-1.13640072e+00  2.37848254e-02 -1.48872090e-04]
model.intercept_:  25.19047936932673


In [16]:
features[0]

array([0.00632])

In [18]:
features[0]**2

array([3.99424e-05])

In [19]:
features[0]**3

array([2.52435968e-07])

In [20]:
features_polynomial[0]

array([6.32000000e-03, 3.99424000e-05, 2.52435968e-07])