## Data Prep

In [1]:
import numpy as np

In [6]:
# Generate input data
np.random.seed(100)

n_samples = 20

X = np.linspace(0, 1, n_samples)
print(X)

[0.         0.05263158 0.10526316 0.15789474 0.21052632 0.26315789
 0.31578947 0.36842105 0.42105263 0.47368421 0.52631579 0.57894737
 0.63157895 0.68421053 0.73684211 0.78947368 0.84210526 0.89473684
 0.94736842 1.        ]


In [7]:
# Set a true function as a piece of a sin curve.
f1 = lambda x: np.sin(1.5 * np.pi * x)
# Use true function to generate some ouput values
y = f1(X)
print(y)

[ 0.          0.24548549  0.47594739  0.67728157  0.83716648  0.94581724
  0.99658449  0.9863613   0.91577333  0.78914051  0.61421271  0.40169542
  0.16459459 -0.08257935 -0.32469947 -0.54694816 -0.73572391 -0.87947375
 -0.96940027 -1.        ]


In [8]:
# Add some noise to the output data
y += np.random.randn(n_samples) * 0.2
print(y)

[-0.34995309  0.31402157  0.70655455  0.62679436  1.03343064  1.04866101
  1.04082043  0.77235264  0.87787416  0.8401408   0.52260732  0.48872812
  0.04787558  0.08079007 -0.19015531 -0.56783039 -0.84197999 -0.67352721
 -1.05702739 -1.22366365]


In [10]:
# Convert input to a feature vector
X = np.transpose([X])
print(X)

[[0.        ]
 [0.05263158]
 [0.10526316]
 [0.15789474]
 [0.21052632]
 [0.26315789]
 [0.31578947]
 [0.36842105]
 [0.42105263]
 [0.47368421]
 [0.52631579]
 [0.57894737]
 [0.63157895]
 [0.68421053]
 [0.73684211]
 [0.78947368]
 [0.84210526]
 [0.89473684]
 [0.94736842]
 [1.        ]]


## Linear Regression
http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html

In [11]:
from sklearn.linear_model import LinearRegression

In [16]:
# With Intercept
lr = LinearRegression(fit_intercept=True)
lr.fit(X, y)
print (lr.intercept_)
print (lr.coef_)
print ('Estimated function: y = {:.2f} + {:.2f}x'.format(lr.intercept_, lr.coef_[0]))

1.034071520568776
[-1.71849162]
Estimated function: y = 1.03 + -1.72x


In [17]:
# Without Intercept
lr = LinearRegression(fit_intercept=False)
lr.fit(X, y)
print (lr.intercept_)
print (lr.coef_)
print ('Estimated function: y = {:.2f} + {:.2f}x'.format(lr.intercept_, lr.coef_[0]))

0.0
[-0.20715632]
Estimated function: y = 0.00 + -0.21x


## Polynomial Regression

In [18]:
from sklearn.preprocessing import PolynomialFeatures

In [19]:
# Generate more features
num_features = 2
pf = PolynomialFeatures(degree=num_features, include_bias=False)
X2 = pf.fit_transform(X)
print (X2)

[[0.         0.        ]
 [0.05263158 0.00277008]
 [0.10526316 0.01108033]
 [0.15789474 0.02493075]
 [0.21052632 0.04432133]
 [0.26315789 0.06925208]
 [0.31578947 0.09972299]
 [0.36842105 0.13573407]
 [0.42105263 0.17728532]
 [0.47368421 0.22437673]
 [0.52631579 0.27700831]
 [0.57894737 0.33518006]
 [0.63157895 0.39889197]
 [0.68421053 0.46814404]
 [0.73684211 0.54293629]
 [0.78947368 0.6232687 ]
 [0.84210526 0.70914127]
 [0.89473684 0.80055402]
 [0.94736842 0.89750693]
 [1.         1.        ]]


In [21]:
lr = LinearRegression(fit_intercept=True)
lr.fit(X2, y)
print (lr.intercept_)
print (lr.coef_)
print ('Estimated function: y = {:.2f} + {:.2f}x + {:.2f}x1'.format(lr.intercept_, lr.coef_[0], lr.coef_[1]))

0.17224314735095428
[ 3.73975474 -5.45824636]
Estimated function: y = 0.17 + 3.74x + -5.46x1


## Predict

In [23]:
X_test = np.linspace(0, 1, 100)
print(X_test)

[0.         0.01010101 0.02020202 0.03030303 0.04040404 0.05050505
 0.06060606 0.07070707 0.08080808 0.09090909 0.1010101  0.11111111
 0.12121212 0.13131313 0.14141414 0.15151515 0.16161616 0.17171717
 0.18181818 0.19191919 0.2020202  0.21212121 0.22222222 0.23232323
 0.24242424 0.25252525 0.26262626 0.27272727 0.28282828 0.29292929
 0.3030303  0.31313131 0.32323232 0.33333333 0.34343434 0.35353535
 0.36363636 0.37373737 0.38383838 0.39393939 0.4040404  0.41414141
 0.42424242 0.43434343 0.44444444 0.45454545 0.46464646 0.47474747
 0.48484848 0.49494949 0.50505051 0.51515152 0.52525253 0.53535354
 0.54545455 0.55555556 0.56565657 0.57575758 0.58585859 0.5959596
 0.60606061 0.61616162 0.62626263 0.63636364 0.64646465 0.65656566
 0.66666667 0.67676768 0.68686869 0.6969697  0.70707071 0.71717172
 0.72727273 0.73737374 0.74747475 0.75757576 0.76767677 0.77777778
 0.78787879 0.7979798  0.80808081 0.81818182 0.82828283 0.83838384
 0.84848485 0.85858586 0.86868687 0.87878788 0.88888889 0.89898

In [26]:
num_features = 2
pf = PolynomialFeatures(degree=num_features, include_bias=False)
X2_test = pf.fit_transform(X_test[:, np.newaxis])

In [29]:
Y_test = lr.predict(X2_test)
print(Y_test)

[ 0.17224315  0.20946154  0.24556612  0.28055688  0.31443384  0.34719697
  0.37884629  0.4093818   0.4388035   0.46711138  0.49430544  0.52038569
  0.54535213  0.56920476  0.59194356  0.61356856  0.63407974  0.65347711
  0.67176066  0.6889304   0.70498632  0.71992843  0.73375673  0.74647121
  0.75807188  0.76855873  0.77793177  0.78619099  0.7933364   0.799368
  0.80428578  0.80808975  0.8107799   0.81235624  0.81281877  0.81216748
  0.81040238  0.80752346  0.80353073  0.79842418  0.79220383  0.78486965
  0.77642166  0.76685986  0.75618425  0.74439482  0.73149157  0.71747451
  0.70234364  0.68609895  0.66874045  0.65026814  0.63068201  0.60998206
  0.5881683   0.56524073  0.54119935  0.51604415  0.48977513  0.4623923
  0.43389566  0.4042852   0.37356093  0.34172285  0.30877095  0.27470523
  0.2395257   0.20323236  0.16582521  0.12730423  0.08766945  0.04692085
  0.00505844 -0.03791779 -0.08200783 -0.12721169 -0.17352936 -0.22096084
 -0.26950614 -0.31916525 -0.36993818 -0.42182492 -0.47