## Generating Data

In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Set randomizer seed to get consistent results each time
np.random.seed(13)

In [7]:
# Generate evenly spaced data for a range
n_samples = 20
range_state = 0
range_end = 1

X = np.linspace(range_state, range_end, n_samples)

print(X)
print(type(X))
print(X.shape)

[0.         0.05263158 0.10526316 0.15789474 0.21052632 0.26315789
 0.31578947 0.36842105 0.42105263 0.47368421 0.52631579 0.57894737
 0.63157895 0.68421053 0.73684211 0.78947368 0.84210526 0.89473684
 0.94736842 1.        ]
<class 'numpy.ndarray'>
(20,)


In [4]:
# Generate array of random numbers
np.random.randn(n_samples)

array([-0.71239066,  0.75376638, -0.04450308,  0.45181234,  1.34510171,
        0.53233789,  1.3501879 ,  0.86121137,  1.47868574, -1.04537713,
       -0.78898902, -1.26160595,  0.56284679, -0.24332625,  0.9137407 ,
        0.31735092,  0.12730328,  2.15038297,  0.60628866, -0.02677165])

In [8]:
# Convert array into a vector (ML classes in sklearn expect input feature vectors)
# We can do this by transposing the array
X = X[:, np.newaxis]
# X = np.transpose([X]) # This gives the same result as above. Make sure the array ([X], not just the values are passed as parameter)
print (X)

[[0.        ]
 [0.05263158]
 [0.10526316]
 [0.15789474]
 [0.21052632]
 [0.26315789]
 [0.31578947]
 [0.36842105]
 [0.42105263]
 [0.47368421]
 [0.52631579]
 [0.57894737]
 [0.63157895]
 [0.68421053]
 [0.73684211]
 [0.78947368]
 [0.84210526]
 [0.89473684]
 [0.94736842]
 [1.        ]]


## Pre-processing
http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PolynomialFeatures.html

In [9]:
from sklearn.preprocessing import PolynomialFeatures

In [11]:
# Add polynomial transformations of feature
num_features = 3
pf = PolynomialFeatures(degree=num_features, include_bias=False)
X2 = pf.fit_transform(X)
print (X2)

[[0.00000000e+00 0.00000000e+00 0.00000000e+00]
 [5.26315789e-02 2.77008310e-03 1.45793847e-04]
 [1.05263158e-01 1.10803324e-02 1.16635078e-03]
 [1.57894737e-01 2.49307479e-02 3.93643388e-03]
 [2.10526316e-01 4.43213296e-02 9.33080624e-03]
 [2.63157895e-01 6.92520776e-02 1.82242309e-02]
 [3.15789474e-01 9.97229917e-02 3.14914711e-02]
 [3.68421053e-01 1.35734072e-01 5.00072897e-02]
 [4.21052632e-01 1.77285319e-01 7.46464499e-02]
 [4.73684211e-01 2.24376731e-01 1.06283715e-01]
 [5.26315789e-01 2.77008310e-01 1.45793847e-01]
 [5.78947368e-01 3.35180055e-01 1.94051611e-01]
 [6.31578947e-01 3.98891967e-01 2.51931768e-01]
 [6.84210526e-01 4.68144044e-01 3.20309083e-01]
 [7.36842105e-01 5.42936288e-01 4.00058318e-01]
 [7.89473684e-01 6.23268698e-01 4.92054235e-01]
 [8.42105263e-01 7.09141274e-01 5.97171599e-01]
 [8.94736842e-01 8.00554017e-01 7.16285173e-01]
 [9.47368421e-01 8.97506925e-01 8.50269719e-01]
 [1.00000000e+00 1.00000000e+00 1.00000000e+00]]


## Pipelines
https://scikit-learn.org/stable/modules/compose.html#combining-estimators

In [12]:
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline

In [None]:
# Combine multiple operations using pipeline
polynomial_features = PolynomialFeatures(degree=2, include_bias=False)
linear_regression = LinearRegression()

pipeline = Pipeline([("polynomial_features", polynomial_features),
                     ("linear_regression", linear_regression)])