# Regression

We won't go over every model, in fact I will stick to as few as possible models but go over how they are used and what their commonalities are.

We will first start off by importing some toy data.

In [1]:
import sklearn.datasets as datasets

X, y = datasets.load_boston(return_X_y=True)

In [3]:
print(y[0])

24.0


Next we will do the training. Models have two states:

1. Instantiated
2. Fit

When we instantiate the model we specify the hyperparameters of the model and nothing else. 

In [4]:
from sklearn import linear_model

linear_model.ElasticNet?

[0;31mInit signature:[0m
[0mlinear_model[0m[0;34m.[0m[0mElasticNet[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0malpha[0m[0;34m=[0m[0;36m1.0[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0;34m*[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0ml1_ratio[0m[0;34m=[0m[0;36m0.5[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mfit_intercept[0m[0;34m=[0m[0;32mTrue[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mnormalize[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mprecompute[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmax_iter[0m[0;34m=[0m[0;36m1000[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcopy_X[0m[0;34m=[0m[0;32mTrue[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mtol[0m[0;34m=[0m[0;36m0.0001[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mwarm_start[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mpositive[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mrando

In [5]:
m = linear_model.ElasticNet(alpha=.1, l1_ratio=.9)

The next step is fitting the model

In [6]:
m.fit(X, y)

ElasticNet(alpha=0.1, l1_ratio=0.9)

In [7]:
m.coef_

array([-0.09834252,  0.04971836, -0.03880229,  0.9660197 , -0.        ,
        3.59953739, -0.00941143, -1.16532753,  0.27777374, -0.01465562,
       -0.77562437,  0.01018345, -0.57555667])

In [8]:
m.intercept_

26.429919460545246

In [9]:
m.predict([X[0]])

array([30.76993389])

In [10]:
y[0]

24.0

In [11]:
m.score(X, y)

0.7267832164923811

In [15]:
m.score?

[0;31mSignature:[0m [0mm[0m[0;34m.[0m[0mscore[0m[0;34m([0m[0mX[0m[0;34m,[0m [0my[0m[0;34m,[0m [0msample_weight[0m[0;34m=[0m[0;32mNone[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Return the coefficient of determination R^2 of the prediction.

The coefficient R^2 is defined as (1 - u/v), where u is the residual
sum of squares ((y_true - y_pred) ** 2).sum() and v is the total
sum of squares ((y_true - y_true.mean()) ** 2).sum().
The best possible score is 1.0 and it can be negative (because the
model can be arbitrarily worse). A constant model that always
predicts the expected value of y, disregarding the input features,
would get a R^2 score of 0.0.

Parameters
----------
X : array-like of shape (n_samples, n_features)
    Test samples. For some estimators this may be a
    precomputed kernel matrix or a list of generic objects instead,
    shape = (n_samples, n_samples_fitted),
    where n_samples_fitted is the number of
    samples used in the

## CV models

Some of these models come with a CV model. 

In [16]:
linear_model.ElasticNetCV?

[0;31mInit signature:[0m
[0mlinear_model[0m[0;34m.[0m[0mElasticNetCV[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0;34m*[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0ml1_ratio[0m[0;34m=[0m[0;36m0.5[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0meps[0m[0;34m=[0m[0;36m0.001[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mn_alphas[0m[0;34m=[0m[0;36m100[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0malphas[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mfit_intercept[0m[0;34m=[0m[0;32mTrue[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mnormalize[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mprecompute[0m[0;34m=[0m[0;34m'auto'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmax_iter[0m[0;34m=[0m[0;36m1000[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mtol[0m[0;34m=[0m[0;36m0.0001[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcv[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcopy_X[0m[0

In [17]:
m = linear_model.ElasticNetCV(
    l1_ratio=[.1, .5, .7, .9, .95, .99, 1], 
    n_alphas=20)

In [18]:
m.fit(X, y)

ElasticNetCV(l1_ratio=[0.1, 0.5, 0.7, 0.9, 0.95, 0.99, 1], n_alphas=20)

In [19]:
m.alphas_

array([[7.24820428e+03, 5.03889940e+03, 3.50300657e+03, 2.43526493e+03,
        1.69297864e+03, 1.17694655e+03, 8.18204764e+02, 5.68810058e+02,
        3.95432655e+02, 2.74901933e+02, 1.91109843e+02, 1.32858186e+02,
        9.23620541e+01, 6.42094347e+01, 4.46379364e+01, 3.10319718e+01,
        2.15732033e+01, 1.49975355e+01, 1.04261786e+01, 7.24820428e+00],
       [1.44964086e+03, 1.00777988e+03, 7.00601313e+02, 4.87052986e+02,
        3.38595727e+02, 2.35389310e+02, 1.63640953e+02, 1.13762012e+02,
        7.90865309e+01, 5.49803866e+01, 3.82219687e+01, 2.65716373e+01,
        1.84724108e+01, 1.28418869e+01, 8.92758728e+00, 6.20639437e+00,
        4.31464065e+00, 2.99950710e+00, 2.08523573e+00, 1.44964086e+00],
       [1.03545775e+03, 7.19842772e+02, 5.00429509e+02, 3.47894990e+02,
        2.41854091e+02, 1.68135222e+02, 1.16886395e+02, 8.12585797e+01,
        5.64903792e+01, 3.92717047e+01, 2.73014062e+01, 1.89797409e+01,
        1.31945792e+01, 9.17277638e+00, 6.37684806e+00, 4.4331

In [21]:
m.mse_path_

array([[[ 34.46313316,  94.87999031, 160.3822969 ,  96.85105338,
          91.18228117],
        [ 36.01725641,  90.87020824, 152.2615469 ,  88.61576106,
          91.18228117],
        [ 38.56862106,  88.31600559, 139.33746263,  85.52517035,
          75.61821262],
        [ 41.23655024,  86.69710072, 130.43823286,  85.11520663,
          55.12988262],
        [ 43.58589628,  85.66581333, 124.32650653,  86.05236042,
          43.2795886 ],
        [ 45.2193056 ,  84.49171601, 120.16909351,  87.20101215,
          36.53104146],
        [ 46.47915464,  83.47037543, 117.35676499,  88.10582451,
          32.69544214],
        [ 47.41711903,  82.80037365, 115.44292525,  88.78642686,
          30.47620643],
        [ 47.28174303,  82.35767049, 113.87788592,  89.00232134,
          28.89573631],
        [ 45.45436715,  80.25621958, 110.00848191,  89.35387088,
          25.30706969],
        [ 42.81244452,  75.02476883, 105.30196523,  87.23744914,
          21.01633294],
        [ 40.13526732

In [22]:
m.alpha_

1.0354577548246566

In [23]:
m.l1_ratio_

0.7

In [24]:
m.predict([X[0]])

array([30.98076851])

In [25]:
m.score(X, y)

0.6831524128217572

# Classification

Okay this one is quite quick. And is very much so the same as the above. So to cut to the chase, I'll train a Cross Validated Logistic Regression.

In [26]:
X, y = datasets.load_iris(return_X_y=True)

In [28]:
d = datasets.load_iris()

print(d.DESCR)

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

In [29]:
linear_model.LogisticRegressionCV?

[0;31mInit signature:[0m
[0mlinear_model[0m[0;34m.[0m[0mLogisticRegressionCV[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0;34m*[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mCs[0m[0;34m=[0m[0;36m10[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mfit_intercept[0m[0;34m=[0m[0;32mTrue[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcv[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdual[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mpenalty[0m[0;34m=[0m[0;34m'l2'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mscoring[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0msolver[0m[0;34m=[0m[0;34m'lbfgs'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mtol[0m[0;34m=[0m[0;36m0.0001[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmax_iter[0m[0;34m=[0m[0;36m100[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mclass_weight[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mn_jobs[0m[

In [33]:
m = linear_model.LogisticRegressionCV(Cs=10, n_jobs=20)

In [34]:
m.fit(X, y)

LogisticRegressionCV(n_jobs=20)

In [35]:
m.coef_

array([[-0.35346644,  2.46957158, -4.9489028 , -2.4931657 ],
       [ 1.16087133,  0.09707947, -0.50859771, -2.91951647],
       [-0.8074049 , -2.56665105,  5.45750051,  5.41268218]])

In [36]:
m.predict([X[0]])

array([0])

In [37]:
y[0]

0

In [38]:
m.predict_proba([X[0]])

array([[9.98599548e-01, 1.40045247e-03, 9.40943979e-18]])

In [39]:
m.predict_log_proba([X[0]])

array([[-1.40143402e-03, -6.57095990e+00, -3.92048183e+01]])

In [40]:
m.score(X, y)

0.98

In [41]:
m.score?

[0;31mSignature:[0m [0mm[0m[0;34m.[0m[0mscore[0m[0;34m([0m[0mX[0m[0;34m,[0m [0my[0m[0;34m,[0m [0msample_weight[0m[0;34m=[0m[0;32mNone[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Returns the score using the `scoring` option on the given
test data and labels.

Parameters
----------
X : array-like of shape (n_samples, n_features)
    Test samples.

y : array-like of shape (n_samples,)
    True labels for X.

sample_weight : array-like of shape (n_samples,), default=None
    Sample weights.

Returns
-------
score : float
    Score of self.predict(X) wrt. y.
[0;31mFile:[0m      ~/opt/anaconda3/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py
[0;31mType:[0m      method
