In [1]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
np.set_printoptions(precision=5)

import pandas as pd
pd.set_option('display.precision', 5)

In [2]:
import mglearn
X, y = mglearn.datasets.make_wave(n_samples=60)

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [4]:
from sklearn.linear_model import LinearRegression
reg = LinearRegression()
reg.fit(X_train, y_train)

In [5]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
y_train_hat = reg.predict(X_train)
print('train MAE: %.5f'%mean_absolute_error(y_train,y_train_hat))
print('train RMSE: %.5f'%mean_squared_error(y_train,y_train_hat)**0.5)
print('train R_square: %.5f'%r2_score(y_train,y_train_hat))

y_test_hat = reg.predict(X_test)
print('test MAE: %.5f'%mean_absolute_error(y_test,y_test_hat))
print('test RMSE: %.5f'%mean_squared_error(y_test,y_test_hat)**0.5)
print('test R_square: %.5f'%r2_score(y_test,y_test_hat))

train MAE: 0.41817
train RMSE: 0.50589
train R_square: 0.67009
test MAE: 0.49453
test RMSE: 0.62826
test R_square: 0.65934


In [6]:
pd.DataFrame({'X': X.ravel(),
              'Y': y}
            )

Unnamed: 0,X,Y
0,-0.75276,-1.18073
1,2.70429,0.50016
2,1.39196,0.13773
3,0.59195,1.17396
4,-2.06389,-1.32036
5,-2.06403,-2.37365
6,-2.6515,-0.70117
7,2.19706,1.2032
8,0.60669,0.29263
9,1.24844,0.44972


In [7]:
print('w0: %.5f'%reg.intercept_)
print('w1: %.5f'%reg.coef_)

w0: -0.03180
w1: 0.39391


In [8]:
import mglearn
X, y = mglearn.datasets.load_extended_boston()
print(X.shape, y.shape)

(506, 104) (506,)


In [9]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [10]:
from sklearn.linear_model import LinearRegression
reg = LinearRegression()
reg.fit(X_train, y_train)

In [11]:
y_train_hat = reg.predict(X_train)
print('train MAE: %.5f'%mean_absolute_error(y_train,y_train_hat))
print('train RMSE: %.5f'%mean_squared_error(y_train,y_train_hat)**0.5)
print('train R_square: %.5f'%r2_score(y_train,y_train_hat))

y_test_hat = reg.predict(X_test)
print('test MAE: %.5f'%mean_absolute_error(y_test,y_test_hat))
print('test RMSE: %.5f'%mean_squared_error(y_test,y_test_hat)**0.5)
print('test R_square: %.5f'%r2_score(y_test,y_test_hat))

train MAE: 1.56741
train RMSE: 2.02246
train R_square: 0.95205
test MAE: 3.22590
test RMSE: 5.66296
test R_square: 0.60747


In [12]:
import mglearn
X, y = mglearn.datasets.load_extended_boston()
print(X.shape, y.shape)

(506, 104) (506,)


In [13]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [14]:
from sklearn.linear_model import Ridge
reg = Ridge(alpha=1)
reg.fit(X_train, y_train)

In [15]:
y_train_hat = reg.predict(X_train)
print('train MAE: %.5f'%mean_absolute_error(y_train,y_train_hat))
print('train RMSE: %.5f'%mean_squared_error(y_train,y_train_hat)**0.5)
print('train R_square: %.5f'%r2_score(y_train,y_train_hat))

y_test_hat = reg.predict(X_test)
print('test MAE: %.5f'%mean_absolute_error(y_test,y_test_hat))
print('test RMSE: %.5f'%mean_squared_error(y_test,y_test_hat)**0.5)
print('test R_square: %.5f'%r2_score(y_test,y_test_hat))

train MAE: 2.16564
train RMSE: 3.12130
train R_square: 0.88580
test MAE: 2.96269
test RMSE: 4.49428
test R_square: 0.75277


In [16]:
import mglearn
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.metrics import r2_score

X, y = mglearn.datasets.load_extended_boston()
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [17]:
training_r2 = []
test_r2 = []

alpha_settings = [0, 0.1, 1, 10]
for alpha in alpha_settings:
    # build the model
    reg = Ridge(alpha=alpha)
    reg.fit(X_train, y_train)
    
    # r2 on the training set
    y_train_hat = reg.predict(X_train)
    training_r2.append(r2_score(y_train, y_train_hat))
    
    # r2 on the test set (generalization)
    y_test_hat = reg.predict(X_test)
    test_r2.append(r2_score(y_test, y_test_hat))

In [18]:
pd.DataFrame({'alpha': alpha_settings,
              'training R_square': training_r2,
              'test R_square': test_r2}
            )

Unnamed: 0,alpha,training R_square,test R_square
0,0.0,0.95201,0.60296
1,0.1,0.92823,0.77221
2,1.0,0.8858,0.75277
3,10.0,0.78828,0.63594


In [19]:
import mglearn
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from sklearn.metrics import r2_score

X, y = mglearn.datasets.load_extended_boston()
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [20]:
num_vars = []
training_r2 = []
test_r2 = []

alpha_settings = [0.0001, 0.001, 0.01, 0.1, 1]
for alpha in alpha_settings:
    # build the model
    reg = Lasso(alpha=alpha, max_iter=1000)
    reg.fit(X_train, y_train)
    
    # no. features used
    num_vars.append(sum(reg.coef_ != 0))
    
    # r2 on the training set
    y_train_hat = reg.predict(X_train)
    training_r2.append(r2_score(y_train, y_train_hat))
    
    # r2 on the test set (generalization)
    y_test_hat = reg.predict(X_test)
    test_r2.append(r2_score(y_test, y_test_hat))

In [21]:
pd.DataFrame({'alpha': alpha_settings,
              'no. features used': num_vars,
              'training R_square': training_r2,
              'test R_square': test_r2}
            )

Unnamed: 0,alpha,no. features used,training R_square,test R_square
0,0.0001,100,0.94209,0.69765
1,0.001,76,0.93546,0.7548
2,0.01,32,0.89611,0.7678
3,0.1,8,0.771,0.6302
4,1.0,4,0.29324,0.20938


In [22]:
import mglearn
X, y = mglearn.datasets.make_forge()

In [23]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [24]:
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression()
clf.fit(X_train, y_train)

In [25]:
y_test_hat = clf.predict(X_test)
print(y_test)
print(y_test_hat)

[1 0 1 0 1 1 0]
[1 0 1 0 1 0 0]


In [26]:
from sklearn.metrics import accuracy_score
print('test accuracy: %.5f'%accuracy_score(y_test, y_test_hat))

test accuracy: 0.85714


In [27]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

cancer = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(
    cancer.data, cancer.target, stratify=cancer.target, random_state=42)

In [28]:
training_accuracy = []
test_accuracy = []

C_settings = [0.01, 0.1, 1, 10, 100, 1000, 10000]
for C in C_settings:
    # build the model
    clf = LogisticRegression(C=C)
    clf.fit(X_train, y_train)
    
    # accuracy on the training set
    y_train_hat = clf.predict(X_train)
    training_accuracy.append(accuracy_score(y_train, y_train_hat))
    
    # accuracy on the test set (generalization)
    y_test_hat = clf.predict(X_test)
    test_accuracy.append(accuracy_score(y_test, y_test_hat))

In [29]:
pd.DataFrame({'C': C_settings,
              'training accuracy': training_accuracy,
              'test accuracy': test_accuracy}
            )

Unnamed: 0,C,training accuracy,test accuracy
0,0.01,0.93427,0.93007
1,0.1,0.94131,0.95105
2,1.0,0.94366,0.96503
3,10.0,0.95775,0.95804
4,100.0,0.94601,0.95804
5,1000.0,0.94601,0.95804
6,10000.0,0.94601,0.96503


In [30]:
from sklearn.datasets import make_blobs
X, y = make_blobs(random_state=42)

In [31]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [32]:
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(multi_class='ovr')
clf.fit(X_train, y_train)

In [33]:
y_test_hat = clf.predict(X_test)
print(y_test_hat)

[1 0 0 2 2 1 2 0 2 0 2 0 1 0 1 2 2 0 2 1 0 2 1 2 1]
