In [1]:
from sklearn.datasets import load_iris, make_moons, load_wine
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC, SVC
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split
import numpy as np

In [2]:
"""
Problem 9 - Train a LinearSVC on a linearly separable dataset. Then train an SVC and a9.
SGDClassifier on the same dataset. See if you can get them to produce roughly
the same model.
"""
iris = load_iris(as_frame=True)
X = iris.data[["petal length (cm)", "petal width (cm)"]].values
y = (iris.target == 2) # Iris Virginica

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X,y, 
                                                    test_size=0.3, random_state=42)

In [4]:
lin_svc = make_pipeline(StandardScaler(), LinearSVC(C=1, random_state=42))

In [5]:
lin_svc.fit(X_train,y_train)

0,1,2
,steps,"[('standardscaler', ...), ('linearsvc', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,penalty,'l2'
,loss,'squared_hinge'
,dual,'auto'
,tol,0.0001
,C,1
,multi_class,'ovr'
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,verbose,0


In [6]:
y_predict = lin_svc.predict(X_test)

In [7]:
print(np.mean(y_predict == y_test)) 
lin_svc.score(X_test, y_test)

1.0


1.0

In [8]:
svc = make_pipeline(StandardScaler(), SVC(C=1, kernel='linear', random_state=42))

In [9]:
svc.fit(X_train, y_train)

0,1,2
,steps,"[('standardscaler', ...), ('svc', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,C,1
,kernel,'linear'
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,probability,False
,tol,0.001
,cache_size,200
,class_weight,


In [10]:
svc.score(X_test, y_test)

1.0

In [11]:
sgd_class = make_pipeline(StandardScaler(), SGDClassifier(alpha=0.01))

In [12]:
sgd_class.fit(X_train, y_train)

0,1,2
,steps,"[('standardscaler', ...), ('sgdclassifier', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,loss,'hinge'
,penalty,'l2'
,alpha,0.01
,l1_ratio,0.15
,fit_intercept,True
,max_iter,1000
,tol,0.001
,shuffle,True
,verbose,0
,epsilon,0.1


In [13]:
sgd_class.score(X_test, y_test)

1.0

In [14]:
"""
Train an SVM classifier on the wine dataset, which you can load using10.
sklearn.datasets.load_wine(). This dataset contains the chemical analyses
of 178 wine samples produced by 3 different cultivators: the goal is to train
a classification model capable of predicting the cultivator based on the wine’s
chemical analysis. Since SVM classifiers are binary classifiers, you will need to
use one-versus-all to classify all three classes. What accuracy can you reach?
"""
wine_df = load_wine(as_frame=True)

In [15]:
X = wine_df.data.values
y = wine_df.target.values

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3, random_state=42)

In [16]:
svc_wine = make_pipeline(StandardScaler(), SVC(decision_function_shape='ovr', kernel='poly',
                                               degree=1, C=0.9, coef0=1, random_state=42))

In [17]:
svc_wine.fit(X_train, y_train)

0,1,2
,steps,"[('standardscaler', ...), ('svc', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,C,0.9
,kernel,'poly'
,degree,1
,gamma,'scale'
,coef0,1
,shrinking,True
,probability,False
,tol,0.001
,cache_size,200
,class_weight,


In [18]:
svc_wine.score(X_test, y_test)

1.0

In [19]:
from sklearn.model_selection import cross_val_score

scores = cross_val_score(svc_wine, X, y, cv=5)
print(f"CV Accuracy: {scores.mean():.3f} (+/- {scores.std():.3f})")

CV Accuracy: 0.972 (+/- 0.025)


In [20]:
"""
Train and fine-tune an SVM regressor on the California housing dataset. You can11.
use the original dataset rather than the tweaked version we used in Chapter 2,
which you can load using sklearn.datasets.fetch_california_housing().
The targets represent hundreds of thousands of dollars. Since there are over
20,000 instances, SVMs can be slow, so for hyperparameter tuning you should
use far fewer instances (e.g., 2,000) to test many more hyperparameter combina‐
tions. What is your best model’s RMSE?
"""
from sklearn.datasets import fetch_california_housing

In [21]:
housing = fetch_california_housing(as_frame=True)

In [22]:
X = housing.data.values 
y = housing.target.values

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3, random_state=42,shuffle=True)

In [33]:
rng = np.random.default_rng() 
indexes = rng.integers(low = 0, high = len(X_train), size = 2000)
X_params, y_params = X_train[indexes], y_train[indexes]

In [23]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.svm import LinearSVR, SVR

In [49]:
lin_svr = make_pipeline(StandardScaler(),
                        LinearSVR(max_iter=5000, dual=True, random_state=42))
lin_svr.fit(X_train, y_train)

0,1,2
,steps,"[('standardscaler', ...), ('linearsvr', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,epsilon,0.0
,tol,0.0001
,C,1.0
,loss,'epsilon_insensitive'
,fit_intercept,True
,intercept_scaling,1.0
,dual,True
,verbose,0
,random_state,42
,max_iter,5000


In [50]:
try:
    from sklearn.metrics import root_mean_squared_error
except ImportError:
    from sklearn.metrics import mean_squared_error

    def root_mean_squared_error(labels, predictions):
        return mean_squared_error(labels, predictions, squared=False)

In [51]:
y_pred = lin_svr.predict(X_train)
rmse = root_mean_squared_error(y_train, y_pred)
rmse

0.8674069403544904

In [52]:
from sklearn.svm import SVR
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import loguniform, uniform

svm_reg = make_pipeline(StandardScaler(), SVR())

param_distrib = {
    "svr__gamma": loguniform(0.001, 0.1),
    "svr__C": uniform(1, 10)
}
rnd_search_cv = RandomizedSearchCV(svm_reg, param_distrib,
                                   n_iter=100, cv=3, random_state=42)
rnd_search_cv.fit(X_train[:2000], y_train[:2000])

0,1,2
,estimator,"Pipeline(step...svr', SVR())])"
,param_distributions,"{'svr__C': <scipy.stats....x7ff2703e7110>, 'svr__gamma': <scipy.stats....x7ff2722941a0>}"
,n_iter,100
,scoring,
,n_jobs,
,refit,True
,cv,3
,verbose,0
,pre_dispatch,'2*n_jobs'
,random_state,42

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,kernel,'rbf'
,degree,3
,gamma,np.float64(0....1408196485979)
,coef0,0.0
,tol,0.001
,C,np.float64(4.63629602379294)
,epsilon,0.1
,shrinking,True
,cache_size,200
,verbose,False


In [53]:
rnd_search_cv.best_estimator_

0,1,2
,steps,"[('standardscaler', ...), ('svr', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,kernel,'rbf'
,degree,3
,gamma,np.float64(0....1408196485979)
,coef0,0.0
,tol,0.001
,C,np.float64(4.63629602379294)
,epsilon,0.1
,shrinking,True
,cache_size,200
,verbose,False


In [54]:
-cross_val_score(rnd_search_cv.best_estimator_, X_train, y_train,
                 scoring="neg_root_mean_squared_error")

array([0.60628288, 0.58521665, 0.5760779 , 0.57309699, 0.60537574])

In [55]:
y_pred = rnd_search_cv.best_estimator_.predict(X_test)
rmse = root_mean_squared_error(y_test, y_pred)
rmse

0.5991587512286991