In [163]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.metrics import f1_score, accuracy_score, mean_squared_error
from sklearn.datasets import fetch_openml
from sklearn.model_selection import cross_val_score, GridSearchCV, train_test_split
from sklearn.svm import LinearSVC, SVC, SVR, LinearSVR
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from pandas.plotting import scatter_matrix
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LinearRegression

Exercise 9

In [2]:
mnist = fetch_openml('mnist_784', version=1)

  warn(


In [12]:
list(mnist.keys())

['data',
 'target',
 'frame',
 'categories',
 'feature_names',
 'target_names',
 'DESCR',
 'details',
 'url']

In [3]:
X, y = mnist['data'], mnist['target']
X = np.array(X)
y = np.array(y)

X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]

In [20]:
svm_linear_clf = LinearSVC(C=1, loss='hinge', random_state=42)

In [21]:
svm_linear_clf.fit(X_train, y_train)



In [22]:
y_pred = svm_linear_clf.predict(X_test)

accuracy_score(y_test, y_pred)

0.8839

In [24]:
cross_val_score(svm_linear_clf, X_train, y_train, cv=3, scoring='accuracy')



array([0.8727 , 0.86635, 0.8695 ])

In [4]:
svc_clf = SVC(kernel='poly', degree=3, coef0=1, C=5)

poly_kernel_svc = Pipeline([
    ('std_scaler', StandardScaler()),
    ('svm', svc_clf)
])

In [27]:
poly_kernel_svc.fit(X_train, y_train)

In [29]:
y_pred = poly_kernel_svc.predict(X_test)

In [30]:
accuracy_score(y_test, y_pred)

0.9769

In [5]:
cross_val_score(poly_kernel_svc, X_train, y_train, cv=2)

array([0.9699    , 0.96813333])

In [8]:
X_train_new = X_train[:1000,:]
y_train_new = y_train[:1000]

In [10]:
X_train_new.shape

(1000, 784)

In [11]:
cross_val_score(svc_clf, X_train_new, y_train_new, cv=5)

array([0.895, 0.87 , 0.895, 0.89 , 0.87 ])

In [14]:
param_grid = {'kernel':['poly', 'rbf'], 'degree':[2,3,4,5], 'coef0':[0.5,1,3], 'C':[0.1,1,5]}

grid = GridSearchCV(svc_clf, param_grid, cv=5, scoring='accuracy')

In [15]:
grid.fit(X_train_new, y_train_new)

In [16]:
grid.best_params_

{'C': 5, 'coef0': 0.5, 'degree': 2, 'kernel': 'rbf'}

In [18]:
svc_best_clf = grid.best_estimator_

In [19]:
svc_best_clf.fit(X_train, y_train)

In [20]:
y_pred = svc_best_clf.predict(X_test)

In [21]:
accuracy_score(y_test, y_pred)

0.9841

Exercise 10

In [196]:
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing()

In [197]:
housing.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'feature_names', 'DESCR'])

In [198]:
housing.feature_names

['MedInc',
 'HouseAge',
 'AveRooms',
 'AveBedrms',
 'Population',
 'AveOccup',
 'Latitude',
 'Longitude']

In [200]:
X = housing['data']
y = housing['target']

In [201]:
scaler = StandardScaler()

In [202]:
scaler.fit(X)

In [203]:
X_ = scaler.transform(X)

In [204]:
X_train, X_test, y_train, y_test = train_test_split(X_, y, test_size=0.2, random_state=42)

In [218]:
svm_reg_clf = LinearSVR(epsilon=1.5, max_iter=10000)

In [219]:
svm_reg_clf.fit(X_train, y_train)

In [220]:
y_pred = svm_reg_clf.predict(X_test)

In [221]:
lin_svr_rmse = np.sqrt(mean_squared_error(y_test, y_pred))

In [222]:
print(lin_svr_rmse)

0.8937676718470506


In [211]:
lin_reg_clf = LinearRegression()

In [212]:
lin_reg_clf.fit(X_train, y_train)

In [213]:
y_pred = lin_reg_clf.predict(X_test)

In [214]:
lin_reg_rmse = np.sqrt(mean_squared_error(y_test, y_pred))

In [215]:
print(lin_reg_rmse)

0.7455813830127764


In [216]:
scores = cross_val_score(lin_reg_clf, X_train, y_train, cv=10, scoring='neg_mean_squared_error')
print(np.sqrt(-scores).mean())

0.720472599387967


In [223]:
scores = cross_val_score(svm_reg_clf, X_train, y_train, cv=10, scoring='neg_mean_squared_error')
print(np.sqrt(-scores).mean())



0.8863304430903733


In [224]:
svm_poly_clf = SVR(kernel='poly', degree=2, C=10, epsilon=0.1)

In [226]:
scores = cross_val_score(svm_poly_clf, X_train, y_train, cv=2, scoring='neg_mean_squared_error')
print(np.sqrt(-scores).mean())

7.104806564287218


In [227]:
svm_poly_clf = SVR()

In [228]:
param_grid = {'kernel':['poly', 'rbf'], 'degree':[2,3], 'epsilon':[0.1,0.5], 'C':[1,100,200]}

grid = GridSearchCV(svm_poly_clf, param_grid, cv=3, scoring='neg_mean_squared_error')