In [1]:
from sklearn.datasets import load_digits, load_diabetes, load_iris, load_wine
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.svm import SVC, SVR
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.model_selection import cross_val_score, train_test_split, GridSearchCV, RandomizedSearchCV
from joblib import dump, load

In [2]:
digits = load_digits()
diabetes = load_diabetes()
iris = load_iris()
wine = load_wine()

In [3]:
x, y = digits.data, digits.target
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
param_grid = {'kernel': ['linear', 'poly', 'rbf'], 'C': [0.1, 1, 5, 10, 20]}
svm = SVC()
svm = GridSearchCV(svm, param_grid, cv=5)
svm.fit(x_train, y_train)
print(svm.best_score_)
print(svm.best_params_)
y_pred = svm.predict(x_test)
print(accuracy_score(y_pred, y_test))

0.9909552845528454
{'C': 10, 'kernel': 'rbf'}
0.9861111111111112


In [4]:
x, y = digits.data, digits.target
svm = SVC()
pca = PCA(n_components=30)
pca.fit(x)
x = pca.transform(x)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
svm = SVC()
svm.fit(x_train, y_train)
y_pred = svm.predict(x_test)
accuracy_score(y_pred, y_test)

0.9861111111111112

In [5]:
x, y = diabetes.data, diabetes.target
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
lin_reg = LinearRegression()
lin_reg.fit(x, y)
y_pred = lin_reg.predict(x_test)
mean_squared_error(y_pred, y_test)

2392.0647551140514

In [6]:
x, y = iris.data, iris.target
kmeans = KMeans(n_clusters=3)
kmeans.fit(x)
kmeans.predict([[5.1, 3.5, 1.4, 0.2], [4.9, 3.0, 1.4, 0.2]])

array([1, 1], dtype=int32)

In [7]:
x, y = wine.data, wine.target
scaler = StandardScaler()
scaler.fit(x)
x = scaler.transform(x)
log_reg = LogisticRegression()
log_reg.fit(x, y)
cross_val_score(log_reg, x, y, cv=10)

array([0.94444444, 0.94444444, 0.94444444, 0.94444444, 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ])

In [8]:
from numpy.random import uniform
x, y = diabetes.data, diabetes.target
svr = SVR()
param_grid = {'kernel': ['linear', 'poly', 'rbf'], 'C': uniform(0, 100, 50)}
svr = RandomizedSearchCV(svr, param_grid, cv=5)
svr.fit(x, y)
print(svr.best_score_)
print(svr.best_params_)
dump(svr, 'svr.joblib')
svr = load('svr.joblib')

0.45870543995154556
{'kernel': 'rbf', 'C': 8.044012816742496}
