# 예제 1. Digit Classification

In [None]:
from sklearn import datasets

digits = datasets.load_digits()

print(digits.images.shape)
print(digits.target.shape)
print(digits.images[0])
print(digits.target[0])

In [None]:
import matplotlib.pyplot as plt

_, axes = plt.subplots(nrows=1, ncols=5, figsize=(10, 5))
for ax, image, target in zip(axes, digits.images, digits.target):
    ax.set_axis_off()
    ax.imshow(image, cmap='gray')
    ax.set_title('Target: ' + str(target))

In [None]:
num_images = len(digits.images)
images = digits.images.reshape(num_images, -1)

print(images.shape)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(images, digits.target, test_size=0.2, shuffle=False)

print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

In [None]:
from sklearn.linear_model import LogisticRegression

classifier = LogisticRegression(random_state=0)

classifier.fit(X_train, y_train)

print(classifier.score(X_test, y_test))

In [None]:
y_test_pred = classifier.predict(X_test)

_, axes = plt.subplots(nrows=1, ncols=5, figsize=(10, 5))
for ax, image, pred in zip(axes, X_test, y_test_pred):
    ax.set_axis_off()
    image = image.reshape(8, 8)
    ax.imshow(image, cmap='gray')
    ax.set_title('Prediction: ' + str(pred))

In [None]:
from sklearn import metrics

print(metrics.classification_report(y_test, y_test_pred))

In [None]:
from sklearn.metrics import ConfusionMatrixDisplay

ConfusionMatrixDisplay.from_predictions(y_test, y_test_pred)
plt.show()

# 예제 2. Iris Classification

In [None]:
from sklearn import datasets

X, y = datasets.load_iris(return_X_y=True)

print(X.shape)
print(y.shape)

In [None]:
import numpy as np

print(np.unique(y))

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

In [None]:
from sklearn.neighbors import KNeighborsClassifier

classifier = KNeighborsClassifier()

classifier.fit(X_train, y_train)

print(classifier.score(X_test, y_test))

In [None]:
y_test_pred = classifier.predict(X_test)

print(y_test)
print(y_test_pred)

In [None]:
from sklearn.metrics import ConfusionMatrixDisplay

ConfusionMatrixDisplay.from_predictions(y_test, y_test_pred)
plt.show()

# 예제 3. Diabete Regression

In [None]:
from sklearn import datasets

X, y = datasets.load_diabetes(return_X_y=True)

print(X.shape)
print(y.shape)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

In [None]:
from sklearn import linear_model

regressor = linear_model.LinearRegression()

regressor.fit(X_train, y_train)

print(regressor.score(X_test, y_test))

In [None]:
print(regressor.coef_)
print(regressor.intercept_)

In [None]:
from sklearn.metrics import mean_squared_error

y_test_pred = regressor.predict(X_test)

print(mean_squared_error(y_test, y_test_pred))

# 예제 4. Underfitting and Overfitting

In [None]:
import numpy as np

X = np.linspace(0, 10)
y = np.sin(X)

X = X.reshape(-1, 1)

print(X.shape)
print(y.shape)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

In [None]:
import matplotlib.pyplot as plt

plt.scatter(X_train, y_train)
plt.scatter(X_test, y_test)
plt.show()

In [None]:
from sklearn import linear_model

regressor = linear_model.LinearRegression()

regressor.fit(X_train, y_train)

print(regressor.score(X_train, y_train))
print(regressor.score(X_test, y_test))

In [None]:
y_pred = regressor.predict(X)

plt.scatter(X_train, y_train)
plt.scatter(X_test, y_test)
plt.plot(X, y_pred)
plt.show()

In [None]:
from sklearn.preprocessing import PolynomialFeatures

polynomial = PolynomialFeatures(10)

X_train_ = polynomial.fit_transform(X_train)

X_ = polynomial.transform(X)
X_test_ = polynomial.transform(X_test)

print(X_.shape)
print(X_train_.shape)
print(X_test_.shape)

In [None]:
regressor = linear_model.LinearRegression()

regressor.fit(X_train_, y_train)

print(regressor.score(X_train_, y_train))
print(regressor.score(X_test_, y_test))

In [None]:
y_pred = regressor.predict(X_)

plt.scatter(X_train, y_train)
plt.scatter(X_test, y_test)
plt.plot(X, y_pred)
plt.show()

In [None]:
polynomial = PolynomialFeatures(20)

X_train_ = polynomial.fit_transform(X_train)

X_ = polynomial.transform(X)
X_test_ = polynomial.transform(X_test)

print(X_.shape)
print(X_train_.shape)
print(X_test_.shape)

In [None]:
regressor = linear_model.LinearRegression()

regressor.fit(X_train_, y_train)

print(regressor.score(X_train_, y_train))
print(regressor.score(X_test_, y_test))

In [None]:
y_pred = regressor.predict(X_)

plt.scatter(X_train, y_train)
plt.scatter(X_test, y_test)
plt.plot(X, y_pred)
plt.show()

# 예제 5. Clustering

In [None]:
from sklearn import datasets

X, y = datasets.make_blobs()

print(X.shape)
print(y.shape)

In [None]:
import matplotlib.pyplot as plt

plt.scatter(X[:, 0], X[:, 1])
plt.show()

In [None]:
from sklearn.cluster import KMeans

kmeans = KMeans(3)

y_pred = kmeans.fit_predict(X)

In [None]:
plt.scatter(X[:, 0], X[:, 1], c=y_pred)
plt.show()

In [None]:
from sklearn.cluster import KMeans

kmeans = KMeans(5)

y_pred = kmeans.fit_predict(X)

In [None]:
plt.scatter(X[:, 0], X[:, 1], c=y_pred)
plt.show()

# 예제 6. Preprocessing

In [None]:
from sklearn import datasets

X, y = datasets.load_digits(return_X_y=True)

print(X.shape)
print(y.shape)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train_ = scaler.fit_transform(X_train)
X_test_ = scaler.transform(X_test)

print(X_train[0])
print(X_train_[0])

In [None]:
from sklearn.linear_model import RidgeClassifier

classifier = RidgeClassifier()

classifier.fit(X_train_, y_train)

print(classifier.score(X_test_, y_test))

In [None]:
from sklearn.metrics import ConfusionMatrixDisplay

y_test_pred = classifier.predict(X_test_)

ConfusionMatrixDisplay.from_predictions(y_test, y_test_pred)
plt.show()

# 예제 7. Hyperparameter Search

In [None]:
from sklearn import datasets

X, y = datasets.load_iris(return_X_y=True)

print(X.shape)
print(y.shape)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

classifier = KNeighborsClassifier()

param_grid = {
    'n_neighbors': [3, 5, 10],
    'p': [1, 2]}
search = GridSearchCV(classifier, param_grid)

search.fit(X_train, y_train)

print(search.score(X_test, y_test))

In [None]:
from sklearn.metrics import ConfusionMatrixDisplay
import matplotlib.pyplot as plt

y_test_pred = search.predict(X_test)

ConfusionMatrixDisplay.from_predictions(y_test, y_test_pred)
plt.show()

In [None]:
import pandas as pd

results = pd.DataFrame(search.cv_results_)

results