In [None]:
from preamble import *
%matplotlib inline

### 1.4 必要なライブラリとツール

#### 1.4.1 Jupyter Notebook

#### 1.4.2 NumPy

In [None]:
### [1]
import numpy as np

x = np.array([[1, 2, 3], [4, 5, 6]])
print("x:\n{}".format(x))

#### 1.4.3 SciPy

#### 1.4.4 matplotlib

In [None]:
### [5]
%matplotlib inline
import matplotlib.pyplot as plt

# Generate a sequence of numbers from -10 to 10 with 100 steps in between
x = np.linspace(-10, 10, 100)
# Create a second array using sine
y = np.sin(x)
# The plot function makes a line chart of one array against another
plt.plot(x, y, marker="x")

#### 1.4.5 pandas

In [None]:
### [6]
import pandas as pd

# create a simple dataset of people
data = {'Name': ["John", "Anna", "Peter", "Linda"],
        'Location' : ["New York", "Paris", "Berlin", "London"],
        'Age' : [24, 13, 53, 33]
       }

data_pandas = pd.DataFrame(data)
# IPython.display allows "pretty printing" of dataframes
# in the Jupyter notebook
display(data_pandas)

In [None]:
### [7]
# Select all rows that have an age column greater than 30
display(data_pandas[data_pandas.Age > 30])

### 1.6 本書で用いているバージョン

In [None]:
### [8]
import sys
print("Python version:", sys.version)

import pandas as pd
print("pandas version:", pd.__version__)

import matplotlib
print("matplotlib version:", matplotlib.__version__)

import numpy as np
print("NumPy version:", np.__version__)

import scipy as sp
print("SciPy version:", sp.__version__)

import IPython
print("IPython version:", IPython.__version__)

import sklearn
print("scikit-learn version:", sklearn.__version__)

### 1.7 最初のアプリケーション：アイリスのクラス分類
![sepal_petal](images/iris_petal_sepal.png)
#### 1.7.1 データを読む

In [None]:
### [9]
from sklearn.datasets import load_iris
iris_dataset = load_iris()

In [None]:
### [10]
print("Keys of iris_dataset:\n", iris_dataset.keys())

In [None]:
### [11]
print(iris_dataset['DESCR'][:193] + "\n...")

In [None]:
### [12]
print("Target names:", iris_dataset['target_names'])

In [None]:
### [13]
print("Feature names:\n", iris_dataset['feature_names'])

In [None]:
### [14]
print("Type of data:", type(iris_dataset['data']))

In [None]:
### [15]
print("Shape of data:", iris_dataset['data'].shape)

In [None]:
### [16]
print("First five rows of data:\n", iris_dataset['data'][:5])

In [None]:
### [17]
print("Type of target:", type(iris_dataset['target']))

In [None]:
### [18]
print("Shape of target:", iris_dataset['target'].shape)

In [None]:
### [19]
print("Target:\n", iris_dataset['target'])

#### 1.7.2 成功度合いの測定：訓練データとテストデータ

In [None]:
### [20]
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    iris_dataset['data'], iris_dataset['target'], random_state=0)

In [None]:
### [21]
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)

In [None]:
### [22]
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)

#### 1.7.3 最初にすべきこと：データをよく観察する

In [None]:
### [23]
# create dataframe from data in X_train
# label the columns using the strings in iris_dataset.feature_names
iris_dataframe = pd.DataFrame(X_train, columns=iris_dataset.feature_names)
# create a scatter matrix from the dataframe, color by y_train
_ = pd.plotting.scatter_matrix(iris_dataframe, c=y_train, figsize=(15, 15),
                               marker='o', hist_kwds={'bins': 20}, s=60,
                               alpha=.8, cmap=mglearn.cm3)

#### 1.7.4 最初のモデル：k-最近傍法

In [None]:
### [24]
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=1)

In [None]:
### [25]
knn.fit(X_train, y_train)

#### 1.7.5 予測を行う

In [None]:
### [26]
X_new = np.array([[5, 2.9, 1, 0.2]])
print("X_new.shape:", X_new.shape)

In [None]:
### [27]
prediction = knn.predict(X_new)
print("Prediction:", prediction)
print("Predicted target name:",
       iris_dataset['target_names'][prediction])

#### モデルの評価

In [None]:
### [28]
y_pred = knn.predict(X_test)
print("Test set predictions:\n", y_pred)

In [None]:
### [29]
print("Test set score: {:.2f}".format(np.mean(y_pred == y_test)))

In [None]:
### [30]
print("Test set score: {:.2f}".format(knn.score(X_test, y_test)))

### 1.8 まとめと今後の展望

In [None]:
### [31]
X_train, X_test, y_train, y_test = train_test_split(
    iris_dataset['data'], iris_dataset['target'], random_state=0)

knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(X_train, y_train)

print("Test set score: {:.2f}".format(knn.score(X_test, y_test)))