# ロジスティック回帰

In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

# データセットの読み込み
df = load_breast_cancer()

X = df.data
y = df.target

# データセットを訓練データとテストデータに分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ロジスティック回帰モデルの作成
model = LogisticRegression()

# モデルの訓練
model.fit(X_train, y_train)

# テストデータを用いたモデルの評価
score = model.score(X_test, y_test)

print(f'Test Accuracy: {score * 100:.2f}%')

Test Accuracy: 96.49%


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


# ナイーブベイズ

In [2]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB

# Irisデータセットの読み込み
iris = load_iris()
X = iris.data
y = iris.target

# データセットを訓練データとテストデータに分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ガウシアンナイーブベイズモデルの作成
model = GaussianNB()

# モデルの訓練
model.fit(X_train, y_train)

# テストデータを用いたモデルの評価
score = model.score(X_test, y_test)

print(f'Test Accuracy: {score * 100:.2f}%')

Test Accuracy: 100.00%


# K近傍法

In [3]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

# データセットのロード
iris = load_iris()
X = iris.data
y = iris.target

# データセットの分割（訓練データ80%、テストデータ20%）
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# KNNモデルの作成と訓練（ここでは近傍点数を5としています）
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

# テストデータを用いたモデルの評価
score = knn.score(X_test, y_test)

print(f'Test Accuracy: {score * 100:.2f}%')


Test Accuracy: 100.00%


# 決定木

In [4]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

# データセットのロード
iris = load_iris()
X = iris.data
y = iris.target

# データセットの分割（訓練データ80%、テストデータ20%）
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# 決定木モデルの作成と訓練（ここでは深さを3としています）
dt = DecisionTreeClassifier(max_depth=3, random_state=1)
dt.fit(X_train, y_train)

# テストデータを用いたモデルの評価
score = dt.score(X_test, y_test)

print(f'Test Accuracy: {score * 100:.2f}%')


Test Accuracy: 96.67%


# ランダムフォレスト

In [5]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

# データセットのロード
iris = load_iris()
X = iris.data
y = iris.target

# データセットの分割（訓練データ80%、テストデータ20%）
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# 決定木モデルの作成と訓練
dt = RandomForestClassifier(random_state=1)
dt.fit(X_train, y_train)

# テストデータを用いたモデルの評価
score = dt.score(X_test, y_test)

print(f'Test Accuracy: {score * 100:.2f}%')

Test Accuracy: 96.67%


# 勾配ブースティング(lightgbm)

In [6]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score

# データセットのロード
iris = datasets.load_iris()
X = iris.data
y = iris.target

# データセットの分割（訓練データ80%、テストデータ20%）
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# LightGBMモデルの作成と訓練
lgb = LGBMClassifier(random_state=1)
lgb.fit(X_train, y_train)

# テストデータに対する予測と精度の計算
y_pred = lgb.predict(X_test)
print('Accuracy: %.2f' % accuracy_score(y_test, y_pred))

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000027 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 88
[LightGBM] [Info] Number of data points in the train set: 120, number of used features: 4
[LightGBM] [Info] Start training from score -1.123930
[LightGBM] [Info] Start training from score -1.176574
[LightGBM] [Info] Start training from score -1.003302
Accuracy: 0.97


# pycaret 実践

!pip install pycaret==3.0.4  # 書籍執筆時点のバージョンをインストール


# 最新版をインストール場合はこちら
# !pip install pycaret 最新版をインストール

In [7]:
import pycaret


pycaret.__version__

'3.3.2'

In [8]:
from pycaret.datasets import get_data


data = get_data('iris')

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [9]:
from pycaret.classification import *


s = setup(data, target='species', session_id=123)

Unnamed: 0,Description,Value
0,Session id,123
1,Target,species
2,Target type,Multiclass
3,Target mapping,"Iris-setosa: 0, Iris-versicolor: 1, Iris-virginica: 2"
4,Original data shape,"(150, 5)"
5,Transformed data shape,"(150, 5)"
6,Transformed train set shape,"(105, 5)"
7,Transformed test set shape,"(45, 5)"
8,Numeric features,4
9,Preprocess,True


In [10]:
best = compare_models()

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,0.9718,0.0,0.9718,0.978,0.9712,0.9573,0.9609,0.49
knn,K Neighbors Classifier,0.9718,0.983,0.9718,0.978,0.9712,0.9573,0.9609,0.044
qda,Quadratic Discriminant Analysis,0.9718,0.0,0.9718,0.978,0.9712,0.9573,0.9609,0.016
lda,Linear Discriminant Analysis,0.9718,0.0,0.9718,0.978,0.9712,0.9573,0.9609,0.016
lightgbm,Light Gradient Boosting Machine,0.9536,0.9935,0.9536,0.9634,0.9528,0.9298,0.9356,0.051
nb,Naive Bayes,0.9445,0.9868,0.9445,0.9525,0.9438,0.9161,0.9207,0.019
et,Extra Trees Classifier,0.9445,0.9935,0.9445,0.9586,0.9426,0.9161,0.9246,0.089
gbc,Gradient Boosting Classifier,0.9355,0.0,0.9355,0.9416,0.9325,0.9023,0.9083,0.187
dt,Decision Tree Classifier,0.9264,0.9429,0.9264,0.9502,0.9201,0.8886,0.904,0.019
rf,Random Forest Classifier,0.9264,0.9909,0.9264,0.9343,0.9232,0.8886,0.8956,0.109


In [11]:
best