In [1]:
# ライブラリのインポート
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np

# 1. サンプルデータの取得
dataset = load_wine()

# DataFrameで見やすく表示
df = pd.DataFrame(data=dataset.data, columns=dataset.feature_names)
df['category'] = dataset.target
print("データ数とカラム数:", df.shape)
print(df.head())

# 2. サンプルデータの分割
X = dataset.data  # 説明変数
y = dataset.target  # 目的変数

# データを7:3で分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=5)

print("X:", X.shape, "X_train:", X_train.shape, "X_test:", X_test.shape)
print("y:", y.shape, "y_train:", y_train.shape, "y_test:", y_test.shape)

# 3. モデルのインスタンス化（決定木）
model = DecisionTreeClassifier(random_state=3)

# 4. モデルの学習
model.fit(X_train, y_train)

# 5. モデルの評価
y_pred = model.predict(X_test)
print("予測結果:", y_pred)
print("正解ラベル:", y_test)

# 正解率の計算
accuracy = accuracy_score(y_test, y_pred)
print("正解率（accuracy_score）:", accuracy)

# scoreメソッドによる正解率
print("正解率（scoreメソッド）:", model.score(X_test, y_test))

# 6. 予測（未知のワインデータ）
X_real = np.array([
    [13, 1.6, 2.2, 16, 118, 2.6, 2.9, 0.21, 1.6, 5.8, 0.92, 3.2, 1011],
    [12, 2.8, 2.2, 18, 100, 2.5, 2.3, 0.25, 2.0, 2.2, 1.15, 3.3, 1000],
    [14, 4.1, 2.7, 24, 101, 1.6, 0.7, 0.53, 1.4, 9.4, 0.61, 1.6, 560]
])
pred_real = model.predict(X_real)
print("未知ワインの予測種別:", pred_real)


データ数とカラム数: (178, 14)
   alcohol  malic_acid   ash  alcalinity_of_ash  magnesium  total_phenols  \
0    14.23        1.71  2.43               15.6      127.0           2.80   
1    13.20        1.78  2.14               11.2      100.0           2.65   
2    13.16        2.36  2.67               18.6      101.0           2.80   
3    14.37        1.95  2.50               16.8      113.0           3.85   
4    13.24        2.59  2.87               21.0      118.0           2.80   

   flavanoids  nonflavanoid_phenols  proanthocyanins  color_intensity   hue  \
0        3.06                  0.28             2.29             5.64  1.04   
1        2.76                  0.26             1.28             4.38  1.05   
2        3.24                  0.30             2.81             5.68  1.03   
3        3.49                  0.24             2.18             7.80  0.86   
4        2.69                  0.39             1.82             4.32  1.04   

   od280/od315_of_diluted_wines  proline 

In [2]:
# 1. サンプルデータの取得
from sklearn.datasets import load_wine
dataset = load_wine()

# PandasでDataFrameに変換
import pandas as pd
df = pd.DataFrame(data=dataset.data, columns=dataset.feature_names)

# category列（ワインの種別）を追加
df['category'] = dataset.target

# データ数を確認
df.shape

# 2. サンプルデータの分割
X = dataset.data
y = dataset.target

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=5)

# レコード数確認
print(X.shape, X_train.shape, X_test.shape, y.shape, y_train.shape, y_test.shape)

# 3. ランダムフォレストの予測モデルのインスタンス化（ここが変更ポイント）
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(random_state=3)

# 4. 予測モデルの学習
model.fit(X_train, y_train)

# 5. 予測モデルの評価
y_pred = model.predict(X_test)

# accuracy_scoreを使って正解率を算出
from sklearn.metrics import accuracy_score
print("正解率（accuracy_score）:", accuracy_score(y_test, y_pred))

# scoreメソッドでも正解率を出力
print("正解率（model.score）:", model.score(X_test, y_test))

# 6. 予測（ダミーデータを使って）
import numpy as np
X_real = np.array([
    [13, 1.6, 2.2, 16, 118, 2.6, 2.9, 0.21, 1.6, 5.8, 0.92, 3.2, 1011],
    [12, 2.8, 2.2, 18, 100, 2.5, 2.3, 0.25, 2.0, 2.2, 1.15, 3.3, 1000],
    [14, 4.1, 2.7, 24, 101, 1.6, 0.7, 0.53, 1.4, 9.4, 0.61, 1.6, 560]])

# 予測
predictions = model.predict(X_real)
print("予測結果:", predictions)


(178, 13) (124, 13) (54, 13) (178,) (124,) (54,)
正解率（accuracy_score）: 0.9629629629629629
正解率（model.score）: 0.9629629629629629
予測結果: [0 1 2]
