# 1. データの読み込み，モデルの学習

In [29]:
# 必要なライブラリをインポート
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# irisデータセットの読み込み
iris = load_iris()
X = iris.data
y = iris.target

# データを学習用とテスト用に分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# RandomForestClassifierのインスタンスを作成
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# モデルを学習
rf_classifier.fit(X_train, y_train)

# テストデータで予測
y_pred = rf_classifier.predict(X_test)

# 精度を評価
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

Accuracy: 1.0


# 2. データとモデルの保存

In [26]:
import pickle
import pandas as pd
import numpy as np

# 学習データとテストデータをDataFrameに変換
train_data = pd.DataFrame(X_train, columns=[f'feature_{i}' for i in range(X_train.shape[1])])
train_data['target'] = y_train

test_data = pd.DataFrame(X_test, columns=[f'feature_{i}' for i in range(X_test.shape[1])])
test_data['target'] = y_test

# CSVファイルに保存
train_data.to_csv('./data/train_data.csv', index=False)
test_data.to_csv('./data/test_data.csv', index=False)

# モデルをPickleファイルに保存
with open('./data/random_forest_model.pkl', 'wb') as model_file:
    pickle.dump(rf_classifier, model_file)

# 3. 保存したファイルからモデルとデータを読み込んで推論

In [32]:
# データの読み込み
test_data = pd.read_csv("./data/test_data.csv")

X_test = np.array(test_data.drop(['target'], axis=1))
y_test = np.array(test_data['target'])

display(X_test)
display(y_test)

# モデルの読み込み
with open('./data/random_forest_model.pkl', 'rb') as model_file:
    model = pickle.load(model_file)

display(model)

array([[6.1, 2.8, 4.7, 1.2],
       [5.7, 3.8, 1.7, 0.3],
       [7.7, 2.6, 6.9, 2.3],
       [6. , 2.9, 4.5, 1.5],
       [6.8, 2.8, 4.8, 1.4],
       [5.4, 3.4, 1.5, 0.4],
       [5.6, 2.9, 3.6, 1.3],
       [6.9, 3.1, 5.1, 2.3],
       [6.2, 2.2, 4.5, 1.5],
       [5.8, 2.7, 3.9, 1.2],
       [6.5, 3.2, 5.1, 2. ],
       [4.8, 3. , 1.4, 0.1],
       [5.5, 3.5, 1.3, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.1, 3.8, 1.5, 0.3],
       [6.3, 3.3, 4.7, 1.6],
       [6.5, 3. , 5.8, 2.2],
       [5.6, 2.5, 3.9, 1.1],
       [5.7, 2.8, 4.5, 1.3],
       [6.4, 2.8, 5.6, 2.2],
       [4.7, 3.2, 1.6, 0.2],
       [6.1, 3. , 4.9, 1.8],
       [5. , 3.4, 1.6, 0.4],
       [6.4, 2.8, 5.6, 2.1],
       [7.9, 3.8, 6.4, 2. ],
       [6.7, 3. , 5.2, 2.3],
       [6.7, 2.5, 5.8, 1.8],
       [6.8, 3.2, 5.9, 2.3],
       [4.8, 3. , 1.4, 0.3],
       [4.8, 3.1, 1.6, 0.2]])

array([1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2,
       0, 2, 2, 2, 2, 2, 0, 0])

In [33]:
# モデルの評価
y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f'Loaded Model Accuracy: {accuracy}')

Loaded Model Accuracy: 1.0
