In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import joblib

# データセットの読み込み
data = pd.read_csv('./_Dataset/_Merged/merged.csv')

# 特徴量とラベルの取得
X = data.drop(['label', 'Sample_Number'], axis=1)  # ラベル列を除いた特徴量
y = data['label']  # ラベル列

# データの分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=24)

# ランダムフォレストモデルの作成と学習
model = RandomForestClassifier()
model.fit(X_train, y_train)

# テストデータで予測
y_pred = model.predict(X_test)

# モデルの評価
accuracy = model.score(X_test, y_test)
print("Accuracy:", accuracy)

# モデルを保存
joblib.dump(model, 'color_classifier.pkl')

# 別のデータセットの読み込み
new_data = pd.read_csv('./_Dataset/_TestData/test_merged.csv')

# 特徴量とラベルの取得
X_new = new_data.drop(['label', 'Sample_Number'], axis=1)
y_new = new_data['label']

# モデルの読み込み
# model = joblib.load('your_model.pkl')

# モデルを使用して予測
y_pred_new = model.predict(X_new)

# 正解率の評価
accuracy_new = accuracy_score(y_new, y_pred_new)
print("Accuracy (new dataset):", accuracy_new)

Accuracy: 1.0
Accuracy (new dataset): 1.0


In [21]:
y.tail()

3796    4
3797    4
3798    4
3799    4
3800    4
Name: label, dtype: int64

In [15]:
# 各特徴量の重要度を表示する
feature_importances = model.feature_importances_
for i, importance in enumerate(feature_importances):
    print(f"Feature {i}: Importance = {importance}")

Feature 0: Importance = 0.0
Feature 1: Importance = 0.0
Feature 2: Importance = 0.0
Feature 3: Importance = 0.0
Feature 4: Importance = 0.0
Feature 5: Importance = 1.8273776174350425e-05
Feature 6: Importance = 7.758583737922692e-05
Feature 7: Importance = 5.808479403357072e-05
Feature 8: Importance = 0.00010231140544894914
Feature 9: Importance = 0.0005575053965680607
Feature 10: Importance = 0.00022499794143551768
Feature 11: Importance = 5.8423292034340333e-05
Feature 12: Importance = 0.0033577911972131036
Feature 13: Importance = 0.0002840993350301304
Feature 14: Importance = 0.00011893406809471514
Feature 15: Importance = 0.00033715434430883426
Feature 16: Importance = 9.517464395004775e-05
Feature 17: Importance = 0.0
Feature 18: Importance = 2.3901674160983945e-05
Feature 19: Importance = 5.080791843230771e-05
Feature 20: Importance = 5.449439502619443e-05
Feature 21: Importance = 8.077423691424228e-06
Feature 22: Importance = 0.0
Feature 23: Importance = 0.0
Feature 24: Importa

In [16]:
import numpy as np
# 重要度が降順（高い順）でのインデックスを取得
sorted_indices = np.argsort(feature_importances)[::-1]

print("特徴量の重要度（降順）:")
for i in sorted_indices:
    print(f"Feature {i}: Importance = {feature_importances[i]}")

特徴量の重要度（降順）:
Feature 571: Importance = 0.0197482244005237
Feature 600: Importance = 0.019513980784547982
Feature 181: Importance = 0.017129774812063372
Feature 68: Importance = 0.0165339931417189
Feature 570: Importance = 0.01646028042379078
Feature 574: Importance = 0.015790203073948985
Feature 573: Importance = 0.01440166147884837
Feature 516: Importance = 0.013007546188315783
Feature 210: Importance = 0.012653224466342209
Feature 151: Importance = 0.012631311149221256
Feature 686: Importance = 0.012539216841059957
Feature 593: Importance = 0.012521218900223615
Feature 544: Importance = 0.012463608794061984
Feature 179: Importance = 0.012436979121046061
Feature 237: Importance = 0.01202764558997489
Feature 602: Importance = 0.011648749315047086
Feature 238: Importance = 0.0111428658737619
Feature 572: Importance = 0.011052067045344166
Feature 543: Importance = 0.010475107525465856
Feature 565: Importance = 0.010193822235575859
Feature 603: Importance = 0.010131223875804968
Feature 20