In [3]:
import joblib
from sklearn.feature_extraction.text import CountVectorizer

# モデルとエンコーダーを読み込む
model = joblib.load('logistic_regression_model.pkl')
label_encoder = joblib.load('label_encoder.pkl')
vectorizer = joblib.load('vectorizer.pkl')

# 予測関数の定義
def predict_category(title):
    X = vectorizer.transform([title])
    probabilities = model.predict_proba(X)[0]
    predicted_index = model.predict(X)[0]
    predicted_category = label_encoder.inverse_transform([predicted_index])[0]
    return predicted_category, probabilities

# テスト
title = "The stock market is experiencing unprecedented growth."
predicted_category, probabilities = predict_category(title)

print(f"Title: {title}")
print(f"Predicted Category: {predicted_category}")
print(f"Probabilities: {probabilities}")

# 各カテゴリの確率を表示
categories = label_encoder.classes_
for category, probability in zip(categories, probabilities):
    print(f"{category}: {probability:.4f}")


Title: The stock market is experiencing unprecedented growth.
Predicted Category: b
Probabilities: [0.94157397 0.02711228 0.00761151 0.02370224]
b: 0.9416
e: 0.0271
m: 0.0076
t: 0.0237
