In [41]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
import joblib

# 1: Dataset 
# בנו מודל Random Forest שמנבא האם לקוח יקבל פיצוי
age = np.array([25, 38, 29, 47, 35, 53, 31, 42, 40, 50])
past_claims = np.array([0, 2, 1, 3, 1, 4, 0, 2, 2, 3])
insurance_amount = np.array([120, 200, 150, 300, 220, 400, 130, 180, 250, 350])
compensation = np.array(['no', 'yes', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'yes', 'yes'])

# Combine features
X = np.column_stack((age, past_claims, insurance_amount))
y = compensation

# 2: Split train/test 
#חלקו את הדאטה לסט אימון וסט בדיקה (train/test split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 3: Random Forest model 
# אימנו את המודל על סט האימון 
model = RandomForestClassifier(
    n_estimators=100, 
    oob_score=True,    
    random_state=42
)

# Train model
model.fit(X_train, y_train)

# 4: Predictions and accuracy 
# חשבו את דיוק המודל (Model Accuracy) על סט הבדיקה
y_pred = model.predict(X_test)

# Test accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy:.2f}")

# OOB accuracy
print(f"OOB Accuracy: {model.oob_score_:.2f}")

# Classification report
# הציגו דוח סיווג (classification report) עם precision, recall ו־f1-score
print("\nClassification Report")
print(classification_report(y_test, y_pred))

# 5: Save model
# חשבו את דיוק ה־Out-of-Bag (OOB Accuracy) של המודל
joblib.dump(model, "random_forest_model.pkl")
print("\nModel saved as random_forest_model.pkl")

# Load model again
# שמרו את המודל באמצעות joblib ולאחר מכן טענו אותו ובצעו פרדיקציה
loaded_model = joblib.load("random_forest_model.pkl")

# Predict for new customer
new_customer = np.array([[30, 1, 180]])
prediction = loaded_model.predict(new_customer)
print(f"\nPrediction for new customer: {prediction[0]}")

# הסבירו בקצרה מה ההבדל בין דיוק רגיל ל־OOB Accuracy
#Test Accuracy – how good the model is on the test data.
#OOB Accuracy – how good the model is on samples not used in training each tree.

Test Accuracy: 1.00
OOB Accuracy: 0.86

Classification Report
              precision    recall  f1-score   support

         yes       1.00      1.00      1.00         3

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3


Model saved as random_forest_model.pkl

Prediction for new customer: no
