In [4]:
# Task 3: Predictive Analytics for Resource Allocation
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, classification_report, recall_score
from sklearn.preprocessing import StandardScaler

print("--- Task 3: Predictive Analytics (Issue Priority Proxy) ---")

# 1. Load and Preprocess Data
cancer = load_breast_cancer(as_frame=True)
X = cancer.data
y = cancer.target # 0: Malignant (High Priority), 1: Benign (Low Priority)

# 1.1 Scaling Data (Good practice for many classifiers)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 2. Split Data
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.3, random_state=42, stratify=y
)

# 3. Train Model (Random Forest Classifier for Issue Priority)
# Predicting High Priority (Malignant = 0) vs. Low Priority (Benign = 1)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
print("\nRandom Forest Classifier Trained Successfully.")

# 4. Evaluation
y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
# F1-score is crucial for imbalanced data, assessing both Precision and Recall.
f1 = f1_score(y_test, y_pred, average='weighted')

print("\n--- Predictive Analytics Model Results ---")
print(f"Accuracy: {accuracy:.4f}")
print(f"F1 Score (Weighted): {f1:.4f}")

# Detailed report showing Precision and Recall for each class (Priority)
print("\nClassification Report (0=High Priority Issue, 1=Low Priority Issue):")
print(classification_report(y_test, y_pred, target_names=['High Priority (0)', 'Low Priority (1)']))

# 5. Insight for Resource Allocation
# The goal is to maximize the Recall for 'High Priority (0)' issues.
# High Recall means fewer critical issues are missed.

high_priority_recall = recall_score(y_test, y_pred, pos_label=0) 
print(f"\nRecall for High Priority Issues (0): {high_priority_recall:.4f}")
print("Interpretation: This model correctly identifies critical (High Priority) issues X% of the time, allowing project managers to prioritize resources effectively.")

--- Task 3: Predictive Analytics (Issue Priority Proxy) ---

Random Forest Classifier Trained Successfully.

--- Predictive Analytics Model Results ---
Accuracy: 0.9357
F1 Score (Weighted): 0.9356

Classification Report (0=High Priority Issue, 1=Low Priority Issue):
                   precision    recall  f1-score   support

High Priority (0)       0.92      0.91      0.91        64
 Low Priority (1)       0.94      0.95      0.95       107

         accuracy                           0.94       171
        macro avg       0.93      0.93      0.93       171
     weighted avg       0.94      0.94      0.94       171


Recall for High Priority Issues (0): 0.9062
Interpretation: This model correctly identifies critical (High Priority) issues X% of the time, allowing project managers to prioritize resources effectively.
