In [14]:
# [1] Setup, Imports, and Project Paths
# --------------------------------------------------------------------------
import os
from pathlib import Path
import numpy as np
import pandas as pd

pd.set_option("display.max_columns", None)
pd.set_option("display.width", 120)

# 1. Project and Data Paths
# Set PROJECT_ROOT to the PARENT of the current notebook directory
PROJECT_ROOT = Path.cwd().parent
DATA_DIR = PROJECT_ROOT / "data"

print(f"Project root (Corrected): {PROJECT_ROOT.resolve()}")
print(f"Data dir (Corrected): {DATA_DIR.resolve()}")

Project root (Corrected): /Users/hc/Documents/projects/roku-patch-stability-analytics
Data dir (Corrected): /Users/hc/Documents/projects/roku-patch-stability-analytics/data


In [16]:
# Diagnostic Code: Reveals the exact, machine-readable column names
loaded_columns = monitoring_df.columns.tolist()

print("\n--- Diagnostic: ALL Columns Loaded (Copy the EXACT name from here) ---")
for col in loaded_columns:
    print(f"[{col}] - Length: {len(col)}")


--- Diagnostic: ALL Columns Loaded (Copy the EXACT name from here) ---
[firmware_version] - Length: 16
[code_churn_score] - Length: 16
[churn_norm] - Length: 10
[risk_score] - Length: 10
[high_risk_flag] - Length: 14
[QA_priority_score] - Length: 17
[QA_bucket] - Length: 9
[avg_device_age_days] - Length: 19


Input file loaded and feature restored. Data shape: (50, 8)

Required Columns Check (Passed):
   risk_score  avg_device_age_days
0    0.016229           702.219142
1    0.005618           426.954965
2    0.006206           336.319311
3    0.620677           340.087869
4    0.201233           467.025999


In [17]:
# [2] Load Input Data, Validate, and Restore Missing Feature
# --------------------------------------------------------------------------
# Input file is the output from the QA Prioritization notebook (Rec 2)
input_file = DATA_DIR / "firmware_qa_priority.csv"

if not input_file.exists():
    raise FileNotFoundError(
        f"Input file not found at {input_file.resolve()}. \n"
        "Please ensure Notebook 04 (QA Prioritization) was run successfully."
    )

monitoring_df = pd.read_csv(input_file)

# ðŸš¨ CRITICAL FIX: The 'avg_device_age_days' column was lost in a prior export.
# We restore it by merging from the original raw features file.
raw_features_file = DATA_DIR / "test_patch_features.csv"
raw_df = pd.read_csv(raw_features_file)
raw_df.columns = raw_df.columns.str.strip() # Clean column names
age_data = raw_df[['firmware_version', 'avg_device_age_days']]

monitoring_df = monitoring_df.merge(
    age_data,
    on='firmware_version',
    how='left'
)

# Robust Validation (matching Notebook 04 style)
REQUIRED_COLS = ['risk_score', 'avg_device_age_days', 'firmware_version']
missing_cols = [col for col in REQUIRED_COLS if col not in monitoring_df.columns]

if missing_cols:
    raise KeyError(
        f"Missing required columns after merge: {missing_cols}. "
        "Check merge key ('firmware_version') and input file."
    )

print(f"Input file loaded and key feature restored. Data shape: {monitoring_df.shape}")

Input file loaded and key feature restored. Data shape: (50, 8)


In [18]:
# [3] Calculate Monitoring Priority Score and Tier
# --------------------------------------------------------------------------

# --- 3a. Define Constants and Thresholds (Justified) ---
# Older devices tend to have hardware failures or be stuck on older minor OS versions.
DEVICE_AGE_THRESHOLD = 600 # days (e.g., end-of-support or average life expectancy)
IMMEDIATE_MONITORING_THRESHOLD = 0.90 # Composite score requires immediate attention
ENHANCED_MONITORING_THRESHOLD = 0.65 # Composite score requires closer observation

# --- 3b. Feature Engineering: Older Device Flag ---
monitoring_df['older_device_flag'] = (
    monitoring_df['avg_device_age_days'] >= DEVICE_AGE_THRESHOLD
).astype(int)

# --- 3c. Composite Score Calculation ---
# Weighting: 70% Model Risk, 30% Device Age (as it's a secondary factor)
W_RISK = 0.70
W_AGE_FLAG = 0.30

monitoring_df['monitoring_priority'] = (
    W_RISK * monitoring_df['risk_score']
    + W_AGE_FLAG * monitoring_df['older_device_flag']
)

# --- 3d. Bucketing Function ---
def bucket_monitoring_tier(score):
    """Maps the composite score to an operational monitoring tier."""
    if score >= IMMEDIATE_MONITORING_THRESHOLD:
        return "Immediate Monitoring"
    elif score >= ENHANCED_MONITORING_THRESHOLD:
        return "Enhanced Monitoring"
    else:
        return "Standard Monitoring"

monitoring_df['monitoring_tier'] = monitoring_df['monitoring_priority'].apply(bucket_monitoring_tier)

print("Monitoring Priority Scores calculated.")

Monitoring Priority Scores calculated.


In [11]:
# [4] Feature Engineering and Composite Score Calculation
# --------------------------------------------------------------------------

# 1. Create Older Device Flag
# Binary flag: 1 if the patch's target fleet is mostly old, 0 otherwise.
monitoring_df["older_device_flag"] = (
    monitoring_df["avg_device_age_days"] >= OLDER_DEVICE_AGE_THRESHOLD
).astype(int)

# 2. Compute Composite Monitoring Priority Score
# Blends the model's risk score with the vulnerability of the device fleet.
monitoring_df["monitoring_priority"] = (
    W_RISK * monitoring_df["risk_score"] +
    W_AGE * monitoring_df["older_device_flag"]
)

print("New columns created: older_device_flag and monitoring_priority")
print("\nSample of calculated scores:")
print(monitoring_df[['firmware_version', 'risk_score', 'older_device_flag', 'monitoring_priority']].head())

New columns created: older_device_flag and monitoring_priority

Sample of calculated scores:
  firmware_version  risk_score  older_device_flag  monitoring_priority
0           10.0.1    0.016229                  1             0.311360
1           10.0.2    0.005618                  0             0.003933
2           10.0.3    0.006206                  0             0.004344
3           10.0.4    0.620677                  0             0.434474
4           10.1.0    0.201233                  0             0.140863


In [19]:
# [4] Final Output and Reporting (Matching Notebook 04 Style)
# --------------------------------------------------------------------------

# 1. Define final output columns
FINAL_OUTPUT_COLS = [
    "firmware_version", 
    "risk_score", 
    "avg_device_age_days", 
    "older_device_flag", 
    "monitoring_priority", 
    "monitoring_tier"
]

final_monitoring_df = monitoring_df[FINAL_OUTPUT_COLS].sort_values(
    by="monitoring_priority", ascending=False
)

# 2. Export to CSV
output_path = DATA_DIR / "monitoring_priority.csv"
final_monitoring_df.to_csv(output_path, index=False)

# 3. Print Final Report
print(f"\nâœ… Monitoring priority sheet generated and saved to: {output_path.name}")
print("\n--- Top 5 Monitoring Priorities (Recommendation #3) ---")
print(final_monitoring_df.head(5))


âœ… Monitoring priority sheet generated and saved to: monitoring_priority.csv

--- Top 5 Monitoring Priorities (Recommendation #3) ---
   firmware_version  risk_score  avg_device_age_days  older_device_flag  monitoring_priority       monitoring_tier
36           10.7.5    0.979955           623.264342                  1             0.985968  Immediate Monitoring
11           10.5.1    0.977943           633.542533                  1             0.984560  Immediate Monitoring
47          10.10.0    0.968586           669.768117                  1             0.978010  Immediate Monitoring
49          10.10.2    0.961969           622.175698                  1             0.973379  Immediate Monitoring
10           10.5.0    0.930101           699.662343                  1             0.951071  Immediate Monitoring
