# Kratos Defense Intelligence Agent - ML Models

This notebook trains and registers ML models for the Kratos Intelligence Agent:
1. **PROGRAM_RISK_PREDICTOR**: Predicts program cost/schedule risk
2. **SUPPLIER_RISK_PREDICTOR**: Predicts supplier quality/delivery risk
3. **PRODUCTION_FORECASTER**: Forecasts manufacturing volume


In [None]:
# Import libraries
from snowflake.snowpark.context import get_active_session
from snowflake.ml.modeling.ensemble import RandomForestClassifier, RandomForestRegressor
from snowflake.ml.registry import Registry
import pandas as pd

# Get Snowpark session
session = get_active_session()
print(f"Connected to: {session.get_current_database()}.{session.get_current_schema()}")


## Model 1: Program Risk Predictor


In [None]:
# Prepare training data for program risk prediction
program_train_query = """
SELECT
    p.program_id,
    COALESCE(p.funded_value / NULLIF(p.total_contract_value, 0), 0.5) AS funded_ratio,
    COALESCE(p.costs_incurred / NULLIF(p.funded_value, 0), 0.5) AS cost_ratio,
    COALESCE(p.revenue_recognized / NULLIF(p.costs_incurred, 0), 0.8) AS revenue_cost_ratio,
    COALESCE(p.margin_percentage, 10) AS margin_pct,
    DATEDIFF('day', p.start_date, CURRENT_DATE()) AS days_active,
    DATEDIFF('day', CURRENT_DATE(), COALESCE(p.planned_end_date, DATEADD('year', 1, CURRENT_DATE()))) AS days_remaining,
    CASE WHEN p.program_type = 'DEVELOPMENT' THEN 1 ELSE 0 END AS is_development,
    CASE WHEN p.risk_level = 'HIGH' THEN 1 WHEN p.risk_level = 'MEDIUM' AND p.costs_incurred > p.funded_value * 0.9 THEN 1 ELSE 0 END AS is_at_risk
FROM KRATOS_INTELLIGENCE.RAW.PROGRAMS p
WHERE p.program_status = 'ACTIVE'
"""

program_df = session.sql(program_train_query)
print(f"Training samples: {program_df.count()}")
program_df.show(5)


In [None]:
# Train and register program risk model
feature_cols = ['FUNDED_RATIO', 'COST_RATIO', 'REVENUE_COST_RATIO', 'MARGIN_PCT', 'DAYS_ACTIVE', 'DAYS_REMAINING', 'IS_DEVELOPMENT']
label_col = 'IS_AT_RISK'

program_risk_model = RandomForestClassifier(
    input_cols=feature_cols,
    label_cols=[label_col],
    output_cols=['PREDICTED_RISK'],
    n_estimators=100,
    max_depth=10,
    random_state=42
)

program_risk_model.fit(program_df)

# Register model
reg = Registry(session=session, database_name='KRATOS_INTELLIGENCE', schema_name='ANALYTICS')
reg.log_model(
    model=program_risk_model,
    model_name='PROGRAM_RISK_PREDICTOR',
    version_name='v1',
    comment='Predicts program cost/schedule risk'
)
print("Program risk model trained and registered")


## Model 2: Supplier Risk Predictor


In [None]:
# Prepare and train supplier risk model
supplier_train_query = """
SELECT
    s.supplier_id,
    COALESCE(s.quality_rating, 0.8) AS quality_rating,
    COALESCE(s.delivery_rating, 0.8) AS delivery_rating,
    COALESCE(s.total_spend, 0) / 1000000 AS spend_millions,
    DATEDIFF('day', COALESCE(s.first_order_date, CURRENT_DATE()), CURRENT_DATE()) AS days_as_supplier,
    CASE WHEN s.is_small_business THEN 1 ELSE 0 END AS is_small_business,
    CASE WHEN s.supplier_type = 'TIER_1' THEN 1 ELSE 0 END AS is_tier_1,
    CASE WHEN COALESCE(s.quality_rating, 0.8) < 0.75 OR COALESCE(s.delivery_rating, 0.8) < 0.75 THEN 1 ELSE 0 END AS is_at_risk
FROM KRATOS_INTELLIGENCE.RAW.SUPPLIERS s
WHERE s.supplier_status = 'ACTIVE'
"""

supplier_df = session.sql(supplier_train_query)
print(f"Training samples: {supplier_df.count()}")

feature_cols = ['QUALITY_RATING', 'DELIVERY_RATING', 'SPEND_MILLIONS', 'DAYS_AS_SUPPLIER', 'IS_SMALL_BUSINESS', 'IS_TIER_1']
supplier_risk_model = RandomForestClassifier(
    input_cols=feature_cols,
    label_cols=['IS_AT_RISK'],
    output_cols=['PREDICTED_RISK'],
    n_estimators=100,
    max_depth=8,
    random_state=42
)

supplier_risk_model.fit(supplier_df)
reg.log_model(
    model=supplier_risk_model,
    model_name='SUPPLIER_RISK_PREDICTOR',
    version_name='v1',
    comment='Predicts supplier quality/delivery risk'
)
print("Supplier risk model trained and registered")


## Model 3: Production Forecaster


In [None]:
# Prepare and train production forecasting model
production_train_query = """
SELECT
    MONTH(m.order_date) AS month_num,
    YEAR(m.order_date) AS year_num,
    DAYOFWEEK(m.order_date) AS day_of_week,
    COUNT(*) AS order_count
FROM KRATOS_INTELLIGENCE.RAW.MANUFACTURING_ORDERS m
WHERE m.order_date >= DATEADD('year', -2, CURRENT_DATE())
GROUP BY MONTH(m.order_date), YEAR(m.order_date), DAYOFWEEK(m.order_date)
"""

production_df = session.sql(production_train_query)
print(f"Training samples: {production_df.count()}")

feature_cols = ['MONTH_NUM', 'YEAR_NUM', 'DAY_OF_WEEK']
production_model = RandomForestRegressor(
    input_cols=feature_cols,
    label_cols=['ORDER_COUNT'],
    output_cols=['PREDICTED_ORDERS'],
    n_estimators=100,
    max_depth=8,
    random_state=42
)

production_model.fit(production_df)
reg.log_model(
    model=production_model,
    model_name='PRODUCTION_FORECASTER',
    version_name='v1',
    comment='Forecasts manufacturing order volume'
)
print("Production forecasting model trained and registered")


## Verify Registered Models


In [None]:
# List all registered models (use print() for pandas DataFrame, not .show())
models = reg.show_models()
print("Registered Models:")
print(models)

print("\n\nAll models trained and registered successfully!")
print("\nNext steps:")
print("1. Run sql/ml/07_create_model_wrapper_functions.sql")
print("2. Run sql/agent/08_create_intelligence_agent.sql")
print("3. Test the agent in Snowsight")
