# Kratos Defense ML Models


In [None]:
import warnings
warnings.filterwarnings('ignore')

from snowflake.snowpark.context import get_active_session
from snowflake.ml.modeling.preprocessing import StandardScaler, OneHotEncoder
from snowflake.ml.modeling.pipeline import Pipeline
from snowflake.ml.modeling.linear_model import LinearRegression
from snowflake.ml.modeling.ensemble import RandomForestClassifier
from snowflake.ml.modeling.metrics import mean_squared_error, mean_absolute_error, accuracy_score
from snowflake.ml.registry import Registry

session = get_active_session()
print("✅ Connected")

## Model 1: Program Risk Predictor


In [None]:
program_df = session.sql("""
SELECT
    program_type,
    COALESCE(contract_type, 'FFP') AS contract_type,
    (funded_value / NULLIF(total_contract_value, 0))::FLOAT AS funded_ratio,
    (costs_incurred / NULLIF(funded_value, 0))::FLOAT AS cost_ratio,
    COALESCE(margin_percentage, 10)::FLOAT AS margin_pct,
    DATEDIFF('day', start_date, CURRENT_DATE())::FLOAT AS days_active,
    CASE WHEN risk_level = 'HIGH' THEN 1 ELSE 0 END AS is_at_risk
FROM KRATOS_INTELLIGENCE.RAW.PROGRAMS
WHERE funded_value > 0 AND total_contract_value > 0
""")
print(f"Programs: {program_df.count()}")
program_df.show(3)

In [None]:
train_prog, test_prog = program_df.random_split([0.8, 0.2], seed=42)
print(f"Train: {train_prog.count()}, Test: {test_prog.count()}")

prog_pipeline = Pipeline([
    ("enc", OneHotEncoder(input_cols=["PROGRAM_TYPE", "CONTRACT_TYPE"], output_cols=["PROG_ENC", "CONT_ENC"], drop_input_cols=True, handle_unknown="ignore")),
    ("clf", RandomForestClassifier(label_cols=["IS_AT_RISK"], output_cols=["PRED"], n_estimators=50, max_depth=5))
])
prog_pipeline.fit(train_prog)
print("✅ Program model trained")

In [None]:
prog_pred = prog_pipeline.predict(test_prog)
acc = accuracy_score(df=prog_pred, y_true_col_names="IS_AT_RISK", y_pred_col_names="PRED")
print(f"Accuracy: {acc:.4f}")

reg = Registry(session, database_name="KRATOS_INTELLIGENCE", schema_name="ANALYTICS")
reg.log_model(prog_pipeline, model_name="PROGRAM_RISK_PREDICTOR", version_name="V1", metrics={"accuracy": round(acc, 4)})
print("✅ PROGRAM_RISK_PREDICTOR registered")

## Model 2: Supplier Risk Predictor


In [None]:
supplier_df = session.sql("""
SELECT
    supplier_type,
    COALESCE(supplier_category, 'UNKNOWN') AS supplier_category,
    COALESCE(quality_rating, 0.85)::FLOAT AS quality_rating,
    COALESCE(delivery_rating, 0.85)::FLOAT AS delivery_rating,
    (total_spend / 1000000)::FLOAT AS spend_millions,
    CASE WHEN quality_rating < 0.75 OR delivery_rating < 0.75 THEN 1 ELSE 0 END AS is_at_risk
FROM KRATOS_INTELLIGENCE.RAW.SUPPLIERS
WHERE total_spend > 0
""")
print(f"Suppliers: {supplier_df.count()}")
supplier_df.show(3)

In [None]:
train_sup, test_sup = supplier_df.random_split([0.8, 0.2], seed=42)
print(f"Train: {train_sup.count()}, Test: {test_sup.count()}")

sup_pipeline = Pipeline([
    ("enc", OneHotEncoder(input_cols=["SUPPLIER_TYPE", "SUPPLIER_CATEGORY"], output_cols=["SUP_ENC", "CAT_ENC"], drop_input_cols=True, handle_unknown="ignore")),
    ("clf", RandomForestClassifier(label_cols=["IS_AT_RISK"], output_cols=["PRED"], n_estimators=50, max_depth=5))
])
sup_pipeline.fit(train_sup)
print("✅ Supplier model trained")

In [None]:
sup_pred = sup_pipeline.predict(test_sup)
acc = accuracy_score(df=sup_pred, y_true_col_names="IS_AT_RISK", y_pred_col_names="PRED")
print(f"Accuracy: {acc:.4f}")

reg.log_model(sup_pipeline, model_name="SUPPLIER_RISK_PREDICTOR", version_name="V1", metrics={"accuracy": round(acc, 4)})
print("✅ SUPPLIER_RISK_PREDICTOR registered")

## Model 3: Production Forecaster


In [None]:
prod_df = session.sql("""
SELECT
    MONTH(order_date)::FLOAT AS month_num,
    COUNT(*)::FLOAT AS order_count,
    SUM(quantity_ordered)::FLOAT AS total_qty,
    SUM(total_cost)::FLOAT AS total_cost
FROM KRATOS_INTELLIGENCE.RAW.MANUFACTURING_ORDERS
GROUP BY MONTH(order_date)
""")
print(f"Months: {prod_df.count()}")
prod_df.show()

In [None]:
train_prod, test_prod = prod_df.random_split([0.8, 0.2], seed=42)
print(f"Train: {train_prod.count()}, Test: {test_prod.count()}")

prod_pipeline = Pipeline([
    ("scaler", StandardScaler(input_cols=["MONTH_NUM", "TOTAL_QTY", "TOTAL_COST"], output_cols=["M_SC", "Q_SC", "C_SC"])),
    ("reg", LinearRegression(label_cols=["ORDER_COUNT"], output_cols=["PRED"]))
])
prod_pipeline.fit(train_prod)
print("✅ Production model trained")

In [None]:
prod_pred = prod_pipeline.predict(test_prod)
mae = mean_absolute_error(df=prod_pred, y_true_col_names="ORDER_COUNT", y_pred_col_names="PRED")
print(f"MAE: {mae:.2f}")

reg.log_model(prod_pipeline, model_name="PRODUCTION_FORECASTER", version_name="V1", metrics={"mae": round(mae, 2)})
print("✅ PRODUCTION_FORECASTER registered")

In [None]:
print("\n=== Models Registered ===")
print(reg.show_models())
print("\n✅ Done - add models to Intelligence Agent")