# üîç Change Order Classifier

## ATLAS Capital Delivery - ML Model for Hidden Discovery

This notebook builds the CO Classifier model that enables the "Hidden Discovery" feature:
- Classifies change orders into categories (SCOPE_GAP, DESIGN_ERROR, FIELD_CONDITION, etc.)
- Uses text features from reason_text field
- Provides SHAP explanations for transparency
- Enables clustering for pattern detection

**Key Outcome**: Identify that 156 small COs across 12 projects share a common root cause - missing grounding specifications.

In [None]:
# Snowpark imports
from snowflake.snowpark import Session
from snowflake.snowpark.functions import col, lit
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from xgboost import XGBClassifier
from sklearn.preprocessing import LabelEncoder
import shap

# Create Snowpark session
connection_params = {"connection_name": "demo"}
session = Session.builder.configs(connection_params).create()
session.use_database("CAPITAL_PROJECTS_DB")
session.use_schema("ATOMIC")
print(f"Connected to: {session.get_current_account()}")

In [None]:
# Load change orders and extract text features
co_df = session.table("CHANGE_ORDER").to_pandas()
print(f"Loaded {len(co_df)} change orders")

def extract_features(text):
    text_lower = str(text).lower()
    return {
        'has_grounding': 1 if 'ground' in text_lower else 0,
        'has_missing': 1 if 'missing' in text_lower or 'not included' in text_lower else 0,
        'has_omitted': 1 if 'omit' in text_lower else 0,
        'has_not_specified': 1 if 'not specified' in text_lower else 0,
        'has_unforeseen': 1 if 'unforeseen' in text_lower else 0,
        'has_error': 1 if 'error' in text_lower else 0,
        'text_length': len(text_lower)
    }

features = co_df['REASON_TEXT'].apply(extract_features).apply(pd.Series)
X = features.values
y = co_df['CO_TYPE'].fillna('OTHER').values

In [None]:
# Train XGBoost classifier
le = LabelEncoder()
y_encoded = le.fit_transform(y)
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

model = XGBClassifier(n_estimators=100, max_depth=5, learning_rate=0.1, random_state=42)
model.fit(X_train, y_train)

print("Classification Report:")
print(classification_report(y_test, model.predict(X_test), target_names=le.classes_))

# SHAP analysis
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X_test)
print("\nTop features by importance:", features.columns[np.argsort(-np.abs(shap_values[0]).mean(0))[:5]].tolist())

In [None]:
# Validate Hidden Discovery - Grounding Pattern
grounding_mask = co_df['REASON_TEXT'].str.lower().str.contains('ground', na=False)
grounding_cos = co_df[grounding_mask]

print("üîç HIDDEN DISCOVERY VALIDATION")
print("=" * 40)
print(f"Grounding-related COs: {len(grounding_cos)}")
print(f"Projects Affected: {grounding_cos['PROJECT_ID'].nunique()}")
print(f"Total Amount: ${grounding_cos['APPROVED_AMOUNT'].sum():,.0f}")
print(f"Average CO Size: ${grounding_cos['APPROVED_AMOUNT'].mean():,.0f}")
print(f"\n‚úÖ Pattern confirmed - systemic design gap identified!")