# Snowcore Anomaly Detection - Multi-Model Training

## Business Objective

Snowcore Industries faces intermittent quality yield issues on the Avalanche X1 bobsled production line. This notebook trains three complementary anomaly detection models:

| Model | Asset Focus | Method | Output |
|-------|-------------|--------|--------|
| **Model A** | Layup Room | Linear Regression | Scrap risk from humidity |
| **Model B** | Autoclave | PCA Autoencoder | Cure cycle anomaly score |
| **Model C** | Cross-Asset | Graph Analysis | Downstream impact propagation |

## Hidden Discovery

The key insight: **High humidity (>65%) in Layup Room causes 3x scrap rate 6 hours later during autoclave cure** due to moisture-induced delamination.

## Output Tables

- `PDM.ANOMALY_EVENTS` - Detected anomalies with severity and root cause
- `PDM.MODEL_METRICS` - Model performance metrics

## 1. Environment Setup

In [None]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import json

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import mean_absolute_error, r2_score

import networkx as nx
import matplotlib.pyplot as plt

plt.style.use('dark_background')
plt.rcParams.update({
    'figure.facecolor': '#121212',
    'axes.facecolor': '#121212',
    'text.color': '#E5E5E7',
    'figure.dpi': 150,
    'figure.figsize': (12, 5)
})

COLORS = {
    'primary': '#29B5E8',
    'secondary': '#11567F',
    'accent': '#FF9F0A',
    'warning': '#ef4444',
    'success': '#22c55e'
}

np.random.seed(42)
print('[OK] Libraries imported')

In [None]:
from snowflake.snowpark.context import get_active_session

session = get_active_session()

print(f'[OK] Connected to Snowflake')
print(f'  Database: {session.get_current_database()}')

## 2. Data Loading

In [None]:
print('Loading data from Snowflake...')

cure_results_df = session.table('PDM.CURE_RESULTS').to_pandas()
print(f'  PDM.CURE_RESULTS: {len(cure_results_df):,} rows')

asset_graph_df = session.table('CONFIG.ASSET_GRAPH').to_pandas()
print(f'  CONFIG.ASSET_GRAPH: {len(asset_graph_df):,} rows')

asset_status_df = session.table('DATA_MART.ASSET_STATUS').to_pandas()
print(f'  DATA_MART.ASSET_STATUS: {len(asset_status_df):,} rows')

try:
    sensors_wide_df = session.sql("""
        SELECT * FROM ATOMIC.ASSET_SENSORS_WIDE 
        WHERE EVENT_TIMESTAMP > DATEADD('day', -30, CURRENT_TIMESTAMP())
        LIMIT 10000
    """).to_pandas()
    print(f'  ATOMIC.ASSET_SENSORS_WIDE: {len(sensors_wide_df):,} rows')
except:
    sensors_wide_df = pd.DataFrame()
    print('  ATOMIC.ASSET_SENSORS_WIDE: No data (will use synthetic)')

print('\n[OK] Data loaded')

In [None]:
if cure_results_df.empty:
    print('Generating synthetic cure results with humidity-scrap correlation...')
    
    n_batches = 500
    cure_results_df = pd.DataFrame({
        'RESULT_ID': [f'CR-{i:04d}' for i in range(n_batches)],
        'BATCH_ID': [f'BATCH-{i:04d}' for i in range(n_batches)],
        'AUTOCLAVE_ID': np.random.choice(['AUTOCLAVE_01', 'AUTOCLAVE_02'], n_batches),
        'CURE_TIMESTAMP': pd.date_range(end=datetime.now(), periods=n_batches, freq='2H'),
        'LAYUP_HUMIDITY_AVG': np.random.uniform(45, 75, n_batches),
        'LAYUP_HUMIDITY_PEAK': np.random.uniform(50, 85, n_batches),
    })
    
    cure_results_df['SCRAP_FLAG'] = (
        (cure_results_df['LAYUP_HUMIDITY_PEAK'] > 65) & 
        (np.random.random(n_batches) < 0.48)
    ) | (
        (cure_results_df['LAYUP_HUMIDITY_PEAK'] <= 65) & 
        (np.random.random(n_batches) < 0.05)
    )
    
    cure_results_df['DELAMINATION_SCORE'] = np.where(
        cure_results_df['SCRAP_FLAG'],
        np.random.uniform(0.6, 1.0, n_batches),
        np.random.uniform(0.0, 0.3, n_batches)
    )
    
    print(f'  Generated {n_batches} synthetic batches')
    print(f'  Scrap rate: {cure_results_df["SCRAP_FLAG"].mean():.1%}')

if sensors_wide_df.empty:
    print('Generating synthetic sensor data...')
    
    assets = ['AUTOCLAVE_01', 'AUTOCLAVE_02', 'LAYUP_ROOM', 'CNC_MILL_01', 'CNC_MILL_02']
    timestamps = pd.date_range(end=datetime.now(), periods=1000, freq='1min')
    
    rows = []
    for ts in timestamps:
        for asset in assets:
            row = {
                'EVENT_TIMESTAMP': ts,
                'ASSET_ID': asset,
                'TEMPERATURE_C': np.random.uniform(150, 200) if 'AUTOCLAVE' in asset else np.random.uniform(20, 25),
                'PRESSURE_PSI': np.random.uniform(80, 120) if 'AUTOCLAVE' in asset else None,
                'VACUUM_MBAR': np.random.uniform(-1.0, -0.85) if 'AUTOCLAVE' in asset else None,
                'HUMIDITY_PCT': np.random.uniform(45, 72) if asset == 'LAYUP_ROOM' else None,
                'VIBRATION_G': np.random.uniform(0.1, 0.6) if 'CNC' in asset else None,
            }
            rows.append(row)
    
    sensors_wide_df = pd.DataFrame(rows)
    print(f'  Generated {len(sensors_wide_df):,} sensor readings')

print('\n[OK] Data ready for training')

## 3. Data Exploration - Humidity-Scrap Correlation

In [None]:
print('=' * 60)
print('HUMIDITY-SCRAP CORRELATION ANALYSIS')
print('=' * 60)

cure_results_df['HUMIDITY_CLASS'] = np.where(
    cure_results_df['LAYUP_HUMIDITY_PEAK'] > 65, 'HIGH', 'NORMAL'
)

correlation = cure_results_df.groupby('HUMIDITY_CLASS').agg({
    'BATCH_ID': 'count',
    'SCRAP_FLAG': ['sum', 'mean'],
    'DELAMINATION_SCORE': 'mean'
}).round(3)

correlation.columns = ['Batches', 'Scrapped', 'Scrap_Rate', 'Avg_Delamination']
print('\nScrap Rate by Humidity Class:')
print(correlation.to_string())

high_humidity_rate = cure_results_df[cure_results_df['HUMIDITY_CLASS'] == 'HIGH']['SCRAP_FLAG'].mean()
normal_humidity_rate = cure_results_df[cure_results_df['HUMIDITY_CLASS'] == 'NORMAL']['SCRAP_FLAG'].mean()
multiplier = high_humidity_rate / normal_humidity_rate if normal_humidity_rate > 0 else 0

print(f'\n[KEY FINDING] High humidity batches have {multiplier:.1f}x higher scrap rate!')
print(f'  HIGH humidity (>65%): {high_humidity_rate:.1%} scrap rate')
print(f'  NORMAL humidity: {normal_humidity_rate:.1%} scrap rate')

## 4. Model A: Linear Regression (Layup Humidity)

Predict delamination score from humidity features to identify at-risk batches.

In [None]:
print('=' * 60)
print('MODEL A: LAYUP HUMIDITY REGRESSION')
print('=' * 60)

feature_cols = ['LAYUP_HUMIDITY_AVG', 'LAYUP_HUMIDITY_PEAK']
X = cure_results_df[feature_cols].values
y = cure_results_df['DELAMINATION_SCORE'].values

split_idx = int(len(X) * 0.8)
X_train, X_test = X[:split_idx], X[split_idx:]
y_train, y_test = y[:split_idx], y[split_idx:]

model_layup = LinearRegression()
model_layup.fit(X_train, y_train)

y_pred = model_layup.predict(X_test)

r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print(f'\nModel Performance:')
print(f'  R2 Score: {r2:.4f}')
print(f'  MAE: {mae:.4f}')

print(f'\nCoefficients:')
for col, coef in zip(feature_cols, model_layup.coef_):
    print(f'  {col}: {coef:.4f}')
print(f'  Intercept: {model_layup.intercept_:.4f}')

model_a_metrics = {
    'model_name': 'LAYUP_HUMIDITY_MODEL',
    'model_type': 'LinearRegression',
    'r2_score': float(r2),
    'mae': float(mae),
    'coefficients': dict(zip(feature_cols, [float(c) for c in model_layup.coef_])),
    'intercept': float(model_layup.intercept_)
}

print('\n[OK] Model A trained')

In [None]:
def predict_scrap_risk(humidity_avg, humidity_peak):
    """Predict delamination risk from humidity readings."""
    predicted_score = model_layup.predict([[humidity_avg, humidity_peak]])[0]
    
    if predicted_score > 0.6:
        severity = 'CRITICAL'
    elif predicted_score > 0.4:
        severity = 'WARNING'
    else:
        severity = 'NORMAL'
    
    return {
        'predicted_delamination': round(predicted_score, 3),
        'severity': severity,
        'root_cause': 'High ambient humidity during layup' if humidity_peak > 65 else 'Normal humidity levels',
        'suggested_fix': 'Activate dehumidifiers, delay layup operations' if humidity_peak > 65 else 'Continue normal operations'
    }

print('Testing Model A inference:')
test_cases = [(50, 55), (62, 68), (70, 78)]

for avg, peak in test_cases:
    result = predict_scrap_risk(avg, peak)
    print(f'\n  Humidity: {avg}/{peak}% -> {result["severity"]}')

## 5. Model B: PCA Autoencoder (Autoclave Anomaly)

Detect anomalies in autoclave cure cycles by measuring reconstruction error.

In [None]:
print('=' * 60)
print('MODEL B: AUTOCLAVE ANOMALY DETECTION')
print('=' * 60)

autoclave_features = ['TEMPERATURE_C', 'PRESSURE_PSI', 'VACUUM_MBAR']

autoclave_data = sensors_wide_df[sensors_wide_df['ASSET_ID'].str.contains('AUTOCLAVE', na=False)].copy()
autoclave_data = autoclave_data.dropna(subset=autoclave_features)

print(f'Autoclave readings: {len(autoclave_data):,}')

scaler_autoclave = StandardScaler()
X_scaled = scaler_autoclave.fit_transform(autoclave_data[autoclave_features])

pca_autoclave = PCA(n_components=2)
X_compressed = pca_autoclave.fit_transform(X_scaled)

X_reconstructed = pca_autoclave.inverse_transform(X_compressed)
reconstruction_errors = np.mean((X_scaled - X_reconstructed)**2, axis=1)

threshold_autoclave = np.percentile(reconstruction_errors, 95)
print(f'Anomaly threshold (95th percentile): {threshold_autoclave:.4f}')

anomaly_rate = (reconstruction_errors > threshold_autoclave).mean()
print(f'Training anomaly rate: {anomaly_rate:.1%}')

print(f'\nPCA explained variance: {pca_autoclave.explained_variance_ratio_.sum():.1%}')

model_b_metrics = {
    'model_name': 'AUTOCLAVE_ANOMALY_MODEL',
    'model_type': 'PCA_Autoencoder',
    'n_components': 2,
    'explained_variance': float(pca_autoclave.explained_variance_ratio_.sum()),
    'threshold': float(threshold_autoclave),
    'training_anomaly_rate': float(anomaly_rate)
}

print('\n[OK] Model B trained')

In [None]:
def detect_autoclave_anomaly(temperature, pressure, vacuum):
    """Detect autoclave anomaly using PCA reconstruction error."""
    X = np.array([[temperature, pressure, vacuum]])
    X_scaled = scaler_autoclave.transform(X)
    X_compressed = pca_autoclave.transform(X_scaled)
    X_reconstructed = pca_autoclave.inverse_transform(X_compressed)
    error = np.mean((X_scaled - X_reconstructed)**2)
    
    is_anomaly = error > threshold_autoclave
    anomaly_score = min(1.0, error / (threshold_autoclave * 2))
    
    anomaly_type = None
    root_cause = 'Normal operation'
    suggested_fix = 'Continue monitoring'
    
    if vacuum > -0.9:
        anomaly_type = 'VACUUM_DEGRADATION'
        root_cause = 'Vacuum seal wear or leak detected'
        suggested_fix = 'Inspect door seal and gaskets, check vacuum pump'
    elif temperature > 195 or temperature < 155:
        anomaly_type = 'TEMPERATURE_DEVIATION'
        root_cause = 'Temperature outside optimal cure range'
        suggested_fix = 'Check heating elements and thermocouples'
    elif pressure > 115 or pressure < 85:
        anomaly_type = 'PRESSURE_DEVIATION'
        root_cause = 'Pressure regulation issue'
        suggested_fix = 'Inspect pressure relief valves and regulators'
    elif is_anomaly:
        anomaly_type = 'PATTERN_ANOMALY'
        root_cause = 'Unusual sensor pattern detected'
        suggested_fix = 'Review cure cycle parameters'
    
    return {
        'is_anomaly': is_anomaly,
        'anomaly_score': round(anomaly_score, 3),
        'anomaly_type': anomaly_type,
        'severity': 'CRITICAL' if anomaly_score > 0.7 else 'WARNING' if anomaly_score > 0.4 else 'INFO',
        'root_cause': root_cause,
        'suggested_fix': suggested_fix
    }

print('Testing Model B inference:')
test_cases = [(175, 100, -0.95), (180, 105, -0.88), (200, 120, -0.75)]

for temp, pressure, vacuum in test_cases:
    result = detect_autoclave_anomaly(temp, pressure, vacuum)
    status = 'ANOMALY' if result['is_anomaly'] else 'NORMAL'
    print(f'\n  T={temp}C, P={pressure}psi, V={vacuum}mbar -> {status}')

## 6. Model C: Graph Propagation (Cross-Asset)

Propagate anomaly signals through the asset dependency graph to predict downstream impacts.

In [None]:
print('=' * 60)
print('MODEL C: CROSS-ASSET GRAPH PROPAGATION')
print('=' * 60)

G = nx.DiGraph()

if not asset_graph_df.empty:
    for _, row in asset_graph_df.iterrows():
        G.add_edge(
            row['SOURCE_ASSET'], 
            row['TARGET_ASSET'],
            edge_type=row['EDGE_TYPE'],
            weight=row['WEIGHT'],
            lag_hours=row['LAG_HOURS']
        )
else:
    edges = [
        ('LAYUP_ROOM', 'LAYUP_BOT_01', 'ENV_INFLUENCE', 0.8, 0),
        ('LAYUP_ROOM', 'LAYUP_BOT_02', 'ENV_INFLUENCE', 0.8, 0),
        ('LAYUP_BOT_01', 'AUTOCLAVE_01', 'MATERIAL_FLOW', 1.0, 2),
        ('LAYUP_BOT_02', 'AUTOCLAVE_02', 'MATERIAL_FLOW', 1.0, 2),
        ('AUTOCLAVE_01', 'CNC_MILL_01', 'MATERIAL_FLOW', 1.0, 4),
        ('AUTOCLAVE_02', 'CNC_MILL_02', 'MATERIAL_FLOW', 1.0, 4),
        ('CNC_MILL_01', 'QC_STATION_01', 'MATERIAL_FLOW', 1.0, 1),
        ('CNC_MILL_02', 'QC_STATION_02', 'MATERIAL_FLOW', 1.0, 1),
    ]
    for src, tgt, etype, weight, lag in edges:
        G.add_edge(src, tgt, edge_type=etype, weight=weight, lag_hours=lag)

print(f'Graph nodes: {G.number_of_nodes()}')
print(f'Graph edges: {G.number_of_edges()}')
print(f'\nAssets: {list(G.nodes())}')

In [None]:
def propagate_anomaly(source_asset, anomaly_score, anomaly_type):
    """Propagate anomaly signal through asset dependency graph."""
    if source_asset not in G:
        return []
    
    affected_assets = []
    
    for target in nx.descendants(G, source_asset):
        try:
            path = nx.shortest_path(G, source_asset, target)
        except nx.NetworkXNoPath:
            continue
        
        propagated_score = anomaly_score
        total_lag = 0
        
        for i in range(len(path) - 1):
            edge = G[path[i]][path[i+1]]
            propagated_score *= edge['weight']
            total_lag += edge['lag_hours']
        
        if propagated_score > 0.3:
            affected_assets.append({
                'target_asset': target,
                'propagated_score': round(propagated_score, 3),
                'lag_hours': total_lag,
                'path': ' -> '.join(path),
                'risk_level': 'HIGH' if propagated_score > 0.7 else 'MEDIUM' if propagated_score > 0.4 else 'LOW',
                'anomaly_type': f'PROPAGATED_{anomaly_type}',
                'root_cause': f'Upstream anomaly from {source_asset}',
                'expected_impact_time': f'+{total_lag}h'
            })
    
    return sorted(affected_assets, key=lambda x: x['propagated_score'], reverse=True)

print('Testing Model C propagation (HIGH_HUMIDITY in LAYUP_ROOM):')
propagation = propagate_anomaly('LAYUP_ROOM', 0.85, 'HIGH_HUMIDITY')

for asset in propagation:
    print(f'\n  {asset["target_asset"]}:')
    print(f'    Risk: {asset["risk_level"]} ({asset["propagated_score"]})')
    print(f'    Expected impact: {asset["expected_impact_time"]}')

## 7. Combined Inference Function

In [None]:
def detect_all_anomalies(asset_id, sensor_data):
    """Run all three models to detect anomalies for a given asset."""
    anomalies = []
    
    if 'LAYUP_ROOM' in asset_id:
        humidity_avg = sensor_data.get('HUMIDITY_AVG', 50)
        humidity_peak = sensor_data.get('HUMIDITY_PEAK', sensor_data.get('HUMIDITY_PCT', 50))
        
        result = predict_scrap_risk(humidity_avg, humidity_peak)
        
        if result['severity'] != 'NORMAL':
            anomaly_score = min(1.0, (humidity_peak - 65) / 20) if humidity_peak > 65 else 0.3
            anomalies.append({
                'asset_id': asset_id,
                'anomaly_type': 'HIGH_HUMIDITY',
                'anomaly_score': anomaly_score,
                'severity': result['severity'],
                'root_cause': result['root_cause'],
                'suggested_fix': result['suggested_fix'],
                'model': 'LAYUP_HUMIDITY_MODEL'
            })
            
            propagated = propagate_anomaly(asset_id, anomaly_score, 'HIGH_HUMIDITY')
            for prop in propagated:
                anomalies.append({
                    'asset_id': prop['target_asset'],
                    'anomaly_type': prop['anomaly_type'],
                    'anomaly_score': prop['propagated_score'],
                    'severity': 'WARNING' if prop['propagated_score'] > 0.5 else 'INFO',
                    'root_cause': prop['root_cause'],
                    'suggested_fix': f'Monitor for impact in {prop["expected_impact_time"]}',
                    'model': 'GRAPH_PROPAGATION'
                })
    
    elif 'AUTOCLAVE' in asset_id:
        temp = sensor_data.get('TEMPERATURE_C', 175)
        pressure = sensor_data.get('PRESSURE_PSI', 100)
        vacuum = sensor_data.get('VACUUM_MBAR', -0.95)
        
        result = detect_autoclave_anomaly(temp, pressure, vacuum)
        
        if result['is_anomaly'] or result['anomaly_type']:
            anomalies.append({
                'asset_id': asset_id,
                'anomaly_type': result['anomaly_type'] or 'CURE_ANOMALY',
                'anomaly_score': result['anomaly_score'],
                'severity': result['severity'],
                'root_cause': result['root_cause'],
                'suggested_fix': result['suggested_fix'],
                'model': 'AUTOCLAVE_ANOMALY_MODEL'
            })
    
    elif 'CNC_MILL' in asset_id:
        vibration = sensor_data.get('VIBRATION_G', 0.3)
        
        if vibration > 0.8:
            anomalies.append({
                'asset_id': asset_id,
                'anomaly_type': 'VIBRATION_SPIKE',
                'anomaly_score': min(1.0, vibration / 1.2),
                'severity': 'CRITICAL' if vibration > 1.0 else 'WARNING',
                'root_cause': 'Spindle bearing wear or imbalance',
                'suggested_fix': 'Check spindle alignment, inspect bearings',
                'model': 'THRESHOLD_DETECTION'
            })
    
    return anomalies

print('=' * 60)
print('COMBINED INFERENCE TEST')
print('=' * 60)

test_scenarios = [
    ('LAYUP_ROOM', {'HUMIDITY_AVG': 68, 'HUMIDITY_PEAK': 75}),
    ('AUTOCLAVE_01', {'TEMPERATURE_C': 180, 'PRESSURE_PSI': 105, 'VACUUM_MBAR': -0.82}),
    ('CNC_MILL_01', {'VIBRATION_G': 0.95}),
]

for asset_id, sensor_data in test_scenarios:
    print(f'\n{asset_id}:')
    anomalies = detect_all_anomalies(asset_id, sensor_data)
    if anomalies:
        for a in anomalies:
            print(f'  [{a["severity"]}] {a["anomaly_type"]}: {a["anomaly_score"]:.2f}')
    else:
        print('  [OK] No anomalies detected')

## 8. Write Results to Snowflake

In [None]:
print('Writing model metrics to Snowflake...')

session.sql("""
CREATE TABLE IF NOT EXISTS PDM.MODEL_METRICS (
    METRIC_ID STRING DEFAULT UUID_STRING(),
    MODEL_NAME STRING,
    MODEL_TYPE STRING,
    METRICS VARIANT,
    TRAINED_AT TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP()
)
""").collect()

metrics_df = pd.DataFrame([
    {
        'MODEL_NAME': model_a_metrics['model_name'],
        'MODEL_TYPE': model_a_metrics['model_type'],
        'METRICS': json.dumps(model_a_metrics)
    },
    {
        'MODEL_NAME': model_b_metrics['model_name'],
        'MODEL_TYPE': model_b_metrics['model_type'],
        'METRICS': json.dumps(model_b_metrics)
    },
    {
        'MODEL_NAME': 'GRAPH_PROPAGATION_MODEL',
        'MODEL_TYPE': 'NetworkX_DiGraph',
        'METRICS': json.dumps({
            'nodes': G.number_of_nodes(),
            'edges': G.number_of_edges(),
            'assets': list(G.nodes())
        })
    }
])

session.write_pandas(metrics_df, 'MODEL_METRICS', database='SNOWCORE_PDM', schema='PDM', overwrite=False)
print(f'  PDM.MODEL_METRICS: {len(metrics_df)} rows written')
print('\n[OK] Model metrics saved')

In [None]:
print('Writing model diagnostics to Snowflake...')

diagnostics = []

diagnostics.append({
    'MODEL_NAME': model_a_metrics['model_name'],
    'MODEL_VERSION': '1.0',
    'METRIC_NAME': 'r2_score',
    'METRIC_VALUE': model_a_metrics['r2_score'],
    'THRESHOLD_VALUE': 0.7,
    'STATUS': 'PASS' if model_a_metrics['r2_score'] >= 0.7 else 'FAIL',
    'DETAILS': json.dumps({'rmse': model_a_metrics['rmse'], 'mae': model_a_metrics['mae']})
})

diagnostics.append({
    'MODEL_NAME': model_b_metrics['model_name'],
    'MODEL_VERSION': '1.0',
    'METRIC_NAME': 'reconstruction_error_threshold',
    'METRIC_VALUE': model_b_metrics['threshold'],
    'THRESHOLD_VALUE': 0.5,
    'STATUS': 'PASS' if model_b_metrics['threshold'] <= 0.5 else 'WARN',
    'DETAILS': json.dumps({'variance_explained': model_b_metrics['variance_explained'], 'n_components': model_b_metrics['n_components']})
})

diagnostics.append({
    'MODEL_NAME': 'GRAPH_PROPAGATION_MODEL',
    'MODEL_VERSION': '1.0',
    'METRIC_NAME': 'graph_connectivity',
    'METRIC_VALUE': G.number_of_edges() / max(G.number_of_nodes(), 1),
    'THRESHOLD_VALUE': 0.5,
    'STATUS': 'PASS' if G.number_of_edges() >= G.number_of_nodes() * 0.5 else 'WARN',
    'DETAILS': json.dumps({'nodes': G.number_of_nodes(), 'edges': G.number_of_edges()})
})

diagnostics_df = pd.DataFrame(diagnostics)
session.write_pandas(diagnostics_df, 'MODEL_DIAGNOSTICS', database='SNOWCORE_PDM', schema='PDM', overwrite=False)
print(f'  PDM.MODEL_DIAGNOSTICS: {len(diagnostics_df)} rows written')
print('\n[OK] Model diagnostics saved')

In [None]:
print('Writing sample anomaly events to Snowflake...')

sample_anomalies = []

for asset_id, sensor_data in test_scenarios:
    detected = detect_all_anomalies(asset_id, sensor_data)
    for a in detected:
        sample_anomalies.append({
            'ASSET_ID': a['asset_id'],
            'TIMESTAMP': datetime.now(),
            'ANOMALY_TYPE': a['anomaly_type'],
            'ANOMALY_SCORE': a['anomaly_score'],
            'SEVERITY': a['severity'],
            'ROOT_CAUSE': a['root_cause'],
            'SUGGESTED_FIX': a['suggested_fix'],
            'RESOLVED': False
        })

if sample_anomalies:
    anomalies_df = pd.DataFrame(sample_anomalies)
    session.write_pandas(anomalies_df, 'ANOMALY_EVENTS', database='SNOWCORE_PDM', schema='PDM', overwrite=False)
    print(f'  PDM.ANOMALY_EVENTS: {len(anomalies_df)} rows written')

print('\n[OK] Sample anomaly events saved')

## 9. Summary

In [None]:
print('=' * 60)
print('ANOMALY DETECTION TRAINING - COMPLETE')
print('=' * 60)

print('\nModels Trained:')
print(f'  1. LAYUP_HUMIDITY_MODEL (LinearRegression)')
print(f'     R2: {model_a_metrics["r2_score"]:.4f}, MAE: {model_a_metrics["mae"]:.4f}')
print(f'  2. AUTOCLAVE_ANOMALY_MODEL (PCA Autoencoder)')
print(f'     Variance: {model_b_metrics["explained_variance"]:.1%}, Threshold: {model_b_metrics["threshold"]:.4f}')
print(f'  3. GRAPH_PROPAGATION_MODEL (NetworkX)')
print(f'     Nodes: {G.number_of_nodes()}, Edges: {G.number_of_edges()}')

print('\nKey Findings:')
print(f'  - High humidity (>65%) causes {multiplier:.1f}x higher scrap rate')
print(f'  - Anomalies propagate through graph with decay')
print(f'  - Expected lag from LAYUP_ROOM to QC: 7 hours')

print('\nTables Updated:')
print(f'  - PDM.MODEL_METRICS: Model performance tracking')
print(f'  - PDM.ANOMALY_EVENTS: Sample anomaly detections')

print('\n[OK] Ready for inference deployment')