# Model Export for Production

This notebook exports trained models for use in the Supply Chain Analyzer.

In [1]:
import pandas as pd
import numpy as np
import json
from pathlib import Path
import joblib

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, IsolationForest
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

## 1. Load Data and Train Final Models

In [2]:
# Load data
X = pd.read_csv("data/features.csv")
y = pd.read_csv("data/labels.csv").values.ravel()

with open("data/feature_columns.json") as f:
    FEATURE_COLUMNS = json.load(f)

print(f"Training on {len(X)} samples")

Training on 100 samples


In [3]:
# Fit scaler on all data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [4]:
# Train final models on full dataset

# 1. Random Forest (Package Risk Scorer)
rf_model = RandomForestClassifier(
    n_estimators=100,
    max_depth=10,
    min_samples_split=5,
    class_weight="balanced",
    random_state=42,
    n_jobs=-1
)
rf_model.fit(X, y)
print("Random Forest trained")

# 2. Logistic Regression (Typosquat Classifier)
lr_model = LogisticRegression(
    C=1.0,
    class_weight="balanced",
    random_state=42,
    max_iter=1000
)
lr_model.fit(X_scaled, y)
print("Logistic Regression trained")

# 3. Isolation Forest (Anomaly Detector)
X_legitimate = X[y == 0]
iso_model = IsolationForest(
    n_estimators=100,
    contamination=0.1,
    random_state=42,
    n_jobs=-1
)
iso_model.fit(X_legitimate)
print("Isolation Forest trained")

Random Forest trained
Logistic Regression trained
Isolation Forest trained


## 2. Export Models

In [5]:
# Create models directory
models_dir = Path("../models")
models_dir.mkdir(exist_ok=True)

# Export models
joblib.dump(rf_model, models_dir / "risk_scorer.joblib")
joblib.dump(lr_model, models_dir / "typosquat_classifier.joblib")
joblib.dump(iso_model, models_dir / "anomaly_detector.joblib")
joblib.dump(scaler, models_dir / "scaler.joblib")

print(f"Models exported to {models_dir.absolute()}")
print("\nExported files:")
for f in models_dir.glob("*.joblib"):
    print(f"  - {f.name} ({f.stat().st_size / 1024:.1f} KB)")

Models exported to C:\Users\ombis\ag2\notebooks\..\models

Exported files:
  - anomaly_detector.joblib (405.9 KB)
  - risk_scorer.joblib (64.8 KB)
  - scaler.joblib (1.2 KB)
  - typosquat_classifier.joblib (0.9 KB)


In [6]:
# Save model metadata
metadata = {
    "feature_columns": FEATURE_COLUMNS,
    "models": {
        "risk_scorer": {
            "type": "RandomForestClassifier",
            "file": "risk_scorer.joblib",
            "scaler_required": False
        },
        "typosquat_classifier": {
            "type": "LogisticRegression",
            "file": "typosquat_classifier.joblib",
            "scaler_required": True
        },
        "anomaly_detector": {
            "type": "IsolationForest",
            "file": "anomaly_detector.joblib",
            "scaler_required": False
        }
    },
    "scaler_file": "scaler.joblib",
    "version": "1.0.0"
}

with open(models_dir / "metadata.json", "w") as f:
    json.dump(metadata, f, indent=2)

print("Model metadata saved")

Model metadata saved


## 3. Test Loading Models

In [7]:
# Test loading and prediction
loaded_rf = joblib.load(models_dir / "risk_scorer.joblib")
loaded_scaler = joblib.load(models_dir / "scaler.joblib")

# Test prediction on a sample
test_sample = X.iloc[[0]]
prediction = loaded_rf.predict(test_sample)
probability = loaded_rf.predict_proba(test_sample)

print(f"Test sample prediction: {prediction[0]}")
print(f"Prediction probability: {probability[0]}")
print("\n✅ Models loaded and working correctly!")

Test sample prediction: 0
Prediction probability: [1. 0.]

✅ Models loaded and working correctly!


## Usage in Supply Chain Analyzer

```python
import joblib
from pathlib import Path

# Load models
models_dir = Path("models")
risk_model = joblib.load(models_dir / "risk_scorer.joblib")

# Predict risk score
features = extract_features(package_metadata)
risk_score = risk_model.predict_proba([features])[0][1]
```