# Save Model to MinIO

Upload trained model to MinIO storage and create deployment variables

In [None]:
import os
import json
import pickle
from datetime import datetime
from sklearn.pipeline import Pipeline
import boto3
from botocore.client import Config

In [None]:
# MinIO configuration
# Default to MinIO service in OpenShift
NAMESPACE = os.getenv('NAMESPACE', 'spending-transaction-monitor')
MINIO_ENDPOINT = os.getenv('MINIO_ENDPOINT', f'http://minio-service.{NAMESPACE}.svc.cluster.local:9000')
MINIO_ACCESS_KEY = os.getenv('MINIO_ACCESS_KEY', 'minio')
MINIO_SECRET_KEY = os.getenv('MINIO_SECRET_KEY', 'minio123')
BUCKET_NAME = os.getenv('BUCKET_NAME', 'models')

print(f"MinIO Configuration:")
print(f"  Endpoint: {MINIO_ENDPOINT}")
print(f"  Bucket: {BUCKET_NAME}")
print(f"  Namespace: {NAMESPACE}")

In [None]:
# Initialize MinIO client (S3-compatible)
s3_client = boto3.client(
    's3',
    endpoint_url=MINIO_ENDPOINT,
    aws_access_key_id=MINIO_ACCESS_KEY,
    aws_secret_access_key=MINIO_SECRET_KEY,
    config=Config(signature_version='s3v4'),
    region_name='us-east-1'
)

print("✅ MinIO client initialized")

In [None]:
# Create bucket if it doesn't exist
try:
    s3_client.create_bucket(Bucket=BUCKET_NAME)
    print(f"✅ Created bucket: {BUCKET_NAME}")
except Exception as e:
    if 'BucketAlreadyOwnedByYou' in str(e) or 'BucketAlreadyExists' in str(e):
        print(f"✅ Bucket already exists: {BUCKET_NAME}")
    else:
        print(f"Error creating bucket: {e}")
        raise

In [None]:
# Generate model version with timestamp
model_version = datetime.now().strftime("%y-%m-%d-%H%M%S")
model_name = 'alert-recommender'

print(f"Model: {model_name}")
print(f"Version: {model_version}")

In [None]:
# Verify model file exists
model_path = 'models/model.pkl'

if not os.path.exists(model_path):
    raise FileNotFoundError(f"Model file not found: {model_path}. Make sure training completed successfully.")

model_size = os.path.getsize(model_path)
print(f"✅ Model file found: {model_size / 1024:.2f} KB")

In [None]:
# Load model artifacts
with open(model_path, 'rb') as f:
    model_artifacts = pickle.load(f)

print(f"Model artifacts keys: {list(model_artifacts.keys())}")

# Save KNNRecommender class as a Python module that MLServer can import
from sklearn.base import BaseEstimator
import numpy as np

knn_module_code = '''"""
KNN Recommender Module - for MLServer import
"""
from sklearn.base import BaseEstimator
import numpy as np

class KNNRecommender(BaseEstimator):
    """Custom KNN wrapper that provides predict() for MLServer compatibility"""
    
    def __init__(self, knn_model, alert_labels, alert_types, threshold=0.4):
        self.knn_model = knn_model
        self.alert_labels = alert_labels
        self.alert_types = alert_types
        self.threshold = threshold
    
    def predict(self, X):
        """
        Predict recommendations for given features.
        Returns numpy array for MLServer compatibility.
        """
        k_neighbors = min(5, len(self.alert_labels))
        distances, indices = self.knn_model.kneighbors(X, n_neighbors=k_neighbors)
        
        # Generate recommendations for each input
        all_recommendations = []
        for idx_list in indices:
            similar_labels = self.alert_labels[idx_list]
            probabilities = similar_labels.mean(axis=0)
            
            recommendations = []
            for i, alert_type in enumerate(self.alert_types):
                if probabilities[i] >= self.threshold:
                    recommendations.append({
                        'alert_type': alert_type,
                        'probability': float(probabilities[i]),
                        'confidence': 'high' if probabilities[i] >= 0.7 else 'medium'
                    })
            
            all_recommendations.append(recommendations)
        
        # Return as numpy array for MLServer compatibility
        return np.array(all_recommendations, dtype=object)
    
    def kneighbors(self, X, n_neighbors=None):
        """Forward kneighbors to the underlying KNN model"""
        return self.knn_model.kneighbors(X, n_neighbors=n_neighbors)
'''

# Save the module file
os.makedirs('models', exist_ok=True)
with open('models/knn_recommender.py', 'w') as f:
    f.write(knn_module_code)

print("✅ Saved KNNRecommender module to models/knn_recommender.py")

# Import the class from the saved module
import sys
sys.path.insert(0, 'models')
from knn_recommender import KNNRecommender

# Create KNN recommender wrapper
knn_recommender = KNNRecommender(
    knn_model=model_artifacts['knn_model'],
    alert_labels=model_artifacts['alert_labels'],
    alert_types=model_artifacts['alert_types'],
    threshold=0.4
)

# Create sklearn Pipeline for MLServer
pipeline = Pipeline([
    ('scaler', model_artifacts['scaler']),
    ('recommender', knn_recommender)
])

# Save pipeline (KNNRecommender will be pickled with module reference)
pipeline_path = 'models/pipeline.pkl'
with open(pipeline_path, 'wb') as f:
    pickle.dump(pipeline, f)

pipeline_size = os.path.getsize(pipeline_path)
print(f"✅ Created sklearn Pipeline with KNNRecommender wrapper: {pipeline_size / 1024:.2f} KB")
print(f"   Pipeline steps: {[name for name, _ in pipeline.steps]}")
print(f"✅ KNNRecommender will be imported from knn_recommender module by MLServer")

In [None]:
# Upload pipeline and module to MinIO
s3_model_path = f'{model_name}/'
pipeline_key = f'{s3_model_path}pipeline.pkl'
module_key = f'{s3_model_path}knn_recommender.py'

print(f"Uploading pipeline to s3://{BUCKET_NAME}/{pipeline_key}")
s3_client.upload_file(
    pipeline_path,
    BUCKET_NAME,
    pipeline_key
)
print("✅ Pipeline uploaded successfully")

print(f"Uploading KNNRecommender module to s3://{BUCKET_NAME}/{module_key}")
s3_client.upload_file(
    'models/knn_recommender.py',
    BUCKET_NAME,
    module_key
)
print("✅ KNNRecommender module uploaded successfully")

In [None]:
# Create and upload model-settings.json for MLServer
model_settings = {
    "name": model_name,
    "implementation": "mlserver_sklearn.SKLearnModel",
    "parameters": {
        "uri": "/mnt/models/pipeline.pkl"
    }
}

settings_key = f'{s3_model_path}model-settings.json'

print(f"Uploading model-settings.json to s3://{BUCKET_NAME}/{settings_key}")

s3_client.put_object(
    Bucket=BUCKET_NAME,
    Key=settings_key,
    Body=json.dumps(model_settings, indent=2)
)

print("✅ model-settings.json uploaded")
print(f"\nModel settings:")
print(json.dumps(model_settings, indent=2))

In [None]:
# Verify uploads
def list_objects(prefix):
    """List objects in MinIO bucket with given prefix"""
    response = s3_client.list_objects_v2(
        Bucket=BUCKET_NAME,
        Prefix=prefix
    )
    if 'Contents' in response:
        for obj in response['Contents']:
            print(f"  {obj['Key']} ({obj['Size']} bytes)")
    else:
        print("  No objects found")

print(f"\nObjects in s3://{BUCKET_NAME}/{s3_model_path}:")
list_objects(s3_model_path)

In [None]:
# Create vars.txt for next notebook
# This file passes variables to the deployment notebook
with open("vars.txt", "w") as f:
    f.write(f'model_version={model_version}\n')
    f.write(f'model_name={model_name}\n')
    f.write(f's3_bucket={BUCKET_NAME}\n')
    f.write(f's3_model_path={s3_model_path}\n')

print("\n✅ vars.txt created with deployment variables:")
with open("vars.txt", "r") as f:
    print(f.read())

In [None]:
print("\n" + "="*60)
print("✅ Model Save Complete!")
print("="*60)
print(f"\nModel uploaded to: s3://{BUCKET_NAME}/{pipeline_key}")
print(f"Model version: {model_version}")
print(f"MinIO endpoint: {MINIO_ENDPOINT}")
print(f"\nNext step: Deploy to OpenShift AI (run notebook 3)")