# Model Persistence

Learn how to save and load trained models with metadata.

**Topics covered:**
- Saving models with metadata
- Loading saved models
- Different formats (joblib, pickle)
- Versioning and tracking

In [1]:
import mkyz
import os

mkyz package initialized. Version: 0.2.1


## 1. Train a Model

In [2]:
# Load and prepare data
data = mkyz.prepare_data(
    'data/titanic.csv',
    target_column='Survived',
    test_size=0.2,
    random_state=42
)

X_train, X_test, y_train, y_test, df, target, num_cols, cat_cols = data

# Train a Random Forest classifier
model = mkyz.train(
    data,
    task='classification',
    model='rf',
    n_estimators=100,
    random_state=42
)

print("Model trained successfully!")

INFO:mkyz.data_processing:First 5 rows of the dataset:
INFO:mkyz.data_processing:   PassengerId  Survived  Pclass  \
0            1         0       3   
1            2         1       1   
2            3         1       3   
3            4         1       1   
4            5         0       3   

                                                Name     Sex   Age  SibSp  \
0                            Braund, Mr. Owen Harris    male  22.0      1   
1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   
2                             Heikkinen, Miss. Laina  female  26.0      0   
3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   
4                           Allen, Mr. William Henry    male  35.0      0   

   Parch            Ticket     Fare Cabin Embarked  
0      0         A/5 21171   7.2500   NaN        S  
1      0          PC 17599  71.2833   C85        C  
2      0  STON/O2. 3101282   7.9250   NaN        S  
3      0            113803 

Model trained successfully!


In [3]:
# Evaluate model for metadata
predictions = model.predict(X_test)
metrics = mkyz.classification_metrics(y_test, predictions)

print("Model Performance:")
for k, v in metrics.items():
    print(f"  {k}: {v:.4f}")

Model Performance:
  accuracy: 0.8000
  precision: 0.7980
  recall: 0.8000
  f1_score: 0.7915
  mcc: 0.5405
  cohen_kappa: 0.5283


## 2. Save Model with Metadata

In [4]:
# Create models directory if it doesn't exist
if not os.path.exists('models'):
    os.makedirs('models')

# Save the model with metadata
model_path = mkyz.save_model(
    model,
    'models/titanic_classifier',
    format='joblib',
    metadata={
        'accuracy': metrics['accuracy'],
        'f1_score': metrics['f1_score'],
        'version': '1.0.0',
        'description': 'Titanic survival prediction model',
        'features': list(X_train.columns) if hasattr(X_train, 'columns') else 'numpy array',
        'n_training_samples': len(X_train)
    }
)

print(f"Model saved to: {model_path}")

Model saved to: c:\Users\mmust\Desktop\mkyz\examples\models\titanic_classifier.joblib


## 3. Load Model and Metadata

In [5]:
# Load the model with metadata
loaded_model, metadata = mkyz.load_model(
    'models/titanic_classifier.joblib',
    return_metadata=True
)

print("Model loaded successfully!")
print("\nMetadata:")
print("=" * 50)
for key, value in metadata.items():
    if key == 'features':
        print(f"  {key}: {len(value) if isinstance(value, list) else value} features")
    else:
        print(f"  {key}: {value}")

Model loaded successfully!

Metadata:
  accuracy: 0.8
  f1_score: 0.7914614121510674
  version: 1.0.0
  description: Titanic survival prediction model
  features: numpy array features
  n_training_samples: 576
  loaded_at: 2026-01-15T20:03:52.760060
  saved_at: 2026-01-15T20:03:49.513369
  mkyz_version: 0.2.1


## 4. Verify Loaded Model

In [6]:
# Make predictions with loaded model
loaded_predictions = loaded_model.predict(X_test)

# Verify predictions match
import numpy as np
matches = np.array_equal(predictions, loaded_predictions)

print(f"Predictions match original: {matches}")

Predictions match original: True


In [7]:
# Evaluate loaded model
loaded_metrics = mkyz.classification_metrics(y_test, loaded_predictions)

print("\nLoaded Model Performance:")
print("=" * 40)
for k, v in loaded_metrics.items():
    print(f"  {k}: {v:.4f}")


Loaded Model Performance:
  accuracy: 0.8000
  precision: 0.7980
  recall: 0.8000
  f1_score: 0.7915
  mcc: 0.5405
  cohen_kappa: 0.5283


## 5. Different Save Formats

In [8]:
# Save in pickle format
pickle_path = mkyz.save_model(
    model,
    'models/titanic_classifier_v2',
    format='pickle',
    metadata={
        'accuracy': metrics['accuracy'],
        'version': '2.0.0'
    }
)

print(f"Pickle model saved to: {pickle_path}")

Pickle model saved to: c:\Users\mmust\Desktop\mkyz\examples\models\titanic_classifier_v2.pkl


In [9]:
# List saved models
print("\nSaved Models in 'models' directory:")
print("=" * 40)
for file in os.listdir('models'):
    file_path = os.path.join('models', file)
    size = os.path.getsize(file_path)
    print(f"  {file} ({size:,} bytes)")


Saved Models in 'models' directory:
  my_model.joblib (3,107,630 bytes)
  titanic_classifier.joblib (3,143,751 bytes)
  titanic_classifier_v2.pkl (3,132,201 bytes)


## 6. Model Without Metadata

In [10]:
# Load model without requesting metadata
simple_model = mkyz.load_model('models/titanic_classifier.joblib')

# Use directly
simple_predictions = simple_model.predict(X_test[:5])
print("Quick predictions:", simple_predictions)

Quick predictions: [1 0 0 0 0]


## Summary

In this notebook, we learned:

1. **Save Models** - Save trained models with custom metadata
2. **Load Models** - Load models and access stored metadata
3. **Formats** - Use joblib or pickle format
4. **Verification** - Verify loaded models produce same results

### Best Practices

- Always include model version in metadata
- Store evaluation metrics for tracking
- Document which features were used
- Use joblib for numpy-heavy models (faster)