# MLServe.com core features

In a few lines of code we will:
1. deploy an xgboost model
2. make predictions
3. monitor endpoint health and model performance
4. provide feedback for online metrics estimation
5. run model A/B test and compare multiple versions

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from sklearn.datasets import load_iris
from xgboost import XGBClassifier
from mlserve_sdk.client import MLServeClient
import pandas as pd
import numpy as np
import os
from dotenv import load_dotenv

load_dotenv()

True

In [3]:
def generate_churn_data(n_samples=1000, missing_frac=0.05, random_state=42):
    """
    Generate synthetic churn dataset for ML benchmarking.

    Parameters
    ----------
    n_samples : int
        Number of rows to generate.
    missing_frac : float
        Fraction of missing values to inject per column (0–1).
    random_state : int
        Seed for reproducibility.

    Returns
    -------
    X : pd.DataFrame
        Feature matrix with categorical & numerical features.
    y : pd.Series
        Binary churn target (0 = no churn, 1 = churn).
    """
    np.random.seed(random_state)

    # Generate synthetic features
    data = {
        "customer_id": np.arange(1, n_samples+1),
        "age": np.random.randint(18, 80, n_samples),
        "tenure_months": np.random.randint(1, 72, n_samples),
        "monthly_charges": np.round(np.random.uniform(20, 120, n_samples), 2),
        "total_charges": np.round(np.random.uniform(20, 8000, n_samples), 2),
        "contract_type": np.random.choice(
            ["Month-to-month", "One year", "Two year"], n_samples, p=[0.6, 0.25, 0.15]
        ),
        "payment_method": np.random.choice(
            ["Electronic check", "Mailed check", "Bank transfer", "Credit card"], n_samples
        ),
        "internet_service": np.random.choice(
            ["DSL", "Fiber optic", "No"], n_samples, p=[0.3, 0.5, 0.2]
        ),
        "gender": np.random.choice(["Male", "Female"], n_samples),
        "has_phone_service": np.random.choice(["Yes", "No"], n_samples, p=[0.9, 0.1]),
        "num_dependents": np.random.poisson(1, n_samples),  # ~0-4 mostly
    }

    X = pd.DataFrame(data)

    # Inject missing values
    if missing_frac > 0:
        for col in X.columns.drop("customer_id"):
            X.loc[X.sample(frac=missing_frac, random_state=random_state).index, col] = np.nan

    # Churn probability (synthetic rules + noise)
    prob_churn = (
        0.3 * (X["contract_type"] == "Month-to-month").astype(float) +
        0.25 * (X["internet_service"] == "Fiber optic").astype(float) +
        0.15 * (X["payment_method"] == "Electronic check").astype(float) +
        0.002 * (X["monthly_charges"].fillna(60)) +
        0.01 * (X["num_dependents"].fillna(0) == 0).astype(float) +
        np.random.normal(0, 0.1, n_samples)
    )
    prob_churn = 1 / (1 + np.exp(-prob_churn))  # sigmoid

    y = pd.Series(np.random.binomial(1, prob_churn), name="churn")

    return X, y

In [4]:
X, y = generate_churn_data(n_samples=1000, missing_frac=0.05)
X.drop(columns=["customer_id"], inplace=True)
for col in ["contract_type", "payment_method", "internet_service", "gender", "has_phone_service"]:
    X[col] = X[col].astype("category")

model = XGBClassifier(enable_categorical=True)
model.fit(X, y)

0,1,2
,objective,'binary:logistic'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,True


In [5]:
USERNAME = os.getenv("USERNAME")
TOKEN = os.getenv("TOKEN")

client = MLServeClient()
client.login(USERNAME, TOKEN)

In [6]:
try:
    lv=client.get_latest_version("churn")
    next_version=lv["next_version"]
except:
    next_version="v1"

print(next_version)

v3


In [7]:
client.deploy(
    model=model,
    name="churn",
    version=next_version,
    features=list(X),
    background_df=X.sample(500),
    metrics={'accuracy':model.score(X, y)},
    task_type='classification'
)

{'predict_url': 'https://mlserve.com/api/v1/predict/churn/v3'}

In [31]:
%%time

TEST_DATA = {
    "features": X.columns.tolist(),
    "inputs": X.values.tolist()
}
preds = client.predict("churn", next_version, TEST_DATA, explain=True)
print("Explanations:", preds['explanations'][0])

Explanations: [{'feature': 'age', 'value': 56, 'shap_value': 0.554, 'impact': 'positive'}, {'feature': 'monthly_charges', 'value': 64.64, 'shap_value': 0.395, 'impact': 'positive'}, {'feature': 'payment_method', 'value': 'Mailed check', 'shap_value': 0.344, 'impact': 'positive'}]
CPU times: user 55.6 ms, sys: 7.97 ms, total: 63.6 ms
Wall time: 3.23 s


In [9]:
preds.keys()

dict_keys(['predictions', 'explanations', 'prediction_ids'])

In [11]:
%%time

TEST_DATA = {
    "features": X.columns.tolist(),
    "inputs": X.values.tolist()
}
preds = client.predict_weighted("churn", TEST_DATA)
# you can pass entity ids and each entity id 
# will be always served by the same model in future predictions (sticky bucketing)
preds = client.predict_weighted("churn", TEST_DATA, entity_ids=["user-133"]*len(X))

CPU times: user 101 ms, sys: 8.86 ms, total: 110 ms
Wall time: 4.48 s


In [35]:
# Predict using a Redis DB as a feature store for quick lookups

TEST_DATA = {
    "inputs": ['1', '2', '3'] # the endpoint connections to the feature store and loads features for these ids
}
preds = client.predict_weighted("churn", TEST_DATA, fs_url="redis://your-redis-fs:port", fs_entity_name='entity')

In [12]:
metrics = client.get_metrics("churn", next_version, as_dataframe=True)
metrics

Unnamed: 0_level_0,requests,predictions,throughput_rps,prediction_rps,avg_latency_ms,p50_latency_ms,p95_latency_ms,p99_latency_ms,avg_latency_per_element_ms,p50_latency_per_element_ms,p95_latency_per_element_ms,p99_latency_per_element_ms,error_rate
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2025-10-08 19:00:00+00:00,6,3000,0.001667,0.833333,727.61296,848.700684,1084.149318,1087.485561,1.455226,1.697401,2.168299,2.174971,0.0


In [13]:
d = client.get_data_quality("churn", next_version, as_dataframe=True)

In [14]:
d['missingness']

Unnamed: 0,feature,missing_fraction
0,age,0.047281
1,tenure_months,0.047281
2,monthly_charges,0.047281
3,total_charges,0.047281
4,num_dependents,0.047281
5,contract_type,0.047281
6,payment_method,0.047281
7,internet_service,0.047281
8,gender,0.047281
9,has_phone_service,0.047281


In [15]:
d['drift']

Unnamed: 0,feature,pct_mean_diff,zscore_outlier_fraction,topk_shift,status
0,age,0.000572,0.0,,ok
1,tenure_months,0.006111,0.0,,ok
2,monthly_charges,0.000695,0.0,,ok
3,total_charges,0.002067,0.0,,ok
4,num_dependents,0.021155,0.0,,ok
5,contract_type,,,0.406541,alert
6,payment_method,,,0.198561,alert
7,internet_service,,,0.341921,alert
8,gender,,,0.379875,alert
9,has_phone_service,,,0.64026,alert


In [16]:
d['outliers']

Unnamed: 0,feature,zscore_fraction,status
0,age,0.0,ok
1,tenure_months,0.0,ok
2,monthly_charges,0.0,ok
3,total_charges,0.0,ok
4,num_dependents,0.0,ok
5,contract_type,,not_applicable
6,payment_method,,not_applicable
7,internet_service,,not_applicable
8,gender,,not_applicable
9,has_phone_service,,not_applicable


In [32]:
# let's go back to predictions we made above
# each prediction carries a prediction id
# you can use this id to send feedback about the prediction
test_id=preds["prediction_ids"][:1][0]

# we provide the true value of the prediction which we receive after the prediction is made
# we also provide the associated reward of the prediction, i.e. the business value
feedback=[
    {"prediction_id":test_id, "true_value":1, "reward":10}
]
client.send_feedback(feedback)

{'status': 'ok', 'updated': 1, 'not_found': []}

In [33]:
client.get_online_metrics("churn", next_version, window_hours=24, as_dataframe=True)

Unnamed: 0,model,version,window_hours,n,n_supervised,mean_reward,n_rewards
0,churn,v3,24,2692,1,10.0,1


In [34]:
# now let's give feedback for 10 more predictions
test_ids=preds["prediction_ids"][:10]

feedback=[]
for tid in test_ids:
    val=np.random.randint(0, 2)
    r=np.random.normal(10, 7)
    feedback.append({"prediction_id":tid, "true_value":val, "reward":r})

client.send_feedback(feedback)

{'status': 'ok', 'updated': 10, 'not_found': []}

In [35]:
client.get_online_metrics("churn", next_version, window_hours=24, as_dataframe=True)

Unnamed: 0,model,version,window_hours,n,n_supervised,accuracy,f1,brier,mean_reward,n_rewards
0,churn,v3,24,2692,10,0.9,0.888889,0.1,9.062107,10


## Comparing with a newer version

In [36]:
lv=client.get_latest_version("churn")
next_version=lv["next_version"]

feats=list(X)

client.deploy(
    model=model,
    name="churn",
    version=next_version,
    features=feats,
    background_df=X.sample(200),
    metrics={'accuracy':0.2},
    task_type='classification'
)

{'predict_url': 'https://mlserve.com/api/v1/predict/churn/v4'}

In [37]:
%%time

TEST_DATA = {
    "features": X.columns.tolist(),
    "inputs": X.values.tolist()
}
preds = client.predict("churn", next_version, TEST_DATA)
#print("Predictions:", preds['predictions'])

CPU times: user 48.5 ms, sys: 4.43 ms, total: 52.9 ms
Wall time: 1.57 s


In [38]:
# now let's give feedback for a few predictions but as true values we will put the predictions (accuracy 100%)
test_ids=preds["prediction_ids"][1:30]
true_values=preds['predictions'][1:30]

feedback=[]
for tid, val in zip(test_ids,true_values):
    r=np.random.normal(10, 7)
    feedback.append({"prediction_id":tid, "true_value":val, "reward":r})

client.send_feedback(feedback)

{'status': 'ok', 'updated': 29, 'not_found': []}

In [39]:
client.get_online_metrics("churn", next_version, window_hours=24, as_dataframe=True)

Unnamed: 0,model,version,window_hours,n,n_supervised,accuracy,f1,brier,mean_reward,n_rewards
0,churn,v4,24,1000,29,1.0,1.0,0.0,9.688586,29


In [40]:
client.get_model_evolution("churn", as_dataframe=True)

Unnamed: 0,version,deployed_at,accuracy,f1,brier,accuracy_delta_pct,f1_delta_pct,brier_delta_pct
0,v1,2025-10-08T19:05:02.718689+00:00,0.527273,0.551724,0.472727,,,
1,v2,2025-10-08T19:15:38.047186+00:00,,,,,,
2,v3,2025-10-08T19:46:22.912614+00:00,0.9,0.888889,0.1,,,
3,v4,2025-10-08T20:07:08.579906+00:00,1.0,1.0,0.0,11.111111,12.5,-100.0


## AB testing model versions

In [41]:
client.get_abtests("churn")

[]

In [42]:
# Let's create an ab test between versions 1 and 2
# observe how the first version gets a zero weight
# now when you use predict_weighted you will randomly get a prediction from each model based on these probabilities
client.configure_abtest("churn", weights={"v1":0.5, "v2":0.5})

{'status': 'ok',
 'model': 'churn',
 'weights': {'v1': 0.5, 'v2': 0.5, 'v3': 0.0, 'v4': 0.0}}

In [43]:
# only 1 ab test so far
client.get_abtests("churn")

[{'id': 1,
  'created_at': '2025-10-08T20:08:35.344722+00:00',
  'weights': {'v1': 0.5, 'v2': 0.5}}]

In [44]:
# this endpoint will return predictions based on probabilities assigned in the ab test configuration
preds=[]
for i in range(10):
    TEST_DATA = {
        "features": X.columns.tolist(),
        "inputs": [X.values[i,:].tolist()]
    }
    pred = client.predict_weighted("churn", TEST_DATA)
    preds.append(pred)

In [45]:
feedback=[]
for pred in preds:
    if pred['versions']=='v1':
        val=1
        r=1
    else:
        val=pred['predictions'][0]
        r=10
    feedback.append({"prediction_id":pred['prediction_ids'][0], "true_value":val, "reward":r})

client.send_feedback(feedback)

{'status': 'ok', 'updated': 10, 'not_found': []}

In [47]:
client.get_model_evolution("churn", as_dataframe=True)

Unnamed: 0,version,deployed_at,accuracy,f1,brier,accuracy_delta_pct,f1_delta_pct,brier_delta_pct
0,v1,2025-10-08T19:05:02.718689+00:00,0.54386,0.580645,0.45614,,,
1,v2,2025-10-08T19:15:38.047186+00:00,,,,,,
2,v3,2025-10-08T19:46:22.912614+00:00,0.9,0.888889,0.1,,,
3,v4,2025-10-08T20:07:08.579906+00:00,1.0,1.0,0.0,11.111111,12.5,-100.0


In [48]:
# now I am sure I want v2 in prod. Let's adjust the ab test
client.configure_abtest("churn", weights={"v2":1})

{'status': 'ok',
 'model': 'churn',
 'weights': {'v3': 0.0, 'v4': 0.0, 'v1': 0.0, 'v2': 1.0}}

In [49]:
# I don't need v1 anymore. Let's stop it
client.stop_model("churn", "v1", remove=True)

{'status': 'ok',
 'message': 'Successfully stopped churn:v1 and removed its container and image'}