# MLOps: Automating the Machine Learning Stack

Visualise the data

In [None]:
import seaborn as sns


penguins = sns.load_dataset("penguins")
sns.scatterplot(
    data=penguins,
    x="flipper_length_mm",
    y="body_mass_g",
    hue="species",
    style="island"
)

Tidy and split the data

In [None]:
from sklearn.model_selection import train_test_split


penguins.dropna(inplace=True)

X = penguins[["island", "flipper_length_mm", "body_mass_g"]]
y = penguins["species"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2
)

Set up and train the model

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import (
    OneHotEncoder,
    StandardScaler,
)


# Data transformations
numeric_features = ["flipper_length_mm", "body_mass_g"]
categorical_features = ["island"]

preprocessor = ColumnTransformer(
    [
        ("num", StandardScaler(), numeric_features),
        ("cat", OneHotEncoder(), categorical_features),
    ]
)

# Model pipeline
model = Pipeline(
    [
        ("preprocess", preprocessor),
        ("model", KNeighborsClassifier()),
    ]
)
model.fit(X_train, y_train)

Make predictions using the model

In [None]:
y_pred = model.predict(X_test)

Accuracy for unseen test data

In [None]:
from sklearn.metrics import accuracy_score


accuracy_score(y_test, y_pred)

Create a Vetiver model

In [None]:
import vetiver


v_model = vetiver.VetiverModel(
    model,
    model_name="k-nn",
    description="penguin-species",
    prototype_data=X_test,
)

Examine Vetiver model

In [None]:
print(v_model.description)
print(v_model.metadata)

Deploy model locally

In [None]:
from vetiver import VetiverAPI


app = VetiverAPI(v_model, check_prototype=True)

View the running model

In [None]:
app.run(port = 8080)

You can try making predictions via the user interface for different input data. If working locally, you can also open a separate terminal, run `python` to start the Python console, and run queries programmatically:

```
from vetiver.server import predict, vetiver_endpoint
import pandas as pd


endpoint = vetiver_endpoint("http://127.0.0.1:8080/predict")

new_penguin_dict = {
    "island": ["Biscoe"],
    "flipper_length_mm": [220],
    "body_mass_g": [5500],
}
new_penguin = pd.DataFrame(new_penguin_dict)
predict(endpoint, new_penguin)
```