# Model Registry

In [1]:
!pip install wandb

Collecting wandb
  Downloading wandb-0.17.5-py3-none-macosx_11_0_arm64.whl.metadata (10 kB)
Collecting docker-pycreds>=0.4.0 (from wandb)
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl.metadata (1.8 kB)
Collecting sentry-sdk>=1.0.0 (from wandb)
  Downloading sentry_sdk-2.11.0-py2.py3-none-any.whl.metadata (14 kB)
Collecting setproctitle (from wandb)
  Downloading setproctitle-1.3.3-cp312-cp312-macosx_10_9_universal2.whl.metadata (9.9 kB)
Downloading wandb-0.17.5-py3-none-macosx_11_0_arm64.whl (6.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.7/6.7 MB[0m [31m43.2 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Downloading sentry_sdk-2.11.0-py2.py3-none-any.whl (303 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m303.6/303.6 kB[0m [31m42.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading setproctitle-1.3.3-cp312-cp312-macosx_10_9_universal2.whl (16 kB)
I

# Save Model to Registry

In [36]:
%%writefile save_model_to_registry.py
# save_model_to_registry.py
import os
from argparse import ArgumentParser

import pandas as pd
import psycopg2
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
import joblib
import wandb

# 0. set wandb environments
wandb.init(project="sklearn", entity="seongyeonkim")

# 1. get data
db_connect = psycopg2.connect(
    user="myuser",
    password="mypassword",
    host="localhost",
    port=5432,
    database="mydatabase",
)
df = pd.read_sql("SELECT * FROM iris_data ORDER BY id DESC LIMIT 100", db_connect)

X = df.drop(["id", "timestamp", "target"], axis="columns")
y = df["target"]
X_train, X_valid, y_train, y_valid = train_test_split(X, y, train_size=0.8, random_state=2022)

# 2. model development and train
model_pipeline = Pipeline([("scaler", StandardScaler()), ("svc", SVC())])
model_pipeline.fit(X_train, y_train)

train_pred = model_pipeline.predict(X_train)
valid_pred = model_pipeline.predict(X_valid)

train_acc = accuracy_score(y_true=y_train, y_pred=train_pred)
valid_acc = accuracy_score(y_true=y_valid, y_pred=valid_pred)

print("Train Accuracy :", train_acc)
print("Valid Accuracy :", valid_acc)

# Log metrics to wandb
wandb.log({"train_accuracy": train_acc, "valid_accuracy": valid_acc})

# 3. save model
parser = ArgumentParser()
parser.add_argument("--model-name", dest="model_name", type=str, default="sk_model")
args = parser.parse_args()

model_path = f"{args.model_name}.joblib"
joblib.dump(model_pipeline, model_path)

# Log the model to wandb
wandb.save(model_path)

# 4. save data
df.to_csv("data.csv", index=False)

# Finish the wandb run
wandb.finish()

Writing save_model_to_registry.py


## Load and predict.py

In [35]:
%%writefile load_and_predict.py
# load_and_predict.py
import os
import pandas as pd
import psycopg2
# from sklearn.pipeline import Pipeline

from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import StandardScaler
# from sklearn.svm import SVC
import joblib
import wandb

# 0. set wandb environments
wandb.init(project="sklearn", entity="seongyeonkim")

# 1. download model from wandb
model_path = "sk_model.joblib"
wandb.restore(model_path)

# 2. load the model
model_pipeline = joblib.load(model_path)

# 3. get new data for prediction
db_connect = psycopg2.connect(
    user="myuser",
    password="mypassword",
    host="localhost",
    port=5432,
    database="mydatabase",
)

db_connect = psycopg2.connect(
    user="myuser",
    password="mypassword",
    host="localhost",
    port=5432,
    database="mydatabase",
)

df = pd.read_sql("SELECT * FROM iris_data ORDER BY id DESC LIMIT 100", db_connect)

X = df.drop(["id", "timestamp", "target"], axis="columns")
y = df["target"]
X_train, X_valid, y_train, y_valid = train_test_split(X, y, train_size=0.8, random_state=2022)

# 2. model development and train
model_pipeline.fit(X_train, y_train)

train_pred = model_pipeline.predict(X_train)
valid_pred = model_pipeline.predict(X_valid)

train_acc = accuracy_score(y_true=y_train, y_pred=train_pred)
valid_acc = accuracy_score(y_true=y_valid, y_pred=valid_pred)

print("Train Accuracy :", train_acc)
print("Valid Accuracy :", valid_acc)

# Finish the wandb run
wandb.finish()

Overwriting load_and_predict.py
