# OCI Data Science Model Catalog - Model Version Set Automation

- conda environment: generalml_p311_cpu_x86_64_v1
- Author: Assaf Rabinowicz
- Date: 14Jan2026 

# Notebook Description

* This notebook demonstrates how to automate the model registration process using a job.
* The job script is defined in a separate notebook: version_set_registration_job.ipynb.
* This notebook covers the use of model version sets, including:
1. Defining a version set
2. Adding multiple model versions to a version set via jobrun
* version_set_registration_job.ipynb also includes logic for defining a retention mechanism.
* The notebook heavily relies on the ADS SDK.

# Packages import and resource principal autentitication

In [None]:
from sklearn.datasets import fetch_openml
import pandas as pd
import numpy as np

import io

import ads
from ads.model import ModelVersionSet
from ads.jobs import Job, DataScienceJob, PythonRuntime
import oci

In [None]:
ads.set_auth(auth="resource_principal")

# Simulating ongoing data batches pipeline

In [None]:
data = fetch_openml(name="adult", version=2, as_frame=True) # https://www.openml.org/search?type=data&sort=version&status=any&order=asc&exact_name=adult
df = data.frame

In [None]:
df.drop(['fnlwgt'], axis=1,inplace=True) # dropping 'sampling weights' column for simplification
df['class'] = (df['class'] == '>50K').astype(int)

In [None]:
df_day1, df_day2, df_day3 = np.array_split(df, 3)

In [None]:
signer = oci.auth.signers.get_resource_principals_signer()
object_storage = oci.object_storage.ObjectStorageClient({}, signer=signer)
namespace = object_storage.get_namespace().data  # Get namespace once
bucket_name='filesdemo'

In [None]:
def upload_df_as_csv(df,object_name):
    csv_buffer = io.BytesIO(df.to_csv(index=False).encode("utf-8"))
    
    print(f"Uploading to oci://{bucket_name}@{namespace}/{object_name}")
    
    return object_storage.put_object(
        namespace_name=namespace,
        bucket_name=bucket_name,
        object_name=object_name,
        put_object_body=csv_buffer,
        content_type="text/csv"
    )

# Creating a Model Version Set

In [None]:
mvs = ModelVersionSet(
    name = "adults-income-version-set-demo",
    description = "automatated data pipeline refitting the model with new data",
    freeform_tags={"project": "IncomePrediction"})
mvs.create()
print(mvs)

# Creating and Running Job

In [None]:
job = (
    Job(name="Training adult income with new data")
    .with_infrastructure(
        DataScienceJob()
        .with_log_group_id("<enter-log-group-ocid-here>")
        .with_shape_name("VM.Standard.E4.Flex")
        .with_shape_config_details(memory_in_gbs=4, ocpus=1)
       .with_block_storage_size(50) # minimus is 50
    )
    .with_runtime(
        PythonRuntime()
        .with_service_conda("generalml_p311_cpu_x86_64_v1")
        .with_source("/home/datascience/code/model_catalog/vesion_set_automation/version_set_registration_job.ipynb")
        .with_environment_variable(DATANAME="PLACEHOLDER")
    )
)
job.create()

In [None]:
job_run = job.run(
    name="use job folder",
    env_var={'DATANAME': 'df_day2.csv'}
)

job_run.watch()