# OCI Data Science Model Catalog - Model Version Set Automation

- conda environment: generalml_p311_cpu_x86_64_v1
- Author: Assaf Rabinowicz
- Date: 14Jan2026 

# Notebook Description

* This notebook demonstrates how to automate the model registration process using a job.
* The job script is defined in a separate notebook: version_set_registration_job.ipynb.
* This notebook covers the use of model version sets, including:
1. Defining a version set
2. Adding multiple model versions to a version set via jobrun
* version_set_registration_job.ipynb also includes logic for defining a retention mechanism.
* The notebook heavily relies on the ADS SDK.

# Packages import and resource principal autentitication

In [1]:
from sklearn.datasets import fetch_openml
import pandas as pd
import numpy as np

import io

import ads
from ads.model import ModelVersionSet
from ads.jobs import Job, DataScienceJob, PythonRuntime
import oci

In [2]:
ads.set_auth(auth="resource_principal")

# Simulating ongoing data batches pipeline

In [3]:
data = fetch_openml(name="adult", version=2, as_frame=True) # https://www.openml.org/search?type=data&sort=version&status=any&order=asc&exact_name=adult
df = data.frame

In [4]:
df.drop(['fnlwgt'], axis=1,inplace=True) # dropping 'sampling weights' column for simplification
df['class'] = (df['class'] == '>50K').astype(int)

In [5]:
df_day1, df_day2, df_day3 = np.array_split(df, 3)

  return bound(*args, **kwds)



In [6]:
signer = oci.auth.signers.get_resource_principals_signer()
object_storage = oci.object_storage.ObjectStorageClient({}, signer=signer)
namespace = object_storage.get_namespace().data  # Get namespace once
bucket_name='filesdemo'

In [7]:
def upload_df_as_csv(df,object_name):
    csv_buffer = io.BytesIO(df.to_csv(index=False).encode("utf-8"))
    
    print(f"Uploading to oci://{bucket_name}@{namespace}/{object_name}")
    
    return object_storage.put_object(
        namespace_name=namespace,
        bucket_name=bucket_name,
        object_name=object_name,
        put_object_body=csv_buffer,
        content_type="text/csv"
    )

# Creating a Model Version Set

In [5]:
mvs = ModelVersionSet(
    name = "adults-income-version-set-demo",
    description = "automatated data pipeline refitting the model with new data",
    freeform_tags={"project": "IncomePrediction"})
mvs.create()
print(mvs)


kind: modelVersionSet
spec:
  compartmentId: ocid1.compartment.oc1..aaaaaaaaenvaxcmsbmrio4gieevntz7ryuji6quq65rnbwjqtweahitw4dza
  definedTags:
    Default_Tags:
      AutoStop: 'Yes'
      CostTrackingCompartment: Specialists
      CreatedBy: ocid1.datasciencenotebooksession.oc1.eu-frankfurt-1.amaaaaaaeicj2tia5kesm5xrcumc5fpc7kflmawra64gborapmu2w2dnxfgq
  description: automatated data pipeline refitting the model with new data
  freeformTags:
    project: IncomePrediction
  id: ocid1.datasciencemodelversionset.oc1.eu-frankfurt-1.amaaaaaaeicj2tiasjogoyloflepsvhduim6bmocccutlucyhceomm5qosea
  name: adults-income-version-set-demo
  projectId: ocid1.datascienceproject.oc1.eu-frankfurt-1.amaaaaaaeicj2tia3noqgbegva53whrsznt2oy7txmxjcm4lggskw7n7i2sq
type: modelVersionSet



# Creating and Running Job

In [6]:
job = (
    Job(name="Training adult income with new data")
    .with_infrastructure(
        DataScienceJob()
        .with_log_group_id("ocid1.loggroup.oc1.eu-frankfurt-1.amaaaaaaeicj2tia4cocgvb633rdxs332osruo2jscng5ohsdypaqtco7mwq")
        .with_shape_name("VM.Standard.E4.Flex")
        .with_shape_config_details(memory_in_gbs=4, ocpus=1)
       .with_block_storage_size(50) # minimus is 50
    )
    .with_runtime(
        PythonRuntime()
        .with_service_conda("generalml_p311_cpu_x86_64_v1")
        .with_source("/home/datascience/code/model_catalog/vesion_set_automation/version_set_registration_job.ipynb")
        .with_environment_variable(DATANAME="PLACEHOLDER")
    )
)
job.create()


kind: job
spec:
  id: ocid1.datasciencejob.oc1.eu-frankfurt-1.amaaaaaaeicj2tia4yniwtxxfrt4wirjnh2sfp64zgkafjwm2pn2jpc7bq6q
  infrastructure:
    kind: infrastructure
    spec:
      blockStorageSize: 50
      compartmentId: ocid1.compartment.oc1..aaaaaaaaenvaxcmsbmrio4gieevntz7ryuji6quq65rnbwjqtweahitw4dza
      displayName: Training adult income with new data
      jobInfrastructureType: ME_STANDALONE
      jobType: DEFAULT
      logGroupId: ocid1.loggroup.oc1.eu-frankfurt-1.amaaaaaaeicj2tia4cocgvb633rdxs332osruo2jscng5ohsdypaqtco7mwq
      projectId: ocid1.datascienceproject.oc1.eu-frankfurt-1.amaaaaaaeicj2tia3noqgbegva53whrsznt2oy7txmxjcm4lggskw7n7i2sq
      shapeConfigDetails:
        memoryInGBs: 4.0
        ocpus: 1.0
      shapeName: VM.Standard.E4.Flex
    type: dataScienceJob
  name: Training adult income with new data
  runtime:
    kind: runtime
    spec:
      conda:
        slug: generalml_p311_cpu_x86_64_v1
        type: service
      env:
      - name: DATANAME
        

In [7]:
job_run = job.run(
    name="use job folder",
    env_var={'DATANAME': 'df_day2.csv'}
)

job_run.watch()

Job OCID: ocid1.datasciencejob.oc1.eu-frankfurt-1.amaaaaaaeicj2tia4yniwtxxfrt4wirjnh2sfp64zgkafjwm2pn2jpc7bq6q
Job Run OCID: ocid1.datasciencejobrun.oc1.eu-frankfurt-1.amaaaaaaeicj2tiag24n3c3eg4jn3dawkxlgl7p2eg3dr2s2e5vbgnjdcxuq
2026-01-19 10:03:50 - Job Run ACCEPTED
2026-01-19 10:04:02 - Job Run ACCEPTED, Infrastructure provisioning.
2026-01-19 10:05:49 - Job Run ACCEPTED, Job run bootstrap starting.
2026-01-19 10:07:46 - Job Run ACCEPTED, Job run bootstrap complete. Artifact execution starting.
2026-01-19 10:08:02 - Job Run IN_PROGRESS, Job run artifact execution in progress.
2026-01-19 10:07:58 - df_day2.csv
2026-01-19 10:08:00 - 0.8790220979857589
2026-01-19 10:08:26 - prepare was completed
2026-01-19 10:08:26 - 
2026-01-19 10:08:26 - metadata was completed
2026-01-19 10:08:26 - !!!before registrating
2026-01-19 10:08:31 - ['output_schema.json', 'score.py', 'runtime.yaml', 'model.joblib', '.model-ignore', 'input_schema.json']
2026-01-19 10:08:31 - Model is successfully loaded.
2026

kind: jobRun
spec:
  id: ocid1.datasciencejobrun.oc1.eu-frankfurt-1.amaaaaaaeicj2tiag24n3c3eg4jn3dawkxlgl7p2eg3dr2s2e5vbgnjdcxuq
  infrastructure:
    kind: infrastructure
    spec:
      blockStorageSize: 50
      compartmentId: ocid1.compartment.oc1..aaaaaaaaenvaxcmsbmrio4gieevntz7ryuji6quq65rnbwjqtweahitw4dza
      displayName: use job folder
      jobInfrastructureType: ME_STANDALONE
      jobType: DEFAULT
      logGroupId: ocid1.loggroup.oc1.eu-frankfurt-1.amaaaaaaeicj2tia4cocgvb633rdxs332osruo2jscng5ohsdypaqtco7mwq
      projectId: ocid1.datascienceproject.oc1.eu-frankfurt-1.amaaaaaaeicj2tia3noqgbegva53whrsznt2oy7txmxjcm4lggskw7n7i2sq
      shapeConfigDetails:
        memoryInGBs: 4.0
        ocpus: 1.0
      shapeName: VM.Standard.E4.Flex
    type: dataScienceJob
  name: use job folder
  runtime:
    kind: runtime
    spec:
      conda:
        slug: generalml_p311_cpu_x86_64_v1
        type: service
      definedTags:
        Default_Tags:
          AutoStop: 'Yes'
          Co