# OCI Data Science Model Catalog - Model Version Set

- conda environment: generalml_p311_cpu_x86_64_v1
- Author: Assaf Rabinowicz
- Date: 14Jan2026 

# Notebook Description

* This notebook acts as the job script that is orchestrated through version_set_orchestrating.ipynb.

# Packages import and resource principal autentitication

In [1]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score
import pandas as pd
from pathlib import Path

import os
import io

import ads
from ads.model import SklearnModel
from ads.model.datascience_model import DataScienceModel
import oci

In [2]:
ads.set_auth(auth="resource_principal")

# Data Loading and Processing

In [4]:
data_name = os.getenv('DATANAME')

df_day1.csv


In [5]:
signer = oci.auth.signers.get_resource_principals_signer()
object_storage = oci.object_storage.ObjectStorageClient({}, signer=signer) # authenticating against Object Storage

namespace = object_storage.get_namespace().data
bucket_name='filesdemo'
file_name= data_name

obj = object_storage.get_object(namespace, bucket_name, file_name)         # extracting the data from Object Storage
df = pd.read_csv(io.BytesIO(obj.data.content))                             # Converting to df

In [6]:
X = df.drop('class', axis=1)
y = df['class']
X = pd.get_dummies(X)
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.3)

# Model fitting

In [7]:
rfc = RandomForestClassifier()
random_forest_model = rfc.fit(X_train, y_train)

In [8]:
y_test_prob = random_forest_model.predict_proba(X_test)[:, 1]
auc = roc_auc_score(y_test, y_test_prob)

0.8763714305972095


# Model Serialization

In [9]:
random_forest_model = SklearnModel(estimator=rfc, artifact_dir='random-forest-model/')



In [11]:
random_forest_model.prepare(inference_conda_env="generalml_p311_cpu_x86_64_v1",
training_conda_env="generalml_p311_cpu_x86_64_v1",
X_sample=X_test,
y_sample=y_test,
force_overwrite=True)
print('prepare was completed')

prepare was completed                                                                                                                                                                                                                                                ?, ?it/s]


In [12]:
random_forest_model.metadata_custom.add(
    key="test_set_auc",
    value=auc,
    description="Accuracy measured on the hold-out test set.",
    replace=True
)

metadata was completed


# Registrating To the Model Version Set

In [13]:
random_forest_model.schema_input = None                  # saving the schema is optional. In our case the schema is heavy and therefore is not saved.

In [None]:
model_id = random_forest_model.save(display_name="adult_income_random_forest",
    model_version_set='adults-income-version-set-demo',
    version_label=data_name,
    retention_setting={                                  # setting retention mechanism 
        "archive_after_days": 30,  
        "delete_after_days": 30
    })