#### Summary of the Complete Workflow.  
  
Connected to your MLflow instance.  
Identified and selected a specific version of a registered model.  
Downloaded the model's artifacts to your local notebook environment.  
Authenticated with Google Cloud Storage.  
Uploaded the model artifacts into a well-organized structure in your GCS bucket.  

In [20]:
import mlflow
from mlflow.tracking import MlflowClient
from google.cloud import storage
import os

In [13]:
project_id = "tough-processor-312510"  # <-- Replace with your actual project ID
client = storage.Client(project=project_id)


In [15]:

try:
    # storage_client = storage.Client()
    project_id = "tough-processor-312510"  # <-- Replace with your actual project ID
    storage_client = storage.Client(project=project_id)
    print("Successfully created storage client")
    # Optionally, list buckets to verify further
    buckets = storage_client.list_buckets()
    print("Buckets:")
    for bucket in buckets:
        print(f"- {bucket.name}")

except Exception as e:
    print(f"An error occurred: {e}")


Successfully created storage client
Buckets:
- mmotl_mlflow_artifacts


In [16]:

# --- MLflow Configuration ---
# Set the MLflow tracking URI.
# This should point to your MLflow server.
mlflow.set_tracking_uri("sqlite:///mlflow.db")
# mlflow.set_experiment("dtc_persona_analysis")

# --- Google Cloud Configuration ---
# The name of your GCS bucket
gcs_bucket_name = "mmotl_mlflow_artifacts"

# Optional: Define a "folder" within your GCS bucket to store the models
gcs_destination_path = "mlflow-models/"

# It's recommended to authenticate using Application Default Credentials (ADC).
# Run `gcloud auth application-default login` in your terminal before starting the notebook.
# If you are running this in a GCP environment (like a Vertex AI Notebook), authentication should be handled automatically.
# Alternatively, you can explicitly point to a service account key file:
# os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/path/to/your/service-account-key.json"

In [21]:
# Initialize the MLflow client
client = MlflowClient()


In [22]:

# List all registered models
print("Registered Models:")
for model in client.search_registered_models():
    print(f"- {model.name}")


Registered Models:
- dtc_persona_clustering_model


In [23]:
model.name

'dtc_persona_clustering_model'

In [24]:

# --- Specify the model and version you want to upload ---
# Replace with the name of your model
model_name = model.name

# You can either get the latest version or specify a particular version number.
# To get the latest version:
latest_versions = client.get_latest_versions(name=model_name, stages=["None"])
if not latest_versions:
    raise Exception(f"No versions found for model '{model_name}'")
model_version = latest_versions[0].version
print(f"Selected latest version: {model_version} for model '{model_name}'")

# Or, to specify a version directly:
# model_version = "2" # for version 2

Selected latest version: 1 for model 'dtc_persona_clustering_model'


  latest_versions = client.get_latest_versions(name=model_name, stages=["None"])


In [25]:
# Create a local directory to temporarily store the model files
local_download_path = f"./{model_name}_v{model_version}"
if not os.path.exists(local_download_path):
    os.makedirs(local_download_path)

# Download the model artifacts
print(f"Downloading model '{model_name}' version {model_version} from MLflow...")
local_path = client.download_artifacts(
    run_id=latest_versions[0].run_id,  # Assumes you are using the latest version from the previous step
    path="model",  # The 'path' is usually 'model' for artifacts logged with `mlflow.pyfunc.log_model`
    dst_path=local_download_path
)
print(f"Model downloaded to: {local_path}")

# Verify the downloaded files
print("Downloaded files:")
for root, _, files in os.walk(local_path):
    for name in files:
        print(os.path.join(root, name))

Downloading model 'dtc_persona_clustering_model' version 1 from MLflow...


Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

Model downloaded to: /Users/matthiasmotl/neuefische/repositories/dtc/dtc_persona_analysis/01_model/dtc_persona_clustering_model_v1/model
Downloaded files:
/Users/matthiasmotl/neuefische/repositories/dtc/dtc_persona_analysis/01_model/dtc_persona_clustering_model_v1/model/python_env.yaml
/Users/matthiasmotl/neuefische/repositories/dtc/dtc_persona_analysis/01_model/dtc_persona_clustering_model_v1/model/requirements.txt
/Users/matthiasmotl/neuefische/repositories/dtc/dtc_persona_analysis/01_model/dtc_persona_clustering_model_v1/model/MLmodel
/Users/matthiasmotl/neuefische/repositories/dtc/dtc_persona_analysis/01_model/dtc_persona_clustering_model_v1/model/model.pkl
/Users/matthiasmotl/neuefische/repositories/dtc/dtc_persona_analysis/01_model/dtc_persona_clustering_model_v1/model/registered_model_meta
/Users/matthiasmotl/neuefische/repositories/dtc/dtc_persona_analysis/01_model/dtc_persona_clustering_model_v1/model/conda.yaml


In [26]:
def upload_folder_to_gcs(bucket_name, source_folder, destination_blob_name):
    """Uploads a folder to the GCS bucket."""
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)

    for local_file in os.walk(source_folder):
        # local_file is a tuple (dirpath, dirnames, filenames)
        dirpath, _, filenames = local_file
        for filename in filenames:
            # Get the full local path
            local_file_path = os.path.join(dirpath, filename)

            # Create the destination path in GCS
            relative_path = os.path.relpath(local_file_path, source_folder)
            gcs_file_path = os.path.join(destination_blob_name, relative_path)

            # Upload the file
            blob = bucket.blob(gcs_file_path)
            blob.upload_from_filename(local_file_path)
            print(f"Uploaded {local_file_path} to gs://{bucket_name}/{gcs_file_path}")

# Define the full destination path in GCS for this specific model and version
gcs_model_path = os.path.join(gcs_destination_path, model_name, f"v{model_version}")

# Upload the entire folder
print(f"\nUploading model files to gs://{gcs_bucket_name}/{gcs_model_path}...")
upload_folder_to_gcs(gcs_bucket_name, local_path, gcs_model_path)

print("\nUpload complete!")


Uploading model files to gs://mmotl_mlflow_artifacts/mlflow-models/dtc_persona_clustering_model/v1...
Uploaded /Users/matthiasmotl/neuefische/repositories/dtc/dtc_persona_analysis/01_model/dtc_persona_clustering_model_v1/model/python_env.yaml to gs://mmotl_mlflow_artifacts/mlflow-models/dtc_persona_clustering_model/v1/python_env.yaml
Uploaded /Users/matthiasmotl/neuefische/repositories/dtc/dtc_persona_analysis/01_model/dtc_persona_clustering_model_v1/model/requirements.txt to gs://mmotl_mlflow_artifacts/mlflow-models/dtc_persona_clustering_model/v1/requirements.txt
Uploaded /Users/matthiasmotl/neuefische/repositories/dtc/dtc_persona_analysis/01_model/dtc_persona_clustering_model_v1/model/MLmodel to gs://mmotl_mlflow_artifacts/mlflow-models/dtc_persona_clustering_model/v1/MLmodel
Uploaded /Users/matthiasmotl/neuefische/repositories/dtc/dtc_persona_analysis/01_model/dtc_persona_clustering_model_v1/model/model.pkl to gs://mmotl_mlflow_artifacts/mlflow-models/dtc_persona_clustering_model/