In [0]:
%sql
-- create the checkpoint volume for reading the raw tweet stream
CREATE VOLUME IF NOT EXISTS workspace.default.checkpoints;

In [0]:
# Install transformers, torch, and torchvision (required for Hugging Face models)
%pip install transformers==4.35.2 torch torchvision --quiet
dbutils.library.restartPython()

In [0]:
import mlflow
from mlflow import MlflowClient
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# Use Unity Catalog Model Registry
# Note: Free Edition has limited permissions - model registration must be done via UI (one-time setup)
mlflow.set_registry_uri("databricks-uc")

# Define model details
HF_MODEL_NAME = "cardiffnlp/twitter-roberta-base-sentiment"
UC_MODEL_NAME = "workspace.default.tweet_sentiment_model"

print(f"ü§ó Loading Hugging Face model: {HF_MODEL_NAME}")
print(f"   This may take a few minutes on first download...")

# Load model and tokenizer from Hugging Face
tokenizer = AutoTokenizer.from_pretrained(HF_MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(HF_MODEL_NAME)

print(f"‚úÖ Model loaded successfully!")
print(f"   Model size: ~125M parameters (RoBERTa base)")
print(f"   Output classes: 3 (negative, neutral, positive)")

In [0]:
# Log model to MLflow run storage (works in Free Edition)
# Note: Model REGISTRATION must be done via UI in Free Edition (see instructions below)
print(f"üì¶ Logging model to MLflow with transformers flavor...")

with mlflow.start_run(run_name="tweet_sentiment_hf_model") as run:
    # Log the model using transformers flavor
    model_info = mlflow.transformers.log_model(
        transformers_model={
            "model": model,
            "tokenizer": tokenizer
        },
        artifact_path="model",
        input_example=["This is a great day!"],  # Example for schema inference
        task="text-classification"
    )

    # Log model metadata for reference
    mlflow.log_param("hf_model_name", HF_MODEL_NAME)
    mlflow.log_param("task", "sentiment-classification")
    mlflow.log_param("num_labels", 3)
    mlflow.log_param("model_type", "twitter-roberta-base")

    run_id = run.info.run_id
    model_uri = f"runs:/{run_id}/model"

    print(f"\n‚úÖ Model logged to MLflow run: {run_id}")
    print(f"   Model URI: {model_uri}")

In [0]:
# Free Edition requires manual registration via Databricks UI
print(f"\nüìã ONE-TIME SETUP: Register Model via Databricks UI")
print(f"=" * 70)
print(f"\n‚ö†Ô∏è  IMPORTANT: Only do this ONCE when first setting up the lab!")
print(f"   If model already registered, skip to verification below.\n")
print(f"üìù Manual Registration Steps:")
print(f"   1. In Databricks workspace, click 'Machine Learning' in left sidebar")
print(f"   2. Click 'Experiments' tab")
print(f"   3. Find and click the experiment containing run: {run_id}")
print(f"   4. Click the run to open run details")
print(f"   5. Scroll down to 'Artifacts' section ‚Üí click 'model' folder")
print(f"   6. Click 'Register Model' button (top right)")
print(f"   7. In dialog:")
print(f"      - Model: Select 'Create New Model'")
print(f"      - Model Name: {UC_MODEL_NAME}")
print(f"      - Click 'Register'")
print(f"\n   ‚úÖ Registration complete! Proceed to verification below.")
print(f"=" * 70)

In [0]:
# Verify model is registered in Unity Catalog
client = MlflowClient()
try:
    model_versions = client.search_model_versions(f"name='{UC_MODEL_NAME}'")
    #for mv in model_versions:
    #    print(f"Version: {mv.version}, Status: {mv.status}, Description: {mv.description}")

    print(f"\n‚úÖ Model registered successfully in Unity Catalog!")
    print(f"   Name: {model_versions[0].name}")
    print(f"   Description: {model_versions[0].description or 'N/A'}")

    if model_versions[0].version:
        print(f"   Latest version: {model_versions[0].version}")
        print(f"   Status: {model_versions[0].status}")
        print(f"\n   Model URI: models:/{UC_MODEL_NAME}/{model_versions[0].version}")
    else:
        print(f"   ‚ö†Ô∏è  No versions found - complete manual registration above!")

except Exception as e:
    print(f"‚ùå Model not found: {e}")
    print(f"\n‚ö†Ô∏è  Please complete the ONE-TIME manual registration above!")
    print(f"   Follow the UI registration steps, then rerun this cell to verify.")