In [6]:
import pandas as pd
import numpy as np
from datetime import datetime
import mlflow
import mlflow.sklearn
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import os
import hashlib
import base64
import json

def domino_short_id(length=8):
    user = os.environ.get("DOMINO_STARTING_USERNAME", "demo_user")
    project = os.environ.get("DOMINO_PROJECT_ID", "demo_project")
    digest = hashlib.sha256(f"{user}/{project}".encode()).digest()
    encoded = base64.urlsafe_b64encode(digest).decode("utf-8").rstrip("=")
    return f"{user}_{encoded[:length]}"

def generate_data(n=200):
    """Generate fake data for model training"""
    X = pd.DataFrame({
        'feature_1': np.random.randn(n),
        'feature_2': np.random.randn(n),
        'feature_3': np.random.randn(n),
        'feature_4': np.random.randn(n)
    })
    y = X.sum(axis=1) + np.random.randn(n) * 0.1
    return X, y

# Define simplified models
MODELS = [
    {"name": "HelpBot", "long_name": "Internal Helpdesk Chatbot"},
    {"name": "PressFinder", "long_name": "Customer Press Release Discovery & Summary Tool"},
    {"name": "VoiceOverPro", "long_name": "Internal Video Voice-Over Tool"},
    {"name": "FINREP_Extractor", "long_name": "Internal Tool: Data Extractor from Financial Reports"},
    {"name": "Fitch_Portfolio_Optimizer", "long_name": "Portfolio Optimization Tool for Subscribers"},
    {"name": "EntityTagger_Pro", "long_name": "Third-Party Press Release Tagging Tool"},
    {"name": "ResumeRanker_Pro", "long_name": "Recruitment Selection Support Tool"}
]

def train_and_register_model(model_info):
    with mlflow.start_run(run_name=f"{model_info['name']}_run") as run:
        X, y = generate_data()
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        
        model = LinearRegression()
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        
        # Log parameters and metrics
        mlflow.log_params({
            "model_name": model_info["name"],
            "data_points": len(X_train),
            "features": list(X.columns)
        })
        
        rmse = np.sqrt(((y_test - y_pred) ** 2).mean())
        r2 = 1 - ((y_test - y_pred) ** 2).sum() / ((y_test - y_test.mean()) ** 2).sum()
        
        # Generate health value with the specified distribution
        if np.random.rand() < 0.7:  # 70% chance
            health = np.random.uniform(0.95, 0.99)
        else:  # 30% chance
            health = np.random.uniform(0.3, 0.95)
        
        mlflow.log_metrics({
            "rmse": rmse,
            "r2_score": r2,
            "health": health
        })
        
        # Set tags including long-name
        mlflow.set_tags({
            "long-name": model_info["long_name"],
            "timestamp": datetime.now().isoformat()
        })
        
        # Log model and register after run
        mlflow.sklearn.log_model(model, "model")
        model_uri = f"runs:/{run.info.run_id}/model"
        registered_model_name = f"{model_info['name']}"
        
        mlflow.register_model(
            model_uri=model_uri,
            name=registered_model_name,
            tags={"long-name": model_info["long_name"]}
        )
        return run.info.run_id

if __name__ == "__main__":
    print(f"📝 Training {len(MODELS)} models\n")
    
    # Create one experiment for all models
    experiment_name = f"Model_Trainingv2_{domino_short_id()}"
    mlflow.set_experiment(experiment_name)
    
    all_runs = []
    
    for model_info in MODELS:
        try:
            print(f"\n🔄 Processing {model_info['name']}...")
            run_id = train_and_register_model(model_info)
            all_runs.append({
                "name": f"{model_info['name']}",
                "run_id": run_id
            })
        except Exception as e:
            print(f"❌ Error processing {model_info['name']}: {str(e)}")
    
    # Save summary
    summary_path = f"/tmp/method2_summary_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
    with open(summary_path, 'w') as f:
        json.dump({
            "experiment_name": experiment_name,
            "models_processed": len(MODELS),
            "total_registrations": len(all_runs),
            "runs": all_runs
        }, f, indent=2)
    
    print(f"\n✨ Training Complete with Method 2 only!")
    print(f"📈 MLflow Experiment: {experiment_name}")
    print(f"📄 Summary: {summary_path}")
    print(f"🔍 Total models: {len(MODELS)}")
    print(f"📋 Total registrations: {len(all_runs)}")


📝 Training 7 models


🔄 Processing HelpBot...


Registered model 'HelpBot' already exists. Creating a new version of this model...
2025/09/11 21:13:39 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: HelpBot, version 3
Created version '3' of model 'HelpBot'.


🏃 View run HelpBot_run at: http://127.0.0.1:8768/#/experiments/1830/runs/f212f338f3b84fa7a419e83daf486bc2
🧪 View experiment at: http://127.0.0.1:8768/#/experiments/1830

🔄 Processing PressFinder...


Registered model 'PressFinder' already exists. Creating a new version of this model...
2025/09/11 21:13:44 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: PressFinder, version 3
Created version '3' of model 'PressFinder'.


🏃 View run PressFinder_run at: http://127.0.0.1:8768/#/experiments/1830/runs/520de617929a4ad4b114e8148862ea05
🧪 View experiment at: http://127.0.0.1:8768/#/experiments/1830

🔄 Processing VoiceOverPro...


Registered model 'VoiceOverPro' already exists. Creating a new version of this model...
2025/09/11 21:13:49 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: VoiceOverPro, version 3
Created version '3' of model 'VoiceOverPro'.


🏃 View run VoiceOverPro_run at: http://127.0.0.1:8768/#/experiments/1830/runs/e326c3a5622a4423a08eae5842e6b7d3
🧪 View experiment at: http://127.0.0.1:8768/#/experiments/1830

🔄 Processing FINREP_Extractor...


Registered model 'FINREP_Extractor' already exists. Creating a new version of this model...
2025/09/11 21:13:55 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: FINREP_Extractor, version 3
Created version '3' of model 'FINREP_Extractor'.


🏃 View run FINREP_Extractor_run at: http://127.0.0.1:8768/#/experiments/1830/runs/91bae52401ff4d1c9e041c055af08d0d
🧪 View experiment at: http://127.0.0.1:8768/#/experiments/1830

🔄 Processing Fitch_Portfolio_Optimizer...


Registered model 'Fitch_Portfolio_Optimizer' already exists. Creating a new version of this model...
2025/09/11 21:14:00 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: Fitch_Portfolio_Optimizer, version 3
Created version '3' of model 'Fitch_Portfolio_Optimizer'.


🏃 View run Fitch_Portfolio_Optimizer_run at: http://127.0.0.1:8768/#/experiments/1830/runs/cb7b9fd8a62b48318fed3466e9a429a3
🧪 View experiment at: http://127.0.0.1:8768/#/experiments/1830

🔄 Processing EntityTagger_Pro...


Registered model 'EntityTagger_Pro' already exists. Creating a new version of this model...
2025/09/11 21:14:06 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: EntityTagger_Pro, version 3
Created version '3' of model 'EntityTagger_Pro'.


🏃 View run EntityTagger_Pro_run at: http://127.0.0.1:8768/#/experiments/1830/runs/8f9eadb5a24645618aef889eda8a3011
🧪 View experiment at: http://127.0.0.1:8768/#/experiments/1830

🔄 Processing ResumeRanker_Pro...


Registered model 'ResumeRanker_Pro' already exists. Creating a new version of this model...
2025/09/11 21:14:11 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: ResumeRanker_Pro, version 3


🏃 View run ResumeRanker_Pro_run at: http://127.0.0.1:8768/#/experiments/1830/runs/d844dc49b15a45a48666252088181333
🧪 View experiment at: http://127.0.0.1:8768/#/experiments/1830

✨ Training Complete with Method 2 only!
📈 MLflow Experiment: Model_Trainingv2_nick_goble_lR__vLoo
📄 Summary: /tmp/method2_summary_20250911_211411.json
🔍 Total models: 7
📋 Total registrations: 7


Created version '3' of model 'ResumeRanker_Pro'.
