In [None]:
! pip install clearml[s3] scikit-learn matplotlib seaborn numpy joblib 

In [None]:
import os
os.environ['CLEARML_API_HOST_VERIFY_CERT'] = 'false'

In [None]:
%env CLEARML_WEB_HOST=https://app.clearml.local/
%env CLEARML_API_HOST=https://api.clearml.local
%env CLEARML_FILES_HOST=https://files.clearml.local/
%env CLEARML_API_ACCESS_KEY= 5PLP7V91SFS0MF244GEBS9W40PVKJQ
%env CLEARML_API_SECRET_KEY= 3gh_KKZT0SyFpl7MZ2VQMv_fQ_JaZyV-eyCyVDDMwtX2Qf62ioJVFtOPg5mv6UKodak

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import joblib
import seaborn as sns
from clearml import Task, Logger
import time
import os

# Configuration
CLEARML_SERVER = "https://api.clearml.local"
CLEARML_WEB_SERVER = "https://app.clearml.local"
CLEARML_FILES_SERVER = "https://files.clearml.local"

# MinIO configuration for model storage
MINIO_ENDPOINT = "minio.local:80"
MODEL_BUCKET = "clearml-artifacts"  # Make sure this bucket exists in MinIO

def test_clearml_storage(task):
    """
    Test ClearML installation by:
    1. Creating plots that should go to fileserver
    2. Saving models that should go to MinIO
    3. Logging metrics and scalars
    """
    
    # Initialize ClearML Task
    print("Initializing ClearML Task...")
    
    
    
    # Get the logger for plots
    logger = task.get_logger()
    
    # Log some configuration parameters
    task.connect({
        'test_param_1': 42,
        'test_param_2': 'hello_clearml',
        'learning_rate': 0.01,
        'epochs': 100,
        'batch_size': 32
    })
    
    print("\n1. Testing scalar logging...")
    # Log some scalar metrics (these go to MongoDB/Elasticsearch)
    for i in range(10):
        logger.report_scalar(
            title='test_metrics',
            series='accuracy',
            value=np.random.random() * 0.3 + 0.7,
            iteration=i
        )
        logger.report_scalar(
            title='test_metrics',
            series='loss',
            value=np.random.random() * 0.5,
            iteration=i
        )
        logger.report_scalar(
            title='learning_curves',
            series='train_loss',
            value=1.0 - (i * 0.1) + np.random.random() * 0.1,
            iteration=i
        )
        logger.report_scalar(
            title='learning_curves',
            series='val_loss',
            value=1.2 - (i * 0.08) + np.random.random() * 0.15,
            iteration=i
        )
    print("✓ Scalar metrics logged")
    
    print("\n2. Testing plot/image storage (should go to fileserver)...")
    
    # Create and log a matplotlib plot
    fig, axes = plt.subplots(2, 2, figsize=(12, 10))
    
    # Plot 1: Line plot
    x = np.linspace(0, 10, 100)
    axes[0, 0].plot(x, np.sin(x), label='sin(x)')
    axes[0, 0].plot(x, np.cos(x), label='cos(x)')
    axes[0, 0].set_title('Trigonometric Functions')
    axes[0, 0].legend()
    axes[0, 0].grid(True)
    
    # Plot 2: Scatter plot
    x_scatter = np.random.randn(100)
    y_scatter = 2 * x_scatter + np.random.randn(100) * 0.5
    axes[0, 1].scatter(x_scatter, y_scatter, alpha=0.5)
    axes[0, 1].set_title('Random Scatter')
    axes[0, 1].set_xlabel('X values')
    axes[0, 1].set_ylabel('Y values')
    
    # Plot 3: Histogram
    data = np.random.randn(1000)
    axes[1, 0].hist(data, bins=30, edgecolor='black')
    axes[1, 0].set_title('Normal Distribution')
    axes[1, 0].set_xlabel('Value')
    axes[1, 0].set_ylabel('Frequency')
    
    # Plot 4: Bar plot
    categories = ['A', 'B', 'C', 'D', 'E']
    values = np.random.randint(10, 100, 5)
    axes[1, 1].bar(categories, values, color=['red', 'green', 'blue', 'yellow', 'purple'])
    axes[1, 1].set_title('Category Comparison')
    axes[1, 1].set_ylabel('Values')
    
    plt.tight_layout()
    logger.report_matplotlib_figure(
        title='Test Plots',
        series='combined_plots',
        figure=fig,
        iteration=0
    )
    plt.close()
    print("✓ Matplotlib plots logged")
    
    # Create and log a confusion matrix
    print("\n3. Creating ML model and confusion matrix...")
    
    # Generate synthetic dataset
    X, y = make_classification(
        n_samples=1000,
        n_features=20,
        n_informative=15,
        n_redundant=5,
        n_classes=3,
        random_state=42
    )
    
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    
    # Train a simple model
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    
    # Make predictions
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    
    # Log accuracy
    logger.report_single_value('test_accuracy', accuracy)
    print(f"✓ Model trained with accuracy: {accuracy:.4f}")
    
    # Create and log confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title('Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    logger.report_matplotlib_figure(
        title='Model Performance',
        series='confusion_matrix',
        figure=plt.gcf(),
        iteration=0
    )
    plt.close()
    print("✓ Confusion matrix logged")
    
    # Log classification report as text
    report = classification_report(y_test, y_pred)
    logger.report_text(
        "Classification Report:\n" + report,
        print_console=False
    )
    
    print("\n4. Testing model storage (should go to MinIO)...")
    
    # Save the model locally first
    model_filename = 'test_model.pkl'
    joblib.dump(model, model_filename)
    print(f"✓ Model saved locally as {model_filename}")
    
    # Register the model with ClearML (this should upload to MinIO)
    task.update_output_model(
        model_path=model_filename,
        model_name='test_random_forest',
        iteration=0,
        comment='Test model for storage validation'
    )
    print(f"✓ Model registered and should be uploaded")
    
    # Log some additional images
    print("\n5. Testing multiple image formats...")
    
    # Create a simple image array
    image_array = np.random.rand(100, 100, 3)
    logger.report_image(
        title='Random Images',
        series='random_rgb',
        iteration=0,
        image=image_array
    )
    
    # Create a grayscale image
    gray_image = np.random.rand(100, 100)
    logger.report_image(
        title='Random Images',
        series='random_grayscale',
        iteration=0,
        image=gray_image
    )
    print("✓ Additional images logged")
    
    # Log some console output
    print("\n6. Testing console logging...")
    print("This is a test console output")
    print("It should appear in the ClearML console logs")
    for i in range(5):
        print(f"  Progress: {i+1}/5")
        time.sleep(0.5)
    print("✓ Console logging complete")
    
    # Create a summary
    print("\n" + "="*50)
    print("STORAGE TEST SUMMARY")
    print("="*50)
    print(f"Task ID: {task.id}")
    print(f"Task Name: {task.name}")
    print(f"Project: {task.get_project_name()}")
    print(f"\nExpected storage locations:")
    print(f"  - Plots/Images: {CLEARML_FILES_SERVER}")
    print(f"  - Model: s3://{MINIO_ENDPOINT}/{MODEL_BUCKET}")
    print(f"  - Metrics/Logs: MongoDB/Elasticsearch (internal)")
    print("\nYou can verify the results at:")
    print(f"  Web UI: {CLEARML_WEB_SERVER}")
    print(f"  Direct link: {CLEARML_WEB_SERVER}/projects/{task.get_project_id(task.get_project_name())}/experiments/{task.id}")
    
    # Clean up local model file
    if os.path.exists(model_filename):
        os.remove(model_filename)
        print(f"\n✓ Cleaned up local model file: {model_filename}")
    
    print("\n✅ All tests completed successfully!")
    
    return task

if __name__ == "__main__":    
    try:
        task = Task.init(
            project_name='storage_test',
            task_name=f'test_storage_{time.strftime("%Y%m%d_%H%M%S")}',
            tags=['test', 'storage-validation','test-tag'],
            reuse_last_task_id=True,
            output_uri=f"s3://{MINIO_ENDPOINT}/{MODEL_BUCKET}" 
        )
        test_clearml_storage(task)
        print("\n🎉 Test completed! Check your ClearML Web UI to verify the results.")
        task.close()
    except Exception as e:
        print(f"\n❌ Error during testing: {str(e)}")
        task.close()
        print("\nTroubleshooting tips:")
        print("1. Verify ClearML server is running: kubectl get pods -n <your-namespace>")
        print("2. Check ingress is working: kubectl get ingress -n <your-namespace>")
        print("3. Ensure DNS resolution for *.clearml.local domains")
        print("4. Verify MinIO is accessible and buckets exist")
        print("5. Check ClearML credentials are set correctly")
        raise