In [None]:
!pip install ultralytics opencv-python

In [None]:
!sudo apt-get update 
!sudo apt-get install -y libgl1
!sudo apt-get install -y poppler-utils

In [None]:
# Import necessary libraries
from ultralytics import YOLO, settings
import os
import torch
import logging
import mlflow
import mlflow.pyfunc
from mlflow.models.signature import ModelSignature
from mlflow.types.schema import Schema, ColSpec
import pandas as pd
import numpy as np
import yaml
import matplotlib.pyplot as plt
from PIL import Image
import cv2
import json
import shutil
from datetime import datetime
import time
import re

settings.update({"mlflow": False})
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

mlflow.autolog()
# Set up MLflow experiment
mlflow.set_experiment("YOLOv11_Signature_Detection")

print("Libraries imported and MLflow experiment initialized!")

In [None]:
# Check CUDA availability
if torch.cuda.is_available():
    device = torch.device("cuda:0")
    logger.info(f"Using GPU: {torch.cuda.get_device_name(0)}")
    # Try a different CUDNN version if available
    torch.backends.cudnn.enabled = True
    # Print memory info
    logger.info(f"Total GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
    logger.info(f"Available GPU memory: {torch.cuda.memory_reserved(0) / 1e9:.2f} GB")
else:
    device = torch.device("cpu")
    logger.info("CUDA not available, using CPU")

print(f"Device set up complete: {device}")

In [None]:
def sanitize_mlflow_key(key):
    """Sanitize MLflow metric names by replacing invalid characters"""
    # Replace parentheses and other invalid characters with underscores
    sanitized = re.sub(r'[^\w\-\.\s:\/]', '_', key)
    return sanitized

model_name="yolo11n.pt"
data_yaml="/home/jovyan/datafabric/Signature_Detection/data.yaml"
epochs=100               
img_size=640
device_id=0
patience=5
batch_size=16

    # Start MLflow run
with mlflow.start_run(run_name=f"YOLOv11_train_{datetime.now().strftime('%Y%m%d_%H%M%S')}") as run:
    try:
        run_name = f"YOLOv11_train_{datetime.now().strftime('%Y%m%d_%H%M%S')}"

        # Log parameters
        params = {
            "model_name": model_name,
            "data_yaml": data_yaml,
            "epochs": epochs,
            "img_size": img_size,
            "device_id": device_id,
            "patience": patience,
            "batch_size": batch_size,
        }
        mlflow.log_params(params)
        
        # Load model
        model = YOLO(model_name)
           
        # Set training parameters
        train_args = {
            "data": data_yaml,
            "epochs": epochs,
            "imgsz": img_size,
            "device": device_id,
            "patience": patience,
            "batch": batch_size,
            "save": True,
            "cache": False,  # Disable cache completely
            "plots": True,
            "amp": False,   # Disable mixed precision
            "optimizer": "AdamW",
            "mosaic": 0.0,  # Disable mosaic augmentation
            "workers": 0,   # Use ZERO workers to avoid shared memory issues
            "name": run_name.replace(':', '_'),  
            "project": "YOLOv11_Signature_Detection"
        }
        
               
        # Train the model
        print("Starting training with adjusted parameters to avoid shared memory issues...")
        results = model.train(**train_args)
        print("Training completed successfully!")
        
                             
        runs_dir = os.path.join("runs", "detect", run_name.replace(':', '_'))
        best_model_path = os.path.join(runs_dir, "weights", "best.pt")
        last_model_path = os.path.join(runs_dir, "weights", "last.pt")

        example_img = "pdf-digital-signature-example.png"

        artifacts = {"yolo_model": best_model_path}
 
       
    except Exception as e:
        print(f"Error during training: {e}")
        import traceback
        traceback.print_exc()

In [None]:
mlflow.log_artifacts(best_model_path, artifact_path="model", run_id="5f9b23608b6d4c8c9b0ddb45e833cde5")

In [74]:
class YOLOv11MLflowModel(mlflow.pyfunc.PythonModel):
    def __init__(self):
        # Don't keep any state in the constructor
        super().__init__()
    
    def load_context(self, context):
        # Import the libraries only when needed
        # This ensures no thread locks or other unpicklable objects 
        # are created during serialization
        from ultralytics import YOLO
        
        # Get the model path from artifacts
        model_path = context.artifacts["model"]
        print(f"Using model path: {model_path}")
        
        # Load the model here
        self.model = YOLO(model_path)
    
    def predict(self, context, image_input):
        # Import libraries when needed to avoid serialization issues
        import json
        import numpy as np
        
        image = image_input.get("image")
        
        if isinstance(image, list):
            image = np.array(image, dtype=np.uint8)
        
        # Run prediction
        results = self.model.predict(image)
        
        # Convert to JSON string
        output = []
        for result in results:
            result_dict = {
                "boxes": result.boxes.xyxy.cpu().numpy().tolist() if result.boxes is not None else [],
                "scores": result.boxes.conf.cpu().numpy().tolist() if result.boxes is not None else [],
                "classes": result.boxes.cls.cpu().numpy().astype(int).tolist() if result.boxes is not None else [],
                "names": result.names,
                "shape": list(result.orig_shape),
            }
            output.append(result_dict)
        
        # Return as JSON string
        return {"results_json": json.dumps(output[0] if output else {})}

In [82]:
import mlflow.pyfunc
import os
import tempfile

# Create a simpler module file
module_content = '''
import json
import numpy as np
import os
from PIL import Image
import pandas as pd

# Try to import YOLO without loading any models yet
try:
    from ultralytics import YOLO
except ImportError:
    pass  # We'll handle this during loading

def load_image(image_source):
    """Load image from various input types"""
    if isinstance(image_source, str) and os.path.isfile(image_source):
        # Load from file path
        return np.array(Image.open(image_source))
    elif isinstance(image_source, (list, np.ndarray)):
        # Convert list or numpy array
        return np.array(image_source)
    elif isinstance(image_source, dict) and "image" in image_source:
        # Extract from dict
        return load_image(image_source["image"])
    elif isinstance(image_source, pd.DataFrame) and "image" in image_source.columns:
        # Extract from DataFrame
        return load_image(image_source["image"].iloc[0])
    else:
        # Return as is, let YOLO handle it
        return image_source

class YOLOModel:
    def __init__(self, model_path):
        self.model_path = model_path
        self._model = None
    
    @property
    def model(self):
        if self._model is None:
            from ultralytics import YOLO
            self._model = YOLO(self.model_path)
        return self._model
    
    def predict(self, data):
        # Load the image data
        image = load_image(data)
        
        # Run prediction
        results = self.model.predict(image)
        
        # Convert to JSON string
        output = []
        for result in results:
            result_dict = {
                "boxes": result.boxes.xyxy.cpu().numpy().tolist() if result.boxes is not None else [],
                "scores": result.boxes.conf.cpu().numpy().tolist() if result.boxes is not None else [],
                "classes": result.boxes.cls.cpu().numpy().astype(int).tolist() if result.boxes is not None else [],
                "names": result.names,
                "shape": list(result.orig_shape) if hasattr(result, "orig_shape") else [],
            }
            output.append(result_dict)
        
        # Return as JSON string
        return {"results_json": json.dumps(output[0] if output else {})}

def _load_pyfunc(model_path):
    """Load function that MLflow will call"""
    model = YOLOModel(model_path)
    return model
'''

# Create a temporary file for the module
with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f:
    f.write(module_content)
    module_path = f.name

# Log the model without an input example
mlflow.pyfunc.log_model(
    artifact_path="model",
    loader_module=os.path.basename(module_path).split('.')[0],
    data_path=model_path,
    code_paths=[module_path],
    # Skip input_example for now
    pip_requirements=[
        "mlflow>=2.0.0",
        "ultralytics>=8.0.0",
        "torch>=1.7.0",
        "numpy>=1.18.0",
        "pillow>=7.0.0",
        "pandas>=1.0.0"
    ]
)

# Clean up the temp file
os.unlink(module_path)

print("Model logged successfully! The artifacts are saved.")



Model logged successfully! The artifacts are saved.


In [85]:
import mlflow
import mlflow.pyfunc
import os
import pandas as pd
from mlflow.models import ModelSignature
from mlflow.types import Schema, ColSpec

# Step 1: Create the wrapper module file
module_path = "yolo_wrapper.py"

with open(module_path, "w") as f:
    f.write('''
import json
import numpy as np
import os
from PIL import Image
import pandas as pd

# Try to import YOLO without loading any models yet
try:
    from ultralytics import YOLO
except ImportError:
    pass  # We'll handle this during loading

def load_image(image_source):
    """Load image from various input types"""
    if isinstance(image_source, str) and os.path.isfile(image_source):
        # Load from file path
        return np.array(Image.open(image_source))
    elif isinstance(image_source, (list, np.ndarray)):
        # Convert list or numpy array
        return np.array(image_source)
    elif isinstance(image_source, dict) and "image" in image_source:
        # Extract from dict
        return load_image(image_source["image"])
    elif isinstance(image_source, pd.DataFrame) and "image" in image_source.columns:
        # Extract from DataFrame
        return load_image(image_source["image"].iloc[0])
    else:
        # Return as is, let YOLO handle it
        return image_source

class YOLOModel:
    def __init__(self, model_path):
        self.model_path = model_path
        self._model = None
    
    @property
    def model(self):
        if self._model is None:
            from ultralytics import YOLO
            self._model = YOLO(self.model_path)
        return self._model
    
    def predict(self, data):
        # Load the image data
        image = load_image(data)
        
        # Run prediction
        results = self.model.predict(image)
        
        # Convert to JSON string
        output = []
        for result in results:
            result_dict = {
                "boxes": result.boxes.xyxy.cpu().numpy().tolist() if result.boxes is not None else [],
                "scores": result.boxes.conf.cpu().numpy().tolist() if result.boxes is not None else [],
                "classes": result.boxes.cls.cpu().numpy().astype(int).tolist() if result.boxes is not None else [],
                "names": result.names,
                "shape": list(result.orig_shape) if hasattr(result, "orig_shape") else [],
            }
            output.append(result_dict)
        
        # Return as JSON string
        return {"results_json": json.dumps(output[0] if output else {})}

def _load_pyfunc(model_path):
    """Load function that MLflow will call"""
    model = YOLOModel(model_path)
    return model
''')

# Step 2: Define model schemas using the correct approach
input_schema = Schema([ColSpec(type="string", name="image")])
output_schema = Schema([ColSpec(type="string", name="results_json")])
signature = ModelSignature(inputs=input_schema, outputs=output_schema)

# Step 3: Create a simple example input matching the signature
example_input = pd.DataFrame({"image": ["path_to_image.jpg"]})

# Make sure you have an active run
if mlflow.active_run() is None:
    mlflow.start_run()

# Step 4: Log the model with proper signature
model_path = "/phoenix/mlflow/111502198059668542/5f9b23608b6d4c8c9b0ddb45e833cde5/artifacts/weights/best.pt"

mlflow.pyfunc.log_model(
    artifact_path="model",
    loader_module="yolo_wrapper",
    data_path=model_path,
    code_paths=[module_path],
    signature=signature,
    input_example=example_input,
    pip_requirements=[
        "mlflow>=2.0.0",
        "ultralytics>=8.0.0",
        "torch>=1.7.0",
        "numpy>=1.18.0",
        "pillow>=7.0.0",
        "pandas>=1.0.0"
    ]
)

# Step 5: Get the current run ID
run_id = mlflow.active_run().info.run_id

# Step 6: Register the model with the MLflow Model Registry
model_name = "YOLOv11_Model"  # Choose a name for your model
model_version = mlflow.register_model(
    model_uri=f"runs:/{run_id}/model",
    name=model_name
)

print(f"Model registered as {model_name} version {model_version.version}")

# End the run
mlflow.end_run()




  "dataframe_split": {
    "columns": [
      "image"
    ],
    "data": [
      [
        "path_to_image.jpg"
      ]
    ]
  }
}. Alternatively, you can avoid passing input example and pass model signature instead when logging the model. To ensure the input example is valid prior to serving, please try calling `mlflow.models.validate_serving_input` on the model uri and serving input example. A serving input example can be generated from model input example using `mlflow.models.convert_input_example_to_serving_input` function.
Got error: path_to_image.jpg does not exist


Model registered as YOLOv11_Model version 1


Successfully registered model 'YOLOv11_Model'.
Created version '1' of model 'YOLOv11_Model'.


MlflowException: The run 851a1d7d5be44c00a63fd3f3031fa53a must be in 'active' lifecycle_stage.

In [None]:
loaded_pyfunc_model = mlflow.pyfunc.load_model(model_uri=best_model_path)

In [None]:
prediction = loaded_pyfunc_model.predict("../temp/pdfimg.jpeg")
for p in prediction:
    print(p)

In [89]:
model_name = "YOLOv11_Model"
loaded_model = mlflow.pyfunc.load_model(f"models:/{model_name}/1")  # Load version 1

# Create a proper test input (you need a real image path)
test_input = pd.DataFrame({"image": ["../temp/pdfimg.jpeg"]})  # Replace with an actual image path

# You can test the model (if you have a valid image)
results = loaded_model.predict(test_input)
results.crop()



0: 640x512 1 signature, 23.2ms
Speed: 1.7ms preprocess, 23.2ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 512)


AttributeError: 'dict' object has no attribute 'crop'