## Imports

In [2]:
!pip install transformers

Collecting transformers
  Downloading transformers-4.39.1-py3-none-any.whl.metadata (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m320.2 kB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting huggingface-hub<1.0,>=0.19.3 (from transformers)
  Using cached huggingface_hub-0.21.4-py3-none-any.whl.metadata (13 kB)
Collecting regex!=2019.12.17 (from transformers)
  Using cached regex-2023.12.25-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB)
Collecting tokenizers<0.19,>=0.14 (from transformers)
  Using cached tokenizers-0.15.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collecting safetensors>=0.4.1 (from transformers)
  Downloading safetensors-0.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)
Collecting fsspec>=2023.5.0 (from huggingface-hub<1.0,>=0.19.3->transformers)
  Using cached fsspec-2024.3.1-py3-none-any.whl.metadata (6.8 kB)
Downloading 

In [3]:
import mlflow
from mlflow import MlflowClient
from mlflow.types.schema import Schema, ColSpec
from mlflow.types import ParamSchema, ParamSpec
from mlflow.models import ModelSignature
from transformers import pipeline
import torch
import json
import os

2024-03-22 19:41:36.838065: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Model

In [4]:
MODEL = "morgana-rodrigues/bert_qa"

In [5]:
qa = pipeline(
    'question-answering',
    model=MODEL,
    device=-1 # -1 means running on CPU
)

config.json:   0%|          | 0.00/582 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/261M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.20k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/669k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

In [6]:
class DistilBERTModel(mlflow.pyfunc.PythonModel):
    def _preprocess(self, inputs):
        context = inputs['context'][0]
        question = inputs['question'][0]
        print("pre processing", context,question)
        return context, question
        
    def load_context(self, context):
        self.model = pipeline(
            'question-answering',
             model=context.artifacts["model"],
             device=-1
        )
        
    def predict(self, context, model_input, params):
        in_ctx, question = self._preprocess(model_input)
        output = self.model(context=in_ctx, question=question)
        return output

    @classmethod
    def log_model(cls, model_name, trainer = None, pipeline = None, demo_folder="demo"): #eg (model, '', 'my_model')
        input_schema = Schema(
            [
                ColSpec("string", "context"),
                ColSpec("string", "question"),
            ]
        )
        output_schema = Schema(
            [
                ColSpec("string", "answer")
            ]
        )
        
        params_schema = ParamSchema(
            [
                ParamSpec("show_score", "boolean", False)
            ]
        )
      
        signature = ModelSignature(inputs=input_schema, outputs=output_schema, params=params_schema, demo_folder=demo)
        if trainer is not None:
            trainer.save_model(model_name)
        elif pipeline is not None:
            pipeline.save_pretrained(model_name)
             
        requirements = [
            "transformers==4.37.0",
            "mlflow==2.6.0",
            "numpy==1.24.3",
            "torch==2.0.0",
            "tqdm==4.65.0",
        ]
        mlflow.pyfunc.log_model(
            model_name,
            python_model=cls(),
            artifacts={"model": model_name, "demo": demo_folder},
            signature=signature,
            pip_requirements=requirements
        )

## Model Registry

In [7]:
mlflow.set_experiment(experiment_name='BERT for Q&A')

2024/03/22 19:42:23 INFO mlflow.tracking.fluent: Experiment with name 'BERT for Q&A' does not exist. Creating a new experiment.


<Experiment: artifact_location='/phoenix/mlflow/601758365276661609', creation_time=1711136543633, experiment_id='601758365276661609', last_update_time=1711136543633, lifecycle_stage='active', name='BERT for Q&A', tags={}>

In [8]:
with mlflow.start_run(run_name='BERT_QA') as run:
    print(f"Run's Artifact URI: {run.info.artifact_uri}")
    DistilBERTModel.log_model(model_name='BERT_QA', pipeline=qa)
    mlflow.register_model(model_uri = f"runs:/{run.info.run_id}/BERT_QA", name='BERT_QA')

Run's Artifact URI: /phoenix/mlflow/601758365276661609/9aeef08ecb3443dc879c64de75431d75/artifacts


Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

Registered model 'BERT_QA' already exists. Creating a new version of this model...
2024/03/22 19:42:44 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: BERT_QA, version 2
Created version '2' of model 'BERT_QA'.


## Testing latest model registred

In [9]:
client = mlflow.MlflowClient()
model_metadata = client.get_latest_versions("BERT_QA", stages=["None"])
latest_model_version = model_metadata[0].version
print(latest_model_version, mlflow.models.get_model_info(f"models:/BERT_QA/{latest_model_version}").signature)

2 inputs: 
  ['context': string, 'question': string]
outputs: 
  ['answer': string]
params: 
  ['show_score': boolean (default: False)]



In [10]:
model = mlflow.pyfunc.load_model(model_uri=f"models:/BERT_QA/{latest_model_version}")
context = "Marta is mother of John and Amanda"
question = "what is the name of Marta's daugther?"
model.predict({"context": [context], "question":[question]})

 - transformers (current: 4.39.1, required: transformers==4.37.0)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.


pre processing ['Marta is mother of John and Amanda'] ["what is the name of Marta's daugther?"]


{'score': 0.6202739477157593, 'start': 28, 'end': 34, 'answer': 'Amanda'}