<a href="https://colab.research.google.com/github/olonok69/LLM_Notebooks/blob/main/mlflow/custom/FineTuned_Vit_save_to_MLFLOW.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ViT Transformer

The Vision Transformer (ViT) is a transformer encoder model (BERT-like) pretrained on a large collection of images in a supervised fashion, namely ImageNet-21k, at a resolution of 224x224 pixels.

Images are presented to the model as a sequence of fixed-size patches (resolution 16x16), which are linearly embedded. One also adds a [CLS] token to the beginning of a sequence to use it for classification tasks. One also adds absolute position embeddings before feeding the sequence to the layers of the Transformer encoder.

Note that this model does not provide any fine-tuned heads, as these were zero'd by Google researchers. However, the model does include the pre-trained pooler, which can be used for downstream tasks (such as image classification).

By pre-training the model, it learns an inner representation of images that can then be used to extract features useful for downstream tasks: if you have a dataset of labeled images for instance, you can train a standard classifier by placing a linear layer on top of the pre-trained encoder. One typically places a linear layer on top of the [CLS] token, as the last hidden state of this token can be seen as a representation of an entire image.


https://huggingface.co/google/vit-base-patch16-224-in21k


###Paper
https://arxiv.org/pdf/2010.11929



### Training data
The ViT model was pretrained on ImageNet-21k, a dataset consisting of 14 million images and 21k classes. https://www.image-net.org/

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install mlflow   optimum open_clip_torch --quiet

! pip install psutil pynvml -q

In [None]:
# Transformers installation

! pip install transformers[torch] -q
! pip install accelerate -U -q
# To install from source instead of the last release, comment the command above and uncomment the following one.
# ! pip install git+https://github.com/huggingface/transformers.git

In [None]:
! pip install onnxruntime -q
! pip install optimum[onnxruntime] -q

In [None]:
path_model ="/content/drive/MyDrive/models/nsfw_pytorch"

In [None]:
from google.colab import userdata

from transformers import ViTImageProcessor, ViTForImageClassification
import os
import sys
import platform
from PIL import Image

In [None]:


from datasets import load_dataset
from functools import partial
import os

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed, Trainer, TrainingArguments, BitsAndBytesConfig, \
    DataCollatorForLanguageModeling, Trainer, TrainingArguments
from datasets import load_dataset
from torch import cuda, bfloat16
import transformers
import openai

import torch.nn as nn
from google.colab import userdata
import mlflow
import numpy as np

In [None]:

from google.colab import output
output.enable_custom_widget_manager()

from transformers.utils import logging
from transformers import pipeline

In [None]:
logging.set_verbosity_error()

os.environ["TRANSFORMERS_VERBOSITY"] = "error"

In [None]:


device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
device


In [None]:
MLFLOW_TRACKING_URI="databricks"
# Specify the workspace hostname and token
DATABRICKS_HOST="https://adb-2467347032368999.19.azuredatabricks.net/"
DATABRICKS_TOKEN=userdata.get('DATABRCKS_TTOKEN')

In [None]:


if "MLFLOW_TRACKING_URI" not in os.environ:
    os.environ["MLFLOW_TRACKING_URI"] = MLFLOW_TRACKING_URI
if "DATABRICKS_HOST" not in os.environ:
    os.environ["DATABRICKS_HOST"] = DATABRICKS_HOST
if "DATABRICKS_TOKEN" not in os.environ:
    os.environ["DATABRICKS_TOKEN"] = DATABRICKS_TOKEN

In [None]:
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

In [None]:

mlflow.set_experiment("/Users/pepe@kk.com/nsfw_pytorch")


In [None]:
mlflow.end_run()

In [None]:

processor = ViTImageProcessor.from_pretrained(path_model)
model = ViTForImageClassification.from_pretrained(path_model)
model = model.to(device)

In [None]:
image_path = "/content/drive/MyDrive/data/beach.jpg"
image = Image.open(image_path).convert("RGB")

In [None]:
np.array(image).shape

In [None]:
np.array(image)

In [None]:
pipe = pipeline( model=model, image_processor=processor, task= "image-classification")

In [None]:
pipe.predict(image.resize((224, 224)))

In [None]:
transformers.__version__

In [None]:
from optimum.onnxruntime import ORTModelForSeq2SeqLM
from optimum.onnxruntime import ORTQuantizer
from optimum.pipelines import pipeline
import mlflow
from mlflow.models.signature import infer_signature
from mlflow.pyfunc import PythonModel
import pprint

In [None]:
pipe.predict(Image.fromarray(np.array(image.resize((224, 224)))))

In [None]:
from mlflow.models import infer_signature

model_output= [{'label': 'neutral', 'score': 0.9923934936523438},
 {'label': 'drawings', 'score': 0.3168586194515228},
 {'label': 'sexy', 'score': 0.27099496126174927},
 {'label': 'porn', 'score': 0.22660772502422333},
 {'label': 'hentai', 'score': 0.13095062971115112}]
infer_signature(model_input=np.array(image.resize((224, 224))),model_output=model_output)

In [None]:
class NSFW_Classifier(PythonModel):
  def load_context(self, context):
        """
        This method initializes the tokenizer and language model
        using the specified model snapshot directory.
        """

        from transformers import ViTImageProcessor, ViTForImageClassification
        from transformers import pipeline
        from PIL import Image
        import torch


        self.model = ViTForImageClassification.from_pretrained(context.artifacts["snapshot"])
        self.tokenizer = ViTImageProcessor.from_pretrained(context.artifacts["snapshot"])
        self.pipe = pipeline( model=self.model, image_processor=self.tokenizer, task= "image-classification")


  def predict(self, context, model_input, params=None):
        """
        This method generates prediction for the given input.
        """
        path_image = model_input["path_image"]
        image = Image.open(image_path).convert("RGB")
        result = self.pipe.predict(image)
        return result


In [None]:
import numpy as np
import pandas as pd

import mlflow
from mlflow.models.signature import ModelSignature
from mlflow.types import ColSpec, DataType, ParamSchema, ParamSpec, Schema

from mlflow.models import infer_signature

model_output= [{'label': 'neutral', 'score': 0.9923934936523438},
 {'label': 'drawings', 'score': 0.3168586194515228},
 {'label': 'sexy', 'score': 0.27099496126174927},
 {'label': 'porn', 'score': 0.22660772502422333},
 {'label': 'hentai', 'score': 0.13095062971115112}]

model_input= "/content/drive/MyDrive/data/beach.jpg"

signature = infer_signature(model_input=model_input,model_output=model_output)



# Define input example
input_example = {"path_image":"/content/drive/MyDrive/data/beach.jpg"}

In [None]:
signature

In [None]:
input_example

In [None]:
import datetime
now = datetime.datetime.now()
now.strftime("%Y-%m-%d_%H:%M:%S")

In [None]:
# Get the current base version of torch that is installed, without specific version modifiers
torch_version = torch.__version__.split("+")[0]

In [None]:

# Start an MLflow run context and log the PHi3 model wrapper along with the param-included signature to
# allow for overriding parameters at inference time
now = datetime.datetime.now()

description= """Log NSFW fine tunned model with mlflow"""
with mlflow.start_run(run_name=f"nsfw_log_{now.strftime('%Y-%m-%d_%H:%M:%S')}", description=description) as run:
    model_info = mlflow.pyfunc.log_model(
        "nsfw_image_classification",
        python_model=NSFW_Classifier(),
        # NOTE: the artifacts dictionary mapping is critical! This dict is used by the load_context() method in our PHi3() class.
        artifacts={"snapshot": "/content/drive/MyDrive/models/nsfw_pytorch"},

        pip_requirements=[
            f"torch=={torch_version}",
            f"transformers=={transformers.__version__}",
            "pillow",


        ],
        input_example=input_example,
        signature=signature,
    )

In [None]:
run.to_dictionary()

In [None]:

model_info.model_uri

In [None]:
loaded_model = mlflow.pyfunc.load_model(model_info.model_uri)

In [None]:
loaded_model

In [None]:

time1=  datetime.datetime.now()
response = loaded_model.predict({"path_image":"/content/drive/MyDrive/data/beach.jpg"})
time2=  datetime.datetime.now()
print(time2-time1)

In [None]:

pprint.pprint(response)

In [None]:
result = mlflow.register_model(
    model_info.model_uri, "vit_nsfw"
)

In [None]:
from mlflow import MlflowClient

client = MlflowClient()

In [None]:
client.get_model_version(name="vit_nsfw", version=2)

In [None]:
import mlflow.pyfunc

model_name = "vit_nsfw"
model_version = 2

model = mlflow.pyfunc.load_model(model_uri=f"models:/{model_name}/{model_version}")

model.predict({"path_image":"/content/drive/MyDrive/data/beach.jpg"})

In [None]:
f"models:/{model_name}/{model_version}"