In [1]:
import mlflow

mlflow.set_tracking_uri("http://mlserver:8080")

In [2]:
from datasets import load_dataset

dataset = load_dataset("yelp_review_full")

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

In [4]:
tokenized_datasets = dataset.map(tokenize_function, batched=True)

small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))
small_validation_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000,2000))

In [5]:
from transformers import AutoModelForSequenceClassification

base_model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased", num_labels=5)
trained_model = AutoModelForSequenceClassification.from_pretrained("models/bert_base_cased/yelp", num_labels=5)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
model_name = "fine-turned-bert"

mlflow.set_experiment("Yelp")

run_name = "fine-tuned-bert"

with mlflow.start_run():
  mlflow.log_param("model_name", model_name)
  #mlflow.transformers.log_model(trained_model, tokenizer, model=model_name)

  # Load a test dataset with labels
  test_data = small_validation_dataset["text"]
  test_labels = small_validation_dataset["label"]

  # Evaluate the model performance on the test dataset
  mlflow.evaluate(model=trained_model, data=test_data, targets=test_labels,model_type="classifier")
  


MlflowException: If the `data` argument is a numpy array, it must be a 2-dimensional array, with the second dimension representing the number of features. If the `data` argument is a list, each of its elements must be a feature array of the numpy array or list, and all elements must have the same length.

In [None]:
from transformers import pipeline

pipe_base = pipeline(
    "text-classification",
    model=base_model,
    batch_size=8,
    tokenizer=tokenizer,
    device=0,
    top_k=1
)

pipe_trained = pipeline(
    "text-classification",
    model=trained_model,
    batch_size=8,
    tokenizer=tokenizer,
    device=0,
    top_k=1
)

In [None]:
for n in range(10,20):
    print("Review Text",small_validation_dataset[n]["text"])
    print("Model prediction (trained): ", pipe_trained(small_validation_dataset[n]["text"]))
    print("Model prediction (base)", pipe_base(small_validation_dataset[n]["text"]))
    print("Label ", small_validation_dataset[n]["label"])

In [None]:
small_validation_dataset

In [None]:
small_validation_dataset["features"]