### Install dependencies

In [None]:
#import evaluate
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from datasets import Dataset, DatasetDict, ClassLabel
from transformers import AutoTokenizer, DataCollatorForTokenClassification, AutoModelForTokenClassification, TrainingArguments, Trainer
from transformers import pipeline
import numpy as np
import evaluate
import torch
import mlflow

### Running Lazy huggingface pipeline

In [2]:
from transformers import pipeline

ner = pipeline(task="ner", model="NYTK/named-entity-recognition-nerkor-hubert-hungarian")
input_text = "A Kovácsné Nagy Erzsébet nagyon jól érzi magát a Nokiánál, azonban a Németországból érkezett Kovács Péter nehezen boldogul a beilleszkedéssel."

print(ner(input_text, aggregation_strategy="simple"))


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


[{'entity_group': 'PER', 'score': 0.9886593, 'word': 'Kovácsné Nagy Erzsébet', 'start': 2, 'end': 24}, {'entity_group': 'ORG', 'score': 0.9960921, 'word': 'Nokián', 'start': 49, 'end': 55}, {'entity_group': 'LOC', 'score': 0.99254215, 'word': 'Németország', 'start': 69, 'end': 80}, {'entity_group': 'PER', 'score': 0.99941576, 'word': 'Kovács Péter', 'start': 93, 'end': 105}]


### Running Huggingface Transformer (Pre trained BERT)

In [3]:
from transformers import AutoTokenizer, AutoModelForTokenClassification
from transformers import pipeline

pre_trained_model="dslim/bert-base-NER"

#model="nlpso/m3_hierarchical_ner_ocr_ptrn_cmbert_iob2"

tokenizer = AutoTokenizer.from_pretrained(pre_trained_model)
model = AutoModelForTokenClassification.from_pretrained(pre_trained_model)

nlp = pipeline("ner", model=model, tokenizer=tokenizer)
example = "My name is Wolfgang and I live in Berlin"

ner_results = nlp(example)
print(ner_results)


Some weights of the model checkpoint at dslim/bert-base-NER were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[{'entity': 'B-PER', 'score': 0.9990139, 'index': 4, 'word': 'Wolfgang', 'start': 11, 'end': 19}, {'entity': 'B-LOC', 'score': 0.999645, 'index': 9, 'word': 'Berlin', 'start': 34, 'end': 40}]


In [4]:
tokenizer(example)

{'input_ids': [101, 1422, 1271, 1110, 14326, 1105, 146, 1686, 1107, 3206, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}

### IOB to IOB2 convertor

In [5]:
"""
IOB1:  O I I B I
IOB2:  O B I B I
"""

from typing import List

def iob2(tags: List[str]):
    """
    Check that tags have a valid IOB format.
    Tags in IOB1 format are converted to IOB2.
    """
    for i, tag in enumerate(tags):
        if tag == 'O':
            continue
        split = tag.split('-')
        if len(split) != 2 or split[0] not in ['I', 'B']:
            return False
        if split[0] == 'B':
            continue
        elif i == 0 or tags[i - 1] == 'O':  # conversion IOB1 to IOB2
            tags[i] = 'B' + tag[1:]
        elif tags[i - 1][1:] == tag[1:]:
            continue
        else:  # conversion IOB1 to IOB2
            tags[i] = 'B' + tag[1:]
    return True

### MLFLOW initiations

In [6]:
# Ensure MLflow directory exists
mlruns_dir = '/Users/lukishyadav/Desktop/engineering/case_studies/ner_casestudy/mlruns'
if not os.path.exists(mlruns_dir):
    os.makedirs(mlruns_dir)

mlruns_trash_dir = '/Users/lukishyadav/Desktop/engineering/case_studies/ner_casestudy/mlruns/.trash'
if not os.path.exists(mlruns_trash_dir):
    os.makedirs(mlruns_trash_dir)
    

import mlflow

mlflow.set_tracking_uri('file:///Users/lukishyadav/Desktop/engineering/case_studies/ner_casestudy/mlruns')

#experiment_id = mlflow.create_experiment('NER_Casestudy_Experiment')

# Create or get the experiment
experiment_name = "NER_Casestudy_Experiment2"
mlflow.set_experiment(experiment_name)



<Experiment: artifact_location='file:///Users/lukishyadav/Desktop/engineering/case_studies/ner_casestudy/mlruns/871896506229020652', creation_time=1719024964319, experiment_id='871896506229020652', last_update_time=1719024964319, lifecycle_stage='active', name='NER_Casestudy_Experiment2', tags={}>

### Reading out input data

In [7]:

# Load the dataset with a specified encoding
file_path = '/Users/lukishyadav/Desktop/engineering/case_studies/ner_casestudy/data/ner_dataset.csv'  # Replace with your file path
data = pd.read_csv(file_path, encoding='ISO-8859-1')

### Trucating for quick experimentation

In [8]:
data=data.head(1000)

In [9]:
data.isnull().sum()

Sentence #    957
Word            0
POS             0
Tag             0
dtype: int64

### Dropping records having null values for our features

In [10]:

# Drop rows with NaN values
data = data.dropna(subset=['Word','POS','Tag'])


In [11]:
data.isnull().sum()

Sentence #    957
Word            0
POS             0
Tag             0
dtype: int64

### Looking at the Tag Counts

In [None]:
from collections import Counter as C
C(data['Tag'])

### Restructuring the data in desired format

In [13]:

# Group the data by sentences
data['Sentence #'] = data['Sentence #'].ffill()  # Fill forward to propagate sentence IDs
sentences = data.groupby('Sentence #').apply(lambda s: [(w, p, t) for w, p, t in zip(s['Word'].values.tolist(),
                                                                                      s['POS'].values.tolist(),
                                                                                      s['Tag'].values.tolist())])
# Convert the groupby object to a list of sentences
sentences = [s for s in sentences]

# Split the dataset into training, validation, and test sets (20% for test)
train_sentences, test_sentences = train_test_split(sentences, test_size=0.20, random_state=42)
train_sentences, val_sentences = train_test_split(train_sentences, test_size=0.25, random_state=42)  # 0.25 * 0.80 = 0.20


In [14]:
train_sentences

[[('Poor', 'JJ', 'O'),
  ('residents', 'NNS', 'O'),
  ('often', 'RB', 'O'),
  ('complain', 'VBP', 'O'),
  ('they', 'PRP', 'O'),
  ('have', 'VBP', 'O'),
  ('been', 'VBN', 'O'),
  ('cheated', 'VBN', 'O'),
  ('out', 'IN', 'O'),
  ('of', 'IN', 'O'),
  ('the', 'DT', 'O'),
  ('huge', 'JJ', 'O'),
  ('riches', 'NNS', 'O'),
  ('extracted', 'VBN', 'O'),
  ('from', 'IN', 'O'),
  ('their', 'PRP$', 'O'),
  ('tribal', 'JJ', 'O'),
  ('lands', 'NNS', 'O'),
  ('-', ':', 'O'),
  ('where', 'WRB', 'O'),
  ('the', 'DT', 'O'),
  ('bulk', 'NN', 'O'),
  ('of', 'IN', 'O'),
  ('Nigeria', 'NNP', 'B-gpe'),
  ("'s", 'POS', 'O'),
  ('2.3', 'CD', 'O'),
  ('million', 'CD', 'O'),
  ('barrels', 'NNS', 'O'),
  ('of', 'IN', 'O'),
  ('petroleum', 'NN', 'O'),
  ('are', 'VBP', 'O'),
  ('pumped', 'VBN', 'O'),
  ('daily', 'RB', 'O'),
  ('.', '.', 'O')],
 [('The', 'DT', 'O'),
  ('European', 'NNP', 'B-org'),
  ('Union', 'NNP', 'I-org'),
  (',', ',', 'O'),
  ('with', 'IN', 'O'),
  ('U.S.', 'NNP', 'B-gpe'),
  ('backing', 'NN', 'O

In [15]:
train_sentences[0][0:5]

[('Poor', 'JJ', 'O'),
 ('residents', 'NNS', 'O'),
 ('often', 'RB', 'O'),
 ('complain', 'VBP', 'O'),
 ('they', 'PRP', 'O')]

### Converting into Hugging Face desired format

In [16]:

# Convert to Hugging Face Datasets format
def convert_to_dict(sentences):
    words = [[word for word, pos, tag in sentence] for sentence in sentences]
    pos_tags = [[pos for word, pos, tag in sentence] for sentence in sentences]
    ner_tags = [[tag for word, pos, tag in sentence] for sentence in sentences]
    return {"tokens": words, "pos_tags": pos_tags, "ner_tags": ner_tags}

train_data = convert_to_dict(train_sentences)
val_data = convert_to_dict(val_sentences)
test_data = convert_to_dict(test_sentences)

# Create a dataset dictionary
dataset_dict = DatasetDict({
    'train': Dataset.from_dict(train_data),
    'validation': Dataset.from_dict(val_data),
    'test': Dataset.from_dict(test_data)
})



In [17]:
dataset_dict

DatasetDict({
    train: Dataset({
        features: ['tokens', 'pos_tags', 'ner_tags'],
        num_rows: 25
    })
    validation: Dataset({
        features: ['tokens', 'pos_tags', 'ner_tags'],
        num_rows: 9
    })
    test: Dataset({
        features: ['tokens', 'pos_tags', 'ner_tags'],
        num_rows: 9
    })
})

In [18]:
dataset_dict["train"][0]["tokens"]

['Poor',
 'residents',
 'often',
 'complain',
 'they',
 'have',
 'been',
 'cheated',
 'out',
 'of',
 'the',
 'huge',
 'riches',
 'extracted',
 'from',
 'their',
 'tribal',
 'lands',
 '-',
 'where',
 'the',
 'bulk',
 'of',
 'Nigeria',
 "'s",
 '2.3',
 'million',
 'barrels',
 'of',
 'petroleum',
 'are',
 'pumped',
 'daily',
 '.']

In [19]:
dataset_dict["train"][0]["ner_tags"]

['O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'B-gpe',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O']

In [20]:
dataset_dict["train"].features['ner_tags']

Sequence(feature=Value(dtype='string', id=None), length=-1, id=None)

### Numbering the labels so that network can accept them

In [21]:
# Define unique tags
unique_tags = list(set(tag for doc in dataset_dict['train']['ner_tags'] for tag in doc))
tag2id = {tag: id for id, tag in enumerate(unique_tags)}
id2tag = {id: tag for tag, id in tag2id.items()}

In [22]:
tag2id,id2tag

({'B-org': 0,
  'O': 1,
  'I-org': 2,
  'B-gpe': 3,
  'I-art': 4,
  'I-per': 5,
  'B-per': 6,
  'B-geo': 7,
  'B-tim': 8,
  'I-geo': 9,
  'B-art': 10},
 {0: 'B-org',
  1: 'O',
  2: 'I-org',
  3: 'B-gpe',
  4: 'I-art',
  5: 'I-per',
  6: 'B-per',
  7: 'B-geo',
  8: 'B-tim',
  9: 'I-geo',
  10: 'B-art'})

In [23]:
# Tokenizer
# model_checkpoint = "dslim/bert-base-NER"
# model_checkpoint="nlpso/m3_hierarchical_ner_ref_cmbert_iob2"
tokenizer = AutoTokenizer.from_pretrained(pre_trained_model)

In [24]:
def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(examples["tokens"], truncation=True, is_split_into_words=True)
    
    #if labelize==True:
    labels = []
    for i, label in enumerate(examples[f"ner_tags"]):
        word_ids = tokenized_inputs.word_ids(batch_index=i)
        previous_word_idx = None
        label_ids = []
        for word_idx in word_ids:
            if word_idx is None:
                label_ids.append(-100)
            elif word_idx != previous_word_idx:
                label_ids.append(tag2id[label[word_idx]])
            else:
                label_ids.append(tag2id[label[word_idx]] if True else -100)
            previous_word_idx = word_idx
        labels.append(label_ids)
        #labels.append(label)
    tokenized_inputs["labels"] = labels
    return tokenized_inputs
    
# global labelize
# labelize=False    
tokenized_datasets = dataset_dict.map(tokenize_and_align_labels, batched=True)



Map:   0%|          | 0/25 [00:00<?, ? examples/s]

Map:   0%|          | 0/9 [00:00<?, ? examples/s]

Map:   0%|          | 0/9 [00:00<?, ? examples/s]

In [25]:
tokenized_datasets

DatasetDict({
    train: Dataset({
        features: ['tokens', 'pos_tags', 'ner_tags', 'input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 25
    })
    validation: Dataset({
        features: ['tokens', 'pos_tags', 'ner_tags', 'input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 9
    })
    test: Dataset({
        features: ['tokens', 'pos_tags', 'ner_tags', 'input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 9
    })
})

### Configuring BitsandBytes and Lora (For future usage)

In [26]:
from transformers import BitsAndBytesConfig
import torch


nf4_config = BitsAndBytesConfig(
   load_in_4bit=True,
   bnb_4bit_quant_type="nf4",
   bnb_4bit_use_double_quant=True,
   bnb_4bit_compute_dtype=torch.bfloat16,
)

In [27]:
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

# LoRA attention dimension
lora_r = 64

# Alpha parameter for LoRA scaling
lora_alpha = 16

# Dropout probability for LoRA layers
lora_dropout = 0.1

# Load LoRA configuration
peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM",
)



### Data collection, model training & logging

In [28]:
# Data collator
data_collator = DataCollatorForTokenClassification(tokenizer)

device = torch.device("mps") if torch.backends.mps.is_available() else torch.device("cpu")

# Model
model = AutoModelForTokenClassification.from_pretrained(pre_trained_model, num_labels=len(unique_tags), ignore_mismatched_sizes=True,
                                                        #quantization_config=nf4_config
                                                        )
model.classifier = torch.nn.Linear(model.classifier.in_features, len(unique_tags))
model.num_labels = len(unique_tags)

# Metrics
metric = evaluate.load("seqeval")

def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)

    true_predictions = [
        [id2tag[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    true_labels = [
        [id2tag[l] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]

    results = metric.compute(predictions=true_predictions, references=true_labels)
    return {
        "precision": results["overall_precision"],
        "recall": results["overall_recall"],
        "f1": results["overall_f1"],
        "accuracy": results["overall_accuracy"],
    }


# Training arguments
args = TrainingArguments(
    output_dir='./results',
    #"test-ner",
    evaluation_strategy = "epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=1,
    weight_decay=0.01,
    use_mps_device=True,
    logging_dir='./logs',
    save_total_limit=2,
)

# # Trainer
trainer = Trainer(
    model,
    args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    #peft_config=peft_config
)

# Trainer
# trainer = SFTTrainer(
#     model,
#     args,
#     train_dataset=tokenized_datasets["train"],
#     eval_dataset=tokenized_datasets["validation"],
#     data_collator=data_collator,
#     tokenizer=tokenizer,
#     compute_metrics=compute_metrics,
#     peft_config=peft_config
# )



# Start MLflow run
with mlflow.start_run() as run:
    # Train the model
    trainer.train()



#     # Log metrics to MLflow
#     mlflow.log_metrics(results)

    


"""

    # Log the model artifact
    trainer.save_model(os.path.join("results", "model"))
    tokenizer.save_pretrained(os.path.join("results", "model"))

    mlflow.log_artifacts("results/model")


    # Log other artifacts if needed
    # For example, logging training args
    with open("results/training_args.bin", "wb") as f:
        torch.save(args, f)
    mlflow.log_artifact("results/training_args.bin")

"""




Some weights of the model checkpoint at dslim/bert-base-NER were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dslim/bert-base-NER and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([9]) in the checkpoint and torch.Size([11]) in the model instantiated
- classifier.weight: found shape torch.Size([9, 768])

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.785125732421875, 'eval_precision': 0.02631578947368421, 'eval_recall': 0.041666666666666664, 'eval_f1': 0.03225806451612904, 'eval_accuracy': 0.7488789237668162, 'eval_runtime': 0.1992, 'eval_samples_per_second': 45.181, 'eval_steps_per_second': 5.02, 'epoch': 1.0}
{'train_runtime': 3.1709, 'train_samples_per_second': 7.884, 'train_steps_per_second': 0.631, 'train_loss': 2.152837038040161, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))


'\n\n    # Log the model artifact\n    trainer.save_model(os.path.join("results", "model"))\n    tokenizer.save_pretrained(os.path.join("results", "model"))\n\n    mlflow.log_artifacts("results/model")\n\n\n    # Log other artifacts if needed\n    # For example, logging training args\n    with open("results/training_args.bin", "wb") as f:\n        torch.save(args, f)\n    mlflow.log_artifact("results/training_args.bin")\n\n'

### Running predictions

In [29]:
# Predict with the model
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple",device=device)
sample_text = "Hugging Face Inc. is a company based in New York City. Its headquarters are in DUMBO, therefore very close to the Manhattan Bridge."
ner_results = ner_pipeline(sample_text)
print(ner_results)

[{'entity_group': 'LABEL_8', 'score': 0.15944915, 'word': 'Hugging', 'start': 0, 'end': 7}, {'entity_group': 'LABEL_1', 'score': 0.1302425, 'word': 'Face Inc', 'start': 8, 'end': 16}, {'entity_group': 'LABEL_9', 'score': 0.18290514, 'word': '. is a', 'start': 16, 'end': 22}, {'entity_group': 'LABEL_1', 'score': 0.20455617, 'word': 'company based', 'start': 23, 'end': 36}, {'entity_group': 'LABEL_2', 'score': 0.19775945, 'word': 'in', 'start': 37, 'end': 39}, {'entity_group': 'LABEL_3', 'score': 0.15408538, 'word': 'New', 'start': 40, 'end': 43}, {'entity_group': 'LABEL_7', 'score': 0.14170787, 'word': 'York City', 'start': 44, 'end': 53}, {'entity_group': 'LABEL_9', 'score': 0.19538349, 'word': '.', 'start': 53, 'end': 54}, {'entity_group': 'LABEL_1', 'score': 0.2051506, 'word': 'Its', 'start': 55, 'end': 58}, {'entity_group': 'LABEL_9', 'score': 0.19571926, 'word': 'headquarters', 'start': 59, 'end': 71}, {'entity_group': 'LABEL_1', 'score': 0.18712822, 'word': 'are in', 'start': 72, 

### Function to convert numbered labels into NER tags using lookup

In [30]:
# Function to replace entity_group with corresponding key from the lookup
def replace_entity_group(data, lookup):
    # Create a reverse lookup dictionary to map indices to their labels
    reverse_lookup = {v: k for k, v in lookup.items()}
    # Update each dictionary in the list
    for item in data:
        entity_group_value = int(item['entity_group'].split('_')[1]) - 1  # Extract the number and adjust to zero-indexed
        item['entity_group'] = reverse_lookup.get(entity_group_value, item['entity_group'])
    return data

lookup=tag2id
data=ner_results

# Replace entity_group in the data
updated_data = replace_entity_group(data, lookup)

# Print the updated data
print(updated_data)

[{'entity_group': 'B-geo', 'score': 0.15944915, 'word': 'Hugging', 'start': 0, 'end': 7}, {'entity_group': 'B-org', 'score': 0.1302425, 'word': 'Face Inc', 'start': 8, 'end': 16}, {'entity_group': 'B-tim', 'score': 0.18290514, 'word': '. is a', 'start': 16, 'end': 22}, {'entity_group': 'B-org', 'score': 0.20455617, 'word': 'company based', 'start': 23, 'end': 36}, {'entity_group': 'O', 'score': 0.19775945, 'word': 'in', 'start': 37, 'end': 39}, {'entity_group': 'I-org', 'score': 0.15408538, 'word': 'New', 'start': 40, 'end': 43}, {'entity_group': 'B-per', 'score': 0.14170787, 'word': 'York City', 'start': 44, 'end': 53}, {'entity_group': 'B-tim', 'score': 0.19538349, 'word': '.', 'start': 53, 'end': 54}, {'entity_group': 'B-org', 'score': 0.2051506, 'word': 'Its', 'start': 55, 'end': 58}, {'entity_group': 'B-tim', 'score': 0.19571926, 'word': 'headquarters', 'start': 59, 'end': 71}, {'entity_group': 'B-org', 'score': 0.18712822, 'word': 'are in', 'start': 72, 'end': 78}, {'entity_group

In [31]:
ner_results[0]

{'entity_group': 'B-geo',
 'score': 0.15944915,
 'word': 'Hugging',
 'start': 0,
 'end': 7}

In [32]:
tag2id

{'B-org': 0,
 'O': 1,
 'I-org': 2,
 'B-gpe': 3,
 'I-art': 4,
 'I-per': 5,
 'B-per': 6,
 'B-geo': 7,
 'B-tim': 8,
 'I-geo': 9,
 'B-art': 10}

### Signature Definition

In [33]:
device = torch.device("mps") if torch.backends.mps.is_available() else torch.device("cpu")

# Predict with the model
tuned_pipeline = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple",device=device)
input_example = "Hugging Face Inc. is a company based in New York City. Its headquarters are in DUMBO, therefore very close to the Manhattan Bridge."
# ner_results = ner_pipeline(sample_text)
# print(ner_results)




# Infer the model signature, including a representative input, the expected output, and the parameters that we would like to be able to override at inference time.
signature = mlflow.models.infer_signature(
    ["This is a test!", "And this is also a test."],
    mlflow.transformers.generate_signature_output(
        tuned_pipeline, ["This is a test response!", "So is this."]
    ),
    #params=model_config,
)


### Logging the pipeline to the existing training run

In [34]:
#Log the pipeline to the existing training run
with mlflow.start_run(run_id=run.info.run_id):
    model_info = mlflow.transformers.log_model(
        transformers_model=tuned_pipeline,
        #artifact_path="fine_tuned",
        artifact_path="model",
        signature=signature,
        input_example=["Pass in a string", "And have it mark as spam or not."],
        #model_config=model_config,
    )

  model_info = mlflow.transformers.log_model(
  flavor.save_model(path=local_path, mlflow_model=mlflow_model, **kwargs)


### Loading and testing

In [35]:
# Load our saved model in the native transformers format
loaded = mlflow.transformers.load_model(model_uri=model_info.model_uri)

# Define a test example that we expect to be classified as spam
validation_text = (
    "Want to learn how to make MILLIONS with no effort? Click HERE now! See for yourself! Guaranteed to make you instantly rich! "
    "Don't miss out you could be a winner!"
)

validation_text=("Hello! there")

# validate the performance of our fine-tuning
loaded(validation_text)

  loaded = mlflow.transformers.load_model(model_uri=model_info.model_uri)
2024/06/23 01:45:54 INFO mlflow.transformers: 'runs:/e8b2a54edc2c46a4b4f83ec458021a4d/model' resolved as 'file:///Users/lukishyadav/Desktop/engineering/case_studies/ner_casestudy/mlruns/871896506229020652/e8b2a54edc2c46a4b4f83ec458021a4d/artifacts/model'


[{'entity': 'LABEL_1',
  'score': 0.18289715,
  'index': 1,
  'word': 'Hello',
  'start': 0,
  'end': 5},
 {'entity': 'LABEL_1',
  'score': 0.18411833,
  'index': 2,
  'word': '!',
  'start': 5,
  'end': 6},
 {'entity': 'LABEL_1',
  'score': 0.17381199,
  'index': 3,
  'word': 'there',
  'start': 7,
  'end': 12}]

### FInal inference testing of invocations api

In [37]:
import requests

headers = {
    # Already added when you pass json=
    # 'Content-Type': 'application/json',
}

json_data = {
    'inputs': [
        'Hello',
    ],
}

response = requests.post('http://en.wikipedia.org/curl', headers=headers, json=json_data)
response = requests.post('http://127.0.0.1:5000/invocations', headers=headers, json=json_data)
response.text

''