# Deploying a Masked Language Model

The DistilBERT language model derives from Google BERT language model, which can be used for masked language modeling and next sentence prediction tasks.

### 1. Define a project and a `huggingfaceserve` function 

In [1]:
import digitalhub as dh

project = dh.get_or_create_project("llm")

In [2]:
llm_function = project.new_function(
    "llm_masked",
    kind="huggingfaceserve",
    model_name="mymodel",
    path="huggingface://distilbert/distilbert-base-uncased",
)

### 2. Serve the model

LLM models have particular hardware requirements. When serving a model within the platform, you can use one of the preconfigured profiles, which define the resources that will be allocated.

**NOTE**: when requesting a GPU node for the service, it may take some time for the service to start.

In [None]:
llm_run = llm_function.run(action="serve", profile="1xa100", wait=True)

### 3. Try the model

Note that BERT answers reflect its training on English Wikipedia and BookCorpus.

In [30]:
model_name = "mymodel"
json = {
    "inputs": [
        {
            "name": "input-0",
            "shape": [1],
            "datatype": "BYTES",
            "data": ["Cats are [MASK]."],
        },
    ]
}

llm_run.invoke(model_name=model_name, json=json).json()

{'model_name': 'mymodel',
 'model_version': None,
 'id': 'fde416ef-1b04-455e-8f2d-780c2a68b8af',
 'parameters': None,
 'outputs': [{'name': 'output-0',
   'shape': [1],
   'datatype': 'BYTES',
   'parameters': None,
   'data': ['nocturnal']}]}

# Adapt the Model on Movie Reviews Domain

### 1. Fine-tune the model

Define and run a training function that will create a new model trained on the IMDb dataset.

In [None]:
%%writefile "train_model.py"
import os
from datasets import load_dataset
from digitalhub_runtime_python import handler
from transformers import AutoModelForMaskedLM, AutoTokenizer, Trainer, TrainingArguments, DataCollatorForLanguageModeling

@handler()
def train(project):
    model_id = "distilbert/distilbert-base-uncased"
    model = AutoModelForMaskedLM.from_pretrained(model_id)
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    dataset = load_dataset("imdb")

    def tokenize_function(examples):
        result = tokenizer(examples["text"])
        if tokenizer.is_fast:
            result["word_ids"] = [result.word_ids(i) for i in range(len(result["input_ids"]))]
        return result

    tokenized_datasets = dataset.map(
        tokenize_function, batched=True, remove_columns=["text", "label"]
    )

    chunk_size = 128

    def group_texts(examples):
        concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
        total_length = len(concatenated_examples[list(examples.keys())[0]])
        total_length = (total_length // chunk_size) * chunk_size
        result = {
            k: [t[i : i + chunk_size] for i in range(0, total_length, chunk_size)]
            for k, t in concatenated_examples.items()
        }
        result["labels"] = result["input_ids"].copy()
        return result

    lm_datasets = tokenized_datasets.map(group_texts, batched=True)

    data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm_probability=0.15)

    train_size = 10_000
    test_size = int(0.1 * train_size)
    
    downsampled_dataset = lm_datasets["train"].train_test_split(
        train_size=train_size, test_size=test_size, seed=42
    )

    batch_size = 64
    logging_steps = len(downsampled_dataset["train"]) // batch_size

    training_args = TrainingArguments(
        output_dir=f"{model_id}-finetuned-imdb",
        overwrite_output_dir=True,
        eval_strategy="epoch",
        learning_rate=2e-5,
        weight_decay=0.01,
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        fp16=True,
        logging_steps=logging_steps,
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=downsampled_dataset["train"],
        eval_dataset=downsampled_dataset["test"],
        data_collator=data_collator,
        tokenizer=tokenizer,
    )

    trainer.train()

    save_dir = "model"
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    trainer.save_model(save_dir)
    tokenizer.save_pretrained(save_dir)

    project.log_model(
        name="test_llm_model",
        kind="huggingface",
        base_model=model_id,
        source=save_dir,
    )

In [6]:
train_func = project.new_function(
    name="train_model",
    kind="python",
    python_version="PYTHON3_10",
    code_src="train_model.py",
    handler="train",
    requirements=[
        "hf_xet",
        "datasets",
        "transformers[torch]",
        "torch",
        "torchvision",
        "accelerate",
    ],
)

In [None]:
train_run = train_func.run(action="job", profile="1xa100", wait=True)

### 2. Serve the fine-tuned model

Create and run the serving function (this will create a new version of the function created during the first step).

In [9]:
model = dh.get_model("test_llm_model", project="llm")

In [10]:
llm_function = project.new_function(
    "llm_masked",
    kind="huggingfaceserve",
    model_name="test_llm_model",
    path=model.spec.path,
)

**NOTE**: at the time of writing, specifying a volume was a temporary workaround to overcome directory space limitations and might not be necessary anymore.

In [None]:
llm_run_finetuned = llm_function.run(
    action="serve",
    profile="1xa100",
    volumes=[
        {
            "name": "volumellm",
            "volume_type": "empty_dir",
            "mount_path": "/shared",
            "spec": {"sizeLimit": "10Gi"},
        }
    ],
    wait=True,
)

### 3. Test the fine-tuned model

In [32]:
model_name_finetuned = "test_llm_model"
json = {
    "inputs": [
        {
            "name": "input-0",
            "shape": [1],
            "datatype": "BYTES",
            "data": ["This [MASK] was great."],
        }
    ]
}

llm_run_finetuned.invoke(model_name=model_name_finetuned, json=json).json()

{'model_name': 'test_llm_model',
 'model_version': None,
 'id': '53893f51-a958-4101-bd66-86e7aadd0147',
 'parameters': None,
 'outputs': [{'name': 'output-0',
   'shape': [1],
   'datatype': 'BYTES',
   'parameters': None,
   'data': ['movie']}]}

### 4. Create a Streamlit app

Write the model name and the run key in an environment file that will be accessible by the Streamlit app.

In [13]:
with open(".env", "w") as f:
    f.write(f"model_name={model_name_finetuned}\n")
    f.write(f"model_run_key={llm_run_finetuned.key}")

In [None]:
pip install streamlit dotenv

In [None]:
%%writefile "app.py"
import digitalhub as dh
import streamlit as st
import os
import json
from pathlib import Path
from dotenv import load_dotenv

st.title("Chat Demo")

env_path = Path('.') / '.env'
load_dotenv(dotenv_path=env_path)

if "model_config" not in st.session_state:
    st.session_state["model_config"] = {
        "model_name": os.environ["model_name"],
        "model_run": dh.get_run(os.environ["model_run_key"])
    }

#initialize a list to store chat history in the session state between reruns
if "messages" not in st.session_state:
    st.session_state.messages = []

#display messages (box with avatar and some content) from chat history on reruns
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

def get_completion(prompt):
    input = {
        "inputs": [
            {
                "name": "input-0",
                "shape": [1],
                "datatype": "BYTES",
                "data": [prompt]
            }
        ]
    }

    model_name = st.session_state.model_config["model_name"]
    run = st.session_state.model_config["model_run"]

    data_string = ""
    for r in run.invoke(model_name=model_name, json=input):
        data_string += r.decode('utf-8')

    try:
        return json.loads(data_string)["outputs"][0]["data"][0]
    except:
        return "Sorry, something went wrong."
            
#display a chat input and store its input in prompt
if prompt := st.chat_input("Give me a sentence to fill (use [MASK] as placeholder)"):
    #store user input in history and display it as a chat message
    st.session_state.messages.append({"role": "user", "content": prompt})
    with st.chat_message("user"):
        st.markdown(prompt)

    #display a chat message with the model response
    with st.chat_message("assistant"):
        response = get_completion(prompt)
        st.markdown(response)
    #store model response in history
    st.session_state.messages.append({"role": "assistant", "content": response})

In [None]:
! streamlit run app.py


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://192.168.3.101:8501[0m
[34m  External URL: [0m[1mhttp://172.213.107.20:8501[0m
[0m


If you are running this notebook inside a Coder workspace, navigate to your workspace and click on "Open ports" to find a link to the Streamlit app.