In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from huggingface_hub import snapshot_download
import torch
import mlflow
import pandas as pd

In [None]:
REPO_ID = "cerebras/Cerebras-GPT-111M"
download_path = snapshot_download(repo_id=REPO_ID)

In [None]:
tokenizer = AutoTokenizer.from_pretrained(download_path)
model = AutoModelForCausalLM.from_pretrained(download_path)

pipe = pipeline(
    task="text-generation",
    model=model,
    tokenizer=tokenizer,
    device=torch.device(type='cuda', index=0)
)

In [None]:
prompts = {"prompts": ["Generative AI is", "So, today we are"]}
input_df = pd.DataFrame(prompts)

generated_text = pipe(
    input_df["prompts"].values.tolist(),
    max_length=256,
    do_sample=False,
    no_repeat_ngram_size=2
)

outputs = [text[0]['generated_text'] for text in generated_text]
output_df = pd.DataFrame({"outputs": outputs})
print(output_df)

In [None]:
signature = mlflow.models.signature.infer_signature(
    input_df,
    output_df    
)

artifacts = {"cached_model_path": download_path}

class LLMWrapper(mlflow.pyfunc.PythonModel):
    def load_context(self, context):
        from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
        import torch
        self.tokenizer = AutoTokenizer.from_pretrained(
            context.artifacts["cached_model_path"]
        ) 
        self.model = AutoModelForCausalLM.from_pretrained(
            context.artifacts["cached_model_path"]
        )
        self.pipe = pipeline(
            task="text-generation",
            model=model,
            tokenizer=tokenizer,
            device=torch.device(type='cuda', index=0)
        )
    def predict(self, context, model_input):
        generated_text = self.pipe(
            model_input["prompts"].values.tolist(),
            max_length=256,
            do_sample=False,
            no_repeat_ngram_size=2
        )
        outputs = [text[0]['generated_text'] for text in generated_text]
        output_df = pd.DataFrame({"outputs": outputs})
        return outputs

In [None]:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

subscription_id = "SUBSCRIPTION_ID"
resource_group = "RESOURCE_GROUP"
workspace = "AML_WORKSPACE_NAME"

ml_client = MLClient(
    DefaultAzureCredential(),
    subscription_id,
    resource_group,
    workspace,
)

azureml_mlflow_uri = ml_client.workspaces.get(
    ml_client.workspace_name
).mlflow_tracking_uri

mlflow.set_tracking_uri(azureml_mlflow_uri)

exp = mlflow.set_experiment("chapter6-llm-notebook")

In [None]:
with mlflow.start_run() as run:
    mlflow_model_dir = 'llm_model'
    mlflow.pyfunc.log_model(
        artifact_path=mlflow_model_dir,
        python_model=LLMWrapper(),
        conda_env='environment.yaml',
        artifacts=artifacts,
        signature=signature,
    )

In [None]:
loaded_model = mlflow.pyfunc.load_model(
    model_uri=f"runs:/{run.info.run_id}/{mlflow_model_dir}/",
)

In [None]:
sample_prompts = {"prompts": ["Generative AI is", "So, today we are"]}
sample_input_df = pd.DataFrame(prompts)

print(loaded_model.predict(sample_input_df))

In [None]:
mlflow.register_model(
    model_uri=f"runs:/{run.info.run_id}/{mlflow_model_dir}/",
    name='chapter6-cerebras-gpt'
)