# Training and Deploying Mistral 7B

In [1]:
!pip install sagemaker

Collecting boto3<2.0,>=1.29.6 (from sagemaker)
  Using cached boto3-1.34.37-py3-none-any.whl.metadata (6.6 kB)
Collecting botocore<1.35.0,>=1.34.37 (from boto3<2.0,>=1.29.6->sagemaker)
  Using cached botocore-1.34.37-py3-none-any.whl.metadata (5.7 kB)
Collecting s3transfer<0.11.0,>=0.10.0 (from boto3<2.0,>=1.29.6->sagemaker)
  Using cached s3transfer-0.10.0-py3-none-any.whl.metadata (1.7 kB)
Using cached boto3-1.34.37-py3-none-any.whl (139 kB)
Using cached botocore-1.34.37-py3-none-any.whl (11.9 MB)
Using cached s3transfer-0.10.0-py3-none-any.whl (82 kB)
Installing collected packages: botocore, s3transfer, boto3
  Attempting uninstall: botocore
    Found existing installation: botocore 1.31.64
    Uninstalling botocore-1.31.64:
      Successfully uninstalled botocore-1.31.64
  Attempting uninstall: s3transfer
    Found existing installation: s3transfer 0.7.0
    Uninstalling s3transfer-0.7.0:
      Successfully uninstalled s3transfer-0.7.0
  Attempting uninstall: boto3
    Found existi

# Modeling

In [2]:
import sagemaker
import boto3

sess = sagemaker.Session()

#preparing the necessary AWS resources and permissions to ensure 
#SageMaker can access the data it needs and has the permissions to perform operations on behalf of the user.

# sagemaker_session_bucket -> used for uploading data, models and logs
# sagemaker_will_automatically create this bucket if it not exists

sagemaker_session_bucket=None

if sagemaker_session_bucket is None and sess is not None:
    # set to default bucket if a bucket name is not given
    sagemaker_session_bucket = sess.default_bucket()

try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)

print(f"sagemaker role arn: {role}")
print(f"sagemaker session region: {sess.boto_region_name}")

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
sagemaker role arn: arn:aws:iam::566086704797:role/service-role/AmazonSageMaker-ExecutionRole-20240123T114211
sagemaker session region: ap-southeast-2


## Hugging Face Deep Learning Container

In [5]:
from sagemaker.huggingface import get_huggingface_llm_image_uri

# retrieve the llm image uri
llm_image = get_huggingface_llm_image_uri(
    "huggingface",
    version="1.1.0"
)

# print ecr image uri
print(f"llm image uri: {llm_image}")

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
llm image uri: 763104351884.dkr.ecr.ap-southeast-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.0.1-tgi1.1.0-gpu-py39-cu118-ubuntu20.04


## Configuration

In [6]:
import json
from sagemaker.huggingface import HuggingFaceModel

# sagemaker config
instance_type = "ml.g5.2xlarge"
n_gpu = 1
health_check_timeout = 300

# Model configuration
config = {
    'HF_MODEL_ID': "mistralai/Mistral-7B-Instruct-v0.2", # model_id for Mistral 7B
    'SM_NUM_GPUS': json.dumps(n_gpu),
    'MAX_INPUT_LENGTH': json.dumps(2048),  # Max length of input text
    'MAX_TOTAL_TOKENS': json.dumps(4096),  # Max length of the generation (including input text)
    'MAX_BATCH_TOTAL_TOKENS': json.dumps(8192),  # Limits the number of tokens that can be processed in parallel during the generation
    'HUGGING_FACE_HUB_TOKEN': json.dumps("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")
    # 'HF_MODEL_QUANTIZE': "bitsandbytes",
}

# HF Model Class
hf_model = HuggingFaceModel(
  role=role,
  image_uri=llm_image,
  env=config
)

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


## Deploy Mistral 7B

In [7]:
# deploy the HuggingFaceModel to Amazon SageMaker
# Creates an endpoint that will contain the model
llm = hf_model.deploy(
  initial_instance_count= 1,
  instance_type= instance_type,
  container_startup_health_check_timeout= health_check_timeout,
)

-----------------*

UnexpectedStatusException: Error hosting endpoint huggingface-pytorch-tgi-inference-2024-02-08-01-18-34-737: Failed. Reason: The primary container for production variant AllTraffic did not pass the ping health check. Please check CloudWatch logs for this endpoint..

## Structure and configure response

In [10]:
messages = [
  { "role": "system","content": "You are a helpful assistant that will carefully understand a user's request, and correctly provide answers."}
]

instruction = 'what 2 scientists uncovered behavioural economics?'

messages.append({"role": "user", "content": instruction})

### Hyper params

In [9]:
#config response
body_input_data = {
    'inputs':messages,
    'parameters':{
        'do_sample': True,
        'top_p': 0.6,
        'temperature': 0.3,
        'top_k': 50,
        'max_new_tokens': 512,
        'repetition_penalty': 1.03
    }
}

body_input_data_json = json.dumps(input_data)# needs to be in json format to be passed in the future

NameError: name 'prompt' is not defined

### Request the response

In [None]:
import io

#sagemaker runtime client
sagemaker_runtime = boto3.client('sagemaker-runtime')
endpoint = 'huggingface-pytorch-tgi-inference-2024-02-08-00-38-52-468'
content_type = 'application/json'


# Requests inference from AWS SageMaker endpoint
# Model deployed - Mistral 7B
response = sagemaker_runtime.invoke_endpoint(
    EndpointName = endpoint,
    ContentType = content_type,
    Body = body_input_data_json.encode('utf-8')
)

### Parse the response
the response is given in json format.

In [None]:
response_body = response['Body'].read().decode('utf-8')
response_json = json.loads(response_body)

### Extract and Print Response

In [None]:
generated_text = response_json[0]['generated_text']
print(generated_text[len(prompt):])

## Fine tuning

### Preprocessing

In [None]:
# out_path = '/home/sagemaker-user'

In [4]:
# from datasets import load_dataset

# dataset = load_dataset("b-mc2/sql-create-context")

# dataset_splits = {"train": dataset["train"]}
# out_path = get_data_path(data_dir)

# out_path.parent.mkdir(parents=True, exist_ok=True)

# for key, ds in dataset_splits.items():
#     with open(out_path, "w") as f:
#         for item in ds:
#             newitem = {
#                 "input": item["question"],
#                 "context": item["context"],
#                 "output": item["answer"],
#             }
#             f.write(json.dumps(newitem) + "\n")

Downloading readme:   0%|          | 0.00/4.43k [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/21.8M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

NameError: name 'get_data_path' is not defined

In [None]:
from datasets import load_dataset

    dataset = load_dataset("b-mc2/sql-create-context")

    dataset_splits = {"train": dataset["train"]}
    out_path = get_data_path(data_dir)

    out_path.parent.mkdir(parents=True, exist_ok=True)

    for key, ds in dataset_splits.items():
        with open(out_path, "w") as f:
            for item in ds:
                newitem = {
                    "input": item["question"],
                    "context": item["context"],
                    "output": item["answer"],
                }
                f.write(json.dumps(newitem) + "\n")