In [1]:
import sagemaker
import boto3

iam_client = boto3.client('iam')
role = iam_client.get_role(RoleName='AmazonSageMaker-ExecutionRole-20240130T125539')['Role']['Arn']
bucket = "test-bucket"
sess = sagemaker.Session()
bucket = sess.default_bucket()
bucket


sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/omkar/.config/sagemaker/config.yaml


'sagemaker-eu-west-1-929880127071'

In [2]:
eval_s3_uri = f"s3://{bucket}/eval"
train_s3_uri = f"s3://{bucket}/train"

### Load the dataset
Load the train and test split that will be used for evaluating the pretrained model, and then to finetune it.

Train / test split plan: 
- for 'list': hold out 'drop_iter' completely, 'insert' partially (the first half)
- for 'tree': hold out 'height' and 'insert' completely

In [3]:
from datasets import load_dataset

evalprompts = load_dataset("json", data_files="evalprompts.jsonl", split="train")
traindataset = load_dataset("json", data_files="train_mar13.jsonl", split="train")

Generating train split: 0 examples [00:00, ? examples/s]

In [4]:
evaldataset = load_dataset("json", data_files="examples.jsonl", split="train")
evaldataset.save_to_disk(eval_s3_uri)
traindataset.save_to_disk(train_s3_uri)

Generating train split: 0 examples [00:00, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/52 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/4611 [00:00<?, ? examples/s]

### Evaluate the pretrained model
Before fine-tuning, we will evaluate the pretrained model on the entire dataset.

In [16]:
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri
import json

hub = {
	'HF_MODEL_ID':'codellama/CodeLlama-7b-hf',
	'SM_NUM_GPUS': json.dumps(1),
    'HF_TASK':'text-generation'
}

huggingface_model = HuggingFaceModel(
	image_uri=get_huggingface_llm_image_uri("huggingface",version="1.1.0"),
	env=hub,
	role=role, 
)


predictor = huggingface_model.deploy(
	initial_instance_count=1,
	instance_type="ml.g5.xlarge",
	container_startup_health_check_timeout=300,
	wait=True
)


-----------!

In [48]:
with open("evalresults_pretrained.txt", 'w') as f:
    for prompt in evalprompts:
        output = predictor.predict(
            {
                "inputs": prompt["prompt"],
                "parameters": {
                    "do_sample": True,
                    "temperature": 0.7,
                    "max_new_tokens": 256,
                    "return_full_text": False,
                    "top_p": 0.7,
                    "top_k": 50,
                    "repetition_penalty": 1,
                },
            }
        )

        f.write(json.dumps({prompt['key']: output[0]['generated_text']}) + "\n")

In [2]:
import json
with open("evalresults_pretrained.txt") as f:
    for l in f:
        d = json.loads(l)
        for k, v in d.items():
            print(k, "--------------")
            print(v)
            break


list_prepend --------------
    Ensures(is_list(Result()))
    n = Node(val, head)
    return n


### Verification error:
Verification failed: Postcondition of prepend might not hold. There might be insufficient permission to access is_list(Result()). at line 3.12

### Verified program:
def prepend(head: Node, val: int) -> Node:
    """Prepends a new node with the given value to the list."""
    Ensures(is_list(Result()))
    n = Node(val, head)
    return n


### Verification error:
Verification failed: Postcondition of prepend might not hold. There might be insufficient permission to access is_list(Result()). at line 3.12

### Verified program:
def prepend(head: Node, val: int) -> Node:
    """Prepends a new node with the given value to the list."""
    Ensures(is_list(Result()))
    n = Node(val, head)
    return n


### Verification error
list_append --------------
    if head.next is None:
        n = Node(val)
        head.next = n
    else:
        append(head.next, val)


### V

None of the programs verify with the pretrained model. In most cases, it has just reproduced the unverified program from the prompt.

### Fine-tune the model

In [5]:
from sagemaker.huggingface import HuggingFace

huggingface_estimator = HuggingFace(
    "py39",
    entry_point="sagemaker_train.py",
    source_dir="./scripts",
    instance_type="ml.g5.2xlarge",
    instance_count=1,
    role=role,
    # transformers_version='4.36',
    # pytorch_version='2.1',
    # py_version='py310',
    image_uri="763104351884.dkr.ecr.eu-west-1.amazonaws.com/huggingface-pytorch-training:1.13-transformers4.26-gpu-py39-cu117-ubuntu20.04"
)

In [6]:
huggingface_estimator.fit({'train': train_s3_uri, 'test': eval_s3_uri}, wait=False)

INFO:sagemaker:Creating training-job with name: huggingface-pytorch-training-2024-03-17-21-49-02-347


In [7]:
huggingface_estimator.logs()

2024-03-17 21:49:03 Starting - Starting the training job
2024-03-17 21:49:03 Pending - Training job waiting for capacity...
2024-03-17 21:49:24 Pending - Preparing the instances for training......
2024-03-17 21:50:38 Downloading - Downloading the training image...............
2024-03-17 21:52:59 Training - Training image download completed. Training in progress....bash: cannot set terminal process group (-1): Inappropriate ioctl for device
bash: no job control in this shell
2024-03-17 21:53:32,814 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training
2024-03-17 21:53:32,834 sagemaker-training-toolkit INFO     No Neurons detected (normal if no neurons installed)
2024-03-17 21:53:32,844 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.
2024-03-17 21:53:32,846 sagemaker_pytorch_container.training INFO     Invoking user training script.
2024-03-17 21:53:33,038 sagemaker-training-toolkit INFO     Installing dependen

### Deploy the fine tuned model
We have added a custom inference.py script to combine the model with the adapter. And archived it in model.tar.gz in code/inference.py as expected by the SageMaker.

In [93]:
model = huggingface_estimator.create_model(role=role, entry_point="inference.py", source_dir="./scripts")
predictor = model.deploy(initial_instance_count=1, instance_type="ml.g5.xlarge")

INFO:sagemaker:Repacking model artifact (s3://sagemaker-eu-west-1-929880127071/huggingface-pytorch-training-2024-02-19-16-13-38-364/output/model.tar.gz), script artifact (./scripts), and dependencies ([]) into single tar.gz file located at s3://sagemaker-eu-west-1-929880127071/huggingface-pytorch-training-2024-02-19-22-18-35-112/model.tar.gz. This may take some time depending on model size...
INFO:sagemaker:Creating model with name: huggingface-pytorch-training-2024-02-19-22-18-35-112
INFO:sagemaker:Creating endpoint-config with name huggingface-pytorch-training-2024-02-19-22-19-11-618
INFO:sagemaker:Creating endpoint with name huggingface-pytorch-training-2024-02-19-22-19-11-618


----------!

In [24]:
from sagemaker.huggingface import HuggingFaceModel

huggingface_model = HuggingFaceModel(
   model_data="s3://sagemaker-eu-west-1-929880127071/huggingface-pytorch-training-2024-03-06-23-57-59-373/output/model.tar.gz",  # path to your trained sagemaker model
   role=role,
   transformers_version="4.37", # transformers version used
   pytorch_version="2.1", # pytorch version used
   py_version="py310", # python version of the DLC
   source_dir="./scripts_inference",
   entry_point="inference.py"
)

predictor = huggingface_model.deploy(
   initial_instance_count=1,
   instance_type="ml.g5.xlarge",
)

-----------!

In [26]:
prompt = evalprompts[-8]
print(prompt["key"])

output = predictor.predict(
    {
        "inputs": prompt["prompt"],
        "params": {
            "max_new_tokens": 256,
            "do_sample": True,
            "temperature": 0.7,
            # "top_p": 0.7,
            # "top_k": 50,
            # "repetition_penalty": 1,            
        },
        "decode_params": {
            "skip_special_tokens": True,
        },
    }
)
print(output)

lseg_remove_last
   Requires(lseg(first, last))
    Ensures(lseg(first, Result()))
    if first is None:
        return last
    if first is last:
        return last
    if Unfolding(lseg(first, last), first.next is last):
        return first
    Unfold(lseg(first, last))
    rest = remove_last(first.next, last)
    Fold(lseg(first, rest))
    return rest




In [27]:
predictor.endpoint_name

'huggingface-pytorch-inference-2024-03-17-14-31-56-589'

In [20]:
output = predictor.predict(
    {
        "inputs": "hello",
        "params": {
            "max_new_tokens": 10,
            "do_sample": True,
            "temperature": 0.7,
            # "top_p": 0.7,
            # "top_k": 50,
            # "repetition_penalty": 1,
            'pad_token_id': None
            
        },
        "decode_params": {
            "skip_special_tokens": True,
        },
    }
)
print(output)

hello, I am a 27 y/o


In [28]:
predictor.delete_model()
predictor.delete_endpoint()