In [9]:
!pip install --upgrade --force-reinstall --no-cache boto3
!pip install --upgrade --force-reinstall --no-cache botocore
!pip install --upgrade --force-reinstall --no-cache awscli

Collecting boto3
  Downloading boto3-1.34.84-py3-none-any.whl.metadata (6.6 kB)
Collecting botocore<1.35.0,>=1.34.84 (from boto3)
  Downloading botocore-1.34.84-py3-none-any.whl.metadata (5.7 kB)
Collecting jmespath<2.0.0,>=0.7.1 (from boto3)
  Downloading jmespath-1.0.1-py3-none-any.whl.metadata (7.6 kB)
Collecting s3transfer<0.11.0,>=0.10.0 (from boto3)
  Downloading s3transfer-0.10.1-py3-none-any.whl.metadata (1.7 kB)
Collecting python-dateutil<3.0.0,>=2.1 (from botocore<1.35.0,>=1.34.84->boto3)
  Downloading python_dateutil-2.9.0.post0-py2.py3-none-any.whl.metadata (8.4 kB)
Collecting urllib3!=2.2.0,<3,>=1.25.4 (from botocore<1.35.0,>=1.34.84->boto3)
  Downloading urllib3-2.2.1-py3-none-any.whl.metadata (6.4 kB)
Collecting six>=1.5 (from python-dateutil<3.0.0,>=2.1->botocore<1.35.0,>=1.34.84->boto3)
  Downloading six-1.16.0-py2.py3-none-any.whl.metadata (1.8 kB)
Downloading boto3-1.34.84-py3-none-any.whl (139 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.3/1

In [60]:
import boto3
import io
import time
import json
import base64
import numpy as np
from PIL import Image
from ipywidgets import Dropdown
import matplotlib.pyplot as plt
import sagemaker
from sagemaker.utils import name_from_base

sess = sagemaker.Session()
bucket = sess.default_bucket() # Set a default S3 bucket

s3_prefix ="titan-finetuning/multi-modal-embedding"
iam_client = boto3.client('iam')
sts_client = boto3.client('sts')
bedrock = boto3.client('bedrock')
bedrock_runtime = boto3.client('bedrock-runtime')

## account info
session = boto3.session.Session()
region = session.region_name
account_id = sts_client.get_caller_identity()["Account"]

%store -r train_jsonl_path
%store -r valid_jsonl_path
%store -r train_data_path
%store -r valid_data_path
    
base_model_id = "amazon.titan-embed-image-v1:0"
role_name = name_from_base(f"FineTuning-{s3_prefix.split('/')[-1]}")
s3_bedrock_ft_access_policy=f"{role_name}-policy"
customization_role = f"arn:aws:iam::{account_id}:role/{role_name}"

## Fine tune job preparation - Creating role and policies requirements

We will now prepare the necessary role for the fine-tune job. That includes creating the policies required to run customization jobs with Amazon Bedrock.

### Create Trust relationship
This JSON object defines the trust relationship that allows the bedrock service to assume a role that will give it the ability to talk to other required AWS services. The conditions set restrict the assumption of the role to a specfic account ID and a specific component of the bedrock service (model_customization_jobs)

In [61]:
# This JSON object defines the trust relationship that allows the bedrock service to assume a role that will give it the ability to talk to other required AWS services. The conditions set restrict the assumption of the role to a specfic account ID and a specific component of the bedrock service (model_customization_jobs)
ROLE_DOC = f"""{{
    "Version": "2012-10-17",
    "Statement": [
        {{
            "Effect": "Allow",
            "Principal": {{
                "Service": "bedrock.amazonaws.com"
            }},
            "Action": "sts:AssumeRole",
            "Condition": {{
                "StringEquals": {{
                    "aws:SourceAccount": "{account_id}"
                }},
                "ArnEquals": {{
                    "aws:SourceArn": "arn:aws:bedrock:{region}:{account_id}:model-customization-job/*"
                }}
            }}
        }}
    ]
}}
"""

In [62]:
ACCESS_POLICY_DOC = f"""{{
    "Version": "2012-10-17",
    "Statement": [
        {{
            "Effect": "Allow",
            "Action": [
                "s3:AbortMultipartUpload",
                "s3:DeleteObject",
                "s3:PutObject",
                "s3:GetObject",
                "s3:GetBucketAcl",
                "s3:GetBucketNotification",
                "s3:ListBucket",
                "s3:PutBucketNotification"
            ],
            "Resource": [
                "arn:aws:s3:::{bucket}",
                "arn:aws:s3:::{bucket}/*"
            ]
        }}
    ]
}}"""

### Create IAM role and attach policies

Let's now create the IAM role with the created trust policy and attach the s3 policy to it

In [63]:
response = iam_client.create_role(
    RoleName=role_name,
    AssumeRolePolicyDocument=ROLE_DOC,
    Description="Role for Bedrock to access S3 for finetuning",
)

In [64]:
role_arn = response["Role"]["Arn"]
response = iam_client.create_policy(
    PolicyName=s3_bedrock_ft_access_policy,
    PolicyDocument=ACCESS_POLICY_DOC,
)
policy_arn = response["Policy"]["Arn"]
iam_client.attach_role_policy(
    RoleName=role_name,
    PolicyArn=policy_arn,
)

{'ResponseMetadata': {'RequestId': 'cf5bd65d-bb4d-401f-8b09-a284c5e58f59',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'date': 'Thu, 25 Apr 2024 22:34:01 GMT',
   'x-amzn-requestid': 'cf5bd65d-bb4d-401f-8b09-a284c5e58f59',
   'content-type': 'text/xml',
   'content-length': '212'},
  'RetryAttempts': 0}}

### > Create a Customization Job

Now that we have all the requirements in place, let's create the fine-tuning job with the Titan Image Generator model.

To do so, we need to set the model **hyperparameters** for `stepCount`, `batchSize` and `learningRate` and provide the path to your training data

In [65]:
# Select the customization type from "FINE_TUNING" or "CONTINUED_PRE_TRAINING". 
customization_type = "FINE_TUNING"

# Specify the roleArn for your customization job
customization_role = role_arn

# Create a customization job name
customization_job_name = name_from_base(s3_prefix.split("/")[-1])

# Create a customized model name for your fine-tuned Llama2 model
custom_model_name = customization_job_name

# Define the hyperparameters for fine-tuning Llama2 model
hyper_parameters = {
    "epochCount": "auto",
    "batchSize": "576",
    "learningRate": "0.00005",
}

# Specify your data path for training, validation(optional) and output
training_data_config = {"s3Uri": train_jsonl_path}


output_data_config = {"s3Uri": f"s3://{bucket}/titan-finetuning"}

validation_data_config = {
    'validators': [
        {
            's3Uri': valid_jsonl_path
        },
    ]
}

# Create the customization job
bedrock.create_model_customization_job(
    customizationType=customization_type,
    jobName=customization_job_name,
    customModelName=custom_model_name,
    roleArn=customization_role,
    baseModelIdentifier=base_model_id,
    hyperParameters=hyper_parameters,
    trainingDataConfig=training_data_config,
    validationDataConfig=validation_data_config,
    outputDataConfig=output_data_config
)

{'ResponseMetadata': {'RequestId': '1ae6187d-a678-4cc9-a7be-63820ea6b32c',
  'HTTPStatusCode': 201,
  'HTTPHeaders': {'date': 'Thu, 25 Apr 2024 22:34:05 GMT',
   'content-type': 'application/json',
   'content-length': '118',
   'connection': 'keep-alive',
   'x-amzn-requestid': '1ae6187d-a678-4cc9-a7be-63820ea6b32c'},
  'RetryAttempts': 0},
 'jobArn': 'arn:aws:bedrock:us-west-2:374212921621:model-customization-job/amazon.titan-embed-image-v1:0/l177wqjcpshe'}

### Waiting until customization job is completed
Once the customization job is finished, you can check your existing custom model(s) and retrieve the modelArn of your fine-tuned model.

<div class=\"alert alert-block alert-warning\">
    <b>Warning:</b> The model customization job can take hours to run. With 5000 steps, 0.000001 learning rate, 64 of batch size and 60 images, it takes around 4 hours to complete
</div>

In [None]:
# check model customization status
status = bedrock.list_model_customization_jobs(
    nameContains=customization_job_name
)["modelCustomizationJobSummaries"][0]["status"]
while status == 'InProgress':
    time.sleep(50)
    status = bedrock.list_model_customization_jobs(
        nameContains=customization_job_name
    )["modelCustomizationJobSummaries"][0]["status"]
    print(status)

InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress

Once Complete, get the new `customModelARN`

In [67]:
custom_model_arn = bedrock.list_model_customization_jobs(
    nameContains=customization_job_name
)["modelCustomizationJobSummaries"][0]["customModelArn"]

### Create Provisioned Model Throughput
Note: Creating provisioned throughput will take around 20-30mins to complete.
You will need to create provisioned throughput to be able to evaluate the model performance. You can do so through the console or use the following api call.

In [68]:
custom_model_name = f"{customization_job_name}-v0"

# Create the provision throughput job and retrieve the provisioned model id
provisioned_model_id = bedrock.create_provisioned_model_throughput(
    modelUnits=1,
    # create a name for your provisioned throughput model
    provisionedModelName=custom_model_name, 
    modelId=custom_model_arn
)['provisionedModelArn']

In [69]:
%%time
# check provisioned throughput job status
import time
status_provisioning = bedrock.get_provisioned_model_throughput(provisionedModelId = provisioned_model_id)['status'] 
while status_provisioning == 'Creating':
    time.sleep(60)
    status_provisioning = bedrock.get_provisioned_model_throughput(provisionedModelId=provisioned_model_id)['status']
    print(status_provisioning)

Creating
Creating
Creating
Creating
Creating
Creating
Creating
Creating
Creating
Creating
InService
CPU times: user 46.3 ms, sys: 16.4 ms, total: 62.8 ms
Wall time: 11min 2s


### Running model invocation experiments
We will now run some model experiments using the bedrock-runtime client with the invoke_model function to invoke both fine-tuned and pre-trained models.

To invoke the provisioned custom model, notice you will need to run the previous step (create provisioned throughput) before proceeding.

In [70]:
body = json.dumps({
    "inputText": "monkeys swinging from vines in jungle",
    # "taskType": "TEXT_IMAGE",
    "embeddingConfig": {
        "outputEmbeddingLength": 1024
    }
})

response = bedrock_runtime.invoke_model(
    body=body,
    modelId=provisioned_model_id,
    accept="application/json",
    contentType="application/json"
)
response_body = json.loads(response.get("body").read())
response_body.get("embedding")

[0.030883789,
 0.028442383,
 -0.001914978,
 0.0022277832,
 0.001625061,
 0.040283203,
 0.15234375,
 0.013000488,
 0.0029296875,
 0.01550293,
 0.036621094,
 -0.033935547,
 -0.0095825195,
 -0.037353516,
 -0.0045776367,
 0.02722168,
 -0.020996094,
 0.022705078,
 0.026977539,
 -0.0075683594,
 0.021362305,
 -0.025390625,
 0.040771484,
 0.03173828,
 0.022583008,
 -0.014099121,
 0.041748047,
 -0.01574707,
 0.044189453,
 0.010620117,
 -0.012084961,
 -0.010131836,
 0.045654297,
 0.018188477,
 -0.0010299683,
 0.013122559,
 0.04711914,
 -0.0061950684,
 0.019897461,
 0.018920898,
 0.038085938,
 0.0010604858,
 -0.036376953,
 0.038330078,
 -0.040039062,
 0.028320312,
 0.03564453,
 0.0003452301,
 0.053466797,
 0.04663086,
 0.0047912598,
 0.001876831,
 0.029174805,
 -0.040771484,
 0.025756836,
 0.055419922,
 -0.033691406,
 -0.016723633,
 -0.030151367,
 0.015136719,
 0.0015716553,
 -0.029174805,
 0.044677734,
 -0.041748047,
 0.0390625,
 0.03491211,
 0.025512695,
 0.060791016,
 -0.033691406,
 -0.0354003

In [71]:
%store provisioned_model_id

Stored 'provisioned_model_id' (str)
