# Train Custom Model

## SageMaker Roles and Buckets

In [None]:
import sagemaker

sagemaker_session = sagemaker.Session()

bucket = sagemaker_session.default_bucket()
prefix = "sagemaker/gsm8k"
role = sagemaker.get_execution_role()

## Upload data to SageMaker Bucket

In [None]:
inputs = sagemaker_session.upload_data(
    path="gsm8k.jsonl",
    bucket=bucket,
    key_prefix=prefix
)
print("input spec (in this case, just an S3 path): {}".format(inputs))

## Submit a Fine-tuning Job

In [None]:
from sagemaker.pytorch import PyTorch
from pathlib import Path

estimator = PyTorch(
    entry_point='fine_tune.py',
    source_dir=f'{Path.cwd()}/src',
    role=role,
    py_version="py311",
    framework_version='2.3.0',
    instance_count=1,
    instance_type='ml.g4dn.xlarge',
    hyperparameters={
        "epochs": 30,
        "model-id": "unsloth/Llama-3.2-1B-Instruct",
        "lr": 1e-3,
        "data-file": "gsm8k.jsonl",
    },
    disable_output_compression=True,
)

In [None]:
estimator.fit({"training": inputs})

## Move model artifacts for Custom Model Import

In [None]:
last_train_job = estimator.jobs[-1].describe()
artifact_path = last_train_job['ModelArtifacts']['S3ModelArtifacts']
artifact_key = artifact_path[artifact_path.find(bucket) + len(bucket):].lstrip('/')
print(f'Model Artifacts at {artifact_path}')

In [None]:
import boto3
import tarfile

sts_client = boto3.client('sts')
account_info = sts_client.get_caller_identity()
account_id = account_info['Account']

bucket_name = f"bedrock-custom-model-{account_id}"

In [None]:
s3 = boto3.resource('s3')
src_bucket = s3.Bucket(bucket)
dst_bucket = s3.Bucket(bucket_name)

for obj in src_bucket.objects.filter(Prefix=artifact_key):
    old_source = {'Bucket': bucket, 'Key': obj.key}
    new_key = obj.key.replace(artifact_key, 'fine-tuned-model', 1)
    print(f"Copy {obj.key}\n\t-> {new_key}")
    new_obj = dst_bucket.Object(new_key)
    new_obj.copy(old_source)