# 

In [None]:
import sagemaker

sagemaker_session = sagemaker.Session()

bucket = sagemaker_session.default_bucket()
prefix = "sagemaker/gsm8k"
role = sagemaker.get_execution_role()

In [None]:
inputs = sagemaker_session.upload_data(
    path="gsm8k.jsonl",
    bucket=bucket,
    key_prefix=prefix
)
print("input spec (in this case, just an S3 path): {}".format(inputs))

In [None]:
inputs

In [None]:
from datasets import load_dataset

dataset = load_dataset("openai/gsm8k", "main")
train_set = dataset['train']
test_set = dataset['test']

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

model_id = 'unsloth/Llama-3.2-1B-Instruct'

tokenizer = AutoTokenizer.from_pretrained(model_id)

In [None]:
import json
import pandas as pd

with open('gsm8k.jsonl', 'r') as f:
    prompts = [json.loads(line) for line in f.readlines()]

df = pd.DataFrame({
    'prompts': [p['prompt'] for p in prompts],
    'answers': [p['referenceResponse'] for p in prompts]
})
df.head()

In [None]:
from datasets import Dataset

training_data = Dataset.from_pandas(df)

def to_chat(prompts):
    texts = []
    inputs = prompts["prompts"]
    outputs = prompts["answers"]

    for input_, output in zip(inputs, outputs):
        text = tokenizer.apply_chat_template([
          {"role": "user", "content": input_},
          {"role": "assistant", "content": output},
        ], tokenize=False)
        texts.append(text)

    return { "text" : texts, }

training_data = training_data.map(to_chat, batched=True)

In [None]:
from sagemaker.pytorch import PyTorch
from pathlib import Path

estimator = PyTorch(
    entry_point='fine_tune.py',
    source_dir=f'{Path.cwd()}/src',
    role=role,
    py_version="py311",
    framework_version='2.3.0',
    instance_count=1,
    instance_type='ml.g5.2xlarge',
    hyperparameters={
        "epochs": 10,
        "model-id": "unsloth/Llama-3.2-1B-Instruct",
        "lr": 1e-3,
        "data-file": "gsm8k.jsonl",
    },
    disable_output_compression=True,
)

In [None]:
estimator.fit({"training": inputs})

## Move model artifacts for Custom Model Import

In [None]:
import boto3
import tarfile

sts_client = boto3.client('sts')
account_info = sts_client.get_caller_identity()
account_id = account_info['Account']

bucket_name = f"bedrock-custom-model-{account_id}"

s3_client = boto3.client('s3')

s3_client.download_file(artifact_path)