### Fine-tune Meta Llama 3.1 - 8B instruct 

In [None]:
%pip install -q -U datasets==3.1.0

### Prepare the dataset

In [None]:
import boto3
import sagemaker
import pandas as pd

In [None]:
s3 = boto3.client('s3')

sagemaker_session = sagemaker.Session()
bucket_name = sagemaker_session.default_bucket()
default_prefix = sagemaker_session.default_bucket_prefix
dataset_name = "telco_promotions"

In [None]:
import pandas as pd

df = pd.read_json(f"./{dataset_name}.json")

df.head()

In [None]:
template = {
    "prompt": (
        "<|begin_of_text|><|start_header_id|>system<|end_header_id|>{system}<|eot_id|>"
        "<|start_header_id|>user<|end_header_id|>{instruction}<|eot_id|>"
    ),
    "completion": "<|start_header_id|>assistant<|end_header_id|>{completion}<|eot_id|>",
}

In [None]:
import json

df.to_json("train.jsonl")

# Write JSON template to local dir
with open("template.json", "w") as f:
    json.dump(template, f)

### Upload to Amazon S3

In [None]:
import sagemaker

In [None]:
sagemaker_session = sagemaker.Session()
bucket_name = sagemaker_session.default_bucket()
default_prefix = sagemaker_session.default_bucket_prefix

In [None]:
from sagemaker.s3 import S3Uploader

if default_prefix:
    default_path = f"{bucket_name}/{default_prefix}/datasets/workshop-fine-tuning"
else:
    default_path = f"{bucket_name}/datasets/workshop-fine-tuning"

train_data_location = f"s3://{default_path}/{dataset_name}"

S3Uploader.upload("train.jsonl", train_data_location)
S3Uploader.upload("template.json", train_data_location)

print(f"Training data location: {train_data_location}")

In [None]:
import sagemaker
from sagemaker.jumpstart.estimator import JumpStartEstimator

In [None]:
sagemaker_session = sagemaker.Session()

In [None]:
instance_type = "ml.g5.12xlarge"
instance_count = 1

model_id = "meta-textgeneration-llama-3-1-8b-instruct"

In [None]:
estimator = JumpStartEstimator(
    model_id=model_id,
    environment={"accept_eula": "true"},  # set "accept_eula": "true" to accept the EULA for gated models
    instance_type=instance_type,
    instance_count=instance_count,
    disable_output_compression=False,
    hyperparameters={
        "instruction_tuned": "True",
        "epoch": "2",
        "chat_dataset": "False",
        "enable_fsdp": "True",
    },
    sagemaker_session=sagemaker_session,
)

In [None]:
estimator.fit({"training": train_data_location})

### Deploy and invoke the fine-tuned model


In [None]:
import boto3
from datetime import datetime
import pytz
import sagemaker

In [None]:
sagemaker_session = sagemaker.Session()

In [None]:
def get_last_job_name(job_name_prefixes):
    sagemaker_client = boto3.client('sagemaker')
    latest_job = None
    # Set latest_creation_time to the minimum possible datetime with timezone info
    latest_creation_time = datetime.min.replace(tzinfo=pytz.UTC)

    for prefix in job_name_prefixes:
        search_response = sagemaker_client.search(
            Resource='TrainingJob',
            SearchExpression={
                'Filters': [
                    {
                        'Name': 'TrainingJobName',
                        'Operator': 'Contains',
                        'Value': prefix
                    },
                    {
                        'Name': 'TrainingJobStatus',
                        'Operator': 'Equals',
                        'Value': "Completed"
                    }
                ]
            },
            SortBy='CreationTime',
            SortOrder='Descending',
            MaxResults=1
        )

        if search_response['Results']:
            current_job = search_response['Results'][0]['TrainingJob']
            creation_time = current_job['CreationTime']

            if creation_time > latest_creation_time:
                latest_job = current_job
                latest_creation_time = creation_time

    if latest_job:
        return latest_job['TrainingJobName']
    else:
        return None

In [None]:
job_name_prefixes = [
    "llama-3-1-8b-instruct",
    "jumpstart-dft-meta-textgeneration-l"
]

# Invoke the function with the prefixes
job_name = get_last_job_name(job_name_prefixes)

job_name

In [None]:
from sagemaker.jumpstart.estimator import JumpStartEstimator

estimator = JumpStartEstimator.attach(job_name)

In [None]:
instance_count = 1
instance_type = "ml.g5.4xlarge"

In [None]:
predictor = estimator.deploy(
    initial_instance_count=instance_count,
    instance_type=instance_type,
)

In [None]:
from datetime import date

# Get the current date
current_date = date.today()

# Format the date as "YYYY-MM-DD"
date_string = current_date.strftime("%Y-%m-%d")

base_prompt = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>{system}<|eot_id|><|start_header_id|>user<|end_header_id|>{instruction}. Today is {current_date}<|eot_id|><|start_header_id|>assistant<|end_header_id|>"""

prompt = base_prompt.format(
    system="You are a marketing bot for a US-based telecom company called AnyCompany. Based on a customer profile, you choose an appropriate promotion for that customer and write a personalized marketing message to send to that customer.",
    instruction="Choose an appropriate promotion and write a personalized marketing message for a customer by following these steps: 1. Read the customer profile, 2. Think step-by-step to choose an appropriate promotion from the list of approved promotions and enclose it in <promotion> tags, 3. Write a personalized message for the customer based on the chosen promotion, the customer profile, and the time of year and enclose it in <personalized_message> tags. For the next customer, you can choose from the list of approved promotions. <approved_promotions> - $5 monthly winter holiday discount every month for the months of November-January - 10GB extra phone data for winter holidays every month from November to January - 20GB extra internet data for winter holidays every month from November to January - 10 extra minutes for winter holidays every month from November to January - 2GB extra phone data for birthday month - 5GB extra internet data for birthday month - $5 discount for birthday month - 30 extra minutes for birthday month - $2 discount on annual plan for customer with 2+ year tenure - $5 discount on annual plan for customer with 5+ year tenure - 5% discount for 6 months on new internet plan for customer with existing phone plan - 5% discount for 6 months on new phone plan for customer with existing internet plan - $5 voucher to spend on any phone or internet product </approved_promotions> Choose an appropriate promotion and write a personalized message for the customer below. Remember to use <promotion> and <personalized_message> tags. <customer_data> Name: Emily State: California DOB: 1985-11-12 Job: Software Engineer Join Date: 2018-05-15 Internet Service: 200Mbps Fiber Internet Contract: Annual Monthly Internet Costs: $65.99 Phone Service: 100GB Unlimited Minutes SmartPhone Contract: Annual Monthly Phone Costs: $89.99</customer_data>",
    current_date=date_string
)

print(prompt)

In [None]:
predictor.predict({
	"inputs": prompt,
    "parameters": {
        "max_new_tokens": 1000,
        "temperature": 0.2,
        "top_p": 0.9,
        "return_full_text": False,
        "stop": ['<|eot_id|>', '<|end_of_text|>']
    }
})

In [None]:
# predictor.delete_model()
# predictor.delete_endpoint(delete_endpoint_config=True)