In [None]:
!pip install -U sagemaker

Collecting sagemaker
  Using cached sagemaker-3.3.0-py3-none-any.whl.metadata (20 kB)
Collecting sagemaker-core<3.0.0,>=2.3.0 (from sagemaker)
  Using cached sagemaker_core-2.3.0-py3-none-any.whl.metadata (5.5 kB)
Collecting sagemaker-train<2.0.0,>=1.3.0 (from sagemaker)
  Using cached sagemaker_train-1.3.0-py3-none-any.whl.metadata (7.8 kB)
Collecting sagemaker-serve<2.0.0,>=1.3.0 (from sagemaker)
  Using cached sagemaker_serve-1.3.0-py3-none-any.whl.metadata (1.6 kB)
Collecting sagemaker-mlops<2.0.0,>=1.3.0 (from sagemaker)
  Using cached sagemaker_mlops-1.3.0-py3-none-any.whl.metadata (5.7 kB)
Collecting boto3<2.0.0,>=1.42.2 (from sagemaker-core<3.0.0,>=2.3.0->sagemaker)
  Using cached boto3-1.42.14-py3-none-any.whl.metadata (6.8 kB)
Collecting rich<14.0.0,>=13.0.0 (from sagemaker-core<3.0.0,>=2.3.0->sagemaker)
  Using cached rich-13.9.4-py3-none-any.whl.metadata (18 kB)
Collecting protobuf<5.0,>=3.12 (from sagemaker-core<3.0.0,>=2.3.0->sagemaker)
  Using cached protobuf-4.25.8-cp37

In [1]:
import sagemaker

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [2]:
from sagemaker.huggingface import HuggingFace

In [4]:
role = sagemaker.get_execution_role()

In [5]:
role

'arn:aws:iam::404400298070:role/SageMakerLLMRole'

In [6]:
hyperparameters = {
    "model_id": "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T",
    "epochs": 2,
    "per_device_train_batch_size": 2,
    "lr": 2e-5
}

In [7]:
estimator = HuggingFace(
    entry_point="train.py",
    source_dir="./scripts",
    role=role,
    transformers_version="4.36",
    pytorch_version="2.1",
    py_version="py310",
    instance_type="ml.g5.xlarge",
    instance_count=1,
    output_path="s3://llm-model-artifacts-sun/models/",
    hyperparameters=hyperparameters
)

## Run only for the training

In [8]:
estimator.fit({
    "train": "s3://llm-finetune-dataset-sun/datasets/"
})

INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: huggingface-pytorch-training-2025-12-22-14-34-06-719


2025-12-22 14:34:11 Starting - Starting the training job
2025-12-22 14:34:11 Pending - Training job waiting for capacity............
2025-12-22 14:36:13 Downloading - Downloading input data...
2025-12-22 14:36:28 Downloading - Downloading the training image........................
2025-12-22 14:40:30 Training - Training image download completed. Training in progress..[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
  "cipher": algorithms.TripleDES,[0m
  "class": algorithms.TripleDES,[0m
[34m2025-12-22 14:40:40,471 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2025-12-22 14:40:40,490 sagemaker-training-toolkit INFO     No Neurons detected (normal if no neurons installed)[0m
[34m2025-12-22 14:40:40,501 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2025-12-22 14:40:40,508 sagemaker_pytorch_container.

In [9]:
# estimator.latest_training_job.model_data
estimator.model_data

's3://llm-model-artifacts-sun/models/huggingface-pytorch-training-2025-12-22-14-34-06-719/output/model.tar.gz'

In [None]:
# Use this code to check all the accessible services inside your AWS Sagemaker

# from sagemaker import image_uris

# image_uris.retrieve(
#     framework="huggingface",
#     region="ap-south-1",   # change your region
#     version="4.37.0",
#     image_scope="inference"
# )

In [None]:
 # instance_type="ml.g5.xlarge",

In [None]:
# model = HuggingFaceModel(
#     model_data="s3://bucket/model.tar.gz",
#     role=role,
#     entry_point="inference.py",
#     source_dir="inference",
#     transformers_version="4.36",
#     pytorch_version="2.1",
#     py_version="py310"
# )

In [5]:
## the code for the deployment
import sagemaker
from sagemaker.huggingface import HuggingFaceModel

role = sagemaker.get_execution_role()

model = HuggingFaceModel(
    model_data="s3://llm-model-artifacts-sun/models/huggingface-pytorch-training-2025-12-22-14-34-06-719/output/model.tar.gz",
    role=role,
    transformers_version="4.37.0",
    pytorch_version="2.1.0",
    py_version="py310",
    env={
        "HF_TASK": "text-generation"
    }
)

predictor = model.deploy(
    initial_instance_count=1,
    instance_type="ml.m5.xlarge",
    endpoint_name="live-finetune-endpoint"
)


------!

In [8]:
import sagemaker
sm = sagemaker.Session()

sm.delete_endpoint("live-finetune-endpoint")
sm.delete_endpoint_config("live-finetune-endpoint")


In [9]:
predictor = model.deploy(
    initial_instance_count=1,
    instance_type="ml.g5.xlarge",
    endpoint_name="tinyllama-g5-endpoint"
)

----------!

In [10]:
## THIS IS JUST TO VALIDATE WHETHER MODEL WORKING OR NOT IN THE NOTEBOOK ITSELF
predictor.predict({"inputs": "Explain AWS S3"})

[{'generated_text': 'Explain AWS S3 Bucket Permissions\n\nAWS S3 Bucket Permissions\n\nAWS S3 Bucket Permissions\n\nAWS S3 Bucket Permissions\n\nAWS S3 Bucket Permissions\n\nAWS S3 Bucket Permissions\n\nAWS S3 Bucket Permissions\n\nAWS S3 Bucket Permissions\n\nAWS S3 Bucket Permissions\n\nAWS S3 Bucket Permissions\n\nAWS S3 Bucket Permissions\n\nAWS S3 Bucket Permissions\n\nAWS S3 Bucket Permissions\n\nAWS S3 Bucket Permissions\n\nAWS S3 Bucket Permissions\n\nAWS S3 Bucket Permissions\n\nAWS S3 Bucket Permissions\n\nAWS S3 Bucket Permissions\n\nAWS S3 Bucket Permissions\n\nAWS S3 Bucket Permissions\n\nAWS S3 Bucket Permissions\n\nAWS S3 Bucket Permissions\n\nAWS S3 Bucket Permissions\n\nAWS S3 Bucket Permissions\n\nAWS S3 Bucket Permissions\n\nAWS S3 Bucket Permissions\n\nAWS S3 Bucket Permissions\n\nAWS S3 Bucket Permissions\n\nAWS S3 Bucket Permissions\n\nAWS S3 Bucket Permissions\n\nAWS S3 Bucket Permissions\n\nAWS S3 Bucket Permissions\n\nAWS S3 Bucket Permissions\n\nAWS S3 Bucket 

In [None]:
## after the deployment URL will look like this
https://runtime.sagemaker.<region>.amazonaws.com/endpoints/tinyllama-g5-endpoint/invocations

In [None]:
import boto3, json

runtime = boto3.client("sagemaker-runtime", region_name="ap-south-1")

resp = runtime.invoke_endpoint(
    EndpointName="tinyllama-g5-endpoint",
    ContentType="application/json",
    Body=json.dumps({"inputs": "hello"})
)

print(resp["Body"].read().decode())


ClientError: An error occurred (UnrecognizedClientException) when calling the InvokeEndpoint operation: The security token included in the request is invalid.