In [10]:
!pip install sagemaker torch torchvision Pillow pandas numpy


Collecting torch
  Downloading torch-2.5.1-cp310-cp310-manylinux1_x86_64.whl.metadata (28 kB)
Collecting torchvision
  Downloading torchvision-0.20.1-cp310-cp310-manylinux1_x86_64.whl.metadata (6.1 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.



Downloading torch-2.5.1-cp310-cp310-manylinux1_x86_64.whl (906.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m906.4/906.4 MB[0m [31m11.4 MB/s[0m eta [36m0:00:00[0m00:01[0m00:02[0m
[?25hDownloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl (363.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m13.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (13.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m15.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (24.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m13.4 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (883 kB)
[2K 

In [1]:
import sagemaker
from sagemaker.pytorch import PyTorch

# SageMaker session
sagemaker_session = sagemaker.Session()

# IAM role
role = sagemaker.get_execution_role()

# S3 bucket and prefix
bucket = 'mnistdataset'  # Your S3 bucket
prefix = 'images'  # Adjusted prefix

# Define the PyTorch estimator
estimator = PyTorch(
    entry_point='mnist_train.py',  
    role=role,
    framework_version='1.9.1',
    py_version='py38',
    instance_count=1,
    instance_type='ml.m4.xlarge',
    hyperparameters={
        'batch-size': 32,
        'epochs': 10,
        'learning-rate': 0.001
    },
    output_path=f's3://{bucket}/{prefix}/output',
    train_use_spot_instances=True,
    train_max_run=1800,
    train_max_wait=1900
)

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


train_max_run has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
train_use_spot_instances has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
train_max_wait has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


In [None]:
# # Start training
# estimator.fit({
#     'train_images': f's3://{bucket}/{prefix}/train images',  # Path to training images
#     'train_labels': f's3://{bucket}/{prefix}/train_labels.csv',  # Correct: points to a file, not a directory
#     'test_images': f's3://{bucket}/{prefix}/test images',  # Path to testing images
#     'test_labels': f's3://{bucket}/{prefix}/test_labels.csv'  # Correct: points to a file, not a directory
# })

In [5]:
from sagemaker.tuner import HyperparameterTuner, IntegerParameter, ContinuousParameter

metric_definitions=[
        {'Name': 'training-loss', 'Regex': 'Training Loss:\\s*([0-9\\.]+)'},
        {'Name': 'validation-accuracy', 'Regex': 'Validation Accuracy:\\s*([0-9\\.]+)'}
    ]

# Define the hyperparameter ranges for tuning
hyperparameter_ranges = {
    'batch-size': IntegerParameter(32, 64),
    'learning-rate': ContinuousParameter(0.0001, 0.001)
}

objective_metric_name='validation-accuracy'

# Create the HyperparameterTuner object
tuner = HyperparameterTuner(
    estimator,
    objective_metric_name,
      # Match this with the metric defined in the estimator
    hyperparameter_ranges,
    metric_definitions,
    max_jobs=6,
    max_parallel_jobs=2,
    objective_type='Maximize',
)

# Start the hyperparameter tuning job
# tuner.fit({
#     'train_images': f's3://{bucket}/{prefix}/train images',
#     'train_labels': f's3://{bucket}/{prefix}/train_labels.csv',
#     'test_images': f's3://{bucket}/{prefix}/test images',
#     'test_labels': f's3://{bucket}/{prefix}/test_labels.csv'
# })


In [7]:
from sagemaker.tuner import HyperparameterTuningJobAnalytics

# Specify the tuning job name (replace 'your-tuning-job-name' with the actual name)
tuning_job_name = 'pytorch-training-241111-0027'

# Retrieve tuning job analytics
tuning_job_analytics = HyperparameterTuningJobAnalytics(tuning_job_name)
best_training_job = tuning_job_analytics.dataframe().sort_values('FinalObjectiveValue', ascending=False).iloc[0]

# Display the best training job name
print(f"The best training job: {best_training_job['TrainingJobName']}")


The best training job: pytorch-training-241111-0027-002-1c89fc09


In [9]:
from sagemaker.estimator import Estimator

# Attach to the best training job
best_estimator = Estimator.attach('pytorch-training-241111-0027-002-1c89fc09')



2024-11-11 00:39:45 Starting - Preparing the instances for training
2024-11-11 00:39:45 Downloading - Downloading the training image
2024-11-11 00:39:45 Training - Training image download completed. Training in progress.
2024-11-11 00:39:45 Uploading - Uploading generated training model
2024-11-11 00:39:45 Completed - Training job completed


In [17]:
# Print the best model's hyperparameters for reference
print(f"The best model hyperparameters: {best_estimator.hyperparameters()}")


The best model hyperparameters: {'_tuning_objective_metric': 'validation-accuracy', 'batch-size': '49', 'epochs': '10', 'learning-rate': '0.00041144187286229066', 'sagemaker_container_log_level': '20', 'sagemaker_estimator_class_name': '"PyTorch"', 'sagemaker_estimator_module': '"sagemaker.pytorch.estimator"', 'sagemaker_job_name': '"pytorch-training-2024-11-11-00-27-29-372"', 'sagemaker_program': '"mnist_train.py"', 'sagemaker_region': '"us-east-2"', 'sagemaker_submit_directory': '"s3://mnistdataset/pytorch-training-2024-11-11-00-27-29-372/source/sourcedir.tar.gz"'}


In [53]:
from sagemaker.estimator import Estimator

# Attach to the best training job
best_estimator = Estimator.attach('pytorch-training-241111-0027-002-1c89fc09')

# Retrieve the model data (S3 path of trained model artifacts)
model_data = best_estimator.model_data
print(f"Model Data Path: {model_data}")



2024-11-11 00:39:45 Starting - Preparing the instances for training
2024-11-11 00:39:45 Downloading - Downloading the training image
2024-11-11 00:39:45 Training - Training image download completed. Training in progress.
2024-11-11 00:39:45 Uploading - Uploading generated training model
2024-11-11 00:39:45 Completed - Training job completed
Model Data Path: s3://mnistdataset/images/output/pytorch-training-241111-0027-002-1c89fc09/output/model.tar.gz


In [1]:
# Checking Evaluvate locally

In [5]:
from inference import model_fn, input_fn, predict_fn, output_fn  # Import functions from inference.py
import time
import json

# Load the model using model_fn from inference.py
# Specify the directory where the model will be downloaded and extracted
model_dir = '/home/ec2-user/SageMaker/'  # Adjust as needed for your environment
model = model_fn(model_dir)

# Function to evaluate a single image
def evaluate_image(image_path, model):
    # Preprocess input using input_fn from inference.py
    request_body = json.dumps({"image_path": image_path})  # Create request body with image path
    image = input_fn(request_body, request_content_type='application/json')
    
    # Measure inference time
    start_time = time.time()
    prediction = predict_fn(image, model)
    end_time = time.time()

    inference_time = end_time - start_time
    output = output_fn(prediction, content_type='application/json')  # Format output if needed
    return output, inference_time

# Example Usage
image_path = '/home/ec2-user/SageMaker/1.png'
 # Replace with your actual image path
output, time_taken = evaluate_image(image_path, model)
print(f'Output: {output}')
print(f'Inference time: {time_taken:.4f} seconds')


Starting model_fn.
INFO:inference:Starting model_fn.
Downloading model from S3 bucket mnistdataset with key images/output/pytorch-training-241111-0027-002-1c89fc09/output/model.tar.gz.
INFO:inference:Downloading model from S3 bucket mnistdataset with key images/output/pytorch-training-241111-0027-002-1c89fc09/output/model.tar.gz.
Extracting model tar file.
INFO:inference:Extracting model tar file.
Loading model from /home/ec2-user/SageMaker/extracted_model/final_mnist_digit_classifier.pth.
INFO:inference:Loading model from /home/ec2-user/SageMaker/extracted_model/final_mnist_digit_classifier.pth.
Model loaded successfully.
INFO:inference:Model loaded successfully.
input_fn called with content type: application/json
INFO:inference:input_fn called with content type: application/json
Received image path: /home/ec2-user/SageMaker/1.png
INFO:inference:Received image path: /home/ec2-user/SageMaker/1.png
Image preprocessed successfully.
INFO:inference:Image preprocessed successfully.
Starting

Output: {"predicted_class": 2}
Inference time: 0.0023 seconds


In [4]:
# Model deployment

In [1]:
from sagemaker.pytorch import PyTorchModel
import sagemaker

# Get the SageMaker execution role
role = sagemaker.get_execution_role()

# Define the S3 path for the model.tar.gz file
s3_model_path = 's3://mnistdataset/images/output/pytorch-training-2024-11-12-02-18-34-727/output/model.tar.gz'  # Replace with your S3 path

# Create a PyTorchModel object
pytorch_model = PyTorchModel(
    model_data=s3_model_path,  # S3 path to model artifacts
    role=role,  # Use the retrieved SageMaker execution role
    framework_version='1.12.1',  # Specify the PyTorch version (adjust this based on availability)
    py_version='py38',  # Python version (py38 for Python 3.8)
    entry_point='inference.py'  # Your inference script
)

# Deploy the model as a SageMaker endpoint
predictor = pytorch_model.deploy(
    initial_instance_count=1,  # Number of instances for the endpoint
    instance_type='ml.m4.xlarge'  # Change instance type as needed
)
print(f"Endpoint '{predictor.endpoint_name}' created successfully.")


sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
------!Endpoint 'pytorch-inference-2024-11-13-20-47-43-192' created successfully.


In [13]:
import json
from PIL import Image
import numpy as np

# Prepare the image for prediction (ensure it matches the format expected by your model)
image_path = '/home/ec2-user/SageMaker/0.png'  # Update with your image path
image = Image.open(image_path).convert('L')
image = image.resize((28, 28))
image = np.array(image).reshape(1, 1, 28, 28).astype('float32') / 255
payload = json.dumps(image.tolist())

# Make a prediction
response = predictor.predict(payload)
print("Prediction response:", response)


In [12]:
predictor.delete_endpoint()


INFO:sagemaker:Deleting endpoint configuration with name: pytorch-inference-2024-11-12-02-35-40-362
INFO:sagemaker:Deleting endpoint with name: pytorch-inference-2024-11-12-02-35-40-362


In [14]:
!pip install streamlit

Collecting streamlit
  Downloading streamlit-1.40.1-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting altair<6,>=4.0 (from streamlit)
  Downloading altair-5.4.1-py3-none-any.whl.metadata (9.4 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting narwhals>=1.5.2 (from altair<6,>=4.0->streamlit)
  Downloading narwhals-1.13.3-py3-none-any.whl.metadata (7.4 kB)
Downloading streamlit-1.40.1-py2.py3-none-any.whl (8.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.6/8.6 MB[0m [31m59.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading altair-5.4.1-py3-none-any.whl (658 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m658.1/658.1 kB[0m [31m58.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m91.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading narwhals-1.13.3-