# Save sample data for testing

In [1]:
import boto3
import os
from tqdm import tqdm

s3 = boto3.client("s3")

# List all objects helper internal function
def list_all_objects(bucket, prefix):
    # Create a paginator for list_objects_v2
    paginator = s3.get_paginator('list_objects_v2')

    # Use the paginator to iterate through all pages
    all_objects = []
    for page in paginator.paginate(Bucket=bucket, Prefix=prefix):
        if 'Contents' in page:
            all_objects.extend(page['Contents'])

    return all_objects

bucket = 'udacity-deeplearning-project'
prefix = 'data'
sample_prefix = 'sample'

all_files = list_all_objects(bucket, prefix)

sample_files = [file_meta for file_meta in all_files if '001.Affenpinscher' in file_meta.get("Key")]

for file_meta in tqdm(sample_files):
    key = file_meta.get("Key")
    
    # Move the data from s3 to a different prefix
    dirname = os.path.dirname(key)
    if dirname:
        os.makedirs(dirname, exist_ok=True)

    s3.download_file(bucket, key, key)
    # s3.copy_object(
    #     Bucket=bucket,
    #     CopySource=f"{bucket}/{key}",
    #     Key=f"{sample_prefix}/{key}"
    # )

100%|██████████| 80/80 [00:05<00:00, 13.60it/s]


## Testing Estimator Locally Prior to Deployment to ECR

In [2]:
from sagemaker.estimator import Estimator
from sagemaker import get_execution_role
import sagemaker
import subprocess

subprocess.run(['sh', 'docker-build.sh'])

role=get_execution_role()

bucket = 'udacity-deeplearning-project'
sample_prefix = 'sample'

hyperparameters = {
    'num-classes': 133,
    'batch-size': 32,
    'lr': 0.005070970373087015
}

bucket = 'udacity-deeplearning-project'
s3_output_location = f"s3://{bucket}/outputs"

estimator=Estimator(
    image_uri='udacity-sagemaker-hpo',
    role=role,
    instance_count=1,
    instance_type='local',
    output_path=s3_output_location,
    hyperparameters=hyperparameters
)

model_inputs = {
    "train": "file://./data/test",
    "test": "file://./data/valid"
}

estimator.fit(inputs=model_inputs, logs=True ,wait=True)

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


https://docs.docker.com/engine/reference/commandline/login/#credentials-store



Login Succeeded
sha256:c9d24850568627df8725310d5b6203682c436683eb27c0250a8baadddc216670
REPOSITORY              TAG       IMAGE ID       CREATED                  SIZE
udacity-sagemaker-hpo   latest    c9d248505686   Less than a second ago   3.92GB


INFO:sagemaker:Creating training-job with name: udacity-sagemaker-hpo-2024-11-23-18-24-58-870
INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker.local.image:'Docker Compose' is not installed. Proceeding to check for 'docker-compose' CLI.
INFO:sagemaker.local.image:'Docker Compose' found using Docker Compose CLI.
INFO:sagemaker.local.local_session:Starting training job
INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole
INFO:sagemaker.local.image:No AWS credentials found in session but credentials from EC2 Metadata Service are available.
INFO:sagemaker.local.i

 Container yuggp7daal-algo-1-8o3y1  Creating
 Container yuggp7daal-algo-1-8o3y1  Created
Attaching to yuggp7daal-algo-1-8o3y1
yuggp7daal-algo-1-8o3y1  | sed: can't read changehostname.c: No such file or directory
yuggp7daal-algo-1-8o3y1  | [01m[Kgcc:[m[K [01;31m[Kerror: [m[Kchangehostname.c: No such file or directory
yuggp7daal-algo-1-8o3y1  | [01m[Kgcc:[m[K [01;31m[Kfatal error: [m[Kno input files
yuggp7daal-algo-1-8o3y1  | compilation terminated.
yuggp7daal-algo-1-8o3y1  | [01m[Kgcc:[m[K [01;31m[Kerror: [m[Kchangehostname.o: No such file or directory
yuggp7daal-algo-1-8o3y1  | ERROR: ld.so: object '/libchangehostname.so' from LD_PRELOAD cannot be preloaded (cannot open shared object file): ignored.
yuggp7daal-algo-1-8o3y1  | 2024-11-23 18:25:05,426 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training
yuggp7daal-algo-1-8o3y1  | 2024-11-23 18:25:05,429 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus 

INFO:sagemaker.local.image:===== Job Complete =====


yuggp7daal-algo-1-8o3y1 exited with code 0
Aborting on container exit...
 Container yuggp7daal-algo-1-8o3y1  Stopping
 Container yuggp7daal-algo-1-8o3y1  Stopped


## Testing Deployed Estimator

In [3]:
!sh push-container.sh

push-container.sh: line 1: fg: no job control
https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded
[1A[1B[0G[?25l[+] Building 0.0s (0/1)                                          docker:default
[?25h[1A[0G[?25l[+] Building 0.1s (1/1) FINISHED                                 docker:default
[34m => [internal] load build definition from Dockerfile                       0.0s
[0m[34m => => transferring dockerfile: 2B                                         0.0s
[0m[?25hERROR: failed to solve: failed to read dockerfile: open Dockerfile: no such file or directory
The push refers to repository [598308907998.dkr.ecr.us-east-1.amazonaws.com/udacity-sagemaker-hpo]

[1Bbf18a086: Preparing 
[1Bb3b8de47: Preparing 
[1Bc0c5ab6b: Preparing 
[1B95045e04: Preparing 
[1Be9708ca1: Preparing 
[1B8f6060c6: Preparing 
[1Ba3c12226: Preparing 
[1B62daa95e: Preparing 
[1B8fe1cb59: Preparing 
[1B061a5b0d: Preparing 
[1B3ff1bf08: Preparing 
[1Bfc0e

In [21]:
model_inputs = {
    "train": sagemaker.inputs.TrainingInput(
        s3_data=f"s3://{bucket}/{sample_prefix}/data/train/",
        content_type="application/x-image"
    ),
    "test": sagemaker.inputs.TrainingInput(
        s3_data=f"s3://{bucket}/{sample_prefix}/data/valid/",
        content_type="application/x-image"
    )
}

In [22]:
hyperparameters = {
    'num-classes': 133,
    'batch-size': 32,
    'lr': 0.005070970373087015
}

In [23]:
estimator=Estimator(
    image_uri='598308907998.dkr.ecr.us-east-1.amazonaws.com/udacity-sagemaker-hpo',
    role=role,
    instance_count=1,
    instance_type='ml.p3.2xlarge',
    hyperparameters=hyperparameters
)

estimator.fit(inputs=model_inputs, logs=True)

INFO:sagemaker:Creating training-job with name: udacity-sagemaker-hpo-2024-11-23-17-03-09-761


2024-11-23 17:03:10 Starting - Starting the training job
2024-11-23 17:03:10 Pending - Training job waiting for capacity......
2024-11-23 17:03:57 Pending - Preparing the instances for training...
2024-11-23 17:04:46 Downloading - Downloading input data...
2024-11-23 17:05:06 Downloading - Downloading the training image......
2024-11-23 17:05:57 Training - Training image download completed. Training in progress..[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34msed: can't read changehostname.c: No such file or directory[0m
[34mgcc: error: changehostname.c: No such file or directory[0m
[34mgcc: fatal error: no input files[0m
[34mcompilation terminated.[0m
[34mgcc: error: changehostname.o: No such file or directory[0m
[34mERROR: ld.so: object '/libchangehostname.so' from LD_PRELOAD cannot be preloaded (cannot open shared object file): ignored.[0m
[34mERROR: ld.so: object '/libchangehostname.s

# END OF NOTEBOOK