# Save sample data for testing

In [7]:
import boto3
import os
from tqdm import tqdm

s3 = boto3.client("s3")

# List all objects helper internal function
def list_all_objects(bucket, prefix):
    # Create a paginator for list_objects_v2
    paginator = s3.get_paginator('list_objects_v2')

    # Use the paginator to iterate through all pages
    all_objects = []
    for page in paginator.paginate(Bucket=bucket, Prefix=prefix):
        if 'Contents' in page:
            all_objects.extend(page['Contents'])

    return all_objects

bucket = 'udacity-deeplearning-project'
prefix = 'data'
sample_prefix = 'sample'

all_files = list_all_objects(bucket, prefix)

sample_files = [file_meta for file_meta in all_files if '001.Affenpinscher' in file_meta.get("Key")]

for file_meta in tqdm(sample_files):
    key = file_meta.get("Key")
    
    # Move the data from s3 to a different prefix
    dirname = os.path.dirname(key)
    if dirname:
        os.makedirs(dirname, exist_ok=True)

    s3.download_file(bucket, key, key)
    # s3.copy_object(
    #     Bucket=bucket,
    #     CopySource=f"{bucket}/{key}",
    #     Key=f"{sample_prefix}/{key}"
    # )

100%|██████████| 80/80 [00:05<00:00, 13.71it/s]


## Testing Estimator Locally Prior to Deployment to ECR

In [19]:
from sagemaker.estimator import Estimator
from sagemaker import get_execution_role
import sagemaker
import subprocess

subprocess.run(['sh', 'docker-build.sh'])

role=get_execution_role()

bucket = 'udacity-deeplearning-project'
sample_prefix = 'sample'

hyperparameters = {
    'num-classes': 133,
    'batch-size': 32,
    'lr': 0.005070970373087015
}

bucket = 'udacity-deeplearning-project'
s3_output_location = f"s3://{bucket}/outputs"

estimator=Estimator(
    image_uri='udacity-sagemaker-hpo',
    role=role,
    instance_count=1,
    instance_type='local',
    output_path=s3_output_location,
    hyperparameters=hyperparameters
)

model_inputs = {
    "train": "file://./data/test",
    "test": "file://./data/valid"
}

estimator.fit(inputs=model_inputs, logs=True ,wait=True)

https://docs.docker.com/engine/reference/commandline/login/#credentials-store



Login Succeeded
sha256:8137233df93acf6ea17921a658c318a6609ed3b78312e4b0b6e3f720b5b7a3da
REPOSITORY                                                           TAG       IMAGE ID       CREATED                  SIZE
udacity-sagemaker-hpo                                                latest    8137233df93a   Less than a second ago   3.92GB
598308907998.dkr.ecr.us-east-1.amazonaws.com/udacity-sagemaker-hpo   latest    4d60d007268b   13 minutes ago           3.92GB
<none>                                                               <none>    84ac6c08b65c   13 minutes ago           3.92GB
<none>                                                               <none>    88a9908f82fe   14 minutes ago           3.92GB
<none>                                                               <none>    39c01ad4fbeb   15 minutes ago           3.92GB
<none>                                                               <none>    7798f220cf62   17 minutes ago           3.92GB
<none>                          

INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole
INFO:sagemaker:Creating training-job with name: udacity-sagemaker-hpo-2024-11-23-17-02-49-059
INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker.local.image:'Docker Compose' is not installed. Proceeding to check for 'docker-compose' CLI.
INFO:sagemaker.local.image:'Docker Compose' found using Docker Compose CLI.
INFO:sagemaker.local.local_session:Starting training job
INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole
INFO:sagemaker.local.image:No AWS credentials f

 Container 5yrr8g7mi5-algo-1-itc0l  Creating
 Container 5yrr8g7mi5-algo-1-itc0l  Created
Attaching to 5yrr8g7mi5-algo-1-itc0l
5yrr8g7mi5-algo-1-itc0l  | sed: can't read changehostname.c: No such file or directory
5yrr8g7mi5-algo-1-itc0l  | [01m[Kgcc:[m[K [01;31m[Kerror: [m[Kchangehostname.c: No such file or directory
5yrr8g7mi5-algo-1-itc0l  | [01m[Kgcc:[m[K [01;31m[Kfatal error: [m[Kno input files
5yrr8g7mi5-algo-1-itc0l  | compilation terminated.
5yrr8g7mi5-algo-1-itc0l  | [01m[Kgcc:[m[K [01;31m[Kerror: [m[Kchangehostname.o: No such file or directory
5yrr8g7mi5-algo-1-itc0l  | ERROR: ld.so: object '/libchangehostname.so' from LD_PRELOAD cannot be preloaded (cannot open shared object file): ignored.
5yrr8g7mi5-algo-1-itc0l  | 2024-11-23 17:02:49,947 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training
5yrr8g7mi5-algo-1-itc0l  | 2024-11-23 17:02:49,950 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus 

INFO:sagemaker.local.image:===== Job Complete =====


5yrr8g7mi5-algo-1-itc0l exited with code 0
Aborting on container exit...
 Container 5yrr8g7mi5-algo-1-itc0l  Stopping
 Container 5yrr8g7mi5-algo-1-itc0l  Stopped


## Testing Deployed Estimator

In [20]:
!sh push-container.sh

push-container.sh: line 1: fg: no job control
https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded
[1A[1B[0G[?25l[+] Building 0.0s (0/1)                                          docker:default
 => [internal] load build definition from Dockerfile                       0.0s
[?25h[1A[1A[0G[?25l[+] Building 0.0s (1/1) FINISHED                                 docker:default
[34m => [internal] load build definition from Dockerfile                       0.0s
[0m[34m => => transferring dockerfile: 2B                                         0.0s
[0m[?25hERROR: failed to solve: failed to read dockerfile: open Dockerfile: no such file or directory
The push refers to repository [598308907998.dkr.ecr.us-east-1.amazonaws.com/udacity-sagemaker-hpo]

[1Bbf18a086: Preparing 
[1Bfd2395cf: Preparing 
[1B2c2c2d01: Preparing 
[1B95045e04: Preparing 
[1Be9708ca1: Preparing 
[1B8f6060c6: Preparing 
[1Ba3c12226: Preparing 
[1B62daa95e: Preparing 

In [21]:
model_inputs = {
    "train": sagemaker.inputs.TrainingInput(
        s3_data=f"s3://{bucket}/{sample_prefix}/data/train/",
        content_type="application/x-image"
    ),
    "test": sagemaker.inputs.TrainingInput(
        s3_data=f"s3://{bucket}/{sample_prefix}/data/valid/",
        content_type="application/x-image"
    )
}

In [22]:
hyperparameters = {
    'num-classes': 133,
    'batch-size': 32,
    'lr': 0.005070970373087015
}

In [23]:
estimator=Estimator(
    image_uri='598308907998.dkr.ecr.us-east-1.amazonaws.com/udacity-sagemaker-hpo',
    role=role,
    instance_count=1,
    instance_type='ml.p3.2xlarge',
    hyperparameters=hyperparameters
)

estimator.fit(inputs=model_inputs, logs=True)

INFO:sagemaker:Creating training-job with name: udacity-sagemaker-hpo-2024-11-23-17-03-09-761


2024-11-23 17:03:10 Starting - Starting the training job
2024-11-23 17:03:10 Pending - Training job waiting for capacity......
2024-11-23 17:03:57 Pending - Preparing the instances for training...
2024-11-23 17:04:46 Downloading - Downloading input data...
2024-11-23 17:05:06 Downloading - Downloading the training image......
2024-11-23 17:05:57 Training - Training image download completed. Training in progress..[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34msed: can't read changehostname.c: No such file or directory[0m
[34mgcc: error: changehostname.c: No such file or directory[0m
[34mgcc: fatal error: no input files[0m
[34mcompilation terminated.[0m
[34mgcc: error: changehostname.o: No such file or directory[0m
[34mERROR: ld.so: object '/libchangehostname.so' from LD_PRELOAD cannot be preloaded (cannot open shared object file): ignored.[0m
[34mERROR: ld.so: object '/libchangehostname.s

# END OF NOTEBOOK