In [None]:
from sagemaker import get_execution_role
from sagemaker.estimator import Estimator

role=get_execution_role()

hyperparameters={'epochs': 1}

estimator=Estimator(
    image_uri='pytorch-extended-container-test',
    role=role,
    instance_count=1,
    instance_type='local',
    hyperparameters=hyperparameters,
    output_path='file:///home/ec2-user/SageMaker/en_us/output'
)

estimator.fit('file:///home/ec2-user/SageMaker/en_us/my-input-csv-file.csv')

In [None]:
%%sh

# Specify an algorithm name
algorithm_name=pytorch-extended-container-test
echo "algorithm_name: ${algorithm_name} ######################"

account=$(aws sts get-caller-identity --query Account --output text)
echo "account: ${account} ######################"

# Get the region defined in the current configuration (default to us-west-2 if none defined)
region=$(aws configure get region)
echo "region: ${region} ######################"

fullname="${account}.dkr.ecr.${region}.amazonaws.com/${algorithm_name}:latest"
echo "fullname: ${fullname} ######################"

# If the repository doesn't exist in ECR, create it.

aws ecr describe-repositories --repository-names "${algorithm_name}" > /dev/null 2>&1
if [ $? -ne 0 ]
then
aws ecr create-repository --repository-name "${algorithm_name}" > /dev/null
fi

# Log into Docker
aws ecr get-login-password --region ${region}|docker login --username AWS --password-stdin ${fullname}

# Build the docker image locally with the image name and then push it to ECR
# with the full name.

docker build -t ${algorithm_name} .
docker tag ${algorithm_name} ${fullname}

docker push ${fullname}

In [None]:
import sagemaker
from sagemaker import get_execution_role
from sagemaker.estimator import Estimator
import pandas as pd
import boto3
import io
import os
from sklearn.model_selection import train_test_split
import boto3

client=boto3.client('sts')
account=client.get_caller_identity()['Account']

my_session=boto3.session.Session()
region=my_session.region_name

algorithm_name="pytorch-extended-container-test"
ecr_image='{}.dkr.ecr.{}.amazonaws.com/{}:latest'.format(account, region, algorithm_name)
print(ecr_image)

url = "file:///home/ec2-user/SageMaker/en_us/my-input-csv-file.csv"
df_demo = pd.read_csv(url,',')

bucket='pippo-itsar'
prefix='demo'
train_file='demo_train.csv'
test_file='demo_test.csv'
validate_file='demo_validate.csv'
whole_file='demo.csv'
s3_resource = boto3.Session().resource('s3')

train, test_and_validate = train_test_split(df_demo, 
                                            test_size=0.2, 
                                            random_state=42, 
                                            stratify=df_demo['quality'])

test, validate = train_test_split(test_and_validate, 
                                  test_size=0.5, 
                                  random_state=42, 
                                  stratify=test_and_validate['quality'])

print(train.shape)
print(test.shape)
print(validate.shape)

def upload_s3_csv(filename, folder, dataframe):
    csv_buffer = io.StringIO()
    dataframe.to_csv(csv_buffer, header=False, index=False )
    s3_resource.Bucket(bucket).Object(os.path.join(prefix, folder, filename)).put(Body=csv_buffer.getvalue())

upload_s3_csv(train_file, 'train', train)
upload_s3_csv(test_file, 'test', test)
upload_s3_csv(validate_file, 'validate', validate)

train_channel = sagemaker.inputs.TrainingInput(
    "s3://{}/{}/train/".format(bucket,prefix,train_file),
    content_type='text/csv')

validate_channel = sagemaker.inputs.TrainingInput(
    "s3://{}/{}/validate/".format(bucket,prefix,validate_file),
    content_type='text/csv')

data_channels = {'train': train_channel, 'validation': validate_channel}


estimator=Estimator(
    image_uri=ecr_image,
    role=get_execution_role(),
    base_job_name='pytorch-extended-container-test',
    instance_count=1,
    instance_type='ml.p2.xlarge',
    output_path='s3://pippo-itsar'
)

# start training
estimator.fit(inputs=data_channels)

# deploy the trained model
predictor=estimator.deploy(1, instance_type)