In [None]:
import tensorflow as tf
import pandas as pd
import sagemaker

In [5]:
#Reading in data in folder
df = pd.read_csv('Iris.csv')
df.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [7]:
#Splitting data in 80-20 split to use testing data for model inference later
train = df.iloc[:120,:]
test = df.iloc[121:,:]
train = train.drop('Id', axis=1)
test = test.drop('Id', axis=1)

In [8]:
#Train and test csv
train.to_csv('train.csv', index=False)
test.to_csv('test.csv', index=False)

In [9]:
#Create a sagemaker session to be able to upload data to s3
import boto3
sagemaker_session = sagemaker.Session()

In [10]:
#Uploading data to S3 bucket titled "tf-iris-data"
prefix = "tf-iris-data"
training_input_path = sagemaker_session.upload_data('train.csv', key_prefix=prefix + '/training')

In [12]:
#ensure training data has uploaded properly
training_data = pd.read_csv(training_input_path, sep = ',')
training_data.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [13]:
output_bucket = sagemaker.Session().default_bucket()
output_prefix = "sagemaker/custom-tf-iris"
output_bucket_path = f"s3://{output_bucket}"

In [21]:
%%time

import os
import boto3
import re
import sagemaker

from time import gmtime, strftime

role = sagemaker.get_execution_role()
region = boto3.Session().region_name
job_name = f"iris-{strftime('%Y-%m-%d-%H-%M-%S', gmtime())}"
print("Training job", job_name)

Training job iris-2021-02-13-00-28-36
CPU times: user 55.1 ms, sys: 12.8 ms, total: 67.9 ms
Wall time: 110 ms


In [24]:
training_image = "149363165341.dkr.ecr.us-west-1.amazonaws.com/nahid-sagemaker-tf-repo"

In [25]:
training_config = {
    "TrainingJobName": job_name,
    "AlgorithmSpecification": {
        "TrainingImage": training_image,
        "TrainingInputMode": "File"
    },
    "RoleArn": role,
    "OutputDataConfig": {"S3OutputPath": f"{output_bucket_path}/{output_prefix}/tf2-custom"},
    "InputDataConfig": [
        {
            "ChannelName": "train",
            "DataSource": {
                "S3DataSource": {
                    "S3DataType": "S3Prefix",
                    "S3Uri": training_input_path,
                    "S3DataDistributionType": "FullyReplicated",
                }
            },
            "ContentType": "text/csv"
        }
    ],
    "ResourceConfig": {
        "InstanceType": "ml.m5.xlarge",
        "InstanceCount": 1,
        "VolumeSizeInGB": 75,
     },
    "StoppingCondition": {
        "MaxRuntimeInSeconds": 86400
    }
}

In [26]:
client = boto3.client("sagemaker", region_name=region)
client.create_training_job(**training_config)

import time

status = client.describe_training_job(TrainingJobName=job_name)["TrainingJobStatus"]
print(status)
while status != "Completed" and status != "Failed":
    time.sleep(60)
    status = client.describe_training_job(TrainingJobName=job_name)["TrainingJobStatus"]
    print(status)

InProgress
InProgress
InProgress
InProgress
InProgress
Completed
