In [1]:
import sagemaker
import boto3

import numpy as np                                # For performing matrix operations and numerical processing
import pandas as pd                               # For manipulating tabular data
from time import gmtime, strftime
import os

region = boto3.Session().region_name
smclient = boto3.Session().client('sagemaker')

In [2]:
from sagemaker import get_execution_role

role = get_execution_role()
print(role)

arn:aws:iam::597261055787:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole


In [3]:
sess = sagemaker.Session()
bucket = 'hyperparam'                   # Set a default S3 bucket
prefix = '1stTrial'

In [4]:
print(bucket)

hyperparam


In [8]:
# Import raw data 
def import_raw_data():
    raw = pd.read_csv('csv//true_car_listings.csv')
    df = raw[['Price','Mileage','Year','Make']].copy().dropna()
    df['Make'] = df['Make'].rank() 
    train = df.sample(frac=0.8).copy() 
    test = df.loc[~df.index.isin(train.index.values)].copy()    
    return train,test
train,test = import_raw_data()
train.to_csv('csv//train.csv')
test.to_csv('csv//test.csv')

In [9]:
### Upload datasets
boto3.Session().resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'train.csv')).upload_file('csv//train.csv')
boto3.Session().resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'test.csv')).upload_file('csv//test.csv')

In [14]:
### Hyperparm metrics

tuning_job_config = {
    "ParameterRanges": {
      "CategoricalParameterRanges": [],
      "ContinuousParameterRanges": [
        {
          "MaxValue": "1",
          "MinValue": "0",
          "Name": "eta"
        },
        {
          "MaxValue": "2",
          "MinValue": "0",
          "Name": "alpha"
        }]
    },
    "ResourceLimits": {
      "MaxNumberOfTrainingJobs": 20,
      "MaxParallelTrainingJobs": 3
    },
    "Strategy": "Bayesian",
    "HyperParameterTuningJobObjective": {
      "MetricName": "validation:auc",
      "Type": "Maximize"
    }
  }

In [19]:
training_image = sagemaker.image_uris.retrieve('xgboost', region, '1.0-1')

s3_input_train = 's3://{}/{}/train.csv'.format(bucket, prefix)
s3_input_validation ='s3://{}/{}/validation.csv'.format(bucket, prefix)

training_job_definition = {
    "AlgorithmSpecification": {
      "TrainingImage": training_image,
      "TrainingInputMode": "File"
    },
    "InputDataConfig": [
      {
        "ChannelName": "train",
        "CompressionType": "None",
        "ContentType": "csv",
        "DataSource": {
          "S3DataSource": {
            "S3DataDistributionType": "FullyReplicated",
            "S3DataType": "S3Prefix",
            "S3Uri": s3_input_train
          }
        }
      },
      {
        "ChannelName": "validation",
        "CompressionType": "None",
        "ContentType": "csv",
        "DataSource": {
          "S3DataSource": {
            "S3DataDistributionType": "FullyReplicated",
            "S3DataType": "S3Prefix",
            "S3Uri": s3_input_validation
          }
        }
      }
    ],
    "OutputDataConfig": {
      "S3OutputPath": "s3://{}/{}/output".format(bucket,prefix)
    },
    "ResourceConfig": {
      "InstanceCount": 2,
      "InstanceType": "ml.p2.xlarge",
      "VolumeSizeInGB": 5
    },
    "RoleArn": role,
    "StaticHyperParameters": {
      "eval_metric": "auc",
      "num_round": "100",
      "objective": "binary:logistic",
      "rate_drop": "0.3",
      "tweedie_variance_power": "1.4"
    },
    "StoppingCondition": {
      "MaxRuntimeInSeconds": 3600
    }
}

In [20]:
tuning_job_name = "hyperparm-1st-example"
smclient.create_hyper_parameter_tuning_job(HyperParameterTuningJobName = tuning_job_name,
                                           HyperParameterTuningJobConfig = tuning_job_config,
                                           TrainingJobDefinition = training_job_definition)

ResourceLimitExceeded: An error occurred (ResourceLimitExceeded) when calling the CreateHyperParameterTuningJob operation: The account-level service limit 'ml.p2.xlarge for training job usage' is 2 Instances, with current utilization of 0 Instances and a request delta of 6 Instances. Please contact AWS support to request an increase for this limit.