In [165]:
import os
import boto3
import re
import copy
import time
from time import gmtime, strftime
from sagemaker import get_execution_role

role = get_execution_role()

region = boto3.Session().region_name

bucket='mitesh-sagemaker-11142019' # Replace with your s3 bucket name

bucket_path = 'https://s3-{}.amazonaws.com/{}'.format(region,bucket) # The URL to access the bucket

In [166]:
%%time 
import pickle, gzip, urllib.request, json
import numpy as np

prefix = 'sagemaker/xgboost-students-grades' 

data_types = ['train', 'verify', 'test']

train_data = 's3://{}/{}/{}'.format(bucket, prefix, 'train')

validation_data = 's3://{}/{}/{}'.format(bucket, prefix, 'verify')

test_data = 's3://{}/{}/{}'.format(bucket, prefix, 'test')

s3_data_sets = [s3_train_data, s3_validation_data, s3_test_data]

s3_output_location = 's3://{}/{}/{}'.format(bucket, prefix, 'xgboost_model_sdk')
print("S3 location of train data {}".format(train_data))
print("S3 location of validation_data  {}".format(validation_data))

S3 location of train data s3://mitesh-sagemaker-11142019/sagemaker/xgboost-students-grades/train
S3 location of validation_data  s3://mitesh-sagemaker-11142019/sagemaker/xgboost-students-grades/verify
CPU times: user 156 µs, sys: 23 µs, total: 179 µs
Wall time: 131 µs


In [167]:
%matplotlib inline
import matplotlib.pyplot as plt

s3 = boto3.resource('s3')
# Exploring Data and Transforming to load in to S3 for training
for types in data_types:
    print("Reading sample {} data".format(types))
    s3_data_key = "{}/{}/examples".format(prefix,types)
    s3.Bucket(bucket).download_file(s3_data_key, 'raw_data')

    data_from_s3 = genfromtxt('raw_data', delimiter=',')
    labels = []
    
    print(data_from_s3.shape)
    
    for t in data_from_s3:
        labels.append(int(t[0]))
       

    grades = np.delete(train_data_from_s3,[0,1],1)

    for t in range(0, 10):
        print("-->  Grade {} labled as {}".format(grades[t],labels[t]))




Reading sample train data
-->  Grade [100.  90.  94.  97.  99. 100.  79.  75.  79.  62.  71.] labled as 3
-->  Grade [94. 88. 86. 84. 92. 72. 78. 89. 78. 87. 84.] labled as 1
-->  Grade [84. 83. 64. 47. 44. 67. 82. 87. 82. 72. 74.] labled as 3
-->  Grade [46. 45. 61. 57. 44. 59. 49. 62. 49. 44. 48.] labled as 2
-->  Grade [64. 59. 58. 50. 57. 60. 61. 68. 66. 65. 62.] labled as 1
-->  Grade [ 93.  95.  94. 100.  94.  94.  91.  98.  90.  99.  95.] labled as 3
-->  Grade [91. 82. 94. 90. 92. 70. 78. 87. 89. 73. 77.] labled as 2
-->  Grade [ 97.  90.  93. 100.  95.  98.  65.  66.  80.  68.  70.] labled as 3
-->  Grade [83. 81. 61. 72. 62. 45. 65. 78. 84. 90. 79.] labled as 5
-->  Grade [100.  92. 100.  99.  99.  94.  93.  96. 100.  97.  94.] labled as 4
Reading sample verify data
-->  Grade [100.  90.  94.  97.  99. 100.  79.  75.  79.  62.  71.] labled as 2
-->  Grade [94. 88. 86. 84. 92. 72. 78. 89. 78. 87. 84.] labled as 5
-->  Grade [84. 83. 64. 47. 44. 67. 82. 87. 82. 72. 74.] labled 

In [168]:
import sagemaker

from sagemaker.amazon.amazon_estimator import get_image_uri

container = get_image_uri(boto3.Session().region_name, 'xgboost','0.90-1')

In [169]:
train_data = 's3://{}/{}/{}'.format(bucket, prefix, 'train')

validation_data = 's3://{}/{}/{}'.format(bucket, prefix, 'verify')

s3_output_location = 's3://{}/{}/{}'.format(bucket, prefix, 'xgboost_model_sdk')
print(train_data)

s3://mitesh-sagemaker-11142019/sagemaker/xgboost-students-grades/train


In [170]:
xgb_model = sagemaker.estimator.Estimator(container,
                                         role, 
                                         train_instance_count=1, 
                                         train_instance_type='ml.m4.xlarge',
                                         train_volume_size = 5,
                                         output_path=s3_output_location,
                                         sagemaker_session=sagemaker.Session())

In [171]:
xgb_model.set_hyperparameters(max_depth = 5,
                              eta = .2,
                              gamma = 4,
                              min_child_weight = 6,
                              silent = 0,
                              objective = "multi:softmax",
                              num_class = 10,
                              num_round = 10)

In [172]:
train_channel = sagemaker.session.s3_input(train_data, content_type='text/csv')
valid_channel = sagemaker.session.s3_input(validation_data, content_type='text/csv')

data_channels = {'train': train_channel, 'validation': valid_channel}

In [173]:
xgb_model.fit(inputs=data_channels,  logs=True)

2019-11-28 17:08:16 Starting - Starting the training job...
2019-11-28 17:08:18 Starting - Launching requested ML instances......
2019-11-28 17:09:25 Starting - Preparing the instances for training......
2019-11-28 17:10:31 Downloading - Downloading input data...
2019-11-28 17:11:15 Training - Training image download completed. Training in progress..[31mINFO:sagemaker-containers:Imported framework sagemaker_xgboost_container.training[0m
[31mINFO:sagemaker-containers:Failed to parse hyperparameter objective value multi:softmax to Json.[0m
[31mReturning the value itself[0m
[31mINFO:sagemaker-containers:No GPUs detected (normal if no gpus installed)[0m
[31mINFO:sagemaker_xgboost_container.training:Running XGBoost Sagemaker in algorithm mode[0m
[31mINFO:root:Determined delimiter of CSV input is ','[0m
[31mINFO:root:Determined delimiter of CSV input is ','[0m
[31mINFO:root:Determined delimiter of CSV input is ','[0m
[31m[17:11:17] 500x14 matrix with 7000 entries loaded from

In [174]:
xgb_predictor = xgb_model.deploy(initial_instance_count=1,
                                instance_type='ml.m4.xlarge',
                                )

---------------------------------------------------------------------------------------------------!

In [175]:


test_key = "{}/test/examples".format(prefix)

s3.Bucket(bucket).download_file(test_key, 'test_data')

print(test_key)
      
            

sagemaker/xgboost-students-grades/test/examples


In [182]:
%matplotlib inline
from numpy import genfromtxt


test_data = "{}/test/examples".format(prefix)

s3.Bucket(bucket).download_file(test_data, 'test_data')


test_data = genfromtxt('test_data', delimiter=',')
test_lables = []
test_features = []

for t in test_data:
    test_lables.append(t[0])

    
test_data = np.delete(test_data,0,1)
        
# print(test_lables)
# print(test_data)

np.savetxt('test_data.csv', test_data, delimiter=',')


    
for i in range (0, 10):
    grades = test_data[i]
    label = test_lables[i]
#     img_reshape = img.reshape((28,28))
#     imgplot = plt.imshow(img, cmap='gray')
   
    print('Student grade {} with lable {}'.format(grades,label))
   

Student grade [70. 76. 84. 95. 91. 82. 95. 90. 87. 76. 76. 84. 86. 80.] with lable 2.0
Student grade [100.  95.  94.  95.  92.  99.  90.  96.  97.  73.  74.  60.  60.  70.] with lable 5.0
Student grade [81. 87. 85. 86. 85. 62. 75. 62. 54. 83. 88. 87. 77. 77.] with lable 4.0
Student grade [80. 78. 86. 94. 87. 82. 94. 95. 75. 71. 80. 90. 77. 77.] with lable 2.0
Student grade [89. 90. 82. 82. 84. 41. 56. 54. 45. 78. 86. 87. 83. 78.] with lable 4.0
Student grade [52. 58. 79. 46. 50. 61. 41. 47. 57. 63. 69. 65. 60. 59.] with lable 3.0
Student grade [44. 43. 62. 56. 61. 63. 54. 69. 57. 70. 55. 63. 58. 58.] with lable 3.0
Student grade [71. 75. 83. 95. 81. 93. 92. 91. 84. 73. 84. 74. 74. 77.] with lable 2.0
Student grade [56. 58. 51. 47. 64. 42. 43. 65. 68. 61. 59. 49. 60. 58.] with lable 3.0
Student grade [ 97.  92.  93.  92.  91.  97.  96.  99.  94. 100.  93.  91.  93.  92.] with lable 1.0


In [199]:
from sagemaker.predictor import csv_serializer
xgb_predictor.content_type = 'text/csv'
xgb_predictor.serializer = csv_serializer
xgb_predictor.deserializer = None

sum = 0
with open('test_data.csv', 'r') as f:
    
    for j in range(0,10):
        single_test = f.readline()
        result = xgb_predictor.predict(single_test)
       
        print("Data Expected: {} -- Model result {}".format(test_lables[j],result))
        if test_lables[j] == result:
            sum = sum + 1
        


        
print(sum)
print(len(test_lables))



Data Expected: 2.0 -- Model result b'2.0'
Data Expected: 5.0 -- Model result b'5.0'
Data Expected: 4.0 -- Model result b'4.0'
Data Expected: 2.0 -- Model result b'2.0'
Data Expected: 4.0 -- Model result b'4.0'
Data Expected: 3.0 -- Model result b'3.0'
Data Expected: 3.0 -- Model result b'3.0'
Data Expected: 2.0 -- Model result b'2.0'
Data Expected: 3.0 -- Model result b'3.0'
Data Expected: 1.0 -- Model result b'1.0'
0
500


NameError: name 'endpoint_name' is not defined