In [166]:
bucket = 'nba-game-prediction-smayne'
prefix = 'nba-game-prediction-smayne/DEMO-linear-dm'
 
# Define IAM role
import boto3
import re
from sagemaker import get_execution_role

role = get_execution_role()

In [157]:
import numpy as np                                # For matrix operations and numerical processing
import pandas as pd                               # For munging tabular data
import matplotlib.pyplot as plt                   # For charts and visualizations
from IPython.display import Image                 # For displaying images in the notebook
from IPython.display import display               # For displaying outputs in the notebook
from time import gmtime, strftime                 # For labeling SageMaker models, endpoints, etc.
import sys                                        # For writing outputs to notebook
import math                                       # For ceiling function
import json                                       # For parsing hosting outputs
import os                                         # For manipulating filepath names
import io
import sagemaker.amazon.common as smac
import sagemaker                                  # Amazon SageMaker's Python SDK provides many helper functions
from sagemaker.predictor import csv_serializer    # Converts strings for HTTP POST requests on inference

In [158]:
s3 = boto3.resource('s3')
object = s3.Object(bucket,'basketball_predictions_112119.csv')

In [167]:
data = pd.read_csv(object.get()['Body'])
pd.set_option('display.max_columns', 7)     # Make sure we can see all of the columns
pd.set_option('display.max_rows', 15)         # Keep the output on one page
data

Unnamed: 0,point_diff (N),team1_elo (N),team2_elo (N),team1_away_percentage (N),team1_last10 (N),team2_home_percentage (N),team2_last10 (N)
0,12,1493,1482,0.333,0.8,0.550,0.3
1,-19,1434,1464,0.434,0.5,0.541,0.8
2,19,1397,1319,0.333,0.3,0.380,0.3
3,-10,1449,1667,0.166,0.3,0.840,0.8
4,-14,1463,1505,0.318,0.3,0.652,0.7
5,18,1271,1225,0.240,0.1,0.217,0.1
6,18,1654,1270,0.560,0.5,0.210,0.1
...,...,...,...,...,...,...,...
45,-32,1285,1654,0.238,0.5,0.850,0.8
46,-17,1429,1470,0.437,0.5,0.667,0.4


In [160]:
data = data.astype(float)
data

Unnamed: 0,point_diff (N),team1_elo (N),team2_elo (N),team1_away_percentage (N),team1_last10 (N),team2_home_percentage (N),team2_last10 (N)
0,12.0,1493.0,1482.0,0.333,0.8,0.550,0.3
1,-19.0,1434.0,1464.0,0.434,0.5,0.541,0.8
2,19.0,1397.0,1319.0,0.333,0.3,0.380,0.3
3,-10.0,1449.0,1667.0,0.166,0.3,0.840,0.8
4,-14.0,1463.0,1505.0,0.318,0.3,0.652,0.7
5,18.0,1271.0,1225.0,0.240,0.1,0.217,0.1
6,18.0,1654.0,1270.0,0.560,0.5,0.210,0.1
...,...,...,...,...,...,...,...
45,-32.0,1285.0,1654.0,0.238,0.5,0.850,0.8
46,-17.0,1429.0,1470.0,0.437,0.5,0.667,0.4


In [161]:
model_data = pd.get_dummies(data) # Convert categorical variables to sets of indicators
model_data

Unnamed: 0,point_diff (N),team1_elo (N),team2_elo (N),team1_away_percentage (N),team1_last10 (N),team2_home_percentage (N),team2_last10 (N)
0,12.0,1493.0,1482.0,0.333,0.8,0.550,0.3
1,-19.0,1434.0,1464.0,0.434,0.5,0.541,0.8
2,19.0,1397.0,1319.0,0.333,0.3,0.380,0.3
3,-10.0,1449.0,1667.0,0.166,0.3,0.840,0.8
4,-14.0,1463.0,1505.0,0.318,0.3,0.652,0.7
5,18.0,1271.0,1225.0,0.240,0.1,0.217,0.1
6,18.0,1654.0,1270.0,0.560,0.5,0.210,0.1
...,...,...,...,...,...,...,...
45,-32.0,1285.0,1654.0,0.238,0.5,0.850,0.8
46,-17.0,1429.0,1470.0,0.437,0.5,0.667,0.4


In [162]:
split_train = int(len(model_data) * 0.6)
split_test = int(len(model_data) * 0.8)

train_y = model_data['point_diff (N)']#[:split_train]
train_X = model_data.drop('point_diff (N)', axis=1)#[:split_train, ].as_matrix()
#validation_y = model_data['point_diff (N)'][split_train:split_test]
#validation_X = model_data.drop('point_diff (N)', axis=1).iloc[split_train:split_test, ].as_matrix()
#test_y = model_data['point_diff (N)'][split_test:]
#test_X = model_data.drop('point_diff (N)', axis=1).iloc[split_test:, ].as_matrix()

In [163]:
from sagemaker.amazon.amazon_estimator import get_image_uri
container = get_image_uri(boto3.Session().region_name, 'linear-learner')

In [164]:
buf = io.BytesIO()
smac.write_numpy_to_dense_tensor(buf, np.array(train_X).astype('float32'), np.array(train_y).astype('float32'))
buf.seek(0)
key = 'linear_train.data'
boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'train', key)).upload_fileobj(buf)
s3_train_data = 's3://{}/{}/train/{}'.format(bucket, prefix, key)

#buf = io.BytesIO()
#smac.write_numpy_to_dense_tensor(buf, np.array(validation_X).astype('float32'), np.array(validation_y).astype('float32'))
#buf.seek(0)
#key = 'linear_validation.data'
#boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'validation', key)).upload_fileobj(buf)
#s3_validation_data = 's3://{}/{}/validation/{}'.format(bucket, prefix, key)

In [207]:
sess = sagemaker.Session()

linear = sagemaker.estimator.Estimator(container,
                                       role, 
                                       train_instance_count=1, 
                                       train_instance_type='ml.m5.large',
                                       output_path='s3://{}/{}/output'.format(bucket, prefix),
                                       sagemaker_session=sess)
linear.set_hyperparameters(feature_dim=6,
                           mini_batch_size=1,
                           predictor_type='regressor',
                           epochs=3,
                           loss='squared_loss')

linear.fit({'train': s3_train_data})

INFO:sagemaker:Creating training-job with name: linear-learner-2019-01-22-19-08-39-271


2019-01-22 19:08:39 Starting - Starting the training job...
2019-01-22 19:08:42 Starting - Launching requested ML instances......
2019-01-22 19:10:08 Starting - Preparing the instances for training......
2019-01-22 19:11:07 Downloading - Downloading input data...
2019-01-22 19:11:35 Training - Training image download completed. Training in progress..
[31mDocker entrypoint called with argument(s): train[0m
[31m[01/22/2019 19:11:37 INFO 139640663308096] Reading default configuration from /opt/amazon/lib/python2.7/site-packages/algorithm/default-input.json: {u'loss_insensitivity': u'0.01', u'epochs': u'15', u'init_bias': u'0.0', u'lr_scheduler_factor': u'auto', u'num_calibration_samples': u'10000000', u'accuracy_top_k': u'3', u'_num_kv_servers': u'auto', u'use_bias': u'true', u'num_point_for_scaler': u'10000', u'_log_level': u'info', u'quantile': u'0.5', u'bias_lr_mult': u'auto', u'lr_scheduler_step': u'auto', u'init_method': u'uniform', u'init_sigma': u'0.01', u'lr_scheduler_minimum_l


2019-01-22 19:11:49 Uploading - Uploading generated training model
2019-01-22 19:11:49 Completed - Training job completed
Billable seconds: 43


In [206]:
lin_predictor = linear.deploy(initial_instance_count=1,
                           instance_type='ml.t2.medium')

INFO:sagemaker:Creating model with name: linear-learner-2019-01-22-19-02-17-848
INFO:sagemaker:Creating endpoint with name linear-learner-2019-01-22-18-56-24-284


---------------------------------------------------------------------------!

In [237]:
matchup = [1471,1328,0.272,0.5,0.291,0.2]

1471
1328
0.272
0.5
0.291
0.2

endpoint_name = 'linear-learner-2019-01-22-18-30-14-101'
predictor = sagemaker.predictor.RealTimePredictor(endpoint=endpoint_name,   #create predictor to send serialized data to sagemaker
                                                serializer=sagemaker.predictor.csv_serializer,
                                                content_type='text/csv')
endpoint_name_2 = 'linear-learner-2019-01-15-00-14-22-386'
predictor_2 = sagemaker.predictor.RealTimePredictor(endpoint=endpoint_name_2,   #create predictor to send serialized data to sagemaker
                                                serializer=sagemaker.predictor.csv_serializer,
                                                content_type='text/csv')

endpoint_name_3 = 'linear-learner-2019-01-22-18-56-24-284'
predictor_3 = sagemaker.predictor.RealTimePredictor(endpoint=endpoint_name_3,   #create predictor to send serialized data to sagemaker
                                                serializer=sagemaker.predictor.csv_serializer,
                                                content_type='text/csv')

response = predictor.predict(matchup)
response_2 = predictor_2.predict(matchup)
response_3 = predictor_3.predict(matchup)

In [238]:
response

b'{"predictions": [{"score": 2.607135772705078}]}'

In [239]:
response_2

b'{"predictions": [{"score": 1.9811859130859375}]}'

In [240]:
response_3

b'{"predictions": [{"score": 0.8397684097290039}]}'