In [None]:
bucket = 'YOUR BUCKET NAME HERE'
prefix =  bucket + '/DEMO-linear-dm'
 
# Define IAM role
import boto3
import re
from sagemaker import get_execution_role

role = get_execution_role()

In [None]:
import numpy as np                                # For matrix operations and numerical processing
import pandas as pd                               # For munging tabular data
import matplotlib.pyplot as plt                   # For charts and visualizations
from IPython.display import Image                 # For displaying images in the notebook
from IPython.display import display               # For displaying outputs in the notebook
from time import gmtime, strftime                 # For labeling SageMaker models, endpoints, etc.
import sys                                        # For writing outputs to notebook
import math                                       # For ceiling function
import json                                       # For parsing hosting outputs
import os                                         # For manipulating filepath names
import io
import sagemaker.amazon.common as smac
import sagemaker                                  # Amazon SageMaker's Python SDK provides many helper functions
from sagemaker.predictor import csv_serializer    # Converts strings for HTTP POST requests on inference

In [None]:
filename = 'basketball_predictions_112119.csv'
boto3.client('s3').upload_file(filename, bucket, filename)

s3 = boto3.resource('s3')
object = s3.Object(bucket,'basketball_predictions_112119.csv')


# Uploads the given file using a managed uploader, which will split up large
# files automatically and upload parts in parallel.


In [None]:
data = pd.read_csv(object.get()['Body'])
pd.set_option('display.max_columns', 7)     # Make sure we can see all of the columns
pd.set_option('display.max_rows', 15)         # Keep the output on one page
data

In [None]:
data = data.astype(float)
data

In [None]:
model_data = pd.get_dummies(data) # Convert categorical variables to sets of indicators
model_data

In [None]:
split_train = int(len(model_data) * 0.6)
split_test = int(len(model_data) * 0.8)

train_y = model_data['point_diff (N)']#[:split_train]
train_X = model_data.drop('point_diff (N)', axis=1)#[:split_train, ].as_matrix()
#validation_y = model_data['point_diff (N)'][split_train:split_test]
#validation_X = model_data.drop('point_diff (N)', axis=1).iloc[split_train:split_test, ].as_matrix()
#test_y = model_data['point_diff (N)'][split_test:]
#test_X = model_data.drop('point_diff (N)', axis=1).iloc[split_test:, ].as_matrix()

In [None]:
from sagemaker.amazon.amazon_estimator import get_image_uri
container = get_image_uri(boto3.Session().region_name, 'linear-learner')

In [None]:
buf = io.BytesIO()
smac.write_numpy_to_dense_tensor(buf, np.array(train_X).astype('float32'), np.array(train_y).astype('float32'))
buf.seek(0)
key = 'linear_train.data'
boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'train', key)).upload_fileobj(buf)
s3_train_data = 's3://{}/{}/train/{}'.format(bucket, prefix, key)

#buf = io.BytesIO()
#smac.write_numpy_to_dense_tensor(buf, np.array(validation_X).astype('float32'), np.array(validation_y).astype('float32'))
#buf.seek(0)
#key = 'linear_validation.data'
#boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'validation', key)).upload_fileobj(buf)
#s3_validation_data = 's3://{}/{}/validation/{}'.format(bucket, prefix, key)

In [None]:
sess = sagemaker.Session()

linear = sagemaker.estimator.Estimator(container,
                                       role, 
                                       train_instance_count=1, 
                                       train_instance_type='ml.m5.large',
                                       output_path='s3://{}/{}/output'.format(bucket, prefix),
                                       sagemaker_session=sess)
linear.set_hyperparameters(feature_dim=6,
                           mini_batch_size=1,
                           predictor_type='regressor',
                           epochs=3,
                           loss='squared_loss')

linear.fit({'train': s3_train_data})

In [None]:
lin_predictor = linear.deploy(initial_instance_count=1,
                           instance_type='ml.t2.medium')

In [None]:
matchup = [1471,1328,0.272,0.5,0.291,0.2]


endpoint_name = 'linear-learner-2019-01-22-18-30-14-101'
predictor = sagemaker.predictor.RealTimePredictor(endpoint=endpoint_name,   #create predictor to send serialized data to sagemaker
                                                serializer=sagemaker.predictor.csv_serializer,
                                                content_type='text/csv')

response = predictor.predict(matchup)

In [None]:
response