In [1]:
import boto3
import math
import dateutil
import json
import re

In [28]:
# Import s3 bucket name from .env file

import os
env_vars = !cat ./.env
for var in env_vars:
    key, value = var.split('=')
    os.environ[key]=value

In [29]:
endpoint_name = os.environ['ENDPOINT_NAME']
profile_name = os.environ['PROFILE_NAME']
#print(endpoint_name)

In [5]:
boto_session = boto3.Session(profile_name=profile_name, region_name='us-east-1')

In [25]:
client = boto_session.client(service_name='sagemaker-runtime', region_name='us-east-1')

In [7]:
# Raw Data
#datetime,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,casual,registered,count
# Actual=562
sample_one = ['2012-12-19 17:00:00',4,0,1,1,16.4,20.455,50,26.0027]
# Actual=569
sample_two = ['2012-12-19 18:00:00',4,0,1,1,15.58,19.695,50,23.9994]
# Actual=4
sample_three = ['2012-12-10 01:00:00',4,0,1,2,14.76,18.94,100,0]

In [9]:
#single objservation

request = {
    'instances': [
        {
            'features':sample_one
        }
    ]
}


In [10]:
request

{'instances': [{'features': ['2012-12-19 17:00:00',
    4,
    0,
    1,
    1,
    16.4,
    20.455,
    50,
    26.0027]}]}

In [12]:
print(json.dumps(request))

{"instances": [{"features": ["2012-12-19 17:00:00", 4, 0, 1, 1, 16.4, 20.455, 50, 26.0027]}]}


In [13]:
# Multiple observations as json

request = {
    'instances':[
        {
            'features':sample_one
        },
        {
            'features':sample_two
        },
        {
            'features':sample_three
        }
    ]
}

In [14]:
print(json.dumps(request))

{"instances": [{"features": ["2012-12-19 17:00:00", 4, 0, 1, 1, 16.4, 20.455, 50, 26.0027]}, {"features": ["2012-12-19 18:00:00", 4, 0, 1, 1, 15.58, 19.695, 50, 23.9994]}, {"features": ["2012-12-10 01:00:00", 4, 0, 1, 2, 14.76, 18.94, 100, 0]}]}


In [17]:
# Raw Data Structure: 
# datetime,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,casual,registered,count

# Model expects data in this format (it was trained with these features):
# season,holiday,workingday,weather,temp,atemp,humidity,windspeed,year,month,day,dayofweek,hour

def transform_data(data):
    features = data.copy()

    dt = dateutil.parser.parse(features[0])

    features.append(dt.year)
    features.append(dt.month)
    features.append(dt.day)
    features.append(dt.weekday())
    features.append(dt.hour)

    return ','.join([str(feature)for feature in features[1:]])

In [18]:
print('Raw Data:', sample_one)
print('Transfored Data:', transform_data(sample_one))

Raw Data: ['2012-12-19 17:00:00', 4, 0, 1, 1, 16.4, 20.455, 50, 26.0027]
Transfored Data: 4,0,1,1,16.4,20.455,50,26.0027,2012,12,19,2,17


In [19]:
# Single with error

request = {
    'instances': [
        {
            'features':['hi there',0,2]
        }
    ]
}


In [20]:
request['instances']

[{'features': ['hi there', 0, 2]}]

In [22]:
try:
    transform_data = [transform_data(instance['features']) for instance in request['instances']]
except Exception as err:
    print('Error when transforming: {0}'.format(err))

Error when transforming: Unknown string format: hi there


In [23]:
#single objservation

request = {
    'instances': [
        {
            'features':sample_one
        }
    ]
}

In [26]:
# Invoke prediction

client

<botocore.client.SageMakerRuntime at 0x7efd88df8c40>

In [30]:
result = client.invoke_endpoint(EndpointName=endpoint_name,
                           Body=transform_data(request['instances'][0]['features']).encode('utf-8'),
                           ContentType='text/csv'
                      )

In [31]:
result = result['Body'].read().decode('utf-8')

In [33]:
print('Predicted Count',math.expm1(float(result)))

Predicted Count 571.092597122044


In [34]:
# Multiple Observations
request = {
    "instances": [
        # First instance.
        {
            "features": sample_one
        },
        # Second instance.
        {
            "features": sample_two
        },
        # Third instance.
        {
            "features": sample_three
        }
    ]
}

In [38]:
for instance in request['instances']:
    print(instance)
    print('Transformed')
    print(' ',transform_data(instance['features']))

{'features': ['2012-12-19 17:00:00', 4, 0, 1, 1, 16.4, 20.455, 50, 26.0027]}
Transformed
  4,0,1,1,16.4,20.455,50,26.0027,2012,12,19,2,17
{'features': ['2012-12-19 18:00:00', 4, 0, 1, 1, 15.58, 19.695, 50, 23.9994]}
Transformed
  4,0,1,1,15.58,19.695,50,23.9994,2012,12,19,2,18
{'features': ['2012-12-10 01:00:00', 4, 0, 1, 2, 14.76, 18.94, 100, 0]}
Transformed
  4,0,1,2,14.76,18.94,100,0,2012,12,10,0,1


In [57]:
result = client.invoke_endpoint(EndpointName=endpoint_name,
                        Body='\n'.join([transform_data(instance['features']) for instance in request['instances']]).encode('utf-8'),
                        ContentType='text/csv')

In [58]:
result

{'ResponseMetadata': {'RequestId': '452d1c9a-5730-470a-b616-b8b7fe97c6df',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '452d1c9a-5730-470a-b616-b8b7fe97c6df',
   'x-amzn-invoked-production-variant': 'AllTraffic',
   'date': 'Tue, 07 May 2024 11:36:06 GMT',
   'content-type': 'text/csv; charset=utf-8',
   'content-length': '54',
   'connection': 'keep-alive'},
  'RetryAttempts': 0},
 'ContentType': 'text/csv; charset=utf-8',
 'InvokedProductionVariant': 'AllTraffic',
 'Body': <botocore.response.StreamingBody at 0x7efd88b3db20>}

In [59]:
result = result['Body'].read().decode('utf-8')

In [60]:
result

'6.349300861358643\n6.321451187133789\n2.441441059112549\n'

In [61]:
print(result)

6.349300861358643
6.321451187133789
2.441441059112549



In [62]:
# spliting result using regular expression

pattern = r'[^0-9.]+'
result = re.split(pattern,result)
predictions = [ math.expm1(float(r)) for r in result if r!=""]

In [63]:
predictions

[571.092597122044, 555.3798181158465, 10.489585991183136]