In [1]:
%load_ext autotime

In [2]:
import boto3
import datetime
import logging
import json
import pickle
import pytest
import sys
import time

from etltools import s3

from lambda_client import (
    ClaimsClient,
    BenefitsClient,
    CalculatorClient,
)

reload(logging)  # get around notebook problem

<module 'logging' from '/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/logging/__init__.pyc'>

time: 512 ms


In [3]:
logging.basicConfig(
    level=logging.INFO, 
    format='[%(asctime)s] {%(filename)s:%(lineno)d} %(levelname)s - %(message)s',
#     handlers=[
#         logging.FileHandler(filename='mylog.log', mode='w'),
#         logging.StreamHandler(sys.stdout),
#     ]
)

time: 1.75 ms


In [4]:
# Test whether logging works:
logger = logging.getLogger()
logger.info('TEST INFO')

[2018-01-14 21:14:09,170] {<ipython-input-4-efd2929c73d3>:3} INFO - TEST INFO


time: 1.96 ms


In [5]:
aws_info = {
    'profile_name': 'sandbox',
}

uids = s3.read_json('s3://picwell.sandbox.medicare/samples/philadelphia-2015-1k-sample')
pids = ['2820028008119', '2820088001036']

time: 248 ms


# Test ConfigInfo

In [None]:
from lambda_client.config_info import ConfigInfo

configs = ConfigInfo('lambda_client/lambda.cfg')

print configs.claims_bucket
print configs.claims_path
print
print configs.benefits_bucket
print configs.benefits_path
print
print configs.claims_table

In [None]:
all_states = configs.all_states

print '{} states'.format(len(all_states))
print all_states

# Test ClaimsClient

In [None]:
# Test S3:
client = ClaimsClient(aws_info, 
                      s3_bucket=configs.claims_bucket,
                      s3_path=configs.claims_path)

people = client.get(uids[:1])
print 'claims of {} people retrieved'.format(len(people))

In [None]:
person = people[0]
print person.keys()
{
    'uid': person['uid'],
    'medical_claims': person['medical_claims'][:5]
}

In [None]:
# Test DynamoDB:
client = ClaimsClient(aws_info,
                      table_name=configs.claims_table)

people = client.get(uids[:1])
print 'claims of {} people retrieved'.format(len(people))

In [None]:
# Test configuration file and retrieving multiple people:
client = ClaimsClient(aws_info)

people = client.get(uids[:5])
print 'claims of {} people retrieved'.format(len(people))

In [None]:
# Let's try something larger:
people = client.get(uids)
print 'claims of {} people retrieved'.format(len(people))

In [None]:
# The object should not be pickled.
with pytest.raises(Exception, match='ClaimsClient object cannot be pickled.'):
    pickle.dumps(client)

# Test BenefitsClient

In [None]:
client = BenefitsClient(aws_info)

print client.all_states

In [None]:
plans = client._get_one_state('01')
print '{} plans read for state 01'.format(len(plans))

plans = client._get_one_state('04')
print '{} plans read for state 04'.format(len(plans))

In [None]:
plans = client.get_by_state(['01', '04'])
print '{} plans read'.format(len(plans))

In [None]:
plans = client.get_all()
print '{} plans read'.format(len(plans))

In [None]:
# Compare the timing against reading the entire file:
from lambda_client.shared_utils import _read_json

session = boto3.Session(**aws_info)
resource = session.resource('s3')

In [None]:
all_plans = _read_json('picwell.sandbox.medicare', 'ma_benefits/cms_2018_pbps_20171005.json', resource)

print '{} plans read'.format(len(plans))

In [None]:
# Ensure that the same plans are read:
sort_key = lambda plan: plan['picwell_id']
assert sorted(all_plans, key=sort_key) == sorted(plans, key=sort_key)

In [None]:
# The object should not be pickled.
with pytest.raises(Exception, match='BenefitsClient object cannot be pickled.'):
    pickle.dumps(client)

# Test Cost Breakdown

In [6]:
client = CalculatorClient(aws_info)

time: 882 µs


In [7]:
responses = client.get_breakdown(uids[:1], pids, verbose=True)

print '{} responses returned'.format(len(responses))
responses[0]

hxn6uyCJ+JoFXwUG+0qQxkgHhRXikaxP04B2w0/C4k01cNJ5VDuNnOlsE1KBdsqmzAkLqj5RIMu3pMdkp54D9dzWECuDljxFzkZJTDrMf3SJdPN/8Ra0FVwmVCfQRVagYYZimW8JwUaCBcHd5axl5JTWWtncdrJYfa3cJqNEogT4ztJWMKN6Q8NIF

host;x-amz-content-sha256;x-amz-date;x-amz-security-token
e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
[DEBUG]	2018-01-15T02:14:21.722Z	cbae26c3-f999-11e7-aa3b-a5be7356686b	StringToSign:
AWS4-HMAC-SHA256
20180115T021421Z
20180115/us-east-1/s3/aws4_request
136d77f31234b59e8a615f0afb488acbfe151355686ceeb8e735e225a8293411
[DEBUG]	2018-01-15T02:14:21.722Z	cbae26c3-f999-11e7-aa3b-a5be7356686b	Signature:
9a6ca68039acacceffd2367cf1ed63a639cdef1322913376813a018c11e94ea1
[DEBUG]	2018-01-15T02:14:21.722Z	cbae26c3-f999-11e7-aa3b-a5be7356686b	Sending http request: <PreparedRequest [GET]>
[INFO]	2018-01-15T02:14:21.740Z	cbae26c3-f999-11e7-aa3b-a5be7356686b	Starting new HTTPS connection (1): s3.amazonaws.com
[DEBUG]	2018-01-15T02:14:22.75Z	cbae26c3-f999-11e7-aa3b-a5be7356686b	"GET /picwell.sandbo

{u'allowed': 26376.640000000003,
 u'covered_breakdown': {u'categories': {u'0': 0.0,
   u'11': 186.33999999999997,
   u'13': 13.23,
   u'15': 30.79,
   u'16': 0.0,
   u'19': 80.0,
   u'25': 1750.0,
   u'30': 719.1700000000001,
   u'31': 300.0,
   u'44': 0.0,
   u'49': 118.48800000000001,
   u'7': 479.558},
  u'composite': 3677.5759999999996,
  u'in_network': 3677.5759999999996,
  u'out_network': 0.0},
 u'deductible_breakdown': {u'categories': {u'0': 0.0,
   u'11': 0.0,
   u'13': 0.0,
   u'15': 0.0,
   u'16': 0.0,
   u'19': 0.0,
   u'25': 0.0,
   u'30': 0.0,
   u'31': 0.0,
   u'44': 0.0,
   u'49': 0.0,
   u'7': 0.0},
  u'composite': 0.0,
  u'in_network': 0.0,
  u'out_network': 0.0},
 u'oop': 3677.5759999999996,
 u'picwell_id': u'2820028008119',
 u'uid': u'1302895801',
 u'uncovered': 0.0,
 u'uncovered_breakdown': {u'categories': {u'0': 0.0,
   u'11': 0.0,
   u'13': 0.0,
   u'15': 0.0,
   u'16': 0.0,
   u'19': 0.0,
   u'25': 0.0,
   u'30': 0.0,
   u'31': 0.0,
   u'44': 0.0,
   u'49': 0.0,


time: 2.24 s


In [14]:
# Test recursive call:
responses = client.get_breakdown(uids[:10], pids, max_calculated_uids=10, verbose=True)

print '{} responses returned'.format(len(responses))

START RequestId: b77f7522-f99b-11e7-a442-2f62625222a6 Version: $LATEST
[INFO]	2018-01-15T02:28:05.419Z	b77f7522-f99b-11e7-a442-2f62625222a6	Clock started at 2018-01-15 02:28:05.419389.
[INFO]	2018-01-15T02:28:05.589Z	b77f7522-f99b-11e7-a442-2f62625222a6	Found credentials in environment variables.
[INFO]	2018-01-15T02:28:05.611Z	b77f7522-f99b-11e7-a442-2f62625222a6	Found credentials in environment variables.
[INFO]	2018-01-15T02:28:05.830Z	b77f7522-f99b-11e7-a442-2f62625222a6	Found credentials in environment variables.
[INFO]	2018-01-15T02:28:05.867Z	b77f7522-f99b-11e7-a442-2f62625222a6	Found credentials in environment variables.
[INFO]	2018-01-15T02:28:06.127Z	b77f7522-f99b-11e7-a442-2f62625222a6	Found credentials in environment variables.
[INFO]	2018-01-15T02:28:06.513Z	b77f7522-f99b-11e7-a442-2f62625222a6	Found credentials in environment variables.
[INFO]	2018-01-15T02:28:06.933Z	b77f7522-f99b-11e7-a442-2f62625222a6	Found credentials in environment variables.
[INFO]	2018-01-15T02:28:

In [22]:
responses = client.get_breakdown(uids[:10], pids, max_lambda_calls=2, verbose=True)

print '{} responses returned'.format(len(responses))

START RequestId: b413934a-f99e-11e7-bb1a-779ece4c41ff Version: $LATEST
[INFO]	2018-01-15T02:49:28.213Z	b413934a-f99e-11e7-bb1a-779ece4c41ff	Clock started at 2018-01-15 02:49:28.213821.
[INFO]	2018-01-15T02:49:28.214Z	b413934a-f99e-11e7-bb1a-779ece4c41ff	10 uids are broken into 2 groups
[INFO]	2018-01-15T02:49:28.214Z	b413934a-f99e-11e7-bb1a-779ece4c41ff	Distribution took 1.3e-05 seconds.
[INFO]	2018-01-15T02:49:28.230Z	b413934a-f99e-11e7-bb1a-779ece4c41ff	Found credentials in environment variables.
[INFO]	2018-01-15T02:49:28.549Z	b413934a-f99e-11e7-bb1a-779ece4c41ff	Starting new HTTPS connection (1): lambda.us-east-1.amazonaws.com
[INFO]	2018-01-15T02:49:28.549Z	b413934a-f99e-11e7-bb1a-779ece4c41ff	Starting new HTTPS connection (1): lambda.us-east-1.amazonaws.com
[INFO]	2018-01-15T02:49:34.441Z	b413934a-f99e-11e7-bb1a-779ece4c41ff	Clock stopped at 2018-01-15 02:49:34.441748 (elapsed: 6.227932 seconds)
END RequestId: b413934a-f99e-11e7-bb1a-779ece4c41ff
REPORT RequestId: b413934a-f99e-1

In [23]:
responses = client.get_breakdown(uids[:10], pids)

print '{} responses returned'.format(len(responses))

20 responses returned
time: 3.43 s


In [25]:
# Let's try something larger:
responses = client.get_breakdown(uids, pids)

print '{} responses returned'.format(len(responses))

2000 responses returned
time: 17.6 s


In [None]:
# unique_uids = {cost['uid'] for cost in responses}
# len(unique_uids)

In [None]:
# Run calculcations locally for comparison:
from lambda_package.calc.calculator import calculate_oop

claims_client = ClaimsClient(aws_info)
people = claims_client.get(uids)

benefits_client = BenefitsClient(aws_info)
plans = benefits_client.get_by_pid(pids)

costs = []
for person in people:
    claims = person['medical_claims']
    
    for plan in plans:
        cost = calculate_oop(claims, plan)
        cost.update({
            'uid': person['uid'],
            'picwell_id': str(plan['picwell_id']),
        })
        
        costs.append(cost)
        
print '{} costs calculated'.format(len(costs))

In [None]:
# benefits_client = BenefitsClient()
# plans_CA = benefits_client.get_by_state(['06'])
# pids_CA = [plan['picwell_id'] for plan in plans_CA]

# print '{} plans identified'.format(len(pids_CA))

In [None]:
# Try a sample size more relevant to commercial:
responses = client.get_breakdown(uids[:300], pids)

print '{} responses returned'.format(len(responses))

# Test Batch Calculation

In [None]:
# uids = s3.read_json('s3n://picwell.sandbox.medicare/samples/philadelphia-2015')

# print '{} uids read'.format(len(uids))

In [None]:
# uids[:10]

In [None]:
# requests_per_second = 100

# for uid in uids:
# #     client.calculate_async(uid, months=['01'])
#     client.calculate_async(uid)
#     time.sleep(1.0/requests_per_second)  