In [1]:
%load_ext autotime

In [2]:
import boto3
import datetime
import logging
import json
import pickle
import pytest
import sys
import time

from etltools import s3

from lambda_client import (
    ClaimsClient,
    BenefitsClient,
    CalculatorClient,
)

reload(logging)  # get around notebook problem

<module 'logging' from '/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/logging/__init__.pyc'>

time: 514 ms


In [3]:
logging.basicConfig(
    level=logging.INFO, 
    format='[%(asctime)s] {%(filename)s:%(lineno)d} %(levelname)s - %(message)s',
#     handlers=[
#         logging.FileHandler(filename='mylog.log', mode='w'),
#         logging.StreamHandler(sys.stdout),
#     ]
)

time: 1.45 ms


In [4]:
# Test whether logging works:
logger = logging.getLogger()
logger.info('TEST INFO')

[2018-01-14 22:42:08,043] {<ipython-input-4-efd2929c73d3>:3} INFO - TEST INFO


time: 2.21 ms


In [5]:
aws_info = {
    'profile_name': 'sandbox',
}

uids = s3.read_json('s3://picwell.sandbox.medicare/samples/philadelphia-2015-1k-sample')
pids = ['2820028008119', '2820088001036']

time: 226 ms


# Test ConfigInfo

In [6]:
from lambda_client.config_info import ConfigInfo

configs = ConfigInfo('lambda_client/lambda.cfg')

print configs.claims_bucket
print configs.claims_path
print
print configs.benefits_bucket
print configs.benefits_path
print
print configs.claims_table

picwell.sandbox.analytics
junghoon/lambda_calculator

picwell.sandbox.analytics
junghoon/lambda_calculator_benefits

ma_claims
time: 3.29 ms


In [7]:
all_states = configs.all_states

print '{} states'.format(len(all_states))
print all_states

51 states
['01', '04', '05', '06', '08', '09', '10', '11', '12', '13', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '44', '45', '46', '47', '48', '49', '50', '51', '53', '54', '55', '56', '72']
time: 1.33 ms


# Test ClaimsClient

In [8]:
# Test S3:
client = ClaimsClient(aws_info, 
                      s3_bucket=configs.claims_bucket,
                      s3_path=configs.claims_path)

people = client.get(uids[:1])
print 'claims of {} people retrieved'.format(len(people))

claims of 1 people retrieved
time: 199 ms


In [9]:
person = people[0]
print person.keys()
{
    'uid': person['uid'],
    'medical_claims': person['medical_claims'][:5]
}

[u'medical_claims', u'uid']


{'medical_claims': [{u'admitted': u'2014-04-03',
   u'benefit_category': 16,
   u'cost': u'148.0',
   u'discharged': u'2014-04-03',
   u'length_of_stay': 1},
  {u'admitted': u'2014-05-02',
   u'benefit_category': 16,
   u'cost': u'74.55',
   u'discharged': u'2014-05-02',
   u'length_of_stay': 1},
  {u'admitted': u'2014-05-05',
   u'benefit_category': 16,
   u'cost': u'104.39',
   u'discharged': u'2014-05-05',
   u'length_of_stay': 1},
  {u'admitted': u'2014-05-13',
   u'benefit_category': 16,
   u'cost': u'210.12',
   u'discharged': u'2014-05-13',
   u'length_of_stay': 1},
  {u'admitted': u'2014-05-19',
   u'benefit_category': 11,
   u'cost': u'442.2',
   u'discharged': u'2014-05-19',
   u'length_of_stay': 1}],
 'uid': u'1302895801'}

time: 5.25 ms


In [10]:
# Test DynamoDB:
client = ClaimsClient(aws_info,
                      table_name=configs.claims_table)

people = client.get(uids[:1])
print 'claims of {} people retrieved'.format(len(people))

claims of 1 people retrieved
time: 228 ms


In [11]:
# Test configuration file and retrieving multiple people:
client = ClaimsClient(aws_info)

people = client.get(uids[:5])
print 'claims of {} people retrieved'.format(len(people))

claims of 5 people retrieved
time: 1.47 s


In [12]:
# Let's try something larger:
people = client.get(uids)
print 'claims of {} people retrieved'.format(len(people))

claims of 1000 people retrieved
time: 2min 28s


In [13]:
# The object should not be pickled.
with pytest.raises(Exception, match='ClaimsClient object cannot be pickled.'):
    pickle.dumps(client)

time: 1.71 ms


# Test BenefitsClient

In [14]:
client = BenefitsClient(aws_info)

print client.all_states

['01', '04', '05', '06', '08', '09', '10', '11', '12', '13', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '44', '45', '46', '47', '48', '49', '50', '51', '53', '54', '55', '56', '72']
time: 2.17 ms


In [15]:
plans = client._get_one_state('01')
print '{} plans read for state 01'.format(len(plans))

plans = client._get_one_state('04')
print '{} plans read for state 04'.format(len(plans))

47 plans read for state 01
75 plans read for state 04
time: 947 ms


In [16]:
plans = client.get_by_state(['01', '04'])
print '{} plans read'.format(len(plans))

122 plans read
time: 415 ms


In [17]:
plans = client.get_all()
print '{} plans read'.format(len(plans))

3558 plans read
time: 9.71 s


In [18]:
# Compare the timing against reading the entire file:
from lambda_client.shared_utils import _read_json

session = boto3.Session(**aws_info)
resource = session.resource('s3')

time: 61.8 ms


In [19]:
all_plans = _read_json('picwell.sandbox.medicare', 'ma_benefits/cms_2018_pbps_20171005.json', resource)

print '{} plans read'.format(len(plans))

3558 plans read
time: 3.53 s


In [20]:
# Ensure that the same plans are read:
sort_key = lambda plan: plan['picwell_id']
assert sorted(all_plans, key=sort_key) == sorted(plans, key=sort_key)

time: 160 ms


In [21]:
# The object should not be pickled.
with pytest.raises(Exception, match='BenefitsClient object cannot be pickled.'):
    pickle.dumps(client)

time: 1.6 ms


# Test Cost Breakdown

In [22]:
client = CalculatorClient(aws_info)

time: 705 µs


In [23]:
responses = client.get_breakdown(uids[:1], pids, verbose=True)

print '{} responses returned'.format(len(responses))
responses[0]

START RequestId: 728e1231-f9a6-11e7-b8ee-6d4ab47091e8 Version: $LATEST
[INFO]	2018-01-15T03:44:54.50Z	728e1231-f9a6-11e7-b8ee-6d4ab47091e8	Clock started at 2018-01-15 03:44:54.049968.
[INFO]	2018-01-15T03:44:54.51Z	728e1231-f9a6-11e7-b8ee-6d4ab47091e8	Thread initialization took 0.000215 seconds.
[INFO]	2018-01-15T03:44:54.169Z	728e1231-f9a6-11e7-b8ee-6d4ab47091e8	Found credentials in environment variables.
[INFO]	2018-01-15T03:44:54.892Z	728e1231-f9a6-11e7-b8ee-6d4ab47091e8	Starting new HTTPS connection (1): s3.amazonaws.com
[INFO]	2018-01-15T03:44:55.19Z	728e1231-f9a6-11e7-b8ee-6d4ab47091e8	Joining all threads took 0.968574 seconds.
[INFO]	2018-01-15T03:44:55.19Z	728e1231-f9a6-11e7-b8ee-6d4ab47091e8	Combining all results took 2.7e-05 seconds.
[INFO]	2018-01-15T03:44:55.19Z	728e1231-f9a6-11e7-b8ee-6d4ab47091e8	Claim retrieval for [u'1302895801'] took 0.969172 seconds.
[INFO]	2018-01-15T03:44:55.49Z	728e1231-f9a6-11e7-b8ee-6d4ab47091e8	Thread initialization took 0.029602 seconds.
[INFO]

{u'allowed': 26376.640000000003,
 u'covered_breakdown': {u'categories': {u'0': 0.0,
   u'11': 186.33999999999997,
   u'13': 13.23,
   u'15': 30.79,
   u'16': 0.0,
   u'19': 80.0,
   u'25': 1750.0,
   u'30': 719.1700000000001,
   u'31': 300.0,
   u'44': 0.0,
   u'49': 118.48800000000001,
   u'7': 479.558},
  u'composite': 3677.5759999999996,
  u'in_network': 3677.5759999999996,
  u'out_network': 0.0},
 u'deductible_breakdown': {u'categories': {u'0': 0.0,
   u'11': 0.0,
   u'13': 0.0,
   u'15': 0.0,
   u'16': 0.0,
   u'19': 0.0,
   u'25': 0.0,
   u'30': 0.0,
   u'31': 0.0,
   u'44': 0.0,
   u'49': 0.0,
   u'7': 0.0},
  u'composite': 0.0,
  u'in_network': 0.0,
  u'out_network': 0.0},
 u'oop': 3677.5759999999996,
 u'picwell_id': u'2820028008119',
 u'uid': u'1302895801',
 u'uncovered': 0.0,
 u'uncovered_breakdown': {u'categories': {u'0': 0.0,
   u'11': 0.0,
   u'13': 0.0,
   u'15': 0.0,
   u'16': 0.0,
   u'19': 0.0,
   u'25': 0.0,
   u'30': 0.0,
   u'31': 0.0,
   u'44': 0.0,
   u'49': 0.0,


time: 4.09 s


In [24]:
# Test recursive call:
responses = client.get_breakdown(uids[:10], pids, max_calculated_uids=10)

print '{} responses returned'.format(len(responses))

20 responses returned
time: 14.3 s


In [25]:
responses = client.get_breakdown(uids[:10], pids, max_lambda_calls=2)

print '{} responses returned'.format(len(responses))

20 responses returned
time: 8.18 s


In [26]:
responses = client.get_breakdown(uids[:10], pids)

print '{} responses returned'.format(len(responses))

20 responses returned
time: 6.52 s


In [27]:
# Let's try something larger:
responses = client.get_breakdown(uids, pids)

print '{} responses returned'.format(len(responses))

2000 responses returned
time: 18.5 s


In [28]:
# Run calculcations locally for comparison:
from lambda_package.calc.calculator import calculate_oop

claims_client = ClaimsClient(aws_info)
people = claims_client.get(uids)

benefits_client = BenefitsClient(aws_info)
plans = benefits_client.get_by_pid(pids)

costs = []
for person in people:
    claims = person['medical_claims']
    
    for plan in plans:
        cost = calculate_oop(claims, plan)
        cost.update({
            'uid': person['uid'],
            'picwell_id': str(plan['picwell_id']),
        })
        
        costs.append(cost)
        
print '{} costs calculated'.format(len(costs))

2000 costs calculated
time: 2min 35s


In [29]:
# benefits_client = BenefitsClient()
benefits_client = BenefitsClient(aws_info)
plans_CA = benefits_client.get_by_state(['06'])
pids_CA = [plan['picwell_id'] for plan in plans_CA]

print '{} plans identified'.format(len(pids_CA))

268 plans identified
time: 1.01 s


In [30]:
# Try a sample size more relevant to commercial:
responses = client.get_oop(uids[:300], pids_CA)

print '{} responses returned'.format(len(responses))

80400 responses returned
time: 20.2 s


# Test Batch Calculation

In [31]:
# uids = s3.read_json('s3n://picwell.sandbox.medicare/samples/philadelphia-2015')

# print '{} uids read'.format(len(uids))

time: 591 µs


In [32]:
# uids[:10]

time: 444 µs


In [33]:
# requests_per_second = 100

# for uid in uids:
# #     client.calculate_async(uid, months=['01'])
#     client.calculate_async(uid)
#     time.sleep(1.0/requests_per_second)  

time: 698 µs
