In [1]:
%load_ext autotime

In [2]:
import boto3
import datetime
import logging
import json
import pickle
import pytest
import sys
import time

from etltools import s3

from lambda_client import (
    ClaimsClient,
    BenefitsClient,
    CalculatorClient,
)

reload(logging)  # get around notebook problem

<module 'logging' from '/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/logging/__init__.pyc'>

time: 556 ms


In [3]:
logging.basicConfig(
    level=logging.INFO, 
    format='[%(asctime)s] {%(filename)s:%(lineno)d} %(levelname)s - %(message)s',
#     handlers=[
#         logging.FileHandler(filename='mylog.log', mode='w'),
#         logging.StreamHandler(sys.stdout),
#     ]
)

time: 1.33 ms


In [4]:
# Test whether logging works:
logger = logging.getLogger()
logger.info('TEST INFO')

[2018-01-18 22:09:55,520] {<ipython-input-4-efd2929c73d3>:3} INFO - TEST INFO


time: 2.04 ms


In [5]:
aws_info = {
    'profile_name': 'sandbox',
}

uids = s3.read_json('s3://picwell.sandbox.medicare/samples/philadelphia-2015-1k-sample')
pids = ['2820028008119', '2820088001036']

time: 307 ms


# Test ConfigInfo

In [None]:
from lambda_client.config_info import ConfigInfo

configs = ConfigInfo('lambda_client/lambda.cfg')

print configs.claims_bucket
print configs.claims_path
print
print configs.benefits_bucket
print configs.benefits_path
print
print configs.claims_table

In [None]:
all_states = configs.all_states

print '{} states'.format(len(all_states))
print all_states

# Test ClaimsClient

In [None]:
# Test S3:
client = ClaimsClient(aws_info, 
                      s3_bucket=configs.claims_bucket,
                      s3_path=configs.claims_path)

people = client.get(uids[:1])
print 'claims of {} people retrieved'.format(len(people))

In [None]:
person = people[0]
print person.keys()
{
    'uid': person['uid'],
    'medical_claims': person['medical_claims'][:5]
}

In [None]:
# Let's try something larger:
people = client.get(uids)
print 'claims of {} people retrieved'.format(len(people))

In [None]:
# Test DynamoDB:
client = ClaimsClient(aws_info,
                      table_name=configs.claims_table)

people = client.get(uids[:1])
print 'claims of {} people retrieved'.format(len(people))

In [None]:
person = people[0]
print person.keys()
{
    'uid': person['uid'],
    'medical_claims': person['medical_claims'][:5]
}

In [None]:
# Let's try something larger:
people = client.get(uids)
print 'claims of {} people retrieved'.format(len(people))

In [None]:
# Test configuration file and retrieving multiple people:
client = ClaimsClient(aws_info)

people = client.get(uids[:5])
print 'claims of {} people retrieved'.format(len(people))

In [None]:
# The object should not be pickled.
with pytest.raises(Exception, match='ClaimsClient object cannot be pickled.'):
    pickle.dumps(client)

# Test BenefitsClient

In [None]:
client = BenefitsClient(aws_info)

print client.all_states

In [None]:
plans = client._get_one_state('01')
print '{} plans read for state 01'.format(len(plans))

plans = client._get_one_state('04')
print '{} plans read for state 04'.format(len(plans))

In [None]:
plans = client.get_by_state(['01', '04'])
print '{} plans read'.format(len(plans))

In [None]:
plans = client.get_all()
print '{} plans read'.format(len(plans))

In [None]:
# Compare the timing against reading the entire file:
from lambda_client.shared_utils import _read_json

session = boto3.Session(**aws_info)
resource = session.resource('s3')

In [None]:
all_plans = _read_json('picwell.sandbox.medicare', 'ma_benefits/cms_2018_pbps_20171005.json', resource)

print '{} plans read'.format(len(plans))

In [None]:
# Ensure that the same plans are read:
sort_key = lambda plan: plan['picwell_id']
assert sorted(all_plans, key=sort_key) == sorted(plans, key=sort_key)

In [None]:
# The object should not be pickled.
with pytest.raises(Exception, match='BenefitsClient object cannot be pickled.'):
    pickle.dumps(client)

# Test Cost Breakdown

In [None]:
client = CalculatorClient(aws_info)

In [None]:
responses = client.get_breakdown(uids[:1], pids, verbose=True)

print '{} responses returned'.format(len(responses))
responses[0]

In [None]:
responses = client.get_breakdown(uids[:1], pids, use_s3_for_claims=False, verbose=True)

print '{} responses returned'.format(len(responses))

In [None]:
# Test recursive call:
responses = client.get_breakdown(uids[:10], pids, max_calculated_uids=10)

print '{} responses returned'.format(len(responses))

In [None]:
responses = client.get_breakdown(uids[:10], pids, max_lambda_calls=2)

print '{} responses returned'.format(len(responses))

In [None]:
responses = client.get_breakdown(uids[:10], pids)

print '{} responses returned'.format(len(responses))

In [None]:
# Check whether DynamoDB reduces latency:
responses = client.get_breakdown(uids[:10], pids, use_s3_for_claims=False, max_calculated_uids=10)

print '{} responses returned'.format(len(responses))

In [None]:
# Let's try something larger:
responses = client.get_breakdown(uids, pids)

print '{} responses returned'.format(len(responses))

In [None]:
responses = client.get_breakdown(uids, pids, use_s3_for_claims=False)

print '{} responses returned'.format(len(responses))

In [None]:
# Runs into memory issue if all 1000 people are calculated once:
responses = client.get_breakdown(uids, pids, use_s3_for_claims=False, max_calculated_uids=100, max_lambda_calls=10)

print '{} responses returned'.format(len(responses))

In [None]:
from lambda_package.calc.calculator import calculate_oop

def run_locally(people, plans, oop_only):
    costs = []
    
    for person in people:
        claims = person['medical_claims']
    
        for plan in plans:

            cost = calculate_oop(claims, plan)
            if oop_only:
                cost = {
                    'oop': cost['oop']
                }

            cost.update({
                'uid': person['uid'],
                'picwell_id': str(plan['picwell_id']),
            })

            costs.append(cost)
    
    return costs

In [None]:
# Run calculcations locally for comparison:
# claims_client = ClaimsClient(aws_info, 
#                              s3_bucket=configs.claims_bucket,
#                              s3_path=configs.claims_path)
claims_client = ClaimsClient(aws_info, 
                             table_name=configs.claims_table)
people = claims_client.get(uids)

benefits_client = BenefitsClient(aws_info)
plans = benefits_client.get_by_pid(pids)

costs = run_locally(people, plans, False)

print '{} costs calculated'.format(len(costs))

In [None]:
# benefits_client = BenefitsClient()
benefits_client = BenefitsClient(aws_info)
plans_CA = benefits_client.get_by_state(['06'])
pids_CA = [plan['picwell_id'] for plan in plans_CA]

print '{} plans identified'.format(len(pids_CA))

In [None]:
# Try a sample size more relevant to commercial:
responses = client.get_oop(uids[:300], pids_CA)

print '{} responses returned'.format(len(responses))

In [None]:
responses = client.get_oop(uids[:300], pids_CA, use_s3_for_claims=False)

print '{} responses returned'.format(len(responses))

In [None]:
# Increasing the amount of computation at the terminal nodes increases the time. This is probably
# because there are many plans.
responses = client.get_oop(uids[:300], pids_CA, use_s3_for_claims=False, max_calculated_uids=3)

print '{} responses returned'.format(len(responses))

In [None]:
claims_client = ClaimsClient(aws_info, 
                             table_name=configs.claims_table)
people = claims_client.get(uids[:300])

benefits_client = BenefitsClient(aws_info)
plans = benefits_client.get_by_pid(pids_CA)

costs = run_locally(people, plans, True)

print '{} costs calculated'.format(len(costs))

# Test Batch Calculation

In [6]:
uids = s3.read_json('s3n://picwell.sandbox.medicare/samples/philadelphia-2015')

print '{} uids read'.format(len(uids))

1352473 uids read
time: 6.04 s


In [7]:
client = CalculatorClient(aws_info)

time: 760 µs


In [8]:
response = client.run_batch(uids[:1], months=['01', '02', '03'], states=['01', '06'], verbose=True)

print response

START RequestId: 4023eddb-fcc6-11e7-8d83-67d313b0b20e Version: $LATEST
[INFO]	2018-01-19T03:10:06.809Z	4023eddb-fcc6-11e7-8d83-67d313b0b20e	Clock started at 2018-01-19 03:10:06.809577.
[INFO]	2018-01-19T03:10:06.822Z	4023eddb-fcc6-11e7-8d83-67d313b0b20e	Thread initialization took 0.00018 seconds.
[INFO]	2018-01-19T03:10:06.983Z	4023eddb-fcc6-11e7-8d83-67d313b0b20e	Found credentials in environment variables.
[INFO]	2018-01-19T03:10:07.384Z	4023eddb-fcc6-11e7-8d83-67d313b0b20e	Starting new HTTPS connection (1): s3.amazonaws.com
[INFO]	2018-01-19T03:10:07.540Z	4023eddb-fcc6-11e7-8d83-67d313b0b20e	Joining all threads took 0.717462 seconds.
[INFO]	2018-01-19T03:10:07.540Z	4023eddb-fcc6-11e7-8d83-67d313b0b20e	Combining all results in order took 2.4e-05 seconds.
[INFO]	2018-01-19T03:10:07.540Z	4023eddb-fcc6-11e7-8d83-67d313b0b20e	Claim retrieval for [u'31970436701'] took 0.718297 seconds.
[INFO]	2018-01-19T03:10:07.540Z	4023eddb-fcc6-11e7-8d83-67d313b0b20e	Processing state 01:
[INFO]	2018-01-

In [17]:
response = client.run_batch(uids[:2], states=['01', '06', '36'], max_calculated_uids=2)

print response

[[u'31970436701', 36], [u'1674435601', 36]]
time: 41.4 s


In [24]:
# Test recursive call:
response = client.run_batch(uids[:2], states=['01', '06', '36'])

print response

[[u'1674435601', 36], [u'31970436701', 36]]
time: 38.7 s


In [29]:
# This only runs for large enough Lambda, e.g. 512 MB, and fails for 256 MB Lambda. 
# Memory determines how fast one can run.
response = client.run_batch(uids[:100], months=['01'], states=['01', '06', '36'])

print response

[[u'31970436701', 3], [u'1784327003', 3], [u'387247001', 3], [u'600970101', 3], [u'31035001601', 3], [u'291790001', 3], [u'1356374201', 3], [u'1261262001', 3], [u'617277001', 3], [u'3133478301', 3], [u'1674435601', 3], [u'246540301', 3], [u'3625817901', 3], [u'1522346501', 3], [u'31576592001', 3], [u'2038500102', 3], [u'1626625702', 3], [u'615957501', 3], [u'1565379401', 3], [u'608721802', 3], [u'204451602', 3], [u'3154052701', 3], [u'1550622501', 3], [u'1903260402', 3], [u'246223201', 3], [u'1341381501', 3], [u'652143501', 3], [u'1332096401', 3], [u'3177265701', 3], [u'3175398301', 3], [u'3158762201', 3], [u'31917048601', 3], [u'628001201', 3], [u'1305811101', 3], [u'3627934702', 3], [u'1565383201', 3], [u'3164462801', 3], [u'3130762002', 3], [u'68296201', 3], [u'1300582601', 3], [u'595989402', 3], [u'617921001', 3], [u'368519101', 3], [u'3145195601', 3], [u'3138548401', 3], [u'1284553402', 3], [u'2029497801', 3], [u'387418101', 3], [u'635086301', 3], [u'2216424101', 3], [u'894536502'