# Example notebook for using classify_insurance package to take a fuzzy input string describing an insurance plan and recommending matching plans in Ribbon Health taxonomy.

In [38]:
import numpy as np
import pandas as pd
import sys
sys.path.append('../')
from classify_insurance.planmapper import PlanComparisonModel
from classify_insurance.inputwithmappings import InputWithMappings
from IPython.core.display import display, HTML

In [39]:
def get_mapped_input(mapper, ix = None):
    """Select an already mapped input string and show properties of string and matching plans.

    Arguments:
        mapper (PlanComparisonModel obj): model for matching inputs to taxonomy.
        ix (int): index of input string in confirmed mappings df. If None, select random index.
    Returns:
        input_string (str): input text string.
        UUID_true (str): matching plan UUID.
    """

    if not mapper.mapped_plans_loaded:
        mapper.load_mapped_plans()
    mapped_plans = mapper.mapped_plans
    if not ix:
        ix = np.random.choice(mapped_plans.index.tolist())

    input_mapped_plan = mapper.index_to_mapped_input_plan(ix, mapped_plans)
    input_string = input_mapped_plan['input_string']

    print('\nINPUT:')
    print('index {}:'.format(ix))
    print('input string: "{}"; Cleaned: "{}"'.format(input_string, mapper.clean_input_string(input_string)))
    input_feature_dict = input_mapped_plan['input_feature_dict']
    print('Features of input string:')
    for feature in mapper.features:
        print('{}: {}'.format(feature, input_feature_dict[feature]))

    print('\nTRUE MATCHING PLAN:')
    UUID_true = input_mapped_plan['true_plan']['UUID']
    print('Unique ID: {}'.format(UUID_true))
    true_plan_feature_dict = input_mapped_plan['true_plan']
    print('Features of true plan:')
    for feature in mapper.features:
        print('{}: {}'.format(feature, true_plan_feature_dict[feature]))

    sim_vec_true = mapper.similarity_vector(input_feature_dict, true_plan_feature_dict)
    sim_score_true = mapper.sim_vec_to_score.compute_score(sim_vec_true)
    print('similarity vector:', sim_vec_true)
    print('score:', sim_score_true)
    return (input_string, UUID_true)

def get_plan_recommendations(input_string, mapper, margin = 0.2):
    """Find recommended mathing plans to input string.

    Arguments:
        input_string (str): input text string.
        mapper (PlanComparisonModel obj): model for matching inputs to taxonomy.
        margin (flt): recommend all plans within margin of top similarity score.
    Returns:
        plan_info (InputWithMappings obj): contains recommended matches information.
    """
    print('\nCOMPARING INPUT TO INSURANCE PLANS...')
    plan_info = InputWithMappings(input_string)
    plan_info.compute_similarity_vectors_scores(mapper)
    plan_info.get_recommendations(margin = 0.2, verbose=True)
    return plan_info

## Load the model that maps input strings to insurance plans

In [40]:
mapper = PlanComparisonModel()

## Choose from 2 options below:

#### 1. use an already mapped input string (and print lots of information):

In [41]:
input_string, UUID_true = get_mapped_input(mapper)
print('\nTRUE MATCHING PLAN DESCRIPTION BEFORE DATA CLEANING:')
mapper.taxonomy_orig[mapper.taxonomy_orig['Ribbon Insurance UUID'] == UUID_true]


INPUT:
index 333:
input string: "anthem blue cross - premier plus ppo"; Cleaned: "anthem blue cross premier plus ppo"
Features of input string:
carrier_association: []
carrier_brand: ['anthem', 'blue cross']
carrier_name: ['anthem blue cross', 'blue cross', 'blue']
state: []
plan: ['plus', 'premier', 'blue', 'blue cross', 'premier plus']
type: ['ppo']
metal: []

TRUE MATCHING PLAN:
Unique ID: c20c884e-c070-48b4-a448-c75df5a26365
Features of true plan:
carrier_association: blue cross blue shield association
carrier_brand: anthem
carrier_name: anthem blue cross
state: nan
plan: premier plus
type: ppo
metal: nan
similarity vector: [0.  1.  1.  0.5 1.  1.  0.  1. ]
score: 8.269280828485197

TRUE MATCHING PLAN DESCRIPTION BEFORE DATA CLEANING:


Unnamed: 0,Ribbon Insurance UUID,carrier_association,carrier_brand,carrier_name,state,plan_name,plan_type,metal_level,display_name,network
162,c20c884e-c070-48b4-a448-c75df5a26365,BCBS Association,Anthem,Anthem Blue Cross,,Premier Plus,PPO,,Anthem Blue Cross - Premier Plus - PPO,


#### 2. or directly select your own input string:

In [None]:
#input_string = 'bcbs - anthem pathway x ind ppo direct access ct'
#UUID_true = None

## Given the input string, find recommended matches in the plan taxonomy

In [42]:
plan_info = get_plan_recommendations(input_string, mapper)


COMPARING INPUT TO INSURANCE PLANS...

SINGLE RECOMMENDED MATCH FOUND!
top score = 8.269280828485197


#### The best matching plan(s) in order of ranking (highest similarity score on top):

In [43]:
plan_info.recommendations[:plan_info.num_rec]

Unnamed: 0,UUID,carrier_association,carrier_brand,carrier_name,state,plan,type,metal,carrier_association_match,carrier_brand_match,carrier_name_match,state_match,plan_match,type_match,metal_match,input_string_match,similarity_score
162,c20c884e-c070-48b4-a448-c75df5a26365,blue cross blue shield association,anthem,anthem blue cross,,premier plus,ppo,,0.0,1.0,1.0,0.5,1.0,1.0,0.0,1.0,8.269281


In [44]:
if not plan_info.good_matches_found:
    plan_info.recommend_taxonomy_entry()

## If you started with an already mapped plan, you can check (again) what is the true matching plan

In [45]:
if isinstance(UUID_true, str):
    if UUID_true in plan_info.recommendations['UUID'].values:
        print('KNOWN TRUE MATCHING PLAN:')
        display(HTML(plan_info.recommendations[plan_info.recommendations['UUID'] == UUID_true].to_html()))

KNOWN TRUE MATCHING PLAN:


Unnamed: 0,UUID,carrier_association,carrier_brand,carrier_name,state,plan,type,metal,carrier_association_match,carrier_brand_match,carrier_name_match,state_match,plan_match,type_match,metal_match,input_string_match,similarity_score
162,c20c884e-c070-48b4-a448-c75df5a26365,blue cross blue shield association,anthem,anthem blue cross,,premier plus,ppo,,0.0,1.0,1.0,0.5,1.0,1.0,0.0,1.0,8.269281
