# Simulate Plans Data

In [1]:
import numpy as np
import json
import requests
import us
from elasticsearch import Elasticsearch
from faker import Factory

In [2]:
# Elasticsearch instance (hosted on AWS EC2)
es = Elasticsearch()

In [3]:
!head -n100 sim_plans_data_v2.json

[
  {
    "state": "SC", 
    "premium": {
      "age_30": 91.30685150355869, 
      "age_50": 111.30685150355869, 
      "age_40": 101.30685150355869
    }, 
    "level": "Bronze", 
    "url": "http://www.feeney-denesik.com/", 
    "providers": [
      {
        "name": "Dr. Bell Lubowitz DDS", 
        "address": "24787 Bogisich Junctions\nLucienport, VI 81392"
      }, 
      {
        "name": "Channie Nolan DDS", 
        "address": "083 Daniel Roads Apt. 396\nKoelpinview, MO 71455-9813"
      }, 
      {
        "name": "Dr. Elza Stokes DDS", 
        "address": "2080 Windler Parkway Apt. 897\nRoscoeborough, GA 41162-4388"
      }, 
      {
        "name": "Lonnie Adams", 
        "address": "3584 Miller Landing Suite 460\nNorth Jesse, AL 11532"
      }, 
      {
        "name": "Corie Rowe PhD", 
        "address": "2869 Collins Oval\nNorth Cecilhaven, RI 36735-2028"
      }, 
      {
        "name": "Miss Erykah Lemke DDS", 
        "address": "3

### Define ES Mapping

In [17]:
# Create Elasticsearch mapping

Index: data
Type: plan  
Fields:  
- plan_name (string, raw)
- premium (nested, number)
- level (string, raw)
- url (string, raw)
- state (string, non-analyzed)

Index: data_children  
Type: providers  
Fields:
- name: string, raw
- address: string, non-analyzed

In [34]:
!curl -XDELETE "localhost:9200/data_children"

{"acknowledged":true}

In [12]:
def define_plan_mappings(es):
    # Mapping
    settings = {
        "settings": {
            "index": {
                "number_of_shards" : 5,
                "number_of_replicas" : 1                
            }
        },
        
        "mappings": {
            "plan": {
                "properties": {
                    "plan_name": {
                        "type": "string",
                        "index": "analyzed",
                        "fields": {
                            "raw": {
                                "type": "string",
                                "index": "not_analyzed"
                            }
                        }                    
                    },

                    "premium": {
                        "type": "nested",
                        "properties": {
                            "age_30": {"type": "float"},
                            "age_40": {"type": "float"},
                            "age_50": {"type": "float"}
                        }                                        
                    },

                    "level": {
                        "type": "string",
                        "index": "analyzed",
                        "fields": {
                            "raw": {
                                "type": "string",
                                "index": "not_analyzed"
                            }
                        }                    
                    },

                    "url": {
                        "type": "string",
                        "index": "not_analyzed"
                    },
                    
                    "state": {
                        "type": "string",
                        "index": "not_analyzed"
                    }
                }                                 
            },            
            "providers": {
                "_parent": {
                    "type" : "plan"
                },

                "properties": {
                    "name": {
                        "type": "string",
                        "index": "analyzed",
                        "fields": {
                            "raw": {
                                "type": "string",
                                "index": "not_analyzed"
                            }
                        }                                
                    },
                    "address": {
                        "type": "string",
                        "index": "not_analyzed",                                                       
                    }
                }
            }                                            
        }        
    }
    
    # Define mappings in ES
    es.indices.create(index="data_children", body=settings)

In [36]:
define_plan_mappings(es)

In [37]:
!curl 'localhost:9200/data_children/_mapping/plan?pretty'

{
  "data_children" : {
    "mappings" : {
      "plan" : {
        "properties" : {
          "level" : {
            "type" : "string",
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed"
              }
            }
          },
          "plan_name" : {
            "type" : "string",
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed"
              }
            }
          },
          "premium" : {
            "type" : "nested",
            "properties" : {
              "age_30" : {
                "type" : "float"
              },
              "age_40" : {
                "type" : "float"
              },
              "age_50" : {
                "type" : "float"
              }
            }
          },
          "state" : {
            "type" : "string",
            "index" : "not_analyzed"

In [38]:
!curl 'localhost:9200/data_children/_mapping/providers?pretty'

{
  "data_children" : {
    "mappings" : {
      "providers" : {
        "_parent" : {
          "type" : "plan"
        },
        "_routing" : {
          "required" : true
        },
        "properties" : {
          "address" : {
            "type" : "string",
            "index" : "not_analyzed"
          },
          "name" : {
            "type" : "string",
            "fields" : {
              "raw" : {
                "type" : "string",
                "index" : "not_analyzed"
              }
            }
          }
        }
      }
    }
  }
}


### Load Data

In [17]:
from collections import OrderedDict

In [30]:
def load_data(input_path, es):
    # Get data from file    
    data = json.load(open(input_path))
    # OrderedDict to hold providers data
    providers_dict = OrderedDict()
    # Add each plan
    for i, plan in enumerate(data):
        # Ignore providers here
        providers_list = plan.pop("providers", None)                     
        es.index(index='data_children', doc_type='plan', id=i, body=plan)        
        for provider in providers_list:
            # Check if each provider is already added
            if provider['name'] not in providers_dict:
                providers_dict[provider['name']] = provider
            child_id = providers_dict.keys().index(provider['name'])
            es.index(index='data_children', doc_type='providers', id=child_id, parent=i, body=provider)

In [39]:
load_data("sim_plans_data_v2.json", es)

In [58]:
# Check data

In [40]:
!curl 'localhost:9200/_cat/indices?v'

health status index                  pri rep docs.count docs.deleted store.size pri.store.size 
yellow open   data                     5   1     120003            0     12.5mb         12.5mb 
yellow open   get-together             2   1         20            0     28.4kb         28.4kb 
yellow open   data_children            5   1      20250         1899      4.3mb          4.3mb 
yellow open   myindex                  5   1          0            0       800b           800b 
yellow open   november_2014_invoices   5   1          0            0       800b           800b 
yellow open   december_2014_invoices   5   1          0            0       800b           800b 
yellow open   blog                     5   1          1            0      3.6kb          3.6kb 
yellow open   logs                     5   1          1            0      3.7kb          3.7kb 


In [53]:
!curl 'localhost:9200/data/plan/0?pretty'

{
  "_index" : "data",
  "_type" : "plan",
  "_id" : "0",
  "_version" : 1,
  "found" : true,
  "_source" : {
    "premium" : {
      "age_30" : 91.30685150355869,
      "age_50" : 111.30685150355869,
      "age_40" : 101.30685150355869
    },
    "providers" : [ {
      "name" : "Dr. Bell Lubowitz DDS",
      "address" : "24787 Bogisich Junctions\nLucienport, VI 81392"
    }, {
      "name" : "Channie Nolan DDS",
      "address" : "083 Daniel Roads Apt. 396\nKoelpinview, MO 71455-9813"
    }, {
      "name" : "Dr. Elza Stokes DDS",
      "address" : "2080 Windler Parkway Apt. 897\nRoscoeborough, GA 41162-4388"
    }, {
      "name" : "Lonnie Adams",
      "address" : "3584 Miller Landing Suite 460\nNorth Jesse, AL 11532"
    }, {
      "name" : "Corie Rowe PhD",
      "address" : "2869 Collins Oval\nNorth Cecilhaven, RI 36735-2028"
    }, {
      "name" : "Miss Erykah Lemke DDS",
      "address" : "33283 Colette Summit\nMuellerchester, KS 23241"
    }, {

In [65]:
!curl 'localhost:9200/data_children/providers/0?parent=0&pretty'

{
  "_index" : "data_children",
  "_type" : "providers",
  "_id" : "0",
  "_version" : 386,
  "_routing" : "9946",
  "_parent" : "9946",
  "found" : true,
  "_source" : {
    "name" : "Dr. Bell Lubowitz DDS",
    "address" : "24787 Bogisich Junctions\nLucienport, VI 81392"
  }
}


In [68]:
!curl 'localhost:9200/data_children/plan/_search?pretty' -d '{\
"query": {\
    "has_child": {\
        "type": "providers",\
        "score_mode": "max",\
        "query": {\
            "match": {\
                "name": "dr"\
            }\
        }\
    }\
}\
}'

{
  "took" : 52,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "failed" : 0
  },
  "hits" : {
    "total" : 30,
    "max_score" : 2.6261177,
    "hits" : [ {
      "_index" : "data_children",
      "_type" : "plan",
      "_id" : "9946",
      "_score" : 2.6261177,
      "_source" : {
        "premium" : {
          "age_30" : 113.25599496903504,
          "age_50" : 133.25599496903504,
          "age_40" : 123.25599496903504
        },
        "url" : "http://dickens.org/",
        "level" : "Silver",
        "state" : "NV",
        "plan_name" : "Keeling and Sons"
      }
    }, {
      "_index" : "data_children",
      "_type" : "plan",
      "_id" : "9949",
      "_score" : 2.6261177,
      "_source" : {
        "premium" : {
          "age_30" : 92.50291222147376,
          "age_50" : 112.50291222147376,
          "age_40" : 102.50291222147376
        },
        "url" : "http://www.stamm.info/",
        "le

In [74]:
!curl 'localhost:9200/data_children/plan/_search?pretty' -d '{\
"query": {\
    "has_child": {\
        "type": "providers",\
        "score_mode": "max",\
        "query": {\
            "match": {\
                "name": {\
                    "type": "phrase_prefix",\
                    "query": "nol",\
                    "max_expansions": 1\
                }\
            }\
        },\
        "inner_hits": {}\
}}}'

{
  "took" : 32,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "failed" : 0
  },
  "hits" : {
    "total" : 5,
    "max_score" : 3.6483245,
    "hits" : [ {
      "_index" : "data_children",
      "_type" : "plan",
      "_id" : "9982",
      "_score" : 3.6483245,
      "_source" : {
        "premium" : {
          "age_30" : 97.29442269441574,
          "age_50" : 117.29442269441574,
          "age_40" : 107.29442269441574
        },
        "url" : "http://anderson.org/",
        "level" : "Catastrophic",
        "state" : "OK",
        "plan_name" : "Schaden, Kihn and Jast"
      },
      "inner_hits" : {
        "providers" : {
          "hits" : {
            "total" : 1,
            "max_score" : 7.5219407,
            "hits" : [ {
              "_index" : "data_children",
              "_type" : "providers",
              "_id" : "1",
              "_score" : 7.5219407,
              "_routing" : "9982",
 