In [1]:
# Importing required libraries

import pandas as pd
import warnings
warnings.filterwarnings('ignore')
import ipywidgets as widgets
from IPython.display import display, clear_output

In [2]:
# Reading saved models

LMS = pd.read_csv('NB/LMSInfo.csv')
CLUSTERS = pd.read_csv('NB/ClusterInfo.csv')

In [3]:
#!jupyter nbextension enable --py widgetsnbextension --sys-prefix
#!jupyter serverextension enable voila --sys-prefix

In [4]:
# Input widgets

age = widgets.FloatText(
        description = 'Age',
        disabled = False,
        layout = widgets.Layout(width = '33%')
    )

bmi = widgets.FloatText(
        description = 'BMI',
        disabled=False,
        layout = widgets.Layout(width = '33%')
    )

children = widgets.IntText(
        description = 'Children',
        disabled = False,
        layout = widgets.Layout(width = '33%')
    )

smoker = widgets.RadioButtons(
        options = ['Yes', 'No'],
        description = 'Smoker',
        disabled = False,
        layout = widgets.Layout(width = '33%')
    )

sex = widgets.RadioButtons(
        options = ['Male', 'Female'],
        description = 'Sex',
        disabled = False,
        layout = widgets.Layout(width = '33%')
    )

region = widgets.RadioButtons(
        options = ['North West', 'North East', 'South West', 'South East'],
        description = 'Region',
        disabled = False,
        layout = widgets.Layout(width = '33%')
    )

inputs1 = widgets.HBox([age, bmi, children], 
                       layout = widgets.Layout(
                        justify_content = 'center',
                        width = '90%'
                        ))

inputs2 = widgets.HBox([smoker, sex, region], 
                       layout = widgets.Layout(
                        justify_content = 'center',
                        width = '90%'
                        ))

In [5]:
# Predicting cluster of unseen data

def return_best_cluster_index(clusters, Age, Bmi, Children, Smoker, Sex, Region):
    L = [Age, Bmi, Children]
    
    if(Smoker == "Yes"):
        L.append(1)
    else:
        L.append(0)
    
    if(Sex == "Male"):
        L.extend([0, 1])
    else:
        L.extend([1, 0])
        
    if(Region == "North East"):
        L.extend([1, 0, 0, 0])
    elif(Region == "North West"):
        L.extend([0, 1, 0, 0])
    elif(Region == "South East"):
        L.extend([0, 0, 1, 0])
    else:
        L.extend([0, 0, 0, 1])
    
    bestC = -1
    bestD = 99999999
    
    for i in range(len(clusters)):
        temp = []
        L1 = list(CLUSTERS.iloc[i, :].values)
        for j in range(len(L1)):
            temp.append(float(L1[j]) - float(L[j]))
        temp = [i**2 for i in temp]
        temp = sum(temp)
        temp = temp**0.5
        
        if(temp<bestD):
            bestD = temp
            bestC = i
        
    return L, bestC

In [6]:
# Return charges predicted using appropriate cluster's linear model

def return_Charges_pred(lms, L, c):
    charges = float(0)
    
    for i in range(1, lms.shape[1]):
        charges += float(L[i-1]) * float(lms.iloc[c, i])
    
    charges += lms.iloc[c, 0]
    
    return charges

In [7]:
# On button click function definition

def on_button_clicked(event):
    with output:
        clear_output()
        Age = int(age.value)
        Bmi = float(bmi.value)
        Children = int(children.value)
        Smoker = list({smoker.value})[0]
        Sex = list({sex.value})[0]
        Region = list({region.value})[0]
        
        if(Age<10 or Age>100):
            display(
            widgets.HTML(value = "<h2><center>Age must be between 10 and 100.</center></h2>")
            )
            return
        
        if(Bmi<18 or Bmi>30):
            display(
            widgets.HTML(value = "<h2><center>BMI must be between 18 and 30.</center></h2>")
            )
            return
        
        if(Children<0 or Children>6):
            display(
            widgets.HTML(value = "<h2><center>Children must be between 0 and 6</center></h2>")
            )
            return
        
        if(Age == 0 or Bmi == 0):
            display(
            widgets.HTML(value = "<h2><center>Charges predicted: $0</center></h2>")
            )
            return
        
        
        L, Best_C = return_best_cluster_index(CLUSTERS, Age, Bmi, Children, Smoker, Sex, Region)
        
        Charges_predicted = return_Charges_pred(LMS, L, Best_C)
        
        display(
            widgets.HTML(value = "<h2><center>Charges predicted: $" + str(round(Charges_predicted, 2)) + "</center></h2>")
        )

In [8]:
# Calculate button

calculate = widgets.Button(
            description = 'Calculate',
            layout = widgets.Layout(width = '100%')
        )

calculate.on_click(on_button_clicked)

In [9]:
# Output widget

output = widgets.Output()

OutputHbox = widgets.HBox([output],
                        layout = widgets.Layout(
                        justify_content = 'center',
                        )
                    )

In [10]:
# Line breaker

text_0 = widgets.HTML(value = "<h1></h1>", 
                     layout = widgets.Layout(
                     align_items = 'center',
                     )
                    )

In [11]:
# Heading

text_1 = widgets.HTML(value = "<h1><b><center>Medical Cost Personal Datasets</center></b></h1>")

# Sub heading

text_2 = widgets.HTML(value = "<h3><center>Insurance Forecast by using Hybrid Machine Learning.</center></h3>")

headings = widgets.VBox([text_1, text_2, text_0])

# About dataset

text_4 = widgets.HTML(value = """
<table style="width: 100%; border-collapse: collapse; border-style: solid; border-color: black;" border="2" cellpadding="25">
<tbody>
<tr>
<td style="width: 100%;">
<h2 style="text-align: justify;">About the Dataset</h2>
<p style="text-align: justify;">This dataset was originally published in book <strong>"Machine Learning with R by Brett Lantz"</strong>. It consists of 1338 records and 7 features for prediction of insurance charges in United States. The 6 independent features can be used to predict the dependent feature 'charges'.</p>
<p style="text-align: justify;"><strong>The dataset contains 6 independent features as listed below:</strong></p>
<ul style="text-align: justify;">
<li>
<p><strong>age:</strong> age of primary beneficiary (10 years - 100 years)</p>
</li>
<li>
<p><strong>sex:</strong> insurance contractor gender, female, male</p>
</li>
<li>
<p><strong>bmi:</strong> Body mass index, providing an understanding of body, weights that are relatively high or low relative to height, objective index of body weight (kg / m ^ 2) using the ratio of height to weight (18 - 30)</p>
</li>
<li>
<p><strong>children:</strong> Number of children covered by health insurance / Number of dependents (0 - 6)</p>
</li>
<li>
<p><strong>smoker:</strong> smoker/non-smoker</p>
</li>
<li>
<p><strong>region:</strong> the beneficiary's residential area in the US, northeast, southeast, southwest, northwest</p>
</li>
<li>
<p><strong>charges:</strong> Individual medical costs billed by health insurance</p>
</li>
</ul>
<p style="text-align: justify;">These features are used to predict the yearly charges incurred (in $) by an individual for availing the insurance facility.</p>
</td>
</tr>
</tbody>
</table>
<h3 style="text-align: justify;">Sample records from the dataset:</h3>
<table style="border-collapse: collapse; width: 455pt; border-style: solid; border-color: black;" border="1" width="455pt" cellspacing="0" cellpadding="0">
<tbody>
<tr style="height: 16.0pt;">
<td style="height: 16pt; width: 84.4375px; text-align: center;" height="67">age</td>
<td style="width: 85.2344px; text-align: center;">sex</td>
<td style="width: 85.3125px; text-align: center;">bmi</td>
<td style="width: 85.5312px; text-align: center;">children</td>
<td style="width: 85.3906px; text-align: center;">smoker</td>
<td style="width: 86.1094px; text-align: center;">region</td>
<td style="width: 86.6406px; text-align: center;">charges</td>
</tr>
<tr style="height: 16pt; text-align: center;">
<td style="height: 16pt; width: 84.4375px; text-align: center;" align="right" height="67">19</td>
<td style="width: 85.2344px; text-align: center;">female</td>
<td style="width: 85.3125px; text-align: center;" align="right">27.9</td>
<td style="width: 85.5312px; text-align: center;" align="right">0</td>
<td style="width: 85.3906px; text-align: center;">yes</td>
<td style="width: 86.1094px; text-align: center;">southwest</td>
<td style="width: 86.6406px; text-align: center;" align="right">16884.924</td>
</tr>
<tr style="height: 16pt; text-align: center;">
<td style="height: 16pt; width: 84.4375px; text-align: center;" align="right" height="67">18</td>
<td style="width: 85.2344px; text-align: center;">male</td>
<td style="width: 85.3125px; text-align: center;" align="right">33.77</td>
<td style="width: 85.5312px; text-align: center;" align="right">1</td>
<td style="width: 85.3906px; text-align: center;">no</td>
<td style="width: 86.1094px; text-align: center;">southeast</td>
<td style="width: 86.6406px; text-align: center;" align="right">1725.5523</td>
</tr>
<tr style="height: 16pt; text-align: center;">
<td style="height: 16pt; width: 84.4375px; text-align: center;" align="right" height="67">28</td>
<td style="width: 85.2344px; text-align: center;">male</td>
<td style="width: 85.3125px; text-align: center;" align="right">33</td>
<td style="width: 85.5312px; text-align: center;" align="right">3</td>
<td style="width: 85.3906px; text-align: center;">no</td>
<td style="width: 86.1094px; text-align: center;">southeast</td>
<td style="width: 86.6406px; text-align: center;" align="right">4449.462</td>
</tr>
<tr style="height: 16pt; text-align: center;">
<td style="height: 16pt; width: 84.4375px; text-align: center;" align="right" height="67">33</td>
<td style="width: 85.2344px; text-align: center;">male</td>
<td style="width: 85.3125px; text-align: center;" align="right">22.705</td>
<td style="width: 85.5312px; text-align: center;" align="right">0</td>
<td style="width: 85.3906px; text-align: center;">no</td>
<td style="width: 86.1094px; text-align: center;">northwest</td>
<td style="width: 86.6406px; text-align: center;" align="right">21984.4706</td>
</tr>
<tr style="height: 16pt; text-align: center;">
<td style="height: 16pt; width: 84.4375px; text-align: center;" align="right" height="67">32</td>
<td style="width: 85.2344px; text-align: center;">male</td>
<td style="width: 85.3125px; text-align: center;" align="right">28.88</td>
<td style="width: 85.5312px; text-align: center;" align="right">0</td>
<td style="width: 85.3906px; text-align: center;">no</td>
<td style="width: 86.1094px; text-align: center;">northwest</td>
<td style="width: 86.6406px; text-align: center;" align="right">3866.8552</td>
</tr>
</tbody>
</table>
""")

text_5 = widgets.HTML(value = """
<h2>Hybrid Approach Used: Clustering + Linear Regression</h2>
<p style="text-align: justify;">A variety of different regression models were tested for the given dataset including Linear, Huber, Orthogonal Matching Point, etc. (refer table below) and linear regression (multivarialte) was found to be the best model among all applied models having a R2 score of 0.7466. In statistics, linear regression is a linear approach for modelling the relationship between a scalar response and one or more explanatory variables.</p>
<p>&nbsp;</p>
""")

text_7 = widgets.HTML(value = """
<h4>* LM: Linear Model</h4>
<p style="text-align: justify;">Combining with clustering, and applying a hybrid approach (refer figure above) the R2 square is furthur increased by 12.06% to 0.83671. The approach consists of splitting the original dataset into diffrent clusters and then using a linear model for each cluster. The optimal number of clusters have been identified by varing cluster numbers from 2 to 30, and analysing the R2 score for predictiond on test data. For new unseen data, firstly the cluster is predicted and then the linear model corresponding to that particular cluster is applied to obtain the final output.</p>
<p>&nbsp;</p>
""")

# Image 1

image1 = open("image1.png", "rb")
image1 = image1.read()
image1 = widgets.Image(
    value = image1,
    format = 'png',
    width = 1000,
    height = 600,
)


# Image 2

image2 = open("image2.png", "rb")
image2 = image2.read()
image2 = widgets.Image(
    value = image2,
    format = 'png',
    width = 800,
    height = 600,
)

ImageHbox1 = widgets.HBox([image1],
                        layout = widgets.Layout(
                        justify_content = 'center',
                        )
                    )

ImageHbox2 = widgets.HBox([image2],
                        layout = widgets.Layout(
                        justify_content = 'center',
                        )
                    )

text_6 = widgets.HTML(value = "<h1><center>Live Demo</center></h1>")

In [12]:
# Displaying rendered Web Page

page = widgets.VBox([text_0, text_1, text_2, text_0, text_0, text_4, text_0, text_5, ImageHbox1, text_0, text_0, ImageHbox2, text_7, text_0, text_6, text_0, text_0, inputs1, text_0, text_0, inputs2, text_0, calculate, output, text_0, text_0, text_0])
display(page)

VBox(children=(HTML(value='<h1></h1>', layout=Layout(align_items='center')), HTML(value='<h1><b><center>Medica…