In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
import numpy as np
from csv import writer

### Initial Set Up of DBs (we use .csv files for simplification in this demo)

In [76]:
db2 = pd.read_csv('db2.csv')
db2

Unnamed: 0,name,age,sex,quote,date,diabetes_prob,cardio_prob
0,Jason,41,0,1500,06/16/2020,1.0,1.0
1,Kevin,35,0,1000,03/02/2019,0.2,0.8
2,Katie,36,1,700,02/28/2019,0.6,0.1


In [7]:
def append_list_as_row(file_name, list_of_elem):
    with open(file_name, 'a+', newline='') as write_obj:
        csv_writer = writer(write_obj)
        csv_writer.writerow(list_of_elem)

### Machine Learning Models

In [79]:
# diabetes prediction

diabetes = pd.read_csv('diabetes.csv')
diabetes = diabetes.dropna()
X_d = diabetes.iloc[:,:8]
y_d = diabetes.iloc[:,8]
X_d_train, X_d_test, y_d_train, y_d_test = train_test_split(X_d, y_d, test_size=0.3, random_state=16)

# the best performance model based on the project part 3
dt_d = DecisionTreeClassifier(max_depth = 8)
dt_d.fit(X_d_train, y_d_train)

DecisionTreeClassifier(max_depth=8)

In [5]:
# cardiovascular prediction

cardio = pd.read_csv('cardiovascular.csv')
cardio = cardio.dropna()
X_c = cardio.iloc[:,:13]
y_c = cardio.iloc[:,13]
X_c_train, X_c_test, y_c_train, y_c_test = train_test_split(X_c, y_c, test_size=0.3, random_state=16)

# the best performance model based on the project part 3
dt_c = DecisionTreeClassifier(max_depth = 8)
dt_c.fit(X_c_train, y_c_train)

DecisionTreeClassifier(max_depth=8)

### User Interface

In [36]:
name = input("Hi, what's your name?")

# diabete-related information request
age = int(input('How old are you?'))
sex = int(input("What's your biological gender (0=M; 1=F)?"))
if sex == 1:
    pregnancies = int(input('How many times have you been pregnanted?'))
else:
    pregnancies = 0
bmi = float(input('Please enter your BMI: '))
glucose = int(input('Please enter your plasma glucose concentration: '))
bloodpressue = int(input('Please enter your diastolic blood pressure (mm Hg): '))
skin = int(input('Please enter your triceps skin fold thickness (mm): '))
insulin = int(input('Please enter your 2-Hour serum insulin (mu U/ml): '))
pedigree = float(input('Please enter your diabetes pedigree function result: '))

# cardiovascular disease-related informaion request
cp = int(input('Please enter your constrictive pericarditis level (0-3): '))
trestbps = int(input('Please enter your resting blood pressure (mm Hg): '))
chol = int(input('Please enter your cholesterol level (mg/dl): '))
fbs = int(input('Is your FBS test positive? (0=N; 1=Y): '))
restecg = int(input('How is your resting electrocardiographic result? (0=normal; 1=having ST-T wave abnormality; 2=showing probable or definite left ventricular hypertrophy by Estes criteria): '))
thalach = int(input('Please enter your maximum heart rate achieved: '))
exang = int(input('Is your EXANG test positive? (0=N; 1=Y): '))
oldpeak = float(input('Please enter your ST depression induced by exercise relative to rest: '))
slope = int(input('Please enter your slope of the peak exercise ST segment (0-2): '))
ca = int(input('Please enter your number of major vessels colored by flourosopy (0-4): '))
thal = int(input('Please enter your thalassemia level (0-3): '))

quote = 'pending'

Hi, what's your name? Clement
How old are you? 41
What's your biological gender (0=M; 1=F)? 0
Please enter your BMI:  34.3
Please enter your plasma glucose concentration:  189
Please enter your diastolic blood pressure (mm Hg):  104
Please enter your triceps skin fold thickness (mm):  25
Please enter your 2-Hour serum insulin (mu U/ml):  0
Please enter your diabetes pedigree function result:  0.435
Please enter your constrictive pericarditis level (0-3):  1
Please enter your resting blood pressure (mm Hg):  105
Please enter your cholesterol level (mg/dl):  198
Is your FBS test positive? (0=N; 1=Y):  0
How is your resting electrocardiographic result? (0=normal; 1=having ST-T wave abnormality; 2=showing probable or definite left ventricular hypertrophy by Estes criteria):  1
Please enter your maximum heart rate achieved:  168
Is your EXANG test positive? (0=N; 1=Y):  0
Please enter your ST depression induced by exercise relative to rest:  0.0
Please enter your slope of the peak exercise 

In [42]:
new_customer = [name, pregnancies, glucose, bloodpressue, skin, insulin, bmi, pedigree, age, sex,
                cp, trestbps, chol, fbs, restecg, thalach, exang, oldpeak, slope, ca, thal, quote]
append_list_as_row('db3.csv', new_customer)

### Insurance Agent Analysis

#### Predict if this customer will get diabete or cardiovascular diseases

In [40]:
new_customer_d = [[pregnancies, glucose, bloodpressue, skin, insulin, bmi, pedigree, age]]
dt_d.predict_proba(new_customer_d)



array([[0., 1.]])

This new customer has 100% probability of getting diabete according to the machine learning model.

In [41]:
new_customer_c = [[age, sex, cp, trestbps, chol, fbs, restecg, thalach, exang, oldpeak, slope, ca, thal]]
dt_c.predict_proba(new_customer_c)



array([[0., 1.]])

This new customer has 100% probability of getting cardiovascular disease according to the machine learning model.

#### Compare with the historical completed orders

In [69]:
diabetes_prob = 1
cardio_prob = 1
db2[(db2['age']==age) & (db2['sex']==sex) & (db2['diabetes_prob']==diabetes_prob) & (db2['cardio_prob']==cardio_prob)]

Unnamed: 0,name,age,sex,quote,date,diabetes_prob,cardio_prob
0,Jason,41,0,1500,06/16/2020,1.0,1.0


This new customer Clement is very similar to Jason, whose insurance quote was $1,500 back in 2020. Consider into 2-year inflaion, Clement's quote should be $1,700.

### Update the quote for this new order

In [70]:
db3 = pd.read_csv('db3.csv')
new_quote = 1700
db3.loc[0, 'quote'] = new_quote
db3

Unnamed: 0,name,pregnancies,glucose,bloodpressure,skin,insulin,bmi,pedigree,age,sex,...,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,quote
0,Clement,0,189,104,25,0,34.3,0.435,41,0,...,198,0,1,168,0,0.0,2,1,2,1700


In [68]:
db3.to_csv('db3.csv', index=False)

### If the customer accepts the order and complete the purchase

In [77]:
date = '2022/08/26'
new_order = [name, age, sex, new_quote, date, diabetes_prob, cardio_prob]
db2.loc[len(db2)] = new_order
db2

Unnamed: 0,name,age,sex,quote,date,diabetes_prob,cardio_prob
0,Jason,41,0,1500,06/16/2020,1.0,1.0
1,Kevin,35,0,1000,03/02/2019,0.2,0.8
2,Katie,36,1,700,02/28/2019,0.6,0.1
3,Clement,41,0,1700,2022/08/26,1.0,1.0


In [78]:
db2.to_csv('db2.csv', index=False)