## Import Libraries

In [1]:
import streamlit as st
import random
import pickle
import pandas as pd
import constants
import os
from pathlib import Path

## Set Variables

In [2]:
model_path = 'model/model_chd_prediction.sav'
# s3bucket_url = 'https://phs-chd.s3.amazonaws.com/'
# data_path = s3bucket_url + 'CHDPositiveData.csv'
# model_path = s3bucket_url + 'team134_top10.sav'
heart = 'heart.jpg'

## CHD Class

In [33]:
class CHD:
    def __init__(self):

        self._age_g_dict = {'Age 18 to 24': 1, 
                            'Age 25 to 34': 2, 
                            'Age 35 to 44': 3,
                            'Age 45 to 54': 4,
                            'Age 55 to 64': 5,
                            'Age 65 or older': 6}
        self._sex_dict = {'Male': 1, 
                          'Female': 2}
        self.persdoc3_dict = {'Yes': 1, 
                              'No': 0}
        self._rfhlth_dict = {'Good or Better Health': 1, 
                             'Fair or Poor Health': 2}
        self._rfhype6_dict = {'Yes': 1, 
                              'No': 0}
        self.cholchk3_dict = {'Never': 1, 
                              'Within the past year': 2,
                             'Within the past 2 years': 3,
                             'Within the past 3 years': 4,
                             'Within the past 4 years': 5,
                             'Within the past 5 years': 6,
                             '5 or more years ago': 8}
        self.usenow3_dict = {'Every day': 1, 
                             'Some days': 2,
                             'Not at all': 3}
        self._educag_dict = {'Did not graduate High School': 1, 
                             'Graduated High School': 2,
                             'Attended College or Technical School': 3,
                             'Graduated from College or Technical School': 4}
        self.employ1_dict = {'Employed for wages': 1, 
                              'Self-employed': 2,
                             'Out of work for 1 year or more': 3,
                             'Out of work for < 1 year': 4,
                             'A homemaker': 5,
                             'A student': 6,
                             'Retired': 7,
                            'Unable to work': 8}
        self.pneuvac4_dict = {'Yes': 1, 
                              'No': 0}
 
        self.best_cols_rename = constants.best_cols_rename

        self.model = self.load_model()
        model_cols = self.model.feature_names_in_
        self.df = pd.DataFrame(columns=model_cols)
#         print(model_cols)

#     @st.cache
    
    def load_model(self):
        loaded_model = pickle.load(open(model_path, 'rb'))
        return loaded_model

    def call_predict(self, df):
        print('XXXXXXXXXXXXX')
        res = self.model.predict_proba(df)
        p = round(random.random(), 3)
        original_title = f'<h3 style="color:Grey; font-size: 20px;">The probability of Coronary ' \
                         f'Heart Disease is: <span style=color:Blue>{round(res[0][0], 3)}</span> </h3>'
        st.markdown(original_title, unsafe_allow_html=True)
        st.markdown("""---""")
        chart_title = f'<h2 style="color:Grey; font-size: 30px;">Primary indicator\'s ' \
                      f'contribution to CHD  </h3>'
        st.markdown(chart_title, unsafe_allow_html=True)

        df_chd = self.df.query('hc_chd__michd_cat == 1')
        print('df_chd',df_chd)
        import matplotlib.pyplot as plt
        from matplotlib import gridspec

        fig = plt.figure()
        fig.set_figheight(12)
        fig.set_figwidth(12)
        spec = gridspec.GridSpec(ncols=4, nrows=2,
                                 wspace=.1,
                                 hspace=.1, width_ratios=[1, 1, 1, 1], height_ratios=[1, 1])
        colors = ['#ff9999', '#66b3ff', '#99ff99', '#ffcc99']
        for i, col in enumerate(constants.disease_dict.keys()):
            if col != 'hc_chd__michd_cat':
                res = df_chd.groupby(col)['hc_chd__michd_cat'].count().rename_axis(col).reset_index(name='counts')
                resC = res.copy()
                resC[col] = resC[col].map(constants.disease_dict[col])
                # fig,ax = plt.subplots()
                ax = fig.add_subplot(spec[i])
                ax.set_title(f"{col.capitalize()}")
                pathches, texts, autotexts = ax.pie(resC.counts, labels=resC[col], autopct='%1.1f%%', shadow=True,
                                                    startangle=90
                                                    , colors=colors
                                                    )
                for text in texts:
                    text.set_color('black')
                for autotext in autotexts:
                    autotext.set_color('black')

                ax.axis('equal')
        st.pyplot(fig)

    def set_title_header(self):
        c1, c2 = st.columns([1, 5])
        c1.image(heart)
        c2.header("CSE6242 Team 134 - Heart Disease Prediction and Exploration")
        st.write(
            "Check out tableau report for secondary attribute exploration at this link [link]("
            "https://public.tableau.com/app/profile/monika.maingi/viz/CSE6242Team134-HeartDiseaseDetectionandExplorationProjectDataVisualization_16696967627000/Demographics?publish=yes)")
        st.markdown("""---""")

    def get_key(self, val, my_dict):
        print(val, my_dict)
        for key, value in my_dict.items():
            if val == key:
                return value

        return "key doesn't exist"+val

    def set_sidebar(self):

        st.sidebar.header("Press Submit after answering the below questions :")

        self._age_g = st.sidebar.radio("Your age:", tuple(self._age_g_dict.keys()))
        _age_g_val = self.get_key(self._age_g, self._age_g_dict)  
        
        self._sex = st.sidebar.radio("Your sex:", tuple(self._sex_dict.keys()))
        _sex_val = self.get_key(self._sex, self._sex_dict)
        
        self.persdoc3 = st.sidebar.radio("Do you have one person (or a group of doctors) that you think of as your personal health care provider?", tuple(self.persdoc3_dict.keys()))
        persdoc3_val = self.get_key(self.persdoc3, self.persdoc3_dict)
        
        self._rfhlth = st.sidebar.radio("What would consider your general health to be?", tuple(self._rfhlth_dict.keys()))
        _rfhlth_val = self.get_key(self._rfhlth, self._rfhlth_dict)
        
        self._rfhype6 = st.sidebar.radio("Have you been told you have high blood pressure by a doctor, nurse, or other health professional?", tuple(self._rfhype6_dict.keys()))
        _rfhype6_val = self.get_key(self._rfhype6, self._rfhype6_dict)
        
        self.cholchk3 = st.sidebar.radio("About how long has it been since you last had your cholesterol checked?", tuple(self.cholchk3_dict.keys()))
        cholchk3_val = self.get_key(self._rfhype6, self.cholchk3_dict)
                                   
        self.usenow3 = st.sidebar.radio("Do you currently use chewing tobacco, snuff, or snus every day, some days, or not at all?", tuple(self.usenow3_dict.keys()))
        usenow3_val = self.get_key(self.usenow3, self.usenow3_dict )                                  
                                   
        self._educag = st.sidebar.radio("Do you currently use chewing tobacco, snuff, or snus every day, some days, or not at all?", tuple(self._educag_dict.keys()))
        _educag_val = self.get_key(self._educag, self._educag_dict )                           
                                   
        self.employ1 = st.sidebar.radio("What is your employment status?", tuple(self.employ1_dict.keys()))
        employ1_val = self.get_key(self.employ1, self.employ1_dict )
        
        self.pneuvac4 = st.sidebar.radio("Have you ever had a pneumonia shot also known as a pneumococcal vaccine?", tuple(self.pneuvac4_dict.keys()))
        pneuvac4_val = self.get_key(self.pneuvac4, self.pneuvac4_dict )

        respDF = pd.DataFrame(
            data=[[_age_g_val ,_sex_val,persdoc3_val,
                    _rfhlth_val, _rfhype6_val,
                     cholchk3_val, usenow3_val, _educag_val,
                     employ1_val, pneuvac4_val]],
            columns=['_age_g' ,'_sex','persdoc3',
                    '_rfhlth', '_rfhype6',
                     'cholchk3', 'usenow3', '_educag',
                     'employ1', 'pneuvac4'])

        if st.sidebar.button('Submit'):
            self.call_predict(respDF)

## Run App

In [35]:
def main():
    chd = CHD()
    chd.set_title_header()
    chd.set_sidebar()
    
    _age_g_val = 1
    _sex_val =1
    persdoc3_val =1
    _rfhlth_val = 1
    _rfhype6_val = 1
    cholchk3_val = 1
    usenow3_val = 1
    _educag_val =1
    employ1_val =1
    pneuvac4_val =1

    
    respDF = pd.DataFrame(
        data=[[_age_g_val ,_sex_val,persdoc3_val,
                    _rfhlth_val, _rfhype6_val,
                     cholchk3_val, usenow3_val, _educag_val,
                     employ1_val, pneuvac4_val]],
        columns=['_age_g' ,'_sex','persdoc3',
                    '_rfhlth', '_rfhype6',
                     'cholchk3', 'usenow3', '_educag',
                     'employ1', 'pneuvac4'])
    print('respDF',respDF)
    res=chd.model.predict_proba(respDF)
    print("Prob=",round(res[0][0], 3))

    # attrs=vars(chd)
    # st.write(', '.join("%s: %s" % item for item in attrs.items() ))
main()

Age 18 to 24 {'Age 18 to 24': 1, 'Age 25 to 34': 2, 'Age 35 to 44': 3, 'Age 45 to 54': 4, 'Age 55 to 64': 5, 'Age 65 or older': 6}
Male {'Male': 1, 'Female': 2}
Yes {'Yes': 1, 'No': 0}
Good or Better Health {'Good or Better Health': 1, 'Fair or Poor Health': 2}
Yes {'Yes': 1, 'No': 0}
Yes {'Never': 1, 'Within the past year': 2, 'Within the past 2 years': 3, 'Within the past 3 years': 4, 'Within the past 4 years': 5, 'Within the past 5 years': 6, '5 or more years ago': 8}
Every day {'Every day': 1, 'Some days': 2, 'Not at all': 3}
Did not graduate High School {'Did not graduate High School': 1, 'Graduated High School': 2, 'Attended College or Technical School': 3, 'Graduated from College or Technical School': 4}
Employed for wages {'Employed for wages': 1, 'Self-employed': 2, 'Out of work for 1 year or more': 3, 'Out of work for < 1 year': 4, 'A homemaker': 5, 'A student': 6, 'Retired': 7, 'Unable to work': 8}
Yes {'Yes': 1, 'No': 0}
respDF    _age_g  _sex  persdoc3  _rfhlth  _rfhype6  

Feature names unseen at fit time:
- _age_g
- _educag
- _rfhlth
- _rfhype6
- _sex
- ...
Feature names seen at fit time, yet now missing:
- dem_age__age_g_cat
- dem_sex__sex_cat
- gh_careprovider_persdoc3_bin
- gh_healthstatus__rfhlth_cat
- hc_cvd__rfhype6_bin
- ...

