In [1]:
#Misc
import pandas as pd
from matplotlib import pyplot as plt
# Widgets
import ipywidgets as widgets
from IPython.display import display

In [2]:
def shape_raw( raw_data_filled, graph_data_df, value_status):
    '''
        Outputs user_df
    
    '''
    #Begin Graph Variables 
    
    #setting the target variable based on user input
    if value_status[1][1] == 'Yes': #if we want to show predicted values
        loop = [value_status[0][1] + '_unfilled', value_status[0][1] + '_filled']
    else:
        loop = [value_status[0][1] + '_unfilled']


    user_df_list = []

    for target in loop:
        groupby_cols = ['Gender','Education','Race','Age', 'Poverty_Index'] #these are the main cols we'll sum over
        groupby_cols.insert(0,target) #add target variable to list of cols to grab

        #user_df is dynamic based on user input
        user_df = graph_data_df[groupby_cols]
        #add back year col
        year_df = raw_data_filled[['Year_Start']].copy().rename(columns={'Year_Start':'Year'})
        user_df = pd.concat([year_df, user_df], axis = 1) 
        

        ##user defined variable filters. If None, then no filter, include all categories
        user_dict = {
            'Gender':None,
            'Race': None,
            'Age': None, 
            'Poverty_Index': None, #high = wealthy, low = less wealthy
            'Education': None, 
            'Start Year': 1999,
            } 
        
        #looping through widget statuses and updating dict
        for lst in value_status:
            key = lst[0]
            value = lst[1]
            if key in user_dict.keys() and value != 'All':
                user_dict[key] = value

        #filtering for all of the variables that user defines. 
        for k,v in user_dict.items():
            if v is not None:
                if k == 'Start Year':
                    user_df = user_df.loc[(user_df['Year'] >= v)]
                else:
                    user_df = user_df.loc[(user_df[k] == v)]

        #check to see if the filtering categories produce results    
        # if user_df.size ==0: 
        #     return
        user_df = user_df.groupby(['Year', target]).size().to_frame().reset_index()
        user_df = user_df.rename(columns = {0:'Count'})
        
        user_df = user_df.pivot(columns = target, index = 'Year')
        user_df.columns = user_df.columns.droplevel()
        user_df = user_df.fillna(0)
        user_df = user_df.rename(columns = {1:'No', 2:'Yes'})

        user_df_list.append(user_df)

    return user_df_list

def plot_chart(raw_data_filled, graph_data_df, value_status):
    '''
    Plots Chart
    '''
    target = value_status[0][1]
    use_predicted = True ##default should be yes

    if value_status[1][1] == 'No':
        use_predicted = False
    

    user_df_list = shape_raw(raw_data_filled, graph_data_df, value_status) #user defined df filtered based on user input

    plt.figure(figsize=(12, 4))

    # x axis values and ticks
    year_list = list(user_df_list[0].index)

    b1 = [i-.35 for i in year_list ] 
    b2 = [i-.15 for i in year_list ] 
    b3 = [i+.15 for i in year_list ] 
    b4 = [i+.35 for i in year_list ] 

    #set title
    plt.title(target + ' Surveys Through Time', fontsize = 15)

    #set x ticks
    plt.xticks([int(i) for i in year_list])

    #set bars
    if use_predicted:
        user_df1 = user_df_list[0]
        user_df2 = user_df_list[1]
        plt.bar(b1, user_df1['No'], width=0.25, alpha=1, color='peachpuff', zorder = 2)
        plt.bar(b2, user_df1['Yes'], width=0.25, alpha=1, color='sandybrown', zorder = 2)
        plt.bar(b3, user_df2['No'], width=0.25, alpha=1, color='lightskyblue', zorder = 2)
        plt.bar(b4, user_df2['Yes'], width=0.25, alpha=1, color='steelblue', zorder = 2)

        plt.legend(['No','Yes','No w/ Pred','Yes w/ Pred'], loc=(1.01, 0.65))

    else:
        user_df1 = user_df_list[0]
        plt.bar(b2, user_df1['No'], width=0.25, alpha=0.75, color='peachpuff', zorder = 2)
        plt.bar(b3, user_df1['Yes'], width=0.25, alpha=0.75, color='sandybrown', zorder = 2)

        plt.legend(['No','Yes'], loc=(1.01, .8))
        
    plt.show()

def start_rf_widgets(raw_data_filled, graph_data_df):
    #lists of options for users
    variables = ['Cancer','Stroke', 'Heart Attack']
    filters = [ "Gender", "Age", 'Race', "Poverty_Index", "Education"]

    #list to keep track of default and changed values of each widget
    value_status = [['Variables','Cancer'],["Predicted",'Yes'], 
                    ["Start Year", 'All'],["Gender",'All'] , 
                    ["Age",'All'], ["Race",'All'], 
                    ["Poverty_Index",'All'],["Education",'All']]

    #lists to hold all widgets created for easy access
    v_widget_list = []
    h_widget_list = []

    #creating widget objects and adding to widget list
    variable_radio = widgets.RadioButtons(options=variables,
        description='Variables:',disabled=False)
    predicted_toggle = widgets.ToggleButtons(options=['Yes','No'],description='Predicted:',disabled=False, 
        tooltips=['Include Random Forest Generated Answers','Do Not Include Generated Answers'])
    l = list(raw_data_filled['Year_Start'].unique())
    l.insert(0,'All')
    year_dropdown= widgets.Dropdown(options = l , description='Start Year',
                                    layout = {'width':'Initial'},
                                    style={'description_width': '100px'})

    h_widget_list.append(variable_radio)
    h_widget_list.append(predicted_toggle)
    v_widget_list.append(year_dropdown)

    for label in filters:
        l = list(graph_data_df[label].unique())
        l.insert(0,'All')
        w = widgets.Dropdown(options = l , description=label, 
                            layout = {'width':'Initial'},
                            style={'description_width': '100px'})
        v_widget_list.append(w) 

    #used in event functions
    all_widget_list = h_widget_list + v_widget_list

    #layout widget to set spacing
    Vbox_layout = widgets.Layout(margin='0 0 25px 25px')
  
    #Output widgets for holding the displayed output for widgets and the plot
    filter_hbox_output = widgets.VBox(children=h_widget_list) #holds all dropdowns in horiz box output
    filter_vbox_output = widgets.VBox(children=v_widget_list, layout = Vbox_layout) #holds all dropdowns in vertical box output
    all_box_output = widgets.HBox([filter_hbox_output,filter_vbox_output]) #holds all widgets in horiz box output'
    plot_output = widgets.Output() #holds the plot output

    #function that calls plot when an observation event is triggered
    def event_call_plot(vs):
        plot_output.clear_output()
        with plot_output:
            plot_chart(raw_data_filled, graph_data_df, vs)

    #change in all of these functions is a dict. print change to see keys
    def all_eventhandler(change):
        idx = all_widget_list.index(change.owner)
        value_status[idx][1] = change.new
        event_call_plot(value_status)
        
    #observation event that triggers event handler
    for widg in all_widget_list:
        widg.observe(all_eventhandler, names='value')

    with plot_output:
        plot_chart(raw_data_filled, graph_data_df, value_status)

    #display_widget observation event -> event handler -> function to plot
    display(all_box_output)
    display(plot_output)



In [3]:
#grabbing dataset
raw_data_path = 'https://testgeorgia.blob.core.windows.net/team187project/raw_data_filled.csv'
raw_data_filled = pd.read_csv(raw_data_path)
#user friendly names for vars
translate_dict = {'MCQ220_filled': 'Cancer_filled', 'MCQ220_unfilled': 'Cancer_unfilled',
                    'MCQ160E_filled': 'Heart Attack_filled','MCQ160E_unfilled': 'Heart Attack_unfilled',
                    'MCQ160F_filled': 'Stroke_filled', 'MCQ160F_unfilled': 'Stroke_unfilled',
                    'RIAGENDR' : 'Gender',
                    'DMDEDUC3': 'Education', 
                    'RIDRETH1': 'Race', 
                    'RIDAGEYR': 'Age', 
                    'INDFMPIR': 'Poverty_Index'}

#creating df to use for graphs
graph_data_df = raw_data_filled[translate_dict.keys()].copy()

#renaming columns
graph_data_df = graph_data_df.rename(columns = translate_dict)

#starting widgets
start_rf_widgets(raw_data_filled, graph_data_df)


HBox(children=(VBox(children=(RadioButtons(description='Variables:', options=('Cancer', 'Stroke', 'Heart Attac…

Output()