In [1]:
import pandas as pd

In [2]:
def buildHeatMap(df, title):
    
    from bokeh.io import output_notebook, show
    from bokeh.models import BasicTicker, ColorBar, ColumnDataSource, LinearColorMapper, PrintfTickFormatter, HoverTool
    from bokeh.plotting import figure
    from bokeh.transform import transform
    
    if df is None:
        return
    
    dispdf = df
    
    title = title

    #output_notebook()
    source = ColumnDataSource(df)

    # this is the colormap from the original NYTimes plot
    colors = ["#75968f", "#a5bab7", "#c9d9d3", "#e2e2e2", "#dfccce", "#ddb7b1", "#cc7878", "#933b41", "#550b1d"]
    mapper = LinearColorMapper(palette=colors, low=dispdf.Val.min(), high=dispdf.Val.max())

    p = figure(plot_width=800, plot_height=300, title=title,
               x_range=list(dispdf.X.unique()), y_range=list(dispdf.Y.unique()),
               toolbar_location=None, tools="hover", x_axis_location="above")

    p.rect(x="X", y="Y", width=1, height=1, source=source,
           line_color=None, fill_color=transform('Val', mapper))


    color_bar = ColorBar(color_mapper=mapper, location=(0, 0),
                         ticker=BasicTicker(desired_num_ticks=len(colors)),
                         formatter=PrintfTickFormatter(format="%d%%"))

    p.select_one(HoverTool).tooltips = [
        ('Percent', '@Val'),
        ('of ', '@X'),
        ('think', '@Y'),

    ]

    p.add_layout(color_bar, 'right')

    p.axis.axis_line_color = None
    p.axis.major_tick_line_color = None
    p.axis.major_label_text_font_size = "5pt"
    p.axis.major_label_standoff = 0
    p.xaxis.major_label_orientation = 1.0

    return(p)

    
    

In [3]:
def buildNotImportantDispDf(df):
    adf = df.copy()
    adf = adf.dropna()
    adf.columns = ["Row","Group","Category","Qualifier","Value"]
    adf = adf.loc[adf['Qualifier'] == "Not Important"]
    dispdf = pd.DataFrame()
    dispdf["X"] = adf['Group']
    dispdf["Y"] = adf['Category']+" " +adf['Qualifier']
    dispdf["Val"] = adf['Value'] * 100
    dispdf = dispdf.dropna()
    
    dispdf.sort_values(by=['X','Y'],inplace=True)
    
    
    return dispdf
    
    
    

In [13]:
def buildMoreImportantDispDf(df):
    adf = df.copy()
    adf = adf.dropna()
    adf.columns = ["Row","Group","Category","Qualifier","Value"]
    adf = adf.loc[adf['Qualifier'] == "Not Important"]
    dispdf = pd.DataFrame()
    dispdf["X"] = adf['Group']
    dispdf["Y"] = adf['Category']+" More Important"
    dispdf["Val"] = (1 - adf['Value']) * 100
    dispdf = dispdf.dropna()
    
    dispdf.sort_values(by=['X','Y'],inplace=True)
    
    
    return dispdf
    

In [14]:
from bokeh.io import output_notebook, show

df = pd.read_csv("./Data/Religion_Data.csv")

dispDf = buildMoreImportantDispDf(df)

relplot = buildHeatMap(dispDf, "Religious Groups")

output_notebook()
show(relplot)



In [15]:
df = pd.read_csv("./Data/State_Data.csv")

dispDf = buildMoreImportantDispDf(df)

stplot = buildHeatMap(dispDf, "States")

output_notebook()
show(stplot)


In [16]:
df = pd.read_csv("./Data/Age Group_Data.csv")

dispDf = buildMoreImportantDispDf(df)

agplot = buildHeatMap(dispDf, "Age Groups")

output_notebook()
show(agplot)


In [17]:
df = pd.read_csv("./Data/MP Constituency Location_Data.csv")
df = df.dropna()

dispDf = buildMoreImportantDispDf(df)

locplot = buildHeatMap(dispDf, "Rural Vs Urban")

output_notebook()
show(locplot)


In [18]:
df = pd.read_csv("./Data/Education Level_Data.csv")
df = df.dropna()

dispDf = buildMoreImportantDispDf(df)

edplot = buildHeatMap(dispDf, "Education Levels")

output_notebook()
show(edplot)


In [19]:
df = pd.read_csv("./Data/Caste_Data.csv")
df = df.dropna()

dispDf = buildMoreImportantDispDf(df)

castplot = buildHeatMap(dispDf, "Castes")

output_notebook()
show(castplot)


In [20]:
df = pd.read_csv("./Data/Respondent Gender_Data.csv")
df = df.dropna()

dispDf = buildMoreImportantDispDf(df)

gendplot = buildHeatMap(dispDf, "Gender")

output_notebook()
show(gendplot)


In [26]:
from bokeh.layouts import gridplot
from bokeh.io import output_file
output_file("cand-party-caste.html")
grid = gridplot([stplot, locplot, agplot, edplot,castplot,relplot,gendplot], ncols=2, plot_width=500, plot_height=250)
show(grid)

W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they are in a layout box: Figure(id='295b6a13-7dc3-40d4-b051-3c2c4f52971e', ...)
W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they are in a layout box: Figure(id='7130cf02-8525-44a9-a690-b02e585e1855', ...), Figure(id='c4df2865-be27-4a42-be07-f43631677ddb', ...)
W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they are in a layout box: Figure(id='b4cfc050-2bf7-4466-aac9-b4b0bf4e6211', ...), Figure(id='5d22bb72-1e2b-44e8-8968-06deeb750851', ...)
W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they are in a layout box: Figure(id='cff1b833-b7b1-4b41-b537-d358a6dd2cd0', ...), Figure(id='4089a7e3-9971-4a1d-8b6e-88e626685fe2', ...)
W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they are in a layout box: Figure(id='295b6a13-7dc3-40d4-b051-3c2c4f52971e', ...)
W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they ar