In [1]:
from bokeh.plotting import figure, show, output_file#, output_notebook
from bokeh.models import ColumnDataSource, HoverTool, CustomJSHover
from bokeh.models.widgets import DataTable, DateFormatter, TableColumn
from bokeh.layouts import column, row, gridplot


In [31]:
import pickle

with open("encoders.pkl","rb") as fin:
    loaded_encoders = pickle.load(fin)

dummy_cols = {}
encoders = {}
for key,val in loaded_encoders.items():
    if type(val) == tuple:
        val,key = val
    encoders[key] = val
    dummy_cols[key] = [key+"_"+str(cls) for cls in range(val.classes_.shape[0])]

# print(dummy_cols)

In [40]:
output_file("dashboard.html")

In [41]:
aggregated = pd.read_hdf("../input/hdfs-dataset/hdfs_aggregated.hd5")
aggregated += 1e-6

AttributeError: ("'float' object has no attribute 'split'", 'occurred at index 2008-11-10 21:00:00')

In [111]:
test_pred = pd.read_hdf("../input/hdfs-dataset/test_pred.hd5")

def map_suspicions(val):
    if type(val) != str:
        return val
    toks = val.split(" ")
    def get_label(tok):
        col,num = tok.split("_")
        if col in ["Component","Level"]:
            return col+" "+encoders[col].classes_[int(num)]
        return tok
    return ";".join([ get_label(tok) for tok in toks])
        

    
aggregated["score"] = test_pred["score"]

def get_warning(x,pos,col):
    which = x["suspicions"].split(" ")[pos]
    
    if col in which:
        return x[which]
    return np.nan

aggregated["suspicions"] = test_pred["suspicions"]

for col in ["TemplateId","Level","Component"]:
    for pos in range(3):
        aggregated["%s_Suspicion_%d" %(col,pos) ] = aggregated[aggregated["score"]>200].apply(get_warning,axis=1,pos=pos,col=col)


In [112]:
aggregated["suspicions"] = test_pred["suspicions"].apply(map_suspicions)
aggregated["score"] = aggregated["score"].fillna(0.)
aggregated["suspicions"] = aggregated["suspicions"].fillna("")

In [113]:
aggregated["time"] = aggregated.index.astype(str)

In [142]:
from matplotlib.colors import to_hex
tab10 = plt.get_cmap("tab10").colors
tab20 = plt.get_cmap("tab20b").colors

allcolors=list(map(to_hex,tab10+tab10+tab20+tab10))


def mk_plot(source,columns,title,x_range=None,y_range=None,add_tooltips=False,
            labels=None,add_susp=False,view=None,warning_cols=None):#"box_select,lasso_select,help"):

    plot = figure(x_axis_type="datetime", title=title, 
                plot_width=1200, plot_height=300,  y_axis_type="log",
                x_range=x_range, y_range=y_range)
    
    plot.xaxis.axis_label = 'Date'
    plot.yaxis.axis_label = 'Event rate'

    tooltips = []
    for column,color in zip(columns,allcolors):
        plot.line("index",column,source=source,color=color,muted_alpha=0.,alpha=1.)
        if view is not None:
            plot.circle("index",column,source=source,color="red",muted_alpha=0.,alpha=1.,view=view,legend="anomaly warning")
        # custom_formatters[column] = HTMLTemplateFormatter(template='<span style="color:%s"><%= value %></span>')
    if warning_cols is not None:
        legend = "anomaly warning"
        for col in warning_cols:
            plot.circle("index",col,source=source,color="red",muted_alpha=0.,alpha=1.,legend=legend)
            legend = None
    
        plot.legend.location = "top_left"
    
    def get_tooltip_key(col):
        if "_" in col:
            key = col.split("_")[1]
        else:
            key = col
        if labels is not None:
            key = labels[int(key)] 
        return key
    
    def get_tooltip_fmt(column,color):
        return '<span style="color:'+color+'">@'+column+'{2e}</span>'

    def add_tooltips(pfx,rng):
        tooltip = []
        key = []
        for column,color in rng:
            key.append( get_tooltip_key(column) )
            tooltip.append(  get_tooltip_fmt(column,color)  ) #"@"+column+"{2e}"
        tooltips.append( ("%s [%s]" % ( pfx, ",".join(key) ), ",".join(tooltip)) )
    
    
    if add_tooltips:
        pairs = list(zip(columns,allcolors))
        pfx = columns[0].split("_")[0]
        if len(columns) < 10:
            tooltips = [ (get_tooltip_key(column),get_tooltip_fmt(column,color)) for column,color in pairs]
        else:
            for icol in range(len(columns) // 5):
                add_tooltips( pfx, pairs[icol*5:(icol+1)*5])
            remind = len(columns) % 5    
            if remind != 0:
                add_tooltips( pfx, pairs[-remind:] )  
        if add_susp:
            tooltips.append( ("anomalous rates", "@{suspicions}") )
        
        hover = HoverTool(tooltips=[("time","@time")]+tooltips)#,formatters=custom_formatters)
        hover.toggleable = True
        plot.add_tools(hover)

    return plot

data = ColumnDataSource(aggregated)

anomalies = aggregated["score"]>200
from bokeh.models import ColumnDataSource, CDSView, BooleanFilter
view = CDSView(source=data,filters=[BooleanFilter(anomalies)])


template_columns = dummy_cols["TemplateId"]
component_columns = dummy_cols["Component"]
level_columns = dummy_cols["Level"]

p_template = mk_plot(data,template_columns,"Rate by event template",
                     y_range=(1,aggregated[["Level_0","Level_1"]].values.ravel().max()),add_tooltips=True,
                     add_susp=True,warning_cols=["TemplateId_Suspicion_0","TemplateId_Suspicion_1","TemplateId_Suspicion_2"]
                    )

p_component = mk_plot(data,component_columns,"Rate by component",
                      x_range=p_template.x_range,y_range=p_template.y_range,add_tooltips=True,add_susp=True,
                      labels=encoders["Component"].classes_,
                       warning_cols=["Component_Suspicion_0","Component_Suspicion_1","Component_Suspicion_2"])

p_level = mk_plot(data,level_columns,"Rate by level",
                  x_range=p_template.x_range,y_range=p_template.y_range,add_tooltips=True,add_susp=True,labels=encoders["Level"].classes_,
                  warning_cols=["Level_Suspicion_0","Level_Suspicion_1","Level_Suspicion_2"])

p_score = mk_plot(data,["score"],"Anomaly score",
                  x_range=p_template.x_range,y_range=p_template.y_range,add_tooltips=True,add_susp=True,view=view)


In [143]:
time_plots = gridplot([p_template,p_component,p_level,p_score],ncols=1)#,toolbar_options=dict(aractive_scroll = "wheel_zoom"))
#show(plot)

In [144]:
# help(CustomJSHover)

In [145]:
templates = pd.read_hdf("../input/hdfs-dataset/hdfs_templates.hd5")


templates["color"] = allcolors[:templates.shape[0]]

In [146]:
from bokeh.models.widgets import DataTable, DateFormatter, TableColumn, HTMLTemplateFormatter

templates_source = ColumnDataSource(templates)

fmt='<p style="color:<%= value %>">&#9608;</p>'

formatter =  HTMLTemplateFormatter(template=fmt)


columns = [
        TableColumn(field="color",title="",width=3,formatter=formatter),
        TableColumn(field="TemplateId", title="TemplateId",width=3),
        TableColumn(field="EventTemplate", title="Description"),
    ]

data_table = DataTable(source=templates_source, columns=columns, width=400, height=280)

In [147]:
plot = gridplot([time_plots,data_table],ncols=2,toolbar_location=None)

In [148]:
show(plot)