In [1]:
import pandas as pd
import numpy as np
from math import pi
import pickle

import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.metrics import plot_confusion_matrix

In [17]:
from collections import OrderedDict

from bokeh.io import output_file
import bokeh.plotting as bk
from bokeh.plotting import figure, show, output_file,curdoc
from bokeh.models import HoverTool, ColumnDataSource,CustomJS,Select,PreText,LinearColorMapper,BasicTicker,ColorBar,Div
from bokeh.transform import cumsum
from bokeh.models.widgets import DataTable, DateFormatter, TableColumn,Dropdown
from bokeh.layouts import row,column,gridplot
from bokeh.palettes import Category10,Category20,cividis,inferno,viridis

from math import pi

#bk.output_notebook()

In [18]:
data = pd.read_csv('X_test.csv')
tfidf = pd.read_csv('tfidf.csv')
COLUMNS = ['text','tokens','airline_sentiment','predictions','negative','neutral','positive']
data = data[COLUMNS]
source = ColumnDataSource(dict(data))
CATEGORY_DICT = {0:'negative',1:'neutral',2:'positive'}
CLASS_NAME = ['negative','neutral','positive']
N_CLASS=3
ACTUAL_LABEL='airline_sentiment'
PREDICTED_LABEL = 'predictions'
N_TOP=15
COLOR_CODE_LIST = Category20[N_TOP]

In [19]:
header = Div(text='<h1 style="text-align: center">US Airlines Sentiment Analysis ML Model Report</h1>')
#show(header)

In [20]:
CONFUSION_MATRIX = confusion_matrix(data[ACTUAL_LABEL],data[PREDICTED_LABEL])
NORMALIZED_CONFUSION_MATRIX=CONFUSION_MATRIX.astype('float')*100 / CONFUSION_MATRIX.sum(axis=1)[:, np.newaxis]

In [21]:
## Heatmap
x_axis_list = []
y_axis_list = []
total_count = []
for i_row in range(N_CLASS):
    for j_col in range(N_CLASS):
        x_axis_list.append(CATEGORY_DICT[i_row])
        y_axis_list.append(CATEGORY_DICT[j_col])
        total_count.append(NORMALIZED_CONFUSION_MATRIX[i_row][j_col])
heatmap_source = pd.DataFrame(list(zip(x_axis_list,y_axis_list,total_count)),columns=['ActualLabel','PredictedLabel','TotalCount'])
heatmap_source = ColumnDataSource(dict(heatmap_source))

correct_y_range = sorted(list(set(x_axis_list)), reverse=True)
correct_x_range = sorted(list(set(y_axis_list)))


colors = ['#00007F', '#0000ff', '#007FFF', '#00ffff', '#7FFF7F', '#ffff00', '#FF7F00', '#ff0000', '#7F0000']
colors = viridis(9)
mapper = LinearColorMapper(palette=colors, low=heatmap_source.data['TotalCount'].min()-1, high=heatmap_source.data['TotalCount'].max()+1)



heatmap = figure(title="Confusion Matrix",
     x_axis_location="above", tools="save",plot_width=400,plot_height=300,
    tooltips=[('Actual', '@ActualLabel'), ('Predicted', '@PredictedLabel'), ('Total(%)', '@TotalCount')],
     y_range=correct_y_range, x_range=correct_x_range)

heatmap.x_range.range_padding = 0
heatmap.y_range.range_padding = 0
#heatmap.xaxis.ticker = x_axis_list
#heatmap.yaxis.ticker = y_axis_list
heatmap.grid.grid_line_color = None
heatmap.axis.axis_line_color = None
heatmap.axis.major_tick_line_color = None
#heatmap.axis.major_label_text_font_size = "5pt"
#heatmap.axis.major_label_standoff = 0
#heatmap.xaxis.major_label_orientation = pi / 3

#heatmap.plot_width = 600
#heatmap.plot_height = heatmap.plot_width

rectwidth = 0.9

#heatmap = figure(title='Confusion Matrics',x_axis_location="below")
heatmap.rect("PredictedLabel","ActualLabel",  rectwidth, rectwidth,#  here width is set to an adequate level. 
       source=heatmap_source,
       fill_color={'field': 'TotalCount', 'transform': mapper},
       line_color=None)

color_bar = ColorBar(color_mapper=mapper, major_label_text_font_size="5pt",
                     ticker=BasicTicker(desired_num_ticks=len(colors)),
                     label_standoff=6, border_line_color=None, location=(0, 0))

heatmap.add_layout(color_bar,'right')
#show(heatmap)

In [22]:
def get_dropdown_options(n_class,category_dict,confusionMatrix):
    options=[]
    for i in range(n_class):
        for j in range(n_class):
            option = 'Actual : '+category_dict[i]+'  Predicted : '+category_dict[j] + '  Total : '+str(confusionMatrix[i,j])
            options.append(option)
    return options

In [23]:
DROPDOWN_OPTIONS = get_dropdown_options(N_CLASS,CATEGORY_DICT,CONFUSION_MATRIX)

In [24]:
### # set up widgets
Classification_Report = classification_report(data[ACTUAL_LABEL],data[PREDICTED_LABEL])
classification_report = PreText(text=Classification_Report, width=500)

classification_report_summary = PreText(text='Classification Report', width=500)
#confusion_matrix = PreText(text=Confusion_Matrix, width=500)
dropdown = Select(title="Actual Vs Predicted :",value='', options=DROPDOWN_OPTIONS)

In [25]:
## Setup table
table_source = ColumnDataSource(dict(data))
#COLUMNS = ['text','tokens','airline_sentiment','predictions','negative','neutral','positive']
table_columns = [TableColumn(field=col, title=col) for col in ['text']]
data_table = DataTable(source=table_source, columns=table_columns, width=800, height=280,css_classes=["my_table"])

table_style = Div(text="""
<style>
.my_table{
font-weight:bold !important;
border-collapse: collapse;
width: 100%;
}
</style>
""")
#show(row(data_table,table_style))

In [26]:
#set up donut plot
donut_source = ColumnDataSource(dict(target_prob=[],probability_score=[],angle=[],color=[]))
donut_chart = figure(plot_height=300, plot_width=400,title="Probability", toolbar_location=None,
        tools="hover", tooltips="@target_prob: @probability_score",x_range=(-.5, .5))
donut_chart.annular_wedge(x=0, y=1, inner_radius=0.15, outer_radius=0.25,
        start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
        line_color="white", fill_color='color', legend='target_prob', source=donut_source)
donut_chart.axis.axis_label=None
donut_chart.axis.visible=False
donut_chart.grid.grid_line_color = None
#show(donut_chart)



In [27]:
# setup top kewords
tfidf['normalize_tfidf']=(tfidf['tfidf']-tfidf['tfidf'].min())/(tfidf['tfidf'].max()-tfidf['tfidf'].min())
tfidf_dict = pd.Series(tfidf.normalize_tfidf.values,index=tfidf.token).to_dict()
tokens_source= ColumnDataSource(dict(tokens=[],weight=[],color=[]))

#tokens = ['aa', 'bad', 'good', 'beaty', 'carry', 'adb']
# years = [.40, .20, .31,.62,.45,.81]

# dummy_data = {'tokens' : tokens,
#         'years'   : years,
#         'color' :Category20[len(tokens)]
#         }

# dummy_source = ColumnDataSource(dummy_data)
#import random
#tokens_source.data['tokens']=['usairways', 'i', 'can', 'legitimately', 'say', 'that', 'iii', 'would', 'have', 'rather', 'driven', 'cross', 'country', 'than', 'flown', 'on', 'us', 'airways']
#tokens_source.data['weight']=[random.uniform(0,1) for i in range(18)]
#tokens_source.data['color']=Category20[18]
hbar_plot = figure(y_range=tokens_source.data['tokens'], x_range=(0, 1), plot_width=400,plot_height=300 ,title="Top Weighted tokens",
           toolbar_location=None, tooltips="@tokens: @weight")

hbar_plot.hbar(y='tokens', right='weight', height=0.8, source=tokens_source,
       color='color')
hbar_plot.grid.grid_line_color = None
#show(hbar_plot)

In [28]:
# set up callbacks
dropdown_on_change_update = """
        var actual_val = (dropdown.value).split(" ")[2];
        var predict_val = (dropdown.value).split(" ")[6];
        table_source.data = source.data;
        var update_data = table_source.data;
        var data = update_data;
        update_data = {};
        
        //Change in the table
        var column = Object.keys(data);
        for (var i=0;i<column.length;i++){
            update_data[column[i]]=[];

        }
        for(var i=0 ; i<data['airline_sentiment'].length;i++){
                if (data['airline_sentiment'][i] == actual_val && data['predictions'][i]== predict_val){
                    for (var j=0; j<column.length;j++){
                        var value = data[column[j]][i];
                        update_data[column[j]].push(value);

                    }

                }

        }

        table_source.data = update_data;
        
        //Change in donut chart
        update_data = donut_source.data;
        update_data['target_prob'] = CLASS_NAME;
        update_data['probability_score'] = [table_source.data[CATEGORY_DICT[0]][0],table_source.data[CATEGORY_DICT[1]][0],table_source.data[CATEGORY_DICT[2]][0]];
        var angle1 = update_data['probability_score'][0]/1*2*3.141592653589793;
        var angle2 = update_data['probability_score'][1]/1*2*3.141592653589793;
        var angle3 = update_data['probability_score'][2]/1*2*3.141592653589793;
        update_data['angle'] = [angle1,angle2,angle3];
        update_data['color'] = ['#1f77b4', '#ff7f0e', '#2ca02c'];
    
        donut_source.data = update_data;
        //donut_source.data = {};
        //tokens_source.data = {};
        donut_source.change.emit();
        //tokens_source.change.emit();
        table_source.change.emit();
"""


callback_1 = CustomJS(args=dict(table_source=table_source,donut_source=donut_source,tokens_source=tokens_source,source=source,dropdown=dropdown,CLASS_NAME=CLASS_NAME,CATEGORY_DICT=CATEGORY_DICT,),code=dropdown_on_change_update)
dropdown.js_on_change('value',callback_1)



table_on_change_update = """
            var data = table_source.data;
            var ind = cb_obj.indices;
            var p_c = donut_source.data;
            p_c['target_prob'] = CLASS_NAME;
            p_c['color'] = ['#1f77b4', '#ff7f0e', '#2ca02c'];
            var sum_prob=1;
            for (var i = 0; i < 3; i++){
                p_c['probability_score'][0] = data[CATEGORY_DICT[0]][ind];
                p_c['probability_score'][1] = data[CATEGORY_DICT[1]][ind];
                p_c['probability_score'][2] = data[CATEGORY_DICT[2]][ind];
                p_c['angle'][0]= p_c['probability_score'][0]/sum_prob*2*3.141592653589793;
                p_c['angle'][1]= p_c['probability_score'][1]/sum_prob*2*3.141592653589793;
                p_c['angle'][2]= p_c['probability_score'][2]/sum_prob*2*3.141592653589793;

            }
            donut_source.data = p_c;
            donut_source.change.emit();
            
            //tokens_source.data={};
            //tokens_source.data['weight']=[];
            //tokens_source.data['color']=[];
            var temp_dict={};
            var tokens = table_source.data['tokens'][ind];
            tokens = tokens.slice(1,-1);
            tokens = tokens.split(',');
            console.log(tokens);
            for (var t =0 ;t< tokens.length;t++){
                var key = tokens[t].trim().slice(1,-1);
                //console.log(key);
                //console.log(key in tfidf_dict);
                if (key in tfidf_dict){
                  temp_dict[key]=tfidf_dict[key];
                }
            }
            console.log(temp_dict);
            function sortOnKeys(dict,N_TOP) {

                var sorted = [];;
                var reverse_dict = {}
                for (var key in dict){
                    reverse_dict[dict[key]]=key;
                }
                for(var key in reverse_dict){
                    sorted[sorted.length] = key;
                }
                sorted = sorted.sort().reverse();
                
                //console.log(sorted);
                var tempDict = {};
                var tokens =[];
                var weight = [];
                for(var i = 0; i < sorted.length; i++) {
                    if (N_TOP >= i){
                        tempDict[reverse_dict[sorted[i]]] = parseFloat(sorted[i]);
                        //tokens.push(reverse_dict[sorted[i]]);
                        //weight.push(parseFloat(sorted[i]));
                    }
                }
                for (key in dict){
                    if (key in tempDict){
                        tokens.push(key);
                        weight.push(parseFloat(dict[key]));
                    }
                
                }
                var result=[];
                result.push(tempDict);
                result.push(tokens);
                result.push(weight);
                return result;
            }
            var result = sortOnKeys(temp_dict,N_TOP);
            var update_token_source={};
            update_token_source['tokens'] = result[1];
            update_token_source['weight']=result[2];
            update_token_source['color']=COLOR_CODE_LIST.slice(0,[update_token_source['tokens'].length]);
            console.log(update_token_source);
            
            tokens_source.data=update_token_source;
            hbar_plot.y_range.factors=update_token_source['tokens'];
            console.log(tokens_source.data);
            tokens_source.change.emit();

        """

callback_2 = CustomJS(args=dict(table_source=table_source,donut_source=donut_source,CATEGORY_DICT=CATEGORY_DICT,CLASS_NAME=CLASS_NAME,tfidf_dict=tfidf_dict,tokens_source=tokens_source,N_TOP=N_TOP,hbar_plot=hbar_plot,COLOR_CODE_LIST=COLOR_CODE_LIST),code=table_on_change_update)
table_source.selected.js_on_change('indices', callback_2)

In [29]:
dashboard_layout = row(hbar_plot,donut_chart)
dashboard_layout = column(data_table,dashboard_layout,table_style)

control_summary = column(dropdown,heatmap,classification_report_summary,classification_report)

#layout = row(control_summary,dashboard_layout,sizing_mode='scale_width')
layout = row(control_summary,dashboard_layout)

final_layout = column(header,layout)

curdoc().add_root(final_layout)
#show(final_layout)

In [30]:
output_file("layout.html")
show(final_layout)