In [21]:
import sys
sys.path.append("..")

import dash
import dash_bootstrap_components as dbc
import plotly.express as px
import pandas as pd
import plotly.graph_objs as go
import pickle
import numpy
import random

import warnings
warnings.filterwarnings('ignore')

from jupyter_dash import JupyterDash
from dash import dcc
from dash import html
from dash import Input, Output, State
from plotly import colors as plotly_colors
from wordcloud import WordCloud
from src.webapp_utility import Loader


In [22]:
PLOTLY_LOGO = "https://images.plot.ly/logo/new-branding/plotly-logomark.png"
TEMPLATE = 'plotly_white'

In [23]:
app_loader = Loader()

Loading full count vectorizers... Done
Loading full lda model... 

FileNotFoundError: [Errno 2] No such file or directory: '../data/webapp/topic_distributions.json'

In [3]:
app = JupyterDash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

##### Navbar

In [4]:
NAVBAR = dbc.Navbar(
    children=[
        html.A(
            # Use row and col to control vertical alignment of logo / brand
            dbc.Row(
                [
                    dbc.Col(html.Img(src=PLOTLY_LOGO, height="30px")),
                    dbc.Col(
                        dbc.NavbarBrand("Illinois Cases Analysis", className="ml-2"),
                        style={"marginLeft": 10}
                    ),
                ],
                align="center",
                className="g-0",
            ),
            href="https://github.com/tomfran/legal-texts-information-retrieval",
            style={"margin": 10, "textDecoration": "none"}
        )
    ],
    color="dark",
    dark=True,
    sticky="top",
)

##### Searchbox

In [5]:
SEARCH_BOX = dbc.InputGroup(
    [
        dbc.Button("Search", id="search-button", n_clicks=0),
        dbc.Input(id="search-input", placeholder="cocaine, drug - gun, weapon"),
    ],
    style={"marginTop": 20}
)

Word Analysis

In [6]:
WORD_DROPDOWN = dcc.Dropdown(id="words-drop", clearable=False, style={"font-size": 12})
CONTEXT_GRAPH = dcc.Loading(
    id="loading-similar-context-words",
    children=[dcc.Graph(id="similar-context-graph")],
    type="default",
)
GRAMS_GRAPH = dcc.Loading(
    id="loading-grams", 
    children=[dcc.Graph(id="grams-graph")],
    type="default",
)
SEMANTIC_YEAR_SLIDER = dcc.Slider(
    id="semantic-year-slider",
    step=1,
    tooltip={"placement": "bottom", "always_visible": True},
)
SEMANTIC_YEARLY_SHIFT_GRAPH = dcc.Loading(
    id="loading-yearly-semantic", 
    children=[dcc.Graph(id="semantic-yearly-shift-graph")],
    type="default",
)
SEMANTIC_YEARLY_FIRST = dcc.Loading(
    id="loading-semantic-yearly-first",
    children=[dcc.Graph(id="semantic-yearly-first-graph")],
    type="default",
)
SEMANTIC_YEARLY_SECOND = dcc.Loading(
    id="loading-semantic-yearly-second",
    children=[dcc.Graph(id="semantic-yearly-second-graph")],
    type="default",
)

SEMANTIC_EPOCH_SHIFT_GRAPH = dcc.Loading(
    id="loading-epoch-semantic", 
    children=[dcc.Graph(id="semantic-epoch-shift-graph")],
    type="default",
)
SEMANTIC_EPOCH_FIRST = dcc.Loading(
    id="loading-semantic-epoch-first",
    children=[dcc.Graph(id="semantic-epoch-first-graph")],
    type="default",
)
SEMANTIC_EPOCH_SECOND = dcc.Loading(
    id="loading-semantic-epoch-second",
    children=[dcc.Graph(id="semantic-epoch-second-graph")],
    type="default",
)

SEMANTIC_SHIFT_TABS = dcc.Tabs(
    id="word-semantic-shift-tabs",
    value="Epoch",
    children=[
        dcc.Tab(
            label="Epoch",
            value='Epoch',
            children=[
                dbc.Row([dbc.Col(SEMANTIC_EPOCH_SHIFT_GRAPH, md=6), dbc.Col(SEMANTIC_EPOCH_FIRST), dbc.Col(SEMANTIC_EPOCH_SECOND)])
            ]
        ),
        dcc.Tab(
            label="Yearly",
            value='Yeary',
            children=[
                SEMANTIC_YEAR_SLIDER,
                dbc.Row([dbc.Col(SEMANTIC_YEARLY_SHIFT_GRAPH, md=8), dbc.Col(SEMANTIC_YEARLY_FIRST), dbc.Col(SEMANTIC_YEARLY_SECOND)])
            ]
        ),
    ]
)

WORD_GENERIC_TOPIC_DISTRIBUTION_GRAPH = dcc.Loading(
    id="loading-word-topics", 
    children=[dcc.Graph(id="word-topics-graph")],
    type="default",
)

WORD_TOPIC_TABS = dcc.Tabs(
    id="word-topics-tabs",
    value="Generic",
    children=[
        dcc.Tab(
            label="Generic",
            value='Generic'
        ),
        dcc.Tab(
            label="Specific",
            value='Specific'
        ),
    ]
)

In [7]:
WORD_CARD = [
    dbc.CardHeader(html.H5("Word analysis")),
    dbc.Alert(
        "Not enough data to render these plots, please adjust the filters",
        id="no-word-data-alert",
        color="warning",
        style={"display": "none"},
    ),
    dbc.CardBody(
        [
            dbc.Row(
                [
                    dbc.Col([
                        WORD_DROPDOWN,
                        CONTEXT_GRAPH
                    ]),
                    dbc.Col([GRAMS_GRAPH], md=8)
                ]
            ),
            SEMANTIC_SHIFT_TABS,
            dbc.Row([WORD_TOPIC_TABS, WORD_GENERIC_TOPIC_DISTRIBUTION_GRAPH])
        ]
    )
]

Topic Analysis

In [8]:
TOPIC_WORDS_GRAPHS = dbc.Row(
    [
        dbc.Col(
            dcc.Loading(
                id="loading-topic-top-words",
                children=[dcc.Graph(id="topic-top-words-graph")],
                type="default",
            )
        ),
        dbc.Col(
            [
                dcc.Tabs(
                    id="tabs",
                    children=[
                        dcc.Tab(
                            label="Treemap",
                            children=[
                                dcc.Loading(
                                    id="loading-treemap",
                                    children=[dcc.Graph(id="topic-treemap")],
                                    type="default",
                                )
                            ],
                        ),
                        dcc.Tab(
                            label="Wordcloud",
                            children=[
                                dcc.Loading(
                                    id="loading-wordcloud",
                                    children=[
                                        dcc.Graph(id="topic-wordcloud")
                                    ],
                                    type="default",
                                )
                            ],
                        ),
                    ],
                )
            ],
            md=8,
        ),
    ]
)

In [9]:
TOPIC_INFO_GRAPHS = dbc.Row(
    [
        dbc.Col(
            dcc.Loading(
                id="loading-topic-years",
                children=[
                    dcc.Graph(id="topic-years-histogram")
                ],
                type="default",
            )
        ),
        dbc.Col(
            dcc.Loading(
                id="loading-topic-courts",
                children=[
                    dcc.Graph(id="topic-courts-graph")
                ],
                type="default",
            ),
            md=4
        )
    ]
)

In [10]:
TOPIC_CARD = [
    dbc.CardHeader(id="topic-header", children=[html.H5("Topic 5 - Driving Incidents", id="selected_topic_name")]),
    dbc.Alert(
        "Not enough data to render these plots, please adjust the filters",
        id="no-topic-data-alert",
        color="warning",
        style={"display": "none"},
    ),
    dbc.CardBody(
        [TOPIC_WORDS_GRAPHS, TOPIC_INFO_GRAPHS]
    )
]

##### Body

In [11]:
BODY = dbc.Container(
    [
        SEARCH_BOX,
        dbc.Card(WORD_CARD, style={"marginTop": 20}),
        dbc.Card(TOPIC_CARD, style={"marginTop": 20, "marginBottom": 30}),
    ],
    className="mt-12",
)

##### Callbacks

In [12]:
@app.callback(
    [
        Output("words-drop", "options"),
        Output("words-drop", "value"),
    ],
    Input('search-button', 'n_clicks'),
    State('search-input', 'value')
)
def populate_search_dropdown(n_clicks, searches):
    if not searches:
        return [], None
    options = []
    for search in searches.split("-"):
        search.strip()
        options.append({"label": search, "value": search})
    return options, options[0]['value']

In [13]:
@app.callback(
    Output("similar-context-graph", "figure"),
    Input("words-drop", "value")
)
def get_similar_context_graph(word):
    if not word:
        return {}
    word = word.strip()
    sim = app_loader.get_n_similar(word=word, n=15, model_type="full")
    return px.histogram(
        y=[word[0] for word in sim],
        x=[word[1] for word in sim],
        orientation="h",     
        title="Similar context",
        color_discrete_sequence=['darkturquoise']
    ).update_layout(
        template=TEMPLATE,
        xaxis_title='',
        yaxis_title=''
    )

In [14]:
@app.callback(
    Output('grams-graph', 'figure'),
    [
        Input('search-button', 'n_clicks'),
        Input("semantic-year-slider", "value")
    ],
    [State('search-input', 'value')])
def update_output(n_clicks, year, searches):
    if not searches:
        return {}
    
    searches = searches.split("-")
    fig = go.Figure(layout=go.Layout(
        title="Semantic shift - yearly",
        template=TEMPLATE,
        xaxis=dict(
            rangeselector=dict(
                buttons=list([
                    dict(count=1,
                         label="1y",
                         step="year",
                         stepmode="backward"),
                    dict(count=5,
                         label="5y",
                         step="year",
                         stepmode="backward"),
                    dict(count=10,
                         label="10y",
                         step="year",
                         stepmode="backward"),
                    dict(count=25,
                         label="25y",
                         step="year",
                         stepmode="backward"),
                    dict(count=50,
                         label="50y",
                         step="year",
                         stepmode="backward"),
                    dict(step="all")
                ])
            ),
            rangeslider=dict(
                visible=True
            ),
            title="year",
            type="date"
        ),
        yaxis=dict(title="freq")
    ))
    for search in searches:
        search = [word.strip() for word in search.split(",")
        grams = app_loader.get_freq_distribution(grams, interval=10)
        if not semantic_shift:
            continue
        fig.add_trace(go.Scatter(x=[year_perc[0] for year_perc in grams], y=[year_perc[1] for year_perc in grams],
                            mode='lines',
                            name=search))
    return fig

SyntaxError: invalid syntax (<ipython-input-14-d5e3c424f7d0>, line 52)

In [None]:
@app.callback(
    Output('semantic-epoch-shift-graph', 'figure'),
    [
        Input('search-button', 'n_clicks'),
    ],
    [State('search-input', 'value')])
def update_output(n_clicks, searches):
    if not searches:
        return {}
    
    searches = searches.split("-")
    fig = go.Figure(layout=go.Layout(
        title="Semantic shift - epoch",
        template=TEMPLATE,
        xaxis=dict(
            rangeselector=dict(
                buttons=list([
                    dict(count=1,
                         label="1y",
                         step="year",
                         stepmode="backward"),
                    dict(count=5,
                         label="5y",
                         step="year",
                         stepmode="backward"),
                    dict(count=10,
                         label="10y",
                         step="year",
                         stepmode="backward"),
                    dict(count=25,
                         label="25y",
                         step="year",
                         stepmode="backward"),
                    dict(count=50,
                         label="50y",
                         step="year",
                         stepmode="backward"),
                    dict(step="all")
                ])
            ),
            rangeslider=dict(
                visible=True
            ),
            title="year",
            type="date"
        ),
        yaxis=dict(title="freq")
    ))
    for search in searches:
        search = search.strip()
        semantic_shift = app_loader.get_semantic_data(search)['ten_year']
        if not semantic_shift:
            continue
        fig.add_trace(go.Scatter(x=[year_perc[0] for year_perc in semantic_shift], y=[year_perc[1] for year_perc in semantic_shift],
                            mode='lines',
                            name=search))
    return fig

In [None]:
@app.callback(
    [
        Output('semantic-yearly-shift-graph', 'figure'),
        Output("semantic-year-slider", "marks"),
        Output("semantic-year-slider", "min"),
        Output("semantic-year-slider", "max"),
        Output("semantic-year-slider", "value"),
    ],
    [
        Input('search-button', 'n_clicks'),
        Input("semantic-year-slider", "value")
    ],
    [State('search-input', 'value')])
def update_output(n_clicks, year, searches):
    if not searches:
        return {}, {}, 0, 0, 0
    
    searches = searches.split("-")
    fig = go.Figure(layout=go.Layout(
        title="Semantic shift - epoch",
        template=TEMPLATE,
        xaxis=dict(
            rangeselector=dict(
                buttons=list([
                    dict(count=1,
                         label="1y",
                         step="year",
                         stepmode="backward"),
                    dict(count=5,
                         label="5y",
                         step="year",
                         stepmode="backward"),
                    dict(count=10,
                         label="10y",
                         step="year",
                         stepmode="backward"),
                    dict(count=25,
                         label="25y",
                         step="year",
                         stepmode="backward"),
                    dict(count=50,
                         label="50y",
                         step="year",
                         stepmode="backward"),
                    dict(step="all")
                ])
            ),
            rangeslider=dict(
                visible=True
            ),
            title="year",
            type="date"
        ),
        yaxis=dict(title="freq")
    ))
    for search in searches:
        search = search.strip()
        semantic_shift = app_loader.get_semantic_data(search, base_year=year)['one_year']
        if not semantic_shift:
            continue
        min_year, max_year = semantic_shift[0][0], semantic_shift[-1][0]
        fig.add_trace(go.Scatter(x=[year_perc[0] for year_perc in semantic_shift], y=[year_perc[1] for year_perc in semantic_shift],
                            mode='lines',
                            name=search))
    return fig, {min_year: f"{min_year}", max_year: f"{max_year}"}, min_year, max_year, max_year

In [None]:
@app.callback(
    Output("word-topics-graph", "figure"),
    [
        Input('search-button', 'n_clicks'),
        Input('word-topics-tabs', 'value')
    ],
    State('search-input', 'value'))
def get_generic_topics_radar_graph(n_clicks, tab, searches):
    if not searches:
        return {}

    fig = go.Figure(layout=go.Layout(
            title="Topic distribution",
            template=TEMPLATE,
    )               )
    
    searches = searches.split("-")
    for search in searches:
        search = [word.strip() for word in search.split(",")]
        
        topics = app_loader.get_topics_dist(search, model="big" if tab == "Generic" else "small")
        values = numpy.array(list(topics.values()), dtype='f') * 100 / max(topics.values())
        
        fig.add_trace(go.Scatterpolar(
            r=values,
            theta=[str(name) for name in list(topics.keys())],
            fill='toself',
            name=search,
            hoverinfo="text",
            textposition="top center",       
            hovertext=[f"Topic {format(value, '.2f')}%" for topic, value in zip(topics, values)]
        ))

    fig.update_layout(
      polar=dict(
        radialaxis=dict(
          visible=False,
          range=[0, 100]
        )),
      showlegend=True
    )
    return fig

In [None]:
@app.callback(
    [
        Output("topic-header", "children"),
        Output("topic-top-words-graph", "figure"),
        Output("topic-treemap", "figure"),
        Output("topic-wordcloud", "figure")
    ],
    [
        Input('word-topics-graph', 'clickData'),
        Input('word-topics-tabs', 'value')
    ],)
def get_topic_words_radar_graph(selected_topic, tab):
    if not selected_topic:
        return [html.H5("Select a topic")], {}, {}, {}
    topic_id = selected_topic['points'][0]['pointNumber']
    
    sim = app_loader.get_topics_words(n=80, model="big" if tab == "Generic" else "small")[topic_id]
    
    words = [word[0] for word in sim]
    freqs = [word[1] for word in sim]
    
    treemap_trace = go.Treemap(
        labels=words[:40], parents=[""] * len(words[:40]), values=freqs
    )
    treemap_layout = go.Layout({"margin": dict(t=0, b=0, l=0, r=0, pad=0)})
    treemap_figure = {"data": [treemap_trace], "layout": treemap_layout}
    
    wc = WordCloud().generate_from_frequencies(frequencies={word[0]: word[1] for word in sim})
    wordcloud = get_wordcloud_graphs_topic_words(wc)
    
    return [html.H5(f"{tab} Topic {topic_id}")],px.histogram(
        y=words[:20][::-1],
        x=freqs[:20][::-1],
        orientation="h",     
        color_discrete_sequence=['darkturquoise']
    ).update_layout(
        template=TEMPLATE,
        xaxis_title='',
        yaxis_title='',
        height=550
    ), treemap_figure, wordcloud

In [15]:
@app.callback(
    Output("topic-years-histogram", "figure"),
    [
        Input('word-topics-graph', 'clickData'),
        Input('word-topics-tabs', 'value')
    ],)
def get_topic_years_histogram(selected_topic, tab):
    if not selected_topic:
        return {}
    topic_id = selected_topic['points'][0]['pointNumber']
    topic_dists = app_loader.get_topics_date_distribution(interval=5)[topic_id] 
    data = [
        {
            "x": list(topic_dists[topic_id].keys()),
            "y": list(topic_dists[topic_id].values()),
            "text": list(topic_dists[topic_id].keys()),
            "type": "bar",
            "name": "",
        }
    ]
    layout = {
        "autosize": True,
        "margin": dict(t=10, b=20, l=40, r=0, pad=4),
        "xaxis": dict(rangeselector=dict(
            buttons=list([
                dict(count=1,
                     label="1y",
                     step="year",
                     stepmode="backward"),
                dict(count=5,
                     label="5y",
                     step="year",
                     stepmode="backward"),
                dict(count=10,
                     label="10y",
                     step="year",
                     stepmode="backward"),
                dict(count=25,
                     label="25y",
                     step="year",
                     stepmode="backward"),
                dict(count=50,
                     label="50y",
                     step="year",
                     stepmode="backward"),
                dict(step="all")
            ])
        ),
                       rangeslider=dict(
                            visible=True
                        ),
                       title="year",
                       type="date",
                       showticklabels=True, )
    }
    return {"data": data, "layout": layout}

In [16]:
@app.callback(
    Output("topic-courts-graph", "figure"),
    [
        Input('word-topics-graph', 'clickData'),
        Input('word-topics-tabs', 'value')
    ],)
def get_topic_courts_distribution_radar_graph(selected_topic, tab):
    if not selected_topic:
        return {}

    court_freqs = {}
    
    topic_id = selected_topic['points'][0]['pointNumber']
    
    fig = go.Figure(layout=go.Layout(
            title="Courts distribution",
            template=TEMPLATE,
    )               )
    
    values = [topics[topic_id] * 100 for topics in court_freqs.values()]
    max_value = max(values)
        
    fig.add_trace(go.Scatterpolar(
        r=values,
        theta=[court.split("Illinois")[1].strip() for court in list(court_freqs.keys())],
        fill='toself',
        name=str(topic_id),
        hoverinfo="text",
        textposition="top center",       
        hovertext=[f"{round(value, 2)}%" for value in values],
    ))

    fig.update_layout(
      polar=dict(
        radialaxis=dict(
          visible=False,
          range=[0, int(max_value * 1.5) if max_value < 50 else int(max_value + 10)]
        )
      )
    )
    return fig

In [17]:
app.layout = html.Div(children=[NAVBAR, BODY])

app.run_server(mode='jupyterlab', dev_tools_ui=True, #debug=True, 
               dev_tools_hot_reload =True, threaded=True)

In [18]:
def _terminate_server_for_port(host, port):
        shutdown_url = "http://{host}:{port}/_shutdown_{token}".format(
            host=host, port=port, token=JupyterDash._token
        )
        try:
            response = requests.get(shutdown_url)
        except Exception as e:
            pass

In [19]:
# _terminate_server_for_port("localhost", 8050)