# "Computing Koselleck" Dashboard

* Please be patient as this page loads. It may take up to a few minutes. Once it loads you'll see a few graphs and tables about the word "culture".
* You can type in another word and hit "Analyze" to see another word.
* You can also **scroll to the bottom of the page to see a list of the most changing words**. Click on any of those words to see its data.
* Feel free to edit the annotation data on the top right panel. Words with annotation data and marked as valid have a <span style="color:lightblue">blue</span> instead of gray button at the bottom of the page; words marked as invalid (due to OCR errors, etc) have a <span style="color:lightcoral">red</span> background.

In [1]:
from ipynb.fs.full.koselleck import *

In [2]:
topout=widgets.Output()
display(topout)
with topout:
    print('Bitte warten Sie einen Augenblick...',timer=False)

Output()

In [3]:
word=WORD='culture'

In [4]:
# list of top words
with topout:
    print('Loading annotations...')
    dfanno=read_sheet()
    dfanno.rank_word=dfanno.rank_word.apply(int)
    dfanno['rank']=dfanno['rank'].apply(int)
    dfanno=dfanno.sort_values('rank_word')
    # dfanno
    print(f'Done. So far {len(dfanno[dfanno.is_valid!=""])} words have been annotated.')

In [5]:
all_words_by_nov = list(get_all_novelty_scores().groupby('word').mean().reset_index().sort_values(
    'foote_novelty_z',ascending=False).drop_duplicates('word').word)
all_words_by_lnm_rank = [w for w in dfanno.index]# if w in set(all_words_by_nov) and dfanno.loc[w]['rank']>0]
okwords=set(dfanno.index)|set(all_words_by_lnm_rank)
all_words_by_nov = [w for w in all_words_by_nov if w in okwords and dfanno.loc[w]['rank']>0]
# dfanno

In [None]:
signif_words=set(get_signif_novelty_words(p_peak=0.05))
keywords=get_keywords()
abswords=get_words_ever_abs()

In [None]:
# define widgets
widg_word=widgets.Text(value='culture',layout=Layout(width='auto'))
widg_word_submit = widgets.Button(description='Analyze',layout=Layout(width='125px'))
widg_changes = widgets.Text(value='',description='Changes')
widg_from = widgets.Text(value='',description='From')
widg_to = widgets.Text(value='',description='To')
widg_notes = widgets.Text(value='',description='Notes')
widg_force = widgets.Checkbox(value=True,description='Overwrite?')
widg_valid = widgets.Text(value='',description='is_valid')
savebutton = Button(description='Save annotations')
widg_plot_nbr=widgets.Checkbox(value=True,description='Plot Nbrs?')
widg_plot_dist=widgets.Checkbox(value=True,description='Plot Dists?')
input_box=HBox([widg_word,widg_word_submit,widg_force,widg_plot_nbr,widg_plot_dist])
annobox=VBox([widg_changes,widg_from,widg_to,widg_notes,widg_valid,savebutton])

In [None]:
### BUTTONS
def make_button(i,w):
    return widgets.Button(
        description=f'{i+1}. {w}',
        layout=Layout(width='125px', height='25px'),
        style=ButtonStyle(button_color=get_color(w))
#     ) for i,w in enumerate(dfanno.index[:200])
    )
def make_buttons(l):
    return [make_button(i,w) for i,(_,w) in enumerate(l)]



# define behaviors
def on_widg_word_submit(b): submit_word(b.value)
def on_widg_word_button_submit(b): submit_word(widg_word.value)
def on_widg_word_button_submit_desc(b): submit_word(b.description.split()[-1])
# set behaviors
widg_word.on_submit(on_widg_word_submit)
widg_word_submit.on_click(on_widg_word_button_submit)


def do_save(b=None):
    global dfanno
    with output_log:
        output_log.clear_output()
        print('Reloading metadata before writing',timer=False)
        dfanno1=dfanno=read_sheet()
    
#         dfanno1=read_sheet()
        w=widg_word.value
        dfanno1.loc[w,'Changes']=widg_changes.value
        dfanno1.loc[w,'From']=widg_from.value
        dfanno1.loc[w,'To']=widg_to.value
        dfanno1.loc[w,'Notes']=widg_notes.value
        dfanno1.loc[w,'is_valid']=widg_valid.value
        for b in allbuttons:
            if b.description.split()[-1]==w:
                b.style.button_color='lightcoral' if widg_valid.value=='n' else 'lightblue'
            
#         display(dfanno1.loc[w])
#         display(dfanno1)
        dfanno1.rank_word=dfanno1.rank_word.apply(int)
        dfanno1=dfanno1.sort_values('rank_word')
        print('Saving online')
        write_sheet(dfanno1)
        print('Finished')
    
    
    

def is_anno(w):
    try:
        row=dfanno.loc[w]
    except KeyError:
        return False
    anno=False
    for k,v in row.items():
        if k[0] == k[0].upper() or k=='is_valid':
            if v.strip():
                anno=True
                break
    return anno

def is_valid(w):
    try:
        row=dfanno.loc[w]
    except KeyError:
        return False
    # assumes yes
    return row.is_valid!='n'

def get_color(w,color=None):
    if is_anno(w): color='lightblue'
    if not is_valid(w): color='lightcoral'
    return color


def do_anno_save(w,key,value):
    dfanno.loc[w,key]=value
    with output:
        print(f'Saved: {dfanno.loc[w,key]}')

        
savebutton.on_click(do_save)


# buttons

lim=200

# remove invalids?
# words_invalid = set(dfanno.query('is_valid=="n"').index)
all_words_by_lnm_rank=[x for x in all_words_by_lnm_rank if is_valid(x)]
all_words_by_nov=[x for x in all_words_by_nov if is_valid(x)]

buttons_nov=make_buttons([(i,w) for i,w in enumerate(all_words_by_nov) if w in signif_words][:lim])
buttons=make_buttons([(i,w) for i,w in enumerate(all_words_by_lnm_rank)][:lim])
buttons_kw=make_buttons([(i,w) for i,w in enumerate(all_words_by_lnm_rank) if w in keywords][:lim])
buttons_abs=make_buttons([(i,w) for i,w in enumerate(all_words_by_lnm_rank) if w in abswords][:lim])
buttons_nov_and_ch=make_buttons([(i,w) for i,w in enumerate(all_words_by_lnm_rank) if w in signif_words][:lim])


## dirs of change





allbuttons = buttons + buttons_nov + buttons_kw + buttons_abs + buttons_nov_and_ch
buttond={}

lim_dir=50

okvecs={'+Abs','+Conc','+Ambig','-Ambig','+Collective','+Indiv','+Human','+Object','+Judg','+Perc','+Pos','+Neg','+Polit','+Acad','+Qual','+Quant','+Sing','+Plural','+Time','+Space','+Woman','+Man'}
for k,l in tqdm(get_top_changes_stz().items(),desc='Building widgets',position=0,disable=True):
    if not k in okvecs: continue
    l=[x for x in l if is_valid(x)][:lim_dir]
    buttond[k]=make_buttons(list(enumerate(l)))
    allbuttons+=buttond[k]
#     break

for b in allbuttons:
    b.on_click(on_widg_word_button_submit_desc)

    
#         display(wboxes,output)
i=1
i2=1
i3=6
dirchangebox_l = []
for k,l in get_top_changes_stz().items():
    if not k in buttond: continue
    dirchangebox_l.append(
        VBox(
            [markdwn(f'#### Top {lim_dir} most {k} words')] + \
            [HBox(buttond[k][n:n+i]) for n in range(0,len(buttond[k])+i+1,i)],
            layout=Layout(border='1px dotted gray',width='125px')
        ),
    )
dirchangebox=VBox([markdwn('### Directions of change')] + [
    HBox(dirchangebox_l[n:n+i3])
    for n in range(0,len(dirchangebox_l)+i3+1,i3)
])
# dirchangebox

i=1
wid='125px'
magnchangebox=HBox([
    VBox(
        [markdwn(f'#### Top {lim} most<br/>changing among novel words (p<0.05)')] + \
        [HBox(buttons_nov_and_ch[n:n+i]) for n in range(0,len(buttons_nov_and_ch)+i+1,i)],
        layout=Layout(border='1px dotted gray',width=wid)
    ),
    VBox(
        [markdwn(f'#### Top {lim} most changing words')] + \
        [HBox(buttons[n:n+i]) for n in range(0,len(buttons)+i+1,i)],
        layout=Layout(border='1px dotted gray',width=wid)
    ),
    VBox(
        [markdwn(f'#### Top {lim} most "novel" words (p<0.05)')] + \
        [HBox(buttons_nov[n:n+i]) for n in range(0,len(buttons_nov)+i+1,i)],
        layout=Layout(border='1px dotted gray',width=wid)
    ),
    VBox(
        [markdwn(f'#### Top {lim} most changing keywords')] + \
        [HBox(buttons_kw[n:n+i2]) for n in range(0,len(buttons_kw)+i2+1,i2)],
        layout=Layout(border='1px dotted gray',width=wid)
    ),
    VBox(
        [markdwn(f'#### Top {lim} most changing abstractions')] + \
        [HBox(buttons_abs[n:n+i2]) for n in range(0,len(buttons_abs)+i2+1,i2)],
        layout=Layout(border='1px dotted gray',width=wid)
    )
])
magnchangebox=VBox([markdwn('### Magnitude of change'), magnchangebox])
# magnchangebox




hbox_buttons_words=HBox([
    magnchangebox,
    dirchangebox,
])

# word_buttons


# display(hbox_buttons_words)

In [None]:
output_changerank=widgets.Output()
output_distmat=widgets.Output()
output_nov=widgets.Output()
output_nbr=widgets.Output()
output_anno=widgets.Output()
output_binaryneighb=widgets.Output()
output_topvecs=widgets.Output()
output_simchange=widgets.Output()
output_psgs=widgets.Output()
output_log=widgets.Output()
output_hdr=widgets.Output()

In [None]:
layout=VBox([
    output_hdr,
    input_box,
    
    output_log,
    
    HBox([
        VBox([
            HTML('<center><h2><u>Magnitude of Change</u></h2></center>'),
            output_changerank,
            output_distmat,
            output_nov,
            
        ],layout=Layout(width='50%',border='1px dotted gray')),
        VBox([
            HTML('<center><h2><u>Direction of Change</u></h2></center>'),
            output_anno,
            output_nbr,
            output_binaryneighb,
        ],layout=Layout(width='50%',border='1px dotted gray'))
    ]),
    HBox([
        VBox([output_topvecs],layout=Layout(width='40%',border='1px dotted gray')),
        VBox([output_simchange],layout=Layout(width='60%',border='1px dotted gray')),
    ]),
    HBox([
        VBox([
        output_psgs
        ])
    ]),
    HTML('<center><h2><u>Top Word Index</u></h2></center>'),
    hbox_buttons_words
])
layout

In [None]:
# print_img('../figures/fig.abstractifying-vs-changing.v1.png')

In [None]:
# for x in show('culture',force=True): display(x)

In [None]:
# fig_nbrs=plot_nbrs(word,width=8,height=5,ybin=20,ymin=1700,ymax=1900,max_rank1=5,max_rank=20,min_periods=1)
# fig_distmat=plot_historical_semantic_distance_matrix(word)
# fig_nov=plot_novelty_words(word,xlim0=1720,xlim1=1900)
# fig_nbrs

In [None]:
# imgs=show(word)
# for x in imgs: display(x)


In [None]:
# @output.capture(clear_output=True)
def submit_word(word,
        force=True,
        vnum='2021-06-20',
        ymin_nbr=YMIN,
        ymax_nbr=YMAX,
        ybin_nbr=YEARBIN,
        ymin_nov=YMIN,
        ymax_nov=YMAX,
        max_rank1_nbr=5,
        max_rank_nbr=25,
        min_periods_nbr=1,
        save=False,num_runs=5,num_proc=5
        ):
    
    widg_word.value=w=word
    force=widg_force.value
    
    with topout:
        topout.clear_output()
        print(f'Gathering tabular data for "{w}"')
    with output_hdr:
        output_hdr.clear_output()
        printm(f'## "{w}"')
    
    
        
    try:
        widg_changes.value=dfanno.loc[w,'Changes']
        widg_from.value=dfanno.loc[w,'From']
        widg_to.value=dfanno.loc[w,'To']
        widg_notes.value=dfanno.loc[w,'Notes']
        widg_valid.value=dfanno.loc[w,'is_valid']
    except KeyError:
        widg_changes.value=''
        widg_from.value=''
        widg_to.value=''
        widg_notes.value=''
        widg_valid.value=''
    
    try:

        with output_anno:
            output_anno.clear_output()
            printm(f'### Annotations for "{w}"')
            display(annobox)

        with output_binaryneighb:
            output_binaryneighb.clear_output()
            printm(f'----\n### Vor vs. nach der Sattelzeit\nNeighborhoods of "{w}" in 1700-1770 models vs. 1830-1900 models')
            stzdf=get_nbr_simple_sattelzeit(w).reset_index().drop('word',1)
            rank,perc=np.nan,np.nan
            if len(stzdf):
                stzrank,stzperc=stzdf.iloc[0]['Change Rank'],stzdf.iloc[0]['Change Percentile']
                stzdf=stzdf.drop(['Change Rank', 'Change Percentile'],1)
                stzdf_md=stzdf.reset_index().drop('index',1).to_markdown()
                printm(f'* Change Rank: {int(stzrank):,}\n* Change Percentile: {int(round(stzperc,0))}%')
                printm(stzdf_md)

        with output_topvecs:
            output_topvecs.clear_output()
            printm(f'### Top vectors of change for "{w}"')
            tilts_stz_md=format_tilts_sattelzeit_html(w,percs=True)
            printm(tilts_stz_md)

        with output_simchange:
            output_simchange.clear_output()
            printm(f'### Words with similar directions of change to "{w}"')
            simchange_md=info_word_simchange(w,lim=10,k=10)
            printm(simchange_md)

        with output_psgs:
            output_psgs.clear_output()
            dfpsgs=show_sents_word(w,num_proc=4,ybin=50,n=5)
            for col in dfpsgs.columns: dfpsgs[col]=dfpsgs[col].apply(highlight_psg)
            dfpsgs_md=dfpsgs.reset_index().drop('index',1).to_markdown()
            printm(f'### Passages of "{w}"')
            printm(dfpsgs_md)

        with topout:
            topout.clear_output()
            print(f'Generating plots for "{w}"...',timer=False)


        with output_nov:
            output_nov.clear_output()
#             printm('----')
            printm(f'### Foote Novelty scores for "{w}"')
            res=plot_novelty_words(
                word,
                xlim0=ymin_nov,xlim1=ymax_nov,
                save=save,vnum=vnum,force=force,
                title=f'Foote Novelty scores for "{word}"'
            )
            print_img(res) if save else display(res)

        with output_nbr:
            output_nbr.clear_output()
            printm('----')
            printm(f'### Neighborhoods of "{w}"')
            res = iplot_nbrs(
                fixed(word),
                save=save,
                force=force,
                vnum=vnum,
                ybin=ybin_nbr,
                ymin=ymin_nbr,
                ymax=ymax_nbr,
                max_rank1=max_rank1_nbr,
                max_rank=max_rank_nbr,
                min_periods=min_periods_nbr,
                num_proc=num_proc,num_runs=num_runs
            )
            print_img(res) if type(res)==str else display(res)
#             if widg_plot_nbr.value: res(w)

        with output_distmat:
            output_distmat.clear_output()
            printm(f'### Historical-semantic distance matrix for "{w}"')
            #ofnfn_distmat=plot_historical_semantic_distance_matrix(word,save=save,vnum=vnum,force=force,num_proc=num_proc)
            #print_img(ofnfn_distmat)
            res=iplot_historical_semantic_distance_matrix(
                fixed(word),
                save=save,
                vnum=vnum,
                force=force,
                num_proc=num_proc,
                num_runs=num_runs
            )
            print_img(res) if type(res)==str else display(res)
#             if widg_plot_dist.value: display(res(w))



        

#         except AssertionError as e:
    except Exception as e:
        
        if widg_valid.value!='n':
            widg_valid.value='n'
#             do_save()
        for b in allbuttons:
            if b.description.split()[-1]==w:
                b.style.button_color='lightcoral'
        with output_log as o:
            print(f'ERROR: Somehow there is insufficient data for this word. Please choose another.\n\n{e}')
    
#     widg_force.value=False
#     widg_plot_nbr.value=False
#     widg_plot_dist.value=False
    with topout: topout.clear_output()

In [None]:
# !killall voila

In [None]:

printm('''
<style type="text/css">
table td {
    border:"1px dotted silver";
}
</style>
''')

In [None]:
# random_word=random.choice(dfanno.query('rank_word<=1000 & is_valid==""').index)
random_word='culture'

In [None]:
submit_word(random_word)