In [1]:

"""
Same behavior as corr_matrix.ipynb, but in particular for the plots for latex
"""
%load_ext autoreload
%autoreload 2


In [2]:
from lib.common.mlm_singleton import init_singleton_scorer
from lib.distr_diff_fcns import jensen_shannon_divergence
import os
from make_graphics.graphics import make_fig_saver, Colors

fig_saver = make_fig_saver()

# make sure we init the scorer for all modules first
mlm_scorer = init_singleton_scorer('roberta-large', output_attentions=True)

# make the warnings shut up (likely bc of latex call)
os.environ["TOKENIZERS_PARALLELISM"] = "true"

ModuleNotFoundError: No module named 'lib'

In [None]:
from typing import List
from matplotlib import pyplot as plt
from rozlib.libs.plotting.plotting import add_rect_to_grid, add_bottom_edge_to_grid, add_edges_to_grid
from lib.scoring_fns import surprisal
from lib.exp_common.corr_matrix import get_scores
from lib.plotting.plot_corr_matrix import plot_heatmap
from rozlib.libs.plotting.utils_latex_matplot import config_matplot_for_latex, save_fig



In [None]:
from lib.plotting.plots import plot_all_affinities, plot_multiple_global_aff, plot_one_global_aff

"""Green day examples"""
s1 = "My favorite is Green Day."
s2 = "My favorite band is Green Day."

# use save_fig to save these
gaf1 = plot_all_affinities(s1, do_make_local_aff_heatmap=False)
# add a placeholder 0 to align them (band)
gaf1.insert(2, 0)
gaf2 = plot_all_affinities(s2, do_make_local_aff_heatmap=False)


In [None]:
from lib.plotting.plots import plot_multiple_global_aff

both = [gaf1, gaf2]

config_matplot_for_latex(14)

all_words = s2.strip(".").split(" ")
words_1 = all_words.copy()
print(words_1)
# words_1[2] = (r"\dots .")
words_1[2] = (r" ")

# can plot individually
# plot_new([gaf1], words_1)
# plot_new([gaf2], all_words)

fig = plot_multiple_global_aff(
    [
        (gaf1, words_1),
        (gaf2, all_words),
    ],
    overall_fig_size=(3,3),
    x_locs=[[2], []]
)
# save_fig(fig, "gaf_green_day_band.pdf")
fig_saver.save(fig, "fig7_gaf_green_day_band.pdf")



In [None]:
"""plots for kick the bucket examples"""

config_matplot_for_latex(12, )

s1 = "The old man kicked the bucket."
s2 = "The old man finally kicked the bucket."
s3 = "The old man finally kicked the bucket and the funeral is tomorrow."

tgt_length_per_square = 0.6

words_1 = s1.strip(".").split(" ")
# words_1.insert(3, r"\dots.")
words_1.insert(3, r" ")
# words_1.extend([""]*5)
words_2 = s2.strip(".").split(" ")
# words_2.extend([""]*5)
words_3 = s3.strip(".").split(" ")

# use save_fig to save these
# print(gaf1)
# print(words_1)
gaf1 = plot_all_affinities(
    s1,
    do_make_local_aff_heatmap=False,
    # output_file_for_global_aff="kick1.png"
)
gaf1.insert(3, 0)   # for band
gaf1.extend([0]*5)
words_1.extend(["---"]*5)
words_2.extend(["---"]*5)
gaf2 = plot_all_affinities(
    s2,
    do_make_local_aff_heatmap=False,
    # output_file_for_global_aff="kick2.png"
)
gaf2.extend([0]*5)

gaf3 = plot_all_affinities(
    s3,
    do_make_local_aff_heatmap=False,
    # output_file_for_global_aff="kick3.png"
)

x_locs = [
    [3] + list(range(7,12)),
    list(range(7,12)),
    []
]

fig = plot_multiple_global_aff([
    (gaf1, words_1),
    (gaf2, words_2),
    (gaf3, words_3)
],
    overall_fig_size=(5,3),
    x_locs=x_locs
)
fig_saver.save(fig, "kick_the_bucket_new_gray.pdf")
# save_fig(fig, "kick_the_bucket_new_gray.pdf")



In [None]:
def get_sorted_idxs_for_matrix(m: List[List[float]]):
    one_d_list = []
    for row in m:
        one_d_list.extend(row)
    assert len(one_d_list) == len(m)**2

    one_d_list_with_values = [(i, v) for i,v in enumerate(one_d_list)]
    one_d_list_with_values.sort(key= lambda x: x[1])
    return [x[0] for x in one_d_list_with_values]



In [None]:
import torch
from pprint import pp
from pathlib import Path


## Produce fig 1b - the so-that plot with two red rectangles to show aap vs cec that


# def get_color():
#     cmap = plt.cm.Dark2
#
#     return cmap(3)

# todo: note that this code is partially duplicating plot_all_affinities
def make_so_that_plot(
        add_rects = True,
        file_type = "pdf"
):
    sent = "I was so excited that I saw you that I told my Mom."

    sent_word_list = sent.strip(".").split(" ")
    subbed_word_list = sent_word_list.copy()
    # subbed_word_list[2] = "so (cec)"
    # subbed_word_list[4] = r"that (aap)"
    # subbed_word_list[8] = "that (cec)"

    subbed_word_list[2] = r"\textbf{so}"
    subbed_word_list[4] = r"\textbf{that$_1$}"
    subbed_word_list[8] = r"\textbf{that$_2$}"

    print(sent_word_list)
    print(subbed_word_list)
    print(len(subbed_word_list))

    scores, new_sents, multi_tok_indices, sent_word_list, hhis, preds, probs, actual_subs = get_scores(
        sent,
        subs_list = None,
        subs_method="mask",
        score_fn = surprisal ,
        num_preds=2,
        dist_diff_fn=jensen_shannon_divergence
    )
    # if you want to see orders of magnitude diff
    pp(torch.round(scores, decimals=2))
    fig = plot_heatmap(scores,
                       subbed_word_list,
                       # actual_subs,
                       ylabels=None,
                       cmap="Greys",
                       title=None,
                       add_colorbar=False,
                       fig_size=(3,3),
                       return_fig=True,
                       xlabel_rotation=45,
                       vmin_max=(0.0, 0.69)
                       )

    # red rectangle annotations on top
    rect_color = Colors.rectangle_color
    ax = fig.axes[0]

    fname = "cec_so_that_affinity." + file_type
    if add_rects:
        # add_rect_to_grid(ax, len(sent_word_list), 2, 4, 1, 1, rect_color)
        # add_rect_to_grid(ax, len(sent_word_list), 2, 8, 1, 1, rect_color)
        # add_bottom_edge_to_grid(ax, len(sent_word_list), 2, 4, 1, 1, rect_color)
        # add_bottom_edge_to_grid(ax, len(sent_word_list), 2, 8, 1, 1, rect_color)
        add_edges_to_grid(ax, len(sent_word_list), 2, 4, 1, 1, rect_color)
        add_edges_to_grid(ax, len(sent_word_list), 2, 8, 1, 1, rect_color)

        fname = "cec_so_that_affinity_no_rect." + file_type

    ax.grid(False)

    plt.show()

    # save_fig(fig, Path("."), fname)
    return fig, scores

    return scores

config_matplot_for_latex(14, dpi=150)

# uncomment to produce plot
fig, s2 = make_so_that_plot(
    add_rects=True,
    file_type="png"
)
pp(s2)

In [None]:

# todo: duplicates above, only rewrites the sentence and the labeling
def make_so_that_plot_leonie(
        add_rects = True,
        file_type = "pdf"
):
    ###### this is the only part that is diff
    sent = "I was so happy that I cried."
    sent_word_list = sent.strip(".").split(" ")
    subbed_word_list = sent_word_list.copy()
    subbed_word_list[2] = r"\textbf{so}"
    subbed_word_list[-3] = r"\textbf{that}"
    # subbed_word_list[7] = r"\textbf{that$_2$}"
    ###
    # end diff

    print(sent_word_list)
    print(subbed_word_list)
    print(len(subbed_word_list))

    scores, new_sents, multi_tok_indices, sent_word_list, hhis, preds, probs, actual_subs = get_scores(
        sent,
        subs_list = None,
        subs_method="mask",
        score_fn = surprisal ,
        num_preds=2,
        dist_diff_fn=jensen_shannon_divergence
    )
    # if you want to see orders of magnitude diff
    pp(torch.round(scores, decimals=2))
    fig = plot_heatmap(scores,
                       subbed_word_list,
                       # actual_subs,
                       ylabels=None,
                       cmap="Greys",
                       title=None,
                       add_colorbar=False,
                       fig_size=(3,3),
                       return_fig=True,
                       xlabel_rotation=45
                       )

    # red rectangle annotations on top
    rect_color = get_color()
    ax = fig.axes[0]

    fname = "cec_so_that_affinity." + file_type
    if add_rects:
        add_rect_to_grid(ax, len(sent_word_list), 2, 4, 1, 1, rect_color)
        add_rect_to_grid(ax, len(sent_word_list), 2, 8, 1, 1, rect_color)
        fname = "cec_so_that_affinity_no_rect." + file_type

    ax.grid(False)

    plt.show()

    save_fig(fig, Path("."), fname)

    return scores

config_matplot_for_latex(14, dpi=300)

# uncomment to produce plot
s2 = make_so_that_plot_leonie(
    add_rects=False,
    file_type="pdf"
)


In [None]:
# JSD and euclidean are not monotonic and thus cannot be used interchangeably
# s1_sorted = get_sorted_idxs_for_matrix(s1)
# s2_sorted = get_sorted_idxs_for_matrix(s2)
# print(s1_sorted)
# print(s2_sorted)


# green day examples
1. (todo) fig 4 - My favorite is green day vs My favorite band is green day -¡
    - make smaller (move the word labels below the squares, only scores inside the squares)
1. fig 5 - aff matrix - My favorite band is Green Day.
2. fig 6 - aff matrix - I saw my favorite band reen day in concert

## notes
- note that fig 8 in the appendix (heat map for eap/aap) is produced in scoring/exp5_aap_eap_cec/exp5_extract_umap.ipynb (toward bottom)


In [None]:
# fig 4
"""Green day examples"""
s1 = "My favorite is Green Day."
s2 = "My favorite band is Green Day."

# use save_fig to save these
plot_all_affinities(s1, do_make_local_aff_heatmap=False)
plot_all_affinities(s2, do_make_local_aff_heatmap=False)

None    # suppress output



In [None]:
# use 14 and fig size (3,3) in plot_heatmap to get the same sizing as others for the multifigure
config_matplot_for_latex(14)

def make_green_day_plot(sent: str,
                        filename: str,
                        figsize=(3,3)
                        ):
    sent_word_list = sent.strip(".").split(" ")
    # print(sent_word_list)

    scores, new_sents, multi_tok_indices, sent_word_list, hhis, preds, probs, actual_subs = get_scores(
        sent,
        subs_list = None,
        subs_method="mask",
        score_fn = surprisal ,
        num_preds=2,
        dist_diff_fn=jensen_shannon_divergence
    )
    fig = plot_heatmap(scores,
                       sent_word_list,
                       # actual_subs,
                       ylabels=None,    # no right hand y axis labels
                       # cmap="Blues",
                       cmap="Grays",
                       title=None,
                       add_colorbar=False,
                       fig_size=figsize,
                       return_fig=True,
                       xlabel_rotation=45
                       )
    return fig




In [None]:

# fig 5

config_matplot_for_latex(14)
s = "My favorite band is Green Day."
sent_word_list = s.strip(".").split(" ")
fname = "aff_green_fave_band_is.pdf"
fig = make_green_day_plot("My favorite band is Green Day.",fname)


ax = fig.axes[0]
ax.grid(False)
add_rect_to_grid(ax, len(sent_word_list), 2, 0, 1, 2, get_color(), linewidth=2.5)
add_rect_to_grid(ax, len(sent_word_list), 4, 3, 2, 1, get_color(), linewidth=2.5)
save_fig(fig, fname)





In [None]:

# fig 6
# commas are passed to model but are not plotted
config_matplot_for_latex(14)

s = "I saw my favorite band, Green Day, in concert."
fname = "aff_green_band_concert.pdf"
sent_word_list = s.strip(".").split(" ")
fig = make_green_day_plot(s, fname)

ax = fig.axes[0]
ax.grid(False)
add_rect_to_grid(ax, len(sent_word_list), 4, 2, 1, 2, get_color(), linewidth=2)
add_rect_to_grid(ax, len(sent_word_list), 5, 4, 2, 1, get_color(), linewidth=2)
# save_fig(fig, fname)



In [None]:
# appendix fig 10

config_matplot_for_latex(14)
make_green_day_plot("My favorite is Green Day.", "aff_greenday_no_context.pdf")

# note appendix fig 11 is the same as fig 5

None


In [None]:
from rozlib.libs.plotting.plotting import print_color_map_with_hex

# Choose a colormap (e.g., 'Dark2')
cmap = plt.cm.Dark2

# get color map for use with external softwares
print_color_map_with_hex(cmap)



In [None]:


config_matplot_for_latex(14, dpi=1000)      # higher for leonie with png
# make_green_day_plot("Alice went to the hardware store and she bought a hammer.", "aff_matrix_alice_hammer.pdf")
# make_green_day_plot("Alice went to the hardware store and she bought a hammer.", "aff_matrix_alice_hammer_33_.png", (3,3))

None



In [None]:


s = "Alice went to the hardware store and she bought a hammer."
words = s.strip(".").split(" ")

# use save_fig to save these
gaf1 = plot_all_affinities(s, do_make_local_aff_heatmap=False)



In [None]:
print(len(gaf1))
print([round(x,2) for x in gaf1])
print(len(words))
fig = plot_one_global_aff(
    gaf1,
    words,
    figsize_if_plotting_single=(4,2)
)
save_fig(fig, "gaf_alice_hammer.pdf")


# Not reviewed (old)

In [None]:


# sent = "I was so excited that I saw you that I told my Mom."
# sent_word_list = sent.strip(".").split(" ")
# subbed_word_list = sent_word_list.copy()
# # subbed_word_list[2] = "so (cec)"
# subbed_word_list[4] = "that (aap)"
# subbed_word_list[8] = "that (cec)"
#
# print(sent_word_list)
# print(subbed_word_list)
# print(len(subbed_word_list))
#
# scores, new_sents, multi_tok_indices, sent_word_list, hhis, preds, probs, actual_subs = get_scores(
#     sent,
#     subs_list = None,
#     subs_method="mask",
#     score_fn = surprisal ,
#     num_preds=2,
#     dist_diff_fn=jensen_shannon_divergence
# )




In [None]:

# s = "Dana is an Australian paralympic swimmer. She was born in the Queensland town of St. George."
s = "Dana is an Australian paralympic swimmer."
# s = "Her early results led to her being offered one of the first Australian Institute of Sport scholarships for disabled swimmers."
# s = "I heard Red Day recently."
# s = "Green Day is a band I heard recently."
s = "My favorite band that I heard recently in concert is Green Day. They are really good."
s = "I was so sad that you cried."
s = "From now on 'BO' means 'so'. Whereever 'so' o
plot_all_affinities(s, do_make_local_aff_heatmap=False)


In [None]:

plot_all_affinities(s2, use_euclid=True)

In [None]:

orig_sent = " i was so happy that i saw you that i told my mom."
orig_sent = " i went to the farm and got an apple."
orig_sent = " i am so sad that i might cry."
orig_sent = " he saw a horse at the farm."
orig_sent = " a dog is my favorite animal."
orig_sent = "I was so happy that I saw you that I told my mom."
orig_sent = "It was so big that it fell over."
orig_sent = "I was so sad that it fell over."
orig_sent = "This task was very easy, a piece of cake.".lower()
orig_sent = "My favorite band is Green Day."
orig_sent = "Surprisingly, in Burma, the belief was once so widespread that the Sumatran rhino ate fire."
orig_sent = "For example, Zhu Youliang the Prince of Heng, an older cousin of the emperor's, was so honored at court at that time that even Li Yu's superior, the chief of staff Li Zhen, kneeled to him."
# orig_sent = "It has also been noted that he was so satisfied that he did this without fee or reward and was publicly thanked."
orig_sent = "He was so satisfied that he gave the speech without fee or reward and was publicly thanked."
orig_sent = 'The judges were all so surprised that one of them had a "spasm," one leaned against the wall for support, and the other fell backwards into a barrel of flour!'
orig_sent = 'I did not want to, but a friend was so adamant that I tried it.'
orig_sent = 'There are a couple of false notes along the way, such as a dreadful rendition in front of a room of people of "Youre So Vain," but so many moments are so right that I had no trouble forgiving them the few missteps.'
orig_sent = "By 1844, he was so accomplished that his father gathered together as much money as he could (apparently even selling the family piano) and sent him to study in Rome at the Accademia di San Luca."
plot_all_affinities(orig_sent, None, use_euclid=False, use_probability=True)



In [None]:
plot_all_affinities(s2, use_euclid=True)


In [None]:
s3 = "I saw my favorite band, Green Day, in concert."
plot_all_affinities(s3)


In [None]:
## not for latex, just experiments
s3 = "Plate by <mask> the chef improved his cooking, and each <mask> was better than the last."
s3 = "Soup by <mask> the chef improved his cooking, and each <mask> was better than the last."
s3 = "Grain by grain the chef improved his cooking, and each <mask> was better than the last."
s3 = "soup by soup the chef improved his cooking, and each <mask> was better than the last."
s3 = "On our backpacking trip, we made the trip eating sandwich by sandwich."
s3 = "The journey was long but sandwich by sandwich we ate our way through it."
s3 = "We moved from <mask> to <mask>, tasting a variety of delicious fillings."
s3 = "We moved from sandwich to sandwich."
s3 = "We moved from sandwich to sandwich, tasting a variety of delicious fillings."
s3 = "In the prison, the inmates were watched guard by guard to ensure maximum security."
s3 = "In the prison system, the inmates were kept under strict surveillance, with guard upon guard watching their every move."
s3 = "He found himself stuck in a cycle, <mask> after <mask>, unable to break free from his unhealthy habits."
s3 = "The older boys help the younger ones."
s3 = "The older you are, the weaker you get."
s3 = "I was so certain "
s3 = "Alice went to the hardware store and she bought a hammer."
plot_all_affinities(s3, use_euclid=False, num_preds=5)
