In [None]:
%load_ext autoreload
%autoreload 2
import os
import matplotlib.pyplot as plt
import seaborn as sns
from os.path import join
from tqdm import tqdm
import pandas as pd
import sys
from IPython.display import display, HTML
from typing import List
from mprompt.modules.emb_diff_module import EmbDiffModule
import numpy as np
import matplotlib
import imodelsx.util
import re
import mprompt.viz
import scipy.special
from spacy.tokenizer import Tokenizer
from spacy.lang.en import English
from mprompt.methods.m4_evaluate import D5_Validator

def moving_average(a, n=3):
    assert n % 2 == 1, 'n should be odd'
    diff = n // 2
    vals = []
    # calculate moving average in a window 2
    # (1, 4)
    for i in range(diff, len(a) + diff):
        l = i - diff
        r = i + diff + 1
        vals.append(np.mean(a[l: r]))
    return np.nan_to_num(vals)
nlp = English()
nlp.tokenizer = Tokenizer(nlp.vocab, token_match=re.compile(r'\S+').match) # only split on whitespace

# Get prompts

In [None]:
expls = [
    'baseball',
    'animals',
    'water',
    'movement',
    'religion',
    'technology',
    'time'
]

In [None]:
prompt_init = 'Write the beginning paragraph of a story about "{expl}". Make sure it contains several references to "{expl}".'
prompt_continue = 'Write the next paragraph of the story, but now make it about "{expl}". Make sure it contains several references to "{expl}".'
prompts = [prompt_init.format(expl=expls[0])] + [prompt_continue.format(expl=expl) for expl in expls[1:]]
for p in prompts:
    print(p)

In [None]:
# Using chatbot for now
# paragraphs = [
#     "The crack of the bat echoed through the stadium as the pitcher windmilled his arm and fired a fastball down the middle of the plate. The batter's eyes lit up as he swung with all his might, sending the ball sailing high into the sky. The center fielder raced back, tracking the ball's flight with a keen eye, ready to make the catch. The crowd held its breath as the ball descended, coming closer and closer to the fielder's outstretched glove. With a satisfying thud, the ball landed squarely in the pocket, and the center fielder triumphantly jogged off the field. It was just another day at the ballpark, where the crack of the bat and the roar of the crowd were the soundtrack to America's favorite pastime: baseball.",
#     "As the center fielder jogged off the field, a family of ducks waddled onto the grass. The mother duck led her ducklings towards a small pond near the outfield, quacking softly to each other. The players watched in amusement as the ducks made themselves at home, seemingly oblivious to the fact that they were in the middle of a baseball game. Suddenly, a squirrel darted across the field, causing the ducks to scatter in all directions. The players laughed as they watched the animals go about their business, momentarily forgetting about the game they were playing. It was a reminder that, despite all the excitement and drama of the sport, the natural world continued to carry on around them.",
#     "In the distance, the sound of crashing waves could be heard, a reminder that the stadium was located just a stone's throw away from the ocean. The salty sea air mingled with the smell of freshly cut grass, creating a unique aroma that was both refreshing and invigorating. As the game went on, the temperature began to rise, and fans could be seen fanning themselves with whatever they could find. Suddenly, a gust of wind picked up, and a fine mist sprayed over the crowd, providing some much-needed relief. The players on the field looked up as they felt the cool droplets on their skin, grateful for the natural air conditioning that the ocean breeze provided.",
# ]
paragraphs = [
    "It was a sunny day in the middle of summer, and the smell of freshly cut grass lingered in the air. The sound of a ball hitting a mitt echoed throughout the empty stadium, as the players warmed up on the field. The crack of a bat could be heard in the distance as someone took batting practice. Baseball season had arrived once again, and the excitement was palpable. Fans from all over the country were gearing up to cheer on their favorite teams, while players prepared to give it their all on the diamond. This was a sport that had been loved and played for generations, and it was time for another season of America's pastime.",
    "As the baseball players stretched and warmed up, a few curious animals peeked their heads out from behind the bleachers. A mischievous squirrel darted across the field, chattering away to itself. A family of ducks waddled along the edge of the outfield, quacking softly. A group of pigeons cooed and flapped their wings, taking off in a flurry of feathers as a ball came too close. Even the players themselves could be likened to animals, with their powerful swings and swift sprints around the bases. It was as if the entire stadium was alive with a vibrant energy, fueled by the presence of these creatures great and small.",
    "As the game began, the players took their positions on the field, the dirt beneath their cleats still damp from the morning dew. A mist rose up from the grass, creating a hazy atmosphere that made it hard to see. In the distance, a body of water could be seen shimmering under the hot sun, its waves gently lapping against the shore. The crack of the bat echoed across the field, and the ball soared high into the sky, almost seeming to touch the clouds. The outfielder ran towards the ball, his eyes fixed on the trajectory as he chased it down. With a splash, he dove into the water that lay just beyond the fence, emerging victorious with the ball held high above his head. The crowd roared with excitement as the game continued, the water now a defining feature of this legendary match.",
    "Movement was everywhere on the field, as players darted and dashed, leapt and lunged, all in an effort to outmaneuver their opponents. The pitcher wound up his arm, his body twisting with kinetic energy before releasing the ball with a sudden burst of movement. The batter swung his bat with lightning-fast reflexes, sending the ball hurtling through the air with incredible momentum. The fielders raced towards the ball, their feet pounding against the ground as they closed in on their target. Even the crowd was swept up in the movement of the game, jumping to their feet and waving their arms as the tension mounted. Every moment was alive with action, as the players pushed themselves to their limits and the game surged forward with an unstoppable energy.",
    "As the game progressed, some of the players looked up to the sky, silently praying for strength and guidance. The crack of the bat and the cheers of the crowd were punctuated by the sound of hymns being sung from a nearby church. Some of the fans crossed themselves and whispered prayers under their breath, while others wore crosses around their necks or had tattoos of religious symbols on their arms. The game seemed to take on a spiritual dimension, with each play imbued with a deeper meaning. Even the scoreboard seemed to speak to something beyond the mere numbers, as if it were a message from the heavens. For some, baseball was more than just a game - it was a manifestation of their faith, a way to connect with something greater than themselves.",
    "The game was being broadcast live across the world, thanks to the wonders of modern technology. Cameras mounted on cranes and drones captured every angle of the action, while microphones picked up the sounds of the game with incredible clarity. The umpire reviewed a close call using instant replay, with the decision ultimately confirmed by a team of experts watching the game from a control room somewhere far away. Fans at home followed the game on their smartphones, using apps that gave them access to real-time stats and analysis. Even the players themselves were benefiting from technology, using tools like virtual reality simulations and biomechanical sensors to improve their performance. It was a game that was shaped by the latest advancements in science and engineering, a testament to the power of human innovation.",
    "As the innings went on, the players felt the weight of time bearing down upon them. Every moment counted, every second ticking away towards the end of the game. The scoreboard displayed the time of day, reminding the players and the fans that the sun would soon set on this contest. The pitcher watched the clock, knowing that he only had a limited number of pitches left in his arm. The batter tried to slow down time, taking his time in the batter's box, adjusting his helmet and gloves, and studying the pitcher's windup. The fans in the stands were aware of the passage of time too, checking their watches and counting down the outs left in the game. Even the umpire was aware of time, making sure the game was not delayed and that each inning was played within the time limit. Time was a constant presence, a reminder that nothing could last forever, not even a baseball game.",
]

# Visualize data heatmap

### Get embedding dists

In [None]:
# mod = EmbDiffModule()
val = D5_Validator()

In [None]:
story_running = ''
scores = {}
for i in range(len(expls)):
# for i in range(1):
    expl = expls[i].lower()
    text = paragraphs[i]
    words = text.split()
    prompt = prompts[i]

    ngrams = imodelsx.util.generate_ngrams_list(text.lower(), ngrams=3, tokenizer_ngrams=nlp.tokenizer)
    ngrams = [words[0], words[0] + ' ' + words[1]] + ngrams

    # # embdiff-based viz
    # mod._init_task(expl)    
    # neg_dists = mod(ngrams)
    # assert len(ngrams) == len(words) == len(neg_dists)
    # # neg_dists = scipy.special.softmax(neg_dists)
    # plt.plot(neg_dists)
    # plt.plot(moving_average(neg_dists, n=5))
    # neg_dists = moving_average(neg_dists, n=3)
    # neg_dists = (neg_dists - neg_dists.min()) / (neg_dists.max() - neg_dists.min())
    # neg_dists = neg_dists / 2 + 0.5 # shift to 0.5-1 range
    # s = mprompt.viz.colorize(words, neg_dists, title=expl, subtitle=prompt)

    # validator-based viz
    probs = np.array(val.validate_w_scores(expl, ngrams))
    probs_disp = moving_average(probs, n=3)
    probs_disp = probs_disp / 2 + 0.5 # shift to 0.5-1 range
    s = mprompt.viz.colorize(words, probs_disp, title=expl, subtitle=prompt)
    
    # viz
    display(HTML(s))
    story_running += ' ' + s

with open('../results/story_running.html', 'w') as f:
    f.write(story_running)

# Quantify synthetic data
Calculate mean match for each paragraph to each explanation

In [None]:
# story_running = ''
n = len(expls)
scores = np.zeros((n, n))
for i in tqdm(range(n)):
    expl = expls[i]
    for j in range(n):
        text = paragraphs[j].lower()
        words = text.split()

        ngrams = imodelsx.util.generate_ngrams_list(text, ngrams=3, tokenizer_ngrams=nlp.tokenizer)
        ngrams = [words[0], words[0] + ' ' + words[1]] + ngrams

        # validator-based viz
        probs = np.array(val.validate_w_scores(expl, ngrams)) > 0.5
        scores[i, j] = probs.mean()

In [None]:
s = scores
# s = scipy.special.softmax(scores, axis=1)
# s = (s - s.min()) / (s.max() - s.min())
plt.figure(figsize=(6, 5))
plt.imshow(s)
plt.xticks(range(n), expls, rotation=90)
plt.yticks(range(n), expls)
plt.ylabel('Explanation for generation')
plt.xlabel('Explanation for matching')
plt.colorbar(label='Fraction of matching ngrams')
plt.tight_layout()
plt.show()