In [23]:
import json
from pathlib import Path

import pandas as pd
import scipy.stats as stats
from rich.progress import track

# Table Generation

In [24]:
root_path = Path().cwd().parent / "outputs"

In [25]:
data = pd.read_csv(root_path / "objective-evaluation-results.csv")

In [26]:
data = data.sort_values(by="avg_score", ascending=False)

In [27]:
data

Unnamed: 0,story_id,title,synopsis,approach,avg_score,std_score,avg_coherence,avg_inspiration,avg_narrative_fluency,avg_readability,avg_word_complexity,std_coherence,std_inspiration,std_narrative_fluency,std_readability,std_word_complexity
15,fff66562-f284-11ee-b947-00155d2625d3,Celestial Odyssey,In a world where magic and technology intertwi...,proposed,6.814534,0.363971,7.197239,6.340504,6.855827,7.219188,6.459911,0.607536,0.560798,0.996028,0.508643,0.578326
11,61b6774a-f487-11ee-8f74-182649966cd4,The Chronicles of Zephyr,In a world where magic and technology intertwi...,proposed,6.762332,0.307734,7.071141,6.24481,6.821667,7.055688,6.618354,0.575162,0.439691,0.850936,0.454643,0.542908
17,6a8b12bd-03d2-11ef-9093-182649966cd4,The Celestial Chronicles,In a world where magic and technology intertwi...,proposed,6.720377,0.337748,7.117082,6.252861,6.662638,7.086998,6.482306,0.563577,0.608376,0.970988,0.54839,0.591979
12,4aa911cb-f78a-11ee-979e-182649966cd4,Echoes of the Celestial Realm,In a world where ancient magic and advanced te...,proposed,6.69218,0.386209,7.09531,6.142679,6.735502,7.113932,6.373476,0.587998,0.551629,0.863932,0.48367,0.532741
9,c185bc6f-f3dd-11ee-9cd1-182649966cd4,Celestial Odyssey,"In a distant future, Zara Quark, a skilled pil...",proposed,6.667909,0.306257,6.91577,6.223764,6.615221,7.089462,6.495326,0.626424,0.441112,0.909606,0.562789,0.546957
10,ea721ae9-033c-11ef-a17c-182649966cd4,Chronicles of the Celestial Realm,In a world where magic and technology intertwi...,proposed,6.611573,0.312366,6.866879,6.17412,6.537452,7.05034,6.429077,0.677729,0.604573,0.920773,0.464877,0.586115
14,3bd1b7aa-051b-11ef-89d5-182649966cd4,Echoes of the Celestial Realm,In a world where magic and technology intertwi...,proposed,6.606084,0.337169,7.003498,6.161393,6.488124,6.992972,6.384434,0.610898,0.409959,1.028726,0.539727,0.501698
16,3859d8cb-f319-11ee-b719-182649966cd4,Echoes of the Celestial Realm,"In a world where magic and technology coexist,...",proposed,6.579677,0.348107,6.794315,6.142403,6.524602,7.122895,6.31417,0.83958,0.599577,0.93952,0.495212,0.554098
13,790a18bb-f56e-11ee-a333-182649966cd4,Chronicles of the Celestial Realm,In a world where advanced technology and ancie...,proposed,6.561804,0.311001,6.785779,6.162755,6.549899,7.013471,6.297115,0.728059,0.524721,0.913267,0.632842,0.491032
0,d979223f-f4e2-11ee-b819-182649966cd4,The Chronicles of Zephyr,In a world where magic and technology intertwi...,baseline,6.531065,0.334788,6.827718,6.017821,6.486688,6.955633,6.367465,0.613461,0.530518,0.942505,0.565736,0.505262


In [28]:
# get max score for each "avg_score", "coherence", "inspiration", "narrative_fluency", "readability", "word_complexity" across all stories
max_scores = {
    "avg_score": data["avg_score"].max(),
    "coherence": data["avg_coherence"].max(),
    "inspiration": data["avg_inspiration"].max(),
    "narrative_fluency": data["avg_narrative_fluency"].max(),
    "readability": data["avg_readability"].max(),
    "word_complexity": data["avg_word_complexity"].max()
}

In [32]:
# Convert table to LaTeX table row
def to_latex_row(row):
    def map_approach(approach: str):
        return approach == "baseline" and "Baseline" or "DCP/P"

    story_id = row["story_id"][:4]
    title = row["title"]
    approach = map_approach(row["approach"])
    avg_score = row["avg_score"] == max_scores[
        "avg_score"] and f"\\textbf{{{row['avg_score']:.2f}}} $\\pm$ {row['std_score']:.2f}" or f"{row['avg_score']:.2f} $\\pm$ {row['std_score']:.2f}"
    coherence = row["avg_coherence"] == max_scores[
        "coherence"] and f"\\textbf{{{row['avg_coherence']:.2f}}} $\\pm$ {row['std_coherence']:.2f}" or f"{row['avg_coherence']:.2f} $\\pm$ {row['std_coherence']:.2f}"
    inspiration = row["avg_inspiration"] == max_scores[
        "inspiration"] and f"\\textbf{{{row['avg_inspiration']:.2f}}} $\\pm$ {row['std_inspiration']:.2f}" or f"{row['avg_inspiration']:.2f} $\\pm$ {row['std_inspiration']:.2f}"
    narrative_fluency = row["avg_narrative_fluency"] == max_scores[
        "narrative_fluency"] and f"\\textbf{{{row['avg_narrative_fluency']:.2f}}} $\\pm$ {row['std_narrative_fluency']:.2f}" or f"{row['avg_narrative_fluency']:.2f} $\\pm$ {row['std_narrative_fluency']:.2f}"
    readability = row["avg_readability"] == max_scores[
        "readability"] and f"\\textbf{{{row['avg_readability']:.2f}}} $\\pm$ {row['std_readability']:.2f}" or f"{row['avg_readability']:.2f} $\\pm$ {row['std_readability']:.2f}"
    word_complexity = row["avg_word_complexity"] == max_scores[
        "word_complexity"] and f"\\textbf{{{row['avg_word_complexity']:.2f}}} $\\pm$ {row['std_word_complexity']:.2f}" or f"{row['avg_word_complexity']:.2f} $\\pm$ {row['std_word_complexity']:.2f}"
    return f"{story_id} & {approach} & {avg_score} & {coherence} & {inspiration} & {narrative_fluency} & {readability} & {word_complexity} \\\\"

In [33]:
for _, row in data.iterrows():
    print(to_latex_row(row))

fff6 & DCP/P & \textbf{6.81} $\pm$ 0.36 & \textbf{7.20} $\pm$ 0.61 & \textbf{6.34} $\pm$ 0.56 & \textbf{6.86} $\pm$ 1.00 & \textbf{7.22} $\pm$ 0.51 & 6.46 $\pm$ 0.58 \\
61b6 & DCP/P & 6.76 $\pm$ 0.31 & 7.07 $\pm$ 0.58 & 6.24 $\pm$ 0.44 & 6.82 $\pm$ 0.85 & 7.06 $\pm$ 0.45 & \textbf{6.62} $\pm$ 0.54 \\
6a8b & DCP/P & 6.72 $\pm$ 0.34 & 7.12 $\pm$ 0.56 & 6.25 $\pm$ 0.61 & 6.66 $\pm$ 0.97 & 7.09 $\pm$ 0.55 & 6.48 $\pm$ 0.59 \\
4aa9 & DCP/P & 6.69 $\pm$ 0.39 & 7.10 $\pm$ 0.59 & 6.14 $\pm$ 0.55 & 6.74 $\pm$ 0.86 & 7.11 $\pm$ 0.48 & 6.37 $\pm$ 0.53 \\
c185 & DCP/P & 6.67 $\pm$ 0.31 & 6.92 $\pm$ 0.63 & 6.22 $\pm$ 0.44 & 6.62 $\pm$ 0.91 & 7.09 $\pm$ 0.56 & 6.50 $\pm$ 0.55 \\
ea72 & DCP/P & 6.61 $\pm$ 0.31 & 6.87 $\pm$ 0.68 & 6.17 $\pm$ 0.60 & 6.54 $\pm$ 0.92 & 7.05 $\pm$ 0.46 & 6.43 $\pm$ 0.59 \\
3bd1 & DCP/P & 6.61 $\pm$ 0.34 & 7.00 $\pm$ 0.61 & 6.16 $\pm$ 0.41 & 6.49 $\pm$ 1.03 & 6.99 $\pm$ 0.54 & 6.38 $\pm$ 0.50 \\
3859 & DCP/P & 6.58 $\pm$ 0.35 & 6.79 $\pm$ 0.84 & 6.14 $\pm$ 0.60 & 6.52 $\pm

# Basic Stats

In [12]:
root_chunks_path = Path().cwd().parent / "results" / "exported-data"

In [13]:
games = [game for game in root_chunks_path.iterdir() if game.is_dir()]

In [14]:
num_nodes = {
    "baseline": [],
    "proposed": []
}

In [15]:
for game in track(games):
    num_chunks = len([chunk for chunk in (game / "chunks").iterdir() if chunk.is_dir()])
    with open(game / "data.json") as f:
        approach = json.load(f)["approach"]
        num_nodes[approach].append(num_chunks)

Output()

In [22]:
for approach, nodes in num_nodes.items():
    print(f"{approach}: avg={sum(nodes) / len(nodes):.2f}, std={stats.sem(nodes):.2f}")
print(
    f"Both: avg={sum(num_nodes['baseline'] + num_nodes['proposed']) / len(num_nodes['baseline'] + num_nodes['proposed']):.2f}, std={stats.sem(num_nodes['baseline'] + num_nodes['proposed']):.2f}")

baseline: avg=577.70, std=53.29
proposed: avg=721.40, std=40.45
Both: avg=649.55, std=36.49
