## Plotting where messenger speeches occur in tragedy

In [None]:
import json

import polars as pl

df = pl.read_parquet("./tragedy.parquet")
with open("./messenger_speeches_updated_no-satyr-play.json") as f:
    plays = json.load(f)

messenger_pre_df = []

for play in plays:
    dramatist = play["dramatist"]
    total_lines = play["total_lines"]
    total_messenger_lines = 0

    title = play["title"]

    for line_pair in play["speeches"]:
        total_messenger_lines += line_pair[1] - line_pair[0]

        for line in range(line_pair[0], line_pair[1] + 1):
            # print(dramatist, title, line)

            ref = df.filter(
                pl.col("dramatist") == dramatist,
                pl.col("title") == title,
                pl.col("n") == line,
            )

            if not ref["text"].is_empty():
                speaker = ref[0].select("speaker").item()

                if speaker != "Χορός":
                    line_obj = dict(
                        dramatist=dramatist,
                        title=title,
                        n=line,
                        speaker=speaker,
                        text=ref["text"].str.join(" ").item(),
                        total_lines=total_lines,
                        year=play["year"]
                    )

                    messenger_pre_df.append(line_obj)

        # print(f"{play['title']}: messenger_lines: {total_messenger_lines}; total_lines: {total_lines}; pct_messenger_lines: {(total_messenger_lines / total_lines) * 100}")

messenger_df = pl.DataFrame(messenger_pre_df)

In [71]:
by_play_and_line = (
    messenger_df.group_by(
        pl.col("dramatist"),
        pl.col("title"),
        pl.col("speaker"),
    )
    .agg(
        pl.col("total_lines").first(),
        pl.col("year").first(),
        pl.col("n").mean(),
    )
    .sort(pl.col("year"), pl.col("n"))
    .with_columns(
        pl.concat_str(
            [pl.col("dramatist"), pl.col("title"), pl.col("speaker")], separator=" "
        ).alias("Dramatist-Title-Speaker"),
        percent=pl.col("n") / pl.col("total_lines")
    )
)

by_play_and_line

dramatist,title,speaker,total_lines,year,n,Dramatist-Title-Speaker,percent
str,str,str,i64,i64,f64,str,f64
"""Aeschylus""","""Persians""","""Ἄγγελος""",1076,-472,406.644444,"""Aeschylus Persians Ἄγγελος""",0.377922
"""Aeschylus""","""Seven Against Thebes""","""Ἄγγελος""",1084,-467,515.225352,"""Aeschylus Seven Against Thebes…",0.4753
"""Aeschylus""","""Agamemnon""","""Κῆρυξ""",1673,-458,588.732143,"""Aeschylus Agamemnon Κῆρυξ""",0.351902
"""Sophocles""","""Trachiniae""","""Λίχας""",1278,-450,269.0,"""Sophocles Trachiniae Λίχας""",0.210485
"""Sophocles""","""Trachiniae""","""Ἄγγελος""",1278,-450,298.866667,"""Sophocles Trachiniae Ἄγγελος""",0.233855
…,…,…,…,…,…,…,…
"""Euripides""","""Orestes""","""Ἄγγελος""",1693,-408,911.0,"""Euripides Orestes Ἄγγελος""",0.538098
"""Euripides""","""Orestes""","""Φρύξ""",1693,-408,1435.952756,"""Euripides Orestes Φρύξ""",0.848171
"""Euripides""","""Bacchae""","""Ἄγγελος""",1392,-405,922.230769,"""Euripides Bacchae Ἄγγελος""",0.662522
"""Euripides""","""IA""","""Ἄγγελος""",1629,-405,1576.0,"""Euripides IA Ἄγγελος""",0.967465


In [90]:
import altair as alt

base = (
    alt.Chart(by_play_and_line)
    .mark_circle(size=100)
    .encode(
        alt.X(
            "year",
            sort=alt.Sort(order="ascending", field="year"),
            scale={"domain": [-480, -400]},
        ),
        # alt.Y("n", sort=alt.Sort(order="ascending", field="year")),
        alt.Y("percent", scale={"domain": [0, 1]}).title("Percentage through play"),
        color=alt.Color("dramatist").title("Dramatist"),
        tooltip="Dramatist-Title-Speaker",
    )
    .properties(
        title="Occurrence of messenger speeches throughout Attic tragedy (by percentage of total lines)",
        height=1000,
        width="container",
    )
)


# Messengers become more reliable the later they appear in the play -- especially apparent for plays that have early vs. late messengers
# Trachiniae is an exception -- maybe?
(
    base
    + base.transform_loess("year", "percent")
    .mark_line(color="cadetblue", strokeDash=[8, 8])
    .encode(color=alt.Color(legend=None).scale(scheme="accent"))
    + base.transform_regression("year", "percent")
    .mark_line(color="firebrick", strokeDash=[1, 1])
    .encode(color=alt.Color(legend=None).scale(scheme="blues"))
)