In [1]:
%pip install lxml polars


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/Users/pletcher/code/writing/articles/2024-11-28_tragedy-dfs/.venv/bin/python -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [4]:
import polars as pl

pl.Config.set_tbl_rows(100)

df = pl.read_parquet("./greek-tragedy-by-line_with-gender.parquet")

In [8]:
def ttr_by_play():
    with pl.Config(tbl_rows=-1):
        res = (
            df.group_by("dramatist", "title")
            .agg(pl.col("tokens").explode())
            .select(
                "dramatist",
                "title",
                n_types=pl.col("tokens").list.n_unique(),
                n_tokens=pl.col("tokens").list.len(),
                ttr=(pl.col("tokens").list.n_unique() / pl.col("tokens").list.len()),
            )
            .sort("ttr")
        )

        return res
    
def ttr_by_dramatist():
    res = ttr_by_play()
    avgs = (
        res.group_by("dramatist")
        .agg(pl.col("ttr").mean().alias("avg_ttr"))
        .select("dramatist", "avg_ttr")
        .sort("avg_ttr")
    )

    return avgs

In [26]:
def write_speakers_by_dramatist_and_play():
    drama_speakers = (
        df.select("dramatist", "title", "speaker")
        .group_by("dramatist", "title")
        .agg(pl.col("speaker").unique())
    )

    with open("speakers.csv", "w+") as f:
        f.write("dramatist,title,speaker,gender\n")

        for dramatist, title, speakers in drama_speakers.iter_rows():
            for speaker in speakers:
                f.write(f"{dramatist},{title},{speaker},\n")

## Characters by Dramatist and Gender

What is the representation of male, female, and epicene characters across tragedians?


In [47]:
def n_lines_by_dramatist_and_gender(dramatist: str):
    rows = (
        df.filter(pl.col("dramatist") == dramatist)
        .group_by("title")
        .agg(
            (pl.col("gender") == "m").sum().alias("# lines male"),
            (pl.col("gender") == "f").sum().alias("# lines female"),
            (pl.col("gender") == "x").sum().alias("# lines epicene"),
        )
        .with_columns(
            (
                pl.col("# lines male")
                / (
                    pl.col("# lines male")
                    + pl.col("# lines female")
                    + (pl.col("# lines epicene"))
                )
            ).alias("pct lines male"),
            (
                pl.col("# lines female")
                / (
                    pl.col("# lines male")
                    + pl.col("# lines female")
                    + (pl.col("# lines epicene"))
                )
            ).alias("pct lines female"),
            (
                pl.col("# lines epicene")
                / (
                    pl.col("# lines male")
                    + pl.col("# lines female")
                    + (pl.col("# lines epicene"))
                )
            ).alias("pct lines epicene"),
        )
    ).sort("pct lines female")
    return rows

In [76]:
def n_personae_by_dramatist_and_gender(dramatist: str):
    rows = (
        df.filter(pl.col("dramatist") == dramatist)
        .unique(["title", "speaker"])
        .group_by("title")
        .agg(
            (pl.col("gender") == "m").sum().alias("# personae male"),
            (pl.col("gender") == "f").sum().alias("# personae female"),
            (pl.col("gender") == "x").sum().alias("# personae epicene"),
        )
        .with_columns(
            (
                pl.col("# personae male")
                / (
                    pl.col("# personae male")
                    + pl.col("# personae female")
                    + (pl.col("# personae epicene"))
                )
            ).alias("pct personae male"),
            (
                pl.col("# personae female")
                / (
                    pl.col("# personae male")
                    + pl.col("# personae female")
                    + (pl.col("# personae epicene"))
                )
            ).alias("pct personae female"),
            (
                pl.col("# personae epicene")
                / (
                    pl.col("# personae male")
                    + pl.col("# personae female")
                    + (pl.col("# personae epicene"))
                )
            ).alias("pct personae epicene"),
        )
    ).sort("pct personae female")

    return rows


### Aeschylus

In [58]:
aeschylus_lines = n_lines_by_dramatist_and_gender("Aeschylus")

print(aeschylus_lines)


shape: (7, 7)
┌────────────────────┬─────────┬─────────┬─────────┬────────────────┬───────────┬───────────┐
│ title              ┆ # lines ┆ # lines ┆ # lines ┆ pct lines male ┆ pct lines ┆ pct lines │
│ ---                ┆ male    ┆ female  ┆ epicene ┆ ---            ┆ female    ┆ epicene   │
│ str                ┆ ---     ┆ ---     ┆ ---     ┆ f64            ┆ ---       ┆ ---       │
│                    ┆ u32     ┆ u32     ┆ u32     ┆                ┆ f64       ┆ f64       │
╞════════════════════╪═════════╪═════════╪═════════╪════════════════╪═══════════╪═══════════╡
│ Πέρσαι             ┆ 887     ┆ 158     ┆ 0       ┆ 0.848804       ┆ 0.151196  ┆ 0.0       │
│ Προμηθεὺς δεσμώτης ┆ 764     ┆ 319     ┆ 0       ┆ 0.705448       ┆ 0.294552  ┆ 0.0       │
│ Ἀγαμέμνων          ┆ 1054    ┆ 511     ┆ 0       ┆ 0.673482       ┆ 0.326518  ┆ 0.0       │
│ Ἑπτὰ ἐπὶ Θήβας     ┆ 472     ┆ 587     ┆ 0       ┆ 0.445703       ┆ 0.554297  ┆ 0.0       │
│ Ἱκέτιδες           ┆ 371     ┆ 572     ┆ 0  

In [77]:
aeschylus_personae = n_personae_by_dramatist_and_gender("Aeschylus")

print(aeschylus_personae)

shape: (7, 7)
┌───────────┬────────────┬────────────┬────────────┬──────────┬──────────┬──────────────────────┐
│ title     ┆ # personae ┆ # personae ┆ # personae ┆ pct      ┆ pct      ┆ pct personae epicene │
│ ---       ┆ male       ┆ female     ┆ epicene    ┆ personae ┆ personae ┆ ---                  │
│ str       ┆ ---        ┆ ---        ┆ ---        ┆ male     ┆ female   ┆ f64                  │
│           ┆ u32        ┆ u32        ┆ u32        ┆ ---      ┆ ---      ┆                      │
│           ┆            ┆            ┆            ┆ f64      ┆ f64      ┆                      │
╞═══════════╪════════════╪════════════╪════════════╪══════════╪══════════╪══════════════════════╡
│ Πέρσαι    ┆ 5          ┆ 1          ┆ 0          ┆ 0.833333 ┆ 0.166667 ┆ 0.0                  │
│ Ἱκέτιδες  ┆ 4          ┆ 1          ┆ 0          ┆ 0.8      ┆ 0.2      ┆ 0.0                  │
│ Ἀγαμέμνων ┆ 5          ┆ 2          ┆ 0          ┆ 0.714286 ┆ 0.285714 ┆ 0.0                  │
│ Προμ

### Sophocles

In [78]:
sophocles_lines = n_lines_by_dramatist_and_gender("Sophocles")

print(sophocles_lines)

shape: (8, 7)
┌─────────────────────┬─────────┬─────────┬─────────┬────────────────┬───────────┬───────────┐
│ title               ┆ # lines ┆ # lines ┆ # lines ┆ pct lines male ┆ pct lines ┆ pct lines │
│ ---                 ┆ male    ┆ female  ┆ epicene ┆ ---            ┆ female    ┆ epicene   │
│ str                 ┆ ---     ┆ ---     ┆ ---     ┆ f64            ┆ ---       ┆ ---       │
│                     ┆ u32     ┆ u32     ┆ u32     ┆                ┆ f64       ┆ f64       │
╞═════════════════════╪═════════╪═════════╪═════════╪════════════════╪═══════════╪═══════════╡
│ Φιλοκτήτης          ┆ 1465    ┆ 0       ┆ 0       ┆ 1.0            ┆ 0.0       ┆ 0.0       │
│ Οἰδίπους Τύραννος   ┆ 1361    ┆ 121     ┆ 0       ┆ 0.918354       ┆ 0.081646  ┆ 0.0       │
│ Οἰδίπους ἐπὶ Κολωνῷ ┆ 1548    ┆ 267     ┆ 0       ┆ 0.852893       ┆ 0.147107  ┆ 0.0       │
│ Ἀντιγόνη            ┆ 994     ┆ 263     ┆ 0       ┆ 0.790772       ┆ 0.209228  ┆ 0.0       │
│ Αἴας                ┆ 1086    ┆ 28

In [79]:
sophocles_personae = n_personae_by_dramatist_and_gender("Sophocles")

print(sophocles_personae)

shape: (8, 7)
┌───────────────┬───────────────┬──────────────┬──────────────┬──────────┬──────────┬──────────────┐
│ title         ┆ # personae    ┆ # personae   ┆ # personae   ┆ pct      ┆ pct      ┆ pct personae │
│ ---           ┆ male          ┆ female       ┆ epicene      ┆ personae ┆ personae ┆ epicene      │
│ str           ┆ ---           ┆ ---          ┆ ---          ┆ male     ┆ female   ┆ ---          │
│               ┆ u32           ┆ u32          ┆ u32          ┆ ---      ┆ ---      ┆ f64          │
│               ┆               ┆              ┆              ┆ f64      ┆ f64      ┆              │
╞═══════════════╪═══════════════╪══════════════╪══════════════╪══════════╪══════════╪══════════════╡
│ Φιλοκτήτης    ┆ 6             ┆ 0            ┆ 0            ┆ 1.0      ┆ 0.0      ┆ 0.0          │
│ Οἰδίπους      ┆ 8             ┆ 1            ┆ 0            ┆ 0.888889 ┆ 0.111111 ┆ 0.0          │
│ Τύραννος      ┆               ┆              ┆              ┆          ┆   

### Euripides

In [83]:
euripides_lines = n_lines_by_dramatist_and_gender("Euripides")

print(euripides_lines)

shape: (19, 7)
┌──────────────────────┬─────────┬─────────┬─────────┬────────────────┬───────────┬───────────┐
│ title                ┆ # lines ┆ # lines ┆ # lines ┆ pct lines male ┆ pct lines ┆ pct lines │
│ ---                  ┆ male    ┆ female  ┆ epicene ┆ ---            ┆ female    ┆ epicene   │
│ str                  ┆ ---     ┆ ---     ┆ ---     ┆ f64            ┆ ---       ┆ ---       │
│                      ┆ u32     ┆ u32     ┆ u32     ┆                ┆ f64       ┆ f64       │
╞══════════════════════╪═════════╪═════════╪═════════╪════════════════╪═══════════╪═══════════╡
│ Κύκλωψ               ┆ 730     ┆ 0       ┆ 0       ┆ 1.0            ┆ 0.0       ┆ 0.0       │
│ Ἄλκηστις             ┆ 1022    ┆ 143     ┆ 0       ┆ 0.877253       ┆ 0.122747  ┆ 0.0       │
│ Ῥῆσος                ┆ 869     ┆ 132     ┆ 0       ┆ 0.868132       ┆ 0.131868  ┆ 0.0       │
│ Ἡρακλῆς              ┆ 1243    ┆ 200     ┆ 0       ┆ 0.8614         ┆ 0.1386    ┆ 0.0       │
│ Ἡρακλεῖδαι           ┆ 

In [82]:
euripides_personae = n_personae_by_dramatist_and_gender("Euripides")

print(euripides_personae)

shape: (19, 7)
┌───────────────┬───────────────┬──────────────┬──────────────┬──────────┬──────────┬──────────────┐
│ title         ┆ # personae    ┆ # personae   ┆ # personae   ┆ pct      ┆ pct      ┆ pct personae │
│ ---           ┆ male          ┆ female       ┆ epicene      ┆ personae ┆ personae ┆ epicene      │
│ str           ┆ ---           ┆ ---          ┆ ---          ┆ male     ┆ female   ┆ ---          │
│               ┆ u32           ┆ u32          ┆ u32          ┆ ---      ┆ ---      ┆ f64          │
│               ┆               ┆              ┆              ┆ f64      ┆ f64      ┆              │
╞═══════════════╪═══════════════╪══════════════╪══════════════╪══════════╪══════════╪══════════════╡
│ Κύκλωψ        ┆ 9             ┆ 0            ┆ 0            ┆ 1.0      ┆ 0.0      ┆ 0.0          │
│ Ῥῆσος         ┆ 11            ┆ 2            ┆ 0            ┆ 0.846154 ┆ 0.153846 ┆ 0.0          │
│ Ἄλκηστις      ┆ 8             ┆ 2            ┆ 0            ┆ 0.8      ┆ 0