In [1]:
%cd -q ../..

%load_ext autoreload
%autoreload 2

In [2]:
import json
import os
from collections import Counter
from pathlib import Path

from dotenv import load_dotenv
from matplotlib import pyplot as plt

from scotus_metalang.diachronic_analysis import authors, summary_graphing
from scotus_metalang.diachronic_analysis.graphing import save_and_show

load_dotenv()
data_path = os.environ["SCOTUS_METALANG_DATA_PATH"]

plt.ioff()

opinions = []
for filepath in Path(f"{data_path}/cap/known_authors").glob("*/*.json"):
    with open(filepath, "r") as f:
        opinion = json.load(f)
        opinions.append(opinion)

## Counts

How many cases total?

In [3]:
num_case_ids = len(set([opinion["scdb_id"] for opinion in opinions]))
num_case_ids

2778

#### How many opinions total?

In [4]:
len(opinions)

6484

## Graphs


In [5]:
# fig = summary_graphing.plot_cases_per_term(df)
# save_and_show(fig, "cases_per_term")

#### Opinion Types by Term

In [6]:
opinion_types_by_term = summary_graphing.get_opinion_types_by_term(opinions)
fig = summary_graphing.plot_opinion_types_by_term_abs(opinion_types_by_term)
save_and_show(fig, "opinion_types_by_term_absolute", prefix="summary_stats")
fig = summary_graphing.plot_opinion_types_by_term_norm(opinion_types_by_term)
save_and_show(fig, "opinion_types_by_term_normalized", prefix="summary_stats")

#### Opinion Types by Author

In [7]:
opinion_types_by_author = summary_graphing.get_opinion_types_by_author(opinions)
fig = summary_graphing.plot_opinion_types_abs(opinion_types_by_author)
save_and_show(fig, "opinion_types_by_author_absolute", prefix="summary_stats")
fig = summary_graphing.plot_opinion_types_norm(opinion_types_by_author)
save_and_show(fig, "opinion_types_by_author_normalized", prefix="summary_stats")

## Tables

#### Author case counts and term start/end

In [16]:
"O'Connor".capitalize()

"O'connor"

In [19]:
num_opinions_by_author = {}
docket_numbers = set()
for author in authors.ORDERED_JUSTICES:
    opinion_paths =list(Path(f"{data_path}/cap/known_authors/{author}/").glob("*.json"))
    num_opinions = len(opinion_paths)
    num_opinions_by_author[author] = num_opinions

author_counts = Counter([opinion["author"] for opinion in opinions])
for author in authors.ORDERED_JUSTICES:
    num_opinions = author_counts[author]
    term_start = authors.ORDERED_JUSTICES[author][0]
    term_end = authors.ORDERED_JUSTICES[author][1]
    if author == "oconnor":
        author = "o'connor"
    term_end = term_end if term_end != 2100 else "—"
    print(f"{author.title()} & {num_opinions} & {term_start} & {term_end}\\\\")

Brennan & 175 & 1956 & 1990\\
White & 226 & 1962 & 1993\\
Marshall & 157 & 1967 & 1991\\
Blackmun & 250 & 1970 & 1994\\
Powell & 41 & 1972 & 1987\\
Rehnquist & 323 & 1972 & 2005\\
Stevens & 888 & 1975 & 2010\\
O'Connor & 453 & 1981 & 2006\\
Scalia & 848 & 1986 & 2016\\
Kennedy & 523 & 1988 & 2018\\
Souter & 359 & 1990 & 2009\\
Thomas & 628 & 1991 & —\\
Ginsburg & 439 & 1993 & 2020\\
Breyer & 499 & 1994 & 2022\\
Roberts & 160 & 2005 & —\\
Alito & 245 & 2006 & —\\
Sotomayor & 155 & 2009 & —\\
Kagan & 81 & 2010 & —\\
Gorsuch & 24 & 2017 & —\\
Kavanaugh & 10 & 2018 & —\\


#### Cases with n opinions

In [9]:
opinions_per_case = Counter()
for opinion in opinions:
    scdb_id = opinion["scdb_id"]
    assert scdb_id is not None
    opinions_per_case[scdb_id] += 1
num_cases_with_n_opinions = Counter(opinions_per_case.values())
for k, v in sorted(num_cases_with_n_opinions.items()):
    print(f"{k} & {v}")
print(f"Total & {sum([v for v in num_cases_with_n_opinions.values()])}")


1 & 740
2 & 986
3 & 637
4 & 268
5 & 100
6 & 41
7 & 5
8 & 1
Total & 2778
