In [1]:
%cd -q ../..

%load_ext autoreload
%autoreload 2

In [2]:
import os
from pathlib import Path

from dotenv import load_dotenv
from matplotlib import pyplot as plt


from scotus_metalang.diachronic_analysis import authors, prediction_graphing
from scotus_metalang.diachronic_analysis.graphing import save_and_show

load_dotenv()
data_path = os.environ["SCOTUS_METALANG_DATA_PATH"]
model_name = "binary_token_model_bert_large_8_epochs"

plt.ioff()

<contextlib.ExitStack at 0x7f3acc1a2510>

### Read prediction data


In [3]:
op_paths_to_pred_paths = {}
for author in authors.ORDERED_JUSTICES:
    for opinion_path in Path(f"{data_path}/cap/known_authors/{author}").glob("*.json"):
        filename = opinion_path.stem + ".txt"
        prediction_path = Path(f"{data_path}/predictions", model_name, author, filename)
        op_paths_to_pred_paths[opinion_path] = prediction_path
df = prediction_graphing.load_data(op_paths_to_pred_paths)
df.head()

Unnamed: 0,docket_number,author,opinion_type,term,tokens,ft,mc,dq,les
0,86-1088,brennan,concurrence,1988,37,0,0,0,3
1,86-39,brennan,majority,1986,10770,26,110,1989,2780
2,88-515,brennan,concurring-in-part-and-dissenting-in-part,1988,849,3,10,155,243
3,87-548,brennan,dissent,1988,4067,17,11,568,543
4,88-2031,brennan,dissent,1989,11876,24,32,2652,2798


In [4]:
categories = ["ft", "mc", "dq", "les"]

In [5]:
fig = prediction_graphing.plot_opinion_length_per_term(df)
save_and_show(fig, "opinion_length_by_term", prefix="predictions", show=False)

In [None]:
for category in categories:
    fig = prediction_graphing.plot_frequency_by_author(df, category)
    save_and_show(fig, f"frequency_by_author_{category}", prefix="predictions", show=False)

In [7]:
for category in categories:
    fig = prediction_graphing.plot_frequency_by_term(df, category)
    save_and_show(fig, f"frequency_by_term_{category}", prefix="predictions", show=False)

In [8]:
ci = True
for category in categories:
    if ci:
        filename = f"rate_of_{category}_trend_ci"
    else:
        filename = f"rate_of_{category}_trend"
    fig = prediction_graphing.plot_frequency_line_with_trend(df, category, ci=ci)

    save_and_show(fig, filename, prefix="predictions", show=False)

In [11]:
fig = prediction_graphing.plot_frequency_line_all_cats_ideology(df)
save_and_show(fig, "line_graphs_with_trends_by_party", prefix="predictions", show=False)

In [9]:
fig = prediction_graphing.plot_frequency_line_all_cats(df)
save_and_show(fig, "line_graphs_with_trends", prefix="predictions")

fig = prediction_graphing.plot_frequency_line_all_cats(df, ci=True)
save_and_show(fig, "line_graphs_with_trends_ci", prefix="predictions")

In [10]:
# Frequencies by opinion type
opinion_types = ["concurrence", "majority","concurring-in-part-and-dissenting-in-part", "dissent"]
for op_type in opinion_types:
    sample = df[df["opinion_type"] == op_type]
    fig = prediction_graphing.plot_frequency_line_all_cats(sample)
    save_and_show(fig, f"frequency_in_{op_type}", prefix="predictions", show=False)

In [11]:
# Frequencies by justice ideology
for ideology in ["liberal", "conservative"]:
    authors_of_interest = [author for author in authors.JUSTICE_TO_IDEOLOGY
                            if authors.JUSTICE_TO_IDEOLOGY[author] == ideology]
    df_sample = df[df["author"].isin(authors_of_interest)]
    fig = prediction_graphing.plot_frequency_line_all_cats(df_sample)
    save_and_show(fig, f"frequencies_{ideology}", "predictions", show=False)

TODO: get sets of predicted focal terms by year

Thoughts on line graphs and ci intervals:

Orange is term average of opinion-level category rates (but these rates themselves are predictions).
Light blue/purple is 95% confidence interval.
Blue is least squares trendline.

This is measurement issue (measurement uncertainty and sampling uncertainty)

Don't have to diagnose/solve the potential "trend is artifact of predicting on earlier data than training data" issue

###  Resources

- Trendline: https://www.statology.org/matplotlib-trendline/ (adapted to new numpy.polynomial)
- Confidence interval: https://www.pythoncharts.com/python/line-chart-with-confidence-interval/#matplotlib