# Analysis of majority opinions vs dissenting opinions

## Load opinions and separate majority, concurrence, and dissent

In [1]:
%cd -q ../..

In [2]:
import math
from pathlib import Path

from curiam.preprocessing import inception_tsv
from curiam import categories

In [3]:
opinions_dir = Path("data/main/annotated/updated_names")

opinions = [inception_tsv.process_opinion_file(opinion_path, opinion_path.name)
            for opinion_path in sorted(opinions_dir.glob("*.tsv"), key= lambda path: path.name)]

majority = [inception_tsv.process_opinion_file(opinion_path, opinion_path.name)
            for opinion_path in sorted(opinions_dir.glob("*ootc*.tsv"), key= lambda path: path.name)]

concurrence = [inception_tsv.process_opinion_file(opinion_path, opinion_path.name)
            for opinion_path in sorted(opinions_dir.glob("*concurrence*.tsv"), key= lambda path: path.name)]

dissent = [inception_tsv.process_opinion_file(opinion_path, opinion_path.name)
            for opinion_path in sorted(opinions_dir.glob("*dissent*.tsv"), key= lambda path: path.name)]






In [4]:
# Remove the Kavanaugh Outlier

# opinions_dir = Path("data/main/annotated/updated_names")


# op_files = list(sorted(opinions_dir.glob("*.tsv"), key= lambda path: path.name))
# for i, name in enumerate(op_files):
#     if "18_6662" in name.name:
#         op_files.pop(i)
#         print("REMOVED FILE")


# opinions = [inception_tsv.process_opinion_file(opinion_path, opinion_path.name)
#             for opinion_path in op_files]

# majority = [inception_tsv.process_opinion_file(opinion_path, opinion_path.name)
#             for opinion_path in sorted(opinions_dir.glob("*ootc*.tsv"), key= lambda path: path.name)]


# conc_files = list(sorted(opinions_dir.glob("*concurrence*.tsv"), key= lambda path: path.name))
# for i, name in enumerate(conc_files):
#     if "18_6662" in name.name:
#         conc_files.pop(i)
#         print("REMOVED FILE")

# concurrence = [inception_tsv.process_opinion_file(opinion_path, opinion_path.name)
#             for opinion_path in conc_files]

# dissent = [inception_tsv.process_opinion_file(opinion_path, opinion_path.name)
#             for opinion_path in sorted(opinions_dir.glob("*dissent*.tsv"), key= lambda path: path.name)]


In [5]:
len(majority)

18

In [6]:
len(concurrence)

11

In [7]:
len(dissent)

12

In [8]:
token_total = sum([len(sentence) for opinion in majority for sentence in opinion])
token_total / 18

5292.611111111111

In [9]:
token_total = sum([len(sentence) for opinion in concurrence for sentence in opinion])
token_total / 11

1239.4545454545455

In [10]:
token_total = sum([len(sentence) for opinion in dissent for sentence in opinion])
token_total / 12

5899.083333333333

In [11]:
sorted([len(opinion) for opinion in opinions])

[6,
 10,
 10,
 12,
 23,
 26,
 30,
 44,
 95,
 95,
 96,
 96,
 101,
 101,
 102,
 105,
 113,
 126,
 141,
 155,
 171,
 174,
 175,
 176,
 176,
 179,
 213,
 215,
 225,
 227,
 227,
 244,
 245,
 247,
 257,
 289,
 332,
 334,
 337,
 518,
 620]

In [12]:
sentence_total = sum([len(opinion) for opinion in opinions])
sentence_total

7068

In [13]:
def get_cat_freqs(annotations):
    cat_freqs = {}
    for sentence_annotations in annotations:
        for annotation in sentence_annotations:
            category = annotation.category
            start = annotation.start
            end = annotation.end
            cat_freqs.setdefault(category, {"count": 0, "lengths": [] })
            cat_freqs[category]["count"] += 1
            cat_freqs[category]["lengths"].append((end - start) + 1)
    return cat_freqs

In [14]:
maj_annotations = [sentence.get_annotations()
               for opinion in majority for sentence in opinion]
maj_annotations = [x for x in maj_annotations if len(x) > 0]
maj_cat_freqs = get_cat_freqs(maj_annotations)
maj_tok = sum([len(sentence) for opinion in majority for sentence in opinion])

conc_annotations = [sentence.get_annotations()
               for opinion in concurrence for sentence in opinion]
conc_annotations = [x for x in conc_annotations if len(x) > 0]
conc_cat_freqs = get_cat_freqs(conc_annotations)
conc_tok = sum([len(sentence) for opinion in concurrence for sentence in opinion])

diss_annotations = [sentence.get_annotations()
               for opinion in dissent for sentence in opinion]
diss_annotations = [x for x in diss_annotations if len(x) > 0]
diss_cat_freqs = get_cat_freqs(diss_annotations)
diss_tok = sum([len(sentence) for opinion in dissent for sentence in opinion])

op_annotations = [sentence.get_annotations()
               for opinion in opinions for sentence in opinion]
op_annotations = [x for x in op_annotations if len(x) > 0]
op_cat_freqs = get_cat_freqs(op_annotations)
op_tok = sum([len(sentence) for opinion in opinions for sentence in opinion])

In [15]:
conc_cat_freqs.keys()

dict_keys(['Legal Source', 'Direct Quote', 'Metalinguistic Cue', 'Focal Term', 'Appeal to Meaning', 'Named Interpretive Rule', 'Definition', 'Language Source'])

In [16]:
def get_cell_color(val):
    min_val = 0
    max_val = 2
    mid_val = 1

    low_color = "blue"
    mid_color = "white"
    high_color = "orange"
    opacity = 70
    output = ""

    if val > max_val or val < min_val:
        return rf"\cellcolor{{gray!50!white!{opacity}}}{val:.2f}"

    if val < mid_val:
        mid_strength = round(100 * (val - min_val) / (mid_val - min_val))
        output = rf"\cellcolor{{{mid_color}!{mid_strength}!{low_color}!{opacity}}}{val:.2f}"
    else:
        high_strength = round(100 * (val - mid_val) / (max_val - mid_val))
        output = rf"\cellcolor{{{high_color}!{high_strength}!{mid_color}!{opacity}}}{val:.2f}"
    return output


print(get_cell_color(.8))

\cellcolor{white!80!blue!70}0.80


In [17]:
def get_count_color(val):
    min_val = 0
    max_val = 1990
    mid_color = "white"
    high_color = "orange"
    opacity = 70
    min_val = 0
    max_val = math.log(max_val)

    if val == 0:
        log_val = 0
    else:
        log_val = math.log(val)

    high_strength = round(100 * (log_val - min_val) / (max_val - min_val))
    output = fr"\cellcolor{{{high_color}!{high_strength}!{mid_color}!{opacity}}}{val}"
    return output

In [18]:
def get_relative_frequencies(conc, diss, conc_tok, diss_tok):
    for k in categories.ORDERED_CATEGORIES:
        if k in conc.keys():
            maj_ratio = (conc[k]["count"] / conc_tok) / (maj_cat_freqs[k]["count"] / maj_tok)
            op_ratio = (conc[k]["count"] / conc_tok) / (op_cat_freqs[k]["count"] / op_tok)
        else:
            maj_ratio = -9999
            op_ratio = -9999
        if k in diss.keys():
            diss_maj_ratio = (diss[k]["count"] / diss_tok) / (maj_cat_freqs[k]["count"] / maj_tok)
            diss_op_ratio = (diss[k]["count"] / diss_tok) / (op_cat_freqs[k]["count"] / op_tok)

        maj_ratio = get_cell_color(round(maj_ratio, 2))
        op_ratio = get_cell_color(round(op_ratio, 2))
        diss_maj_ratio = get_cell_color(round(diss_maj_ratio, 2))
        diss_op_ratio = get_cell_color(round(diss_op_ratio, 2))
        #print(f"{k} & \\gc{{{maj_ratio:.2f}}} & \\gc{{{op_ratio:.2f}}} & \\gc{{{diss_maj_ratio:.2f}}} & \\gc{{{diss_op_ratio:.2f}}}\\\\")
        print(f"{k} & {maj_ratio} & {op_ratio} & {diss_maj_ratio} & {diss_op_ratio}\\\\")


In [19]:
for k in categories.ORDERED_CATEGORIES:
    conc_ratio = -1000
    maj_ratio = (maj_cat_freqs[k]["count"] / maj_tok) / (op_cat_freqs[k]["count"] / op_tok)
    if k in conc_cat_freqs.keys():
        conc_ratio = (conc_cat_freqs[k]["count"] / conc_tok) / (op_cat_freqs[k]["count"] / op_tok)
        conc_k = conc_cat_freqs[k]['count']
    else:
        conc_ratio = -9999
        conc_count = 0
    diss_ratio = (diss_cat_freqs[k]["count"] / diss_tok) / (op_cat_freqs[k]["count"] / op_tok)
    maj_ratio = get_cell_color(maj_ratio)
    conc_ratio = get_cell_color(conc_ratio)
    diss_ratio = get_cell_color(diss_ratio)
    print(f"{get_count_color(maj_cat_freqs[k]['count'])} & {get_count_color(conc_k)} & {get_count_color(diss_cat_freqs[k]['count'])} & {k} & {maj_ratio} & {conc_ratio} & {diss_ratio}\\\\")

\cellcolor{orange!80!white!70}442 & \cellcolor{orange!48!white!70}37 & \cellcolor{orange!83!white!70}564 & Focal Term & \cellcolor{white!80!blue!70}0.80 & \cellcolor{white!47!blue!70}0.47 & \cellcolor{orange!37!white!70}1.37\\
\cellcolor{orange!62!white!70}114 & \cellcolor{orange!29!white!70}9 & \cellcolor{orange!66!white!70}150 & Definition & \cellcolor{white!79!blue!70}0.79 & \cellcolor{white!43!blue!70}0.43 & \cellcolor{orange!39!white!70}1.39\\
\cellcolor{orange!87!white!70}758 & \cellcolor{orange!63!white!70}121 & \cellcolor{orange!90!white!70}905 & Metalinguistic Cue & \cellcolor{white!80!blue!70}0.80 & \cellcolor{white!89!blue!70}0.89 & \cellcolor{orange!29!white!70}1.29\\
\cellcolor{orange!94!white!70}1247 & \cellcolor{orange!70!white!70}198 & \cellcolor{orange!93!white!70}1132 & Direct Quote & \cellcolor{white!91!blue!70}0.91 & \cellcolor{orange!1!white!70}1.01 & \cellcolor{orange!12!white!70}1.12\\
\cellcolor{orange!100!white!70}1990 & \cellcolor{orange!76!white!70}323 & \cel

In [None]:
get_relative_frequencies(conc_cat_freqs, diss_cat_freqs, conc_tok, diss_tok)

Focal Term & \cellcolor{white!58!blue!70}0.58 & \cellcolor{white!47!blue!70}0.47 & \cellcolor{orange!72!white!70}1.72 & \cellcolor{orange!37!white!70}1.37\\
Definition & \cellcolor{white!55!blue!70}0.55 & \cellcolor{white!43!blue!70}0.43 & \cellcolor{orange!77!white!70}1.77 & \cellcolor{orange!39!white!70}1.39\\
Metalinguistic Cue & \cellcolor{orange!12!white!70}1.12 & \cellcolor{white!89!blue!70}0.89 & \cellcolor{orange!61!white!70}1.61 & \cellcolor{orange!29!white!70}1.29\\
Direct Quote & \cellcolor{orange!11!white!70}1.11 & \cellcolor{orange!1!white!70}1.01 & \cellcolor{orange!22!white!70}1.22 & \cellcolor{orange!12!white!70}1.12\\
Legal Source & \cellcolor{orange!13!white!70}1.13 & \cellcolor{orange!15!white!70}1.15 & \cellcolor{white!94!blue!70}0.94 & \cellcolor{white!95!blue!70}0.95\\
Language Source & \cellcolor{white!76!blue!70}0.76 & \cellcolor{white!71!blue!70}0.71 & \cellcolor{orange!20!white!70}1.2 & \cellcolor{orange!13!white!70}1.13\\
Named Interpretive Rule & \cellcolor{

In [None]:
print("Dissents")
get_relative_frequencies(diss_cat_freqs, diss_tok)

Dissents
Focal Term & 1.72 & 1.37\\
Definition & 1.77 & 1.39\\
Metalinguistic Cue & 1.61 & 1.29\\
Direct Quote & 1.22 & 1.12\\
Legal Source & 0.94 & 0.95\\
Language Source & 1.20 & 1.13\\
Named Interpretive Rule & 1.28 & 0.95\\
Example Use & 1.75 & 1.43\\
Appeal to Meaning & 1.39 & 1.23\\


In [None]:
get_relative_frequencies(conc_cat_freqs, conc_tok)