I sourced data from CAP based on super-scotus docket numbers. Do these docket numbers match up with scdb?

In [2]:
%cd -q ../..

In [None]:
import jsonlines
from collections import Counter, defaultdict

import pandas as pd

In [110]:
convokit = []
with open("data/convokit/cases.jsonl", "r") as f:
    reader = jsonlines.Reader(f)
    for row in reader:
        if row["year"] >= 1986:
            convokit.append(row)

In [111]:
convokit[0]

{'id': '1986_84-2022',
 'year': 1986,
 'citation': '479 US 335',
 'title': '324 Liquor Corporation v. Duffy',
 'petitioner': '324 Liquor Corporation',
 'respondent': 'Duffy',
 'docket_no': '84-2022',
 'court': 'Rehnquist Court',
 'decided_date': 'Jan 13, 1987',
 'url': 'https://www.oyez.org/cases/1986/84-2022',
 'transcripts': [{'name': 'Oral Argument - November 03, 1986',
   'url': 'https://apps.oyez.org/player/#/rehnquist1/oral_argument_audio/19212',
   'id': 19212,
   'case_id': '1986_84-2022'}],
 'adv_sides_inferred': False,
 'known_respondent_adv': True,
 'advocates': {'W. Stephen Cannon': {'id': 'w_stephen_cannon',
   'name': 'W. Stephen Cannon',
   'role': 'on behalf of the United States as amicus curiae supporting appellant',
   'side': 2},
  'Bertram K. Kantor': {'id': 'bertram_k_kantor',
   'name': 'Bertram K. Kantor',
   'role': 'on behalf of the appellant',
   'side': 1},
  'Christopher Keith Hall': {'id': 'christopher_keith_hall',
   'name': 'Christopher Keith Hall',
   'r

In [112]:
convokit_ids = [r["docket_no"] for r in convokit]

In [122]:
superscotus = []
with open("data/super_scotus/case_with_all_sources_with_companion_cases_tag.jsonl", "r") as f:
    reader = jsonlines.Reader(f)
    for row in reader:
        row.pop("utterances", None)
        row.pop("justia_sections", None)
        superscotus.append(row)

In [123]:
superscotus[0]

{'id': '1955_71',
 'year': 1955,
 'citation': '350 US 79',
 'title': 'Affronti v. United States',
 'petitioner': 'Affronti',
 'respondent': 'United States',
 'docket_no': '71',
 'court': 'Warren Court',
 'decided_date': 'Dec 5, 1955',
 'url': 'https://www.oyez.org/cases/1955/71',
 'transcripts': [{'name': 'Oral Argument - November 15, 1955',
   'url': 'https://apps.oyez.org/player/#/warren3/oral_argument_audio/13127',
   'id': 13127,
   'case_id': '1955_71'}],
 'adv_sides_inferred': True,
 'known_respondent_adv': True,
 'advocates': {'Harry F. Murphy': {'id': 'harry_f_murphy',
   'name': 'Harry F. Murphy',
   'side': 1},
  'John V. Lindsay': {'id': 'john_v_lindsay',
   'name': 'John V. Lindsay',
   'side': 0}},
 'win_side': 0.0,
 'win_side_detail': 2.0,
 'scdb_docket_id': '1955-009-01',
 'votes': {'j__john_m_harlan2': 2.0,
  'j__hugo_l_black': 2.0,
  'j__william_o_douglas': 2.0,
  'j__earl_warren': 2.0,
  'j__tom_c_clark': 2.0,
  'j__felix_frankfurter': 2.0,
  'j__harold_burton': 2.0,


In [56]:
superscotus_to_scdb_id = {}
for row in superscotus:
    if row["year"] >=1986:
        superscotus_to_scdb_id[row["id"]] = row["scdb_elements"]["caseId"]

KeyError: 'scdb_elements'

In [66]:
superscotus_cases_by_year = defaultdict(set)
for row in superscotus:
    if row["year"] >= 1986:
        superscotus_cases_by_year[row["year"]].add(row["docket_no"])
superscotus_year_counts = {k: len(v) for k, v in superscotus_cases_by_year.items()}

In [63]:
superscotus_year_counts = Counter([row["year"] for row in superscotus if row["year"] >=1986])
sorted(superscotus_year_counts.items())

[(1986, 146),
 (1987, 142),
 (1988, 136),
 (1989, 127),
 (1990, 115),
 (1991, 108),
 (1992, 106),
 (1993, 82),
 (1994, 80),
 (1995, 76),
 (1996, 81),
 (1997, 93),
 (1998, 79),
 (1999, 74),
 (2000, 77),
 (2001, 77),
 (2002, 76),
 (2003, 75),
 (2004, 75),
 (2005, 75),
 (2006, 71),
 (2007, 69),
 (2008, 78),
 (2009, 77),
 (2010, 78),
 (2011, 68),
 (2012, 73),
 (2013, 66),
 (2014, 53),
 (2015, 68),
 (2016, 62),
 (2017, 63),
 (2018, 70),
 (2019, 58)]

In [None]:
scdb = pd.read_csv("data/scdb/SCDB_2023_01_justiceCentered_Docket.csv", encoding="cp1252")
scdb_86_onward = scdb[(scdb.term >= 1986) & (scdb.term <= 2019)]

In [58]:
scdb_cases_by_term = defaultdict(set)
for i, row in scdb_86_onward.iterrows():
    scdb_cases_by_term[row["term"]].add(row.docket)
scdb_year_counts = {k: len(v) for k, v in scdb_cases_by_term.items()}

In [62]:
scdb_year_counts

{1986: 182,
 1987: 165,
 1988: 177,
 1989: 154,
 1990: 140,
 1991: 149,
 1992: 125,
 1993: 123,
 1994: 101,
 1995: 106,
 1996: 106,
 1997: 108,
 1998: 104,
 1999: 92,
 2000: 94,
 2001: 95,
 2002: 92,
 2003: 95,
 2004: 91,
 2005: 100,
 2006: 84,
 2007: 80,
 2008: 94,
 2009: 96,
 2010: 94,
 2011: 88,
 2012: 82,
 2013: 85,
 2014: 83,
 2015: 95,
 2016: 76,
 2017: 80,
 2018: 76,
 2019: 76}

In [11]:
sum([1 for row in superscotus if row["year"] >=1986])

2854

In [101]:
list(superscotus_cases_by_year[2014])[:10]

['14-15',
 '13-485',
 '13-553',
 '14-6368',
 '13-1032',
 '14-7955',
 '13-1175',
 '13-1041',
 '13-433',
 '14-114']

In [131]:
pd.set_option('display.max_columns', None)

In [134]:
display(scdb_86_onward[scdb_86_onward["docket"] == "13-895"])

Unnamed: 0,caseId,docketId,caseIssuesId,voteId,dateDecision,decisionType,usCite,sctCite,ledCite,lexisCite,term,naturalCourt,chief,docket,caseName,dateArgument,dateRearg,petitioner,petitionerState,respondent,respondentState,jurisdiction,adminAction,adminActionState,threeJudgeFdc,caseOrigin,caseOriginState,caseSource,caseSourceState,lcDisagreement,certReason,lcDisposition,lcDispositionDirection,declarationUncon,caseDisposition,caseDispositionUnusual,partyWinning,precedentAlteration,voteUnclear,issue,issueArea,decisionDirection,decisionDirectionDissent,authorityDecision1,authorityDecision2,lawType,lawSupp,lawMinor,majOpinWriter,majOpinAssigner,splitVote,majVotes,minVotes,justice,justiceName,vote,opinion,direction,majority,firstAgreement,secondAgreement
89827,2014-017,2014-017-01,2014-017-01-01,2014-017-01-01-01-01,3/25/2015,1,575 U.S. 254,135 S. Ct. 1257,191 L. Ed. 2d 314,2015 U.S. LEXIS 2122,2014,1704,Roberts,13-895,ALA. LEGISLATIVE BLACK CAUCUS v. ALABAMA,11/12/2014,,19.0,1.0,28.0,1.0,2.0,,,1.0,41.0,,41.0,,1.0,1.0,,1.0,1.0,5.0,0.0,1.0,0.0,0.0,20090.0,2.0,2.0,0.0,1.0,,2.0,231.0,,110.0,106.0,1,5,4,111,JGRoberts,2.0,1.0,1.0,1.0,105.0,
89828,2014-017,2014-017-01,2014-017-01-01,2014-017-01-01-01-02,3/25/2015,1,575 U.S. 254,135 S. Ct. 1257,191 L. Ed. 2d 314,2015 U.S. LEXIS 2122,2014,1704,Roberts,13-895,ALA. LEGISLATIVE BLACK CAUCUS v. ALABAMA,11/12/2014,,19.0,1.0,28.0,1.0,2.0,,,1.0,41.0,,41.0,,1.0,1.0,,1.0,1.0,5.0,0.0,1.0,0.0,0.0,20090.0,2.0,2.0,0.0,1.0,,2.0,231.0,,110.0,106.0,1,5,4,105,AScalia,2.0,2.0,1.0,1.0,,
89829,2014-017,2014-017-01,2014-017-01-01,2014-017-01-01-01-03,3/25/2015,1,575 U.S. 254,135 S. Ct. 1257,191 L. Ed. 2d 314,2015 U.S. LEXIS 2122,2014,1704,Roberts,13-895,ALA. LEGISLATIVE BLACK CAUCUS v. ALABAMA,11/12/2014,,19.0,1.0,28.0,1.0,2.0,,,1.0,41.0,,41.0,,1.0,1.0,,1.0,1.0,5.0,0.0,1.0,0.0,0.0,20090.0,2.0,2.0,0.0,1.0,,2.0,231.0,,110.0,106.0,1,5,4,106,AMKennedy,1.0,1.0,2.0,2.0,,
89830,2014-017,2014-017-01,2014-017-01-01,2014-017-01-01-01-04,3/25/2015,1,575 U.S. 254,135 S. Ct. 1257,191 L. Ed. 2d 314,2015 U.S. LEXIS 2122,2014,1704,Roberts,13-895,ALA. LEGISLATIVE BLACK CAUCUS v. ALABAMA,11/12/2014,,19.0,1.0,28.0,1.0,2.0,,,1.0,41.0,,41.0,,1.0,1.0,,1.0,1.0,5.0,0.0,1.0,0.0,0.0,20090.0,2.0,2.0,0.0,1.0,,2.0,231.0,,110.0,106.0,1,5,4,108,CThomas,2.0,2.0,1.0,1.0,105.0,
89831,2014-017,2014-017-01,2014-017-01-01,2014-017-01-01-01-05,3/25/2015,1,575 U.S. 254,135 S. Ct. 1257,191 L. Ed. 2d 314,2015 U.S. LEXIS 2122,2014,1704,Roberts,13-895,ALA. LEGISLATIVE BLACK CAUCUS v. ALABAMA,11/12/2014,,19.0,1.0,28.0,1.0,2.0,,,1.0,41.0,,41.0,,1.0,1.0,,1.0,1.0,5.0,0.0,1.0,0.0,0.0,20090.0,2.0,2.0,0.0,1.0,,2.0,231.0,,110.0,106.0,1,5,4,109,RBGinsburg,1.0,1.0,2.0,2.0,,
89832,2014-017,2014-017-01,2014-017-01-01,2014-017-01-01-01-06,3/25/2015,1,575 U.S. 254,135 S. Ct. 1257,191 L. Ed. 2d 314,2015 U.S. LEXIS 2122,2014,1704,Roberts,13-895,ALA. LEGISLATIVE BLACK CAUCUS v. ALABAMA,11/12/2014,,19.0,1.0,28.0,1.0,2.0,,,1.0,41.0,,41.0,,1.0,1.0,,1.0,1.0,5.0,0.0,1.0,0.0,0.0,20090.0,2.0,2.0,0.0,1.0,,2.0,231.0,,110.0,106.0,1,5,4,110,SGBreyer,1.0,2.0,2.0,2.0,,
89833,2014-017,2014-017-01,2014-017-01-01,2014-017-01-01-01-07,3/25/2015,1,575 U.S. 254,135 S. Ct. 1257,191 L. Ed. 2d 314,2015 U.S. LEXIS 2122,2014,1704,Roberts,13-895,ALA. LEGISLATIVE BLACK CAUCUS v. ALABAMA,11/12/2014,,19.0,1.0,28.0,1.0,2.0,,,1.0,41.0,,41.0,,1.0,1.0,,1.0,1.0,5.0,0.0,1.0,0.0,0.0,20090.0,2.0,2.0,0.0,1.0,,2.0,231.0,,110.0,106.0,1,5,4,112,SAAlito,2.0,1.0,1.0,1.0,105.0,
89834,2014-017,2014-017-01,2014-017-01-01,2014-017-01-01-01-08,3/25/2015,1,575 U.S. 254,135 S. Ct. 1257,191 L. Ed. 2d 314,2015 U.S. LEXIS 2122,2014,1704,Roberts,13-895,ALA. LEGISLATIVE BLACK CAUCUS v. ALABAMA,11/12/2014,,19.0,1.0,28.0,1.0,2.0,,,1.0,41.0,,41.0,,1.0,1.0,,1.0,1.0,5.0,0.0,1.0,0.0,0.0,20090.0,2.0,2.0,0.0,1.0,,2.0,231.0,,110.0,106.0,1,5,4,113,SSotomayor,1.0,1.0,2.0,2.0,,
89835,2014-017,2014-017-01,2014-017-01-01,2014-017-01-01-01-09,3/25/2015,1,575 U.S. 254,135 S. Ct. 1257,191 L. Ed. 2d 314,2015 U.S. LEXIS 2122,2014,1704,Roberts,13-895,ALA. LEGISLATIVE BLACK CAUCUS v. ALABAMA,11/12/2014,,19.0,1.0,28.0,1.0,2.0,,,1.0,41.0,,41.0,,1.0,1.0,,1.0,1.0,5.0,0.0,1.0,0.0,0.0,20090.0,2.0,2.0,0.0,1.0,,2.0,231.0,,110.0,106.0,1,5,4,114,EKagan,1.0,1.0,2.0,2.0,,


In [None]:
display(scdb_86_onward[scdb_86_onward["docket"] == "13-1138"])

In [None]:
scdb_86_onward[scdb_86_onward["usCite"] == "575 U.S. 600"]

https://www.oyez.org/cases/2014/13-1433 not in super-scotus but https://www.oyez.org/cases/2014/14-7955 (which looks very similar) is

In [128]:
"13-1421" in convokit_ids

True

'126, ORIG.', original docket
 '13-1052', consolidated
 '13-1075', consolidated
 '13-1138', consolidated
 '13-1412', SF vs Sheehan MISSING
 '13-1421', BOA v Caulkett MISSING
 '13-1433', Brumfield v cain MISSING
 '13-534',
 '13-720',
 '13-7211',
 '13-896',
 '13-946',
 '13–1318',
 '14-116',
 '14-163',
 '14-212',
 '14-275',
 '14-378',
 '14-400',
 '14-46',
 '14-47',
 '14-49',
 '14-562',
 '14-571',
 '14-574',
 '14-593',
 '14-618',
 '14-6873',
 '14-939',
 '14–95'

In [141]:
# Cases that are in SCDB that aren't in superscotus
scdb_cases_by_term[2014].difference(superscotus_cases_by_year[2014])

{'126, ORIG.',
 '13-1052',
 '13-1075',
 '13-1138',
 '13-1412',
 '13-1421',
 '13-1433',
 '13-534',
 '13-720',
 '13-7211',
 '13-896',
 '13-946',
 '13–1318',
 '14-116',
 '14-163',
 '14-212',
 '14-275',
 '14-378',
 '14-400',
 '14-46',
 '14-47',
 '14-49',
 '14-562',
 '14-571',
 '14-574',
 '14-593',
 '14-618',
 '14-6873',
 '14-939',
 '14–95'}

In [None]:
# Cases that are in ConvoKit but not in Super-SCOTUS
missing = Counter()
for year, docket_numbers in scdb_cases_by_term.items():
    for d in docket_numbers:
        if d not in superscotus_cases_by_year[year]:
            if d in convokit_ids:
                missing[year] += 1

sorted(missing.items())

In [None]:
pd.DataFrame(zip(superscotus_year_counts.values(), scdb_year_counts.values()),
             columns=["Super-SCOTUS", "SCDB"], index=superscotus_year_counts.keys())