In [1]:
import string
import pandas as pd
import numpy as np
from nltk import word_tokenize
import matplotlib.pyplot as plt
plt.style.use('ggplot')

In [2]:
with open('./corpora/washington-law/justice_centered_docket_post2000.csv') as fMetadata:
    dfAllMetadata = pd.read_csv(fMetadata)

In [3]:
dfAllMetadata.columns

Index([u'caseId', u'docketId', u'caseIssuesId', u'voteId', u'dateDecision',
       u'decisionType', u'usCite', u'sctCite', u'ledCite', u'lexisCite',
       u'term', u'naturalCourt', u'chief', u'docket', u'caseName',
       u'dateArgument', u'dateRearg', u'petitioner', u'petitionerState',
       u'respondent', u'respondentState', u'jurisdiction', u'adminAction',
       u'adminActionState', u'threeJudgeFdc', u'caseOrigin',
       u'caseOriginState', u'caseSource', u'caseSourceState',
       u'lcDisagreement', u'certReason', u'lcDisposition',
       u'lcDispositionDirection', u'declarationUncon', u'caseDisposition',
       u'caseDispositionUnusual', u'partyWinning', u'precedentAlteration',
       u'voteUnclear', u'issue', u'issueArea', u'decisionDirection',
       u'decisionDirectionDissent', u'authorityDecision1',
       u'authorityDecision2', u'lawType', u'lawSupp', u'lawMinor',
       u'majOpinWriter', u'majOpinAssigner', u'splitVote', u'majVotes',
       u'minVotes', u'justice', u'jus

In [4]:
relevantColumns = ['caseIssuesId', 'decisionType', 'petitioner', 'petitionerState', 'respondent', 'respondentState', 'jurisdiction',
                  'caseOrigin', 'caseSource', 'lcDisagreement', 'lcDisposition', 'lcDispositionDirection', 'partyWinning', 'precedentAlteration',
                  'decisionDirection', 'decisionDirectionDissent', 'authorityDecision1', 'lawType', 'majVotes', 'minVotes', 'justiceName', 'vote',
                  'opinion', 'direction']

dfMetadata = dfAllMetadata[relevantColumns]

In [5]:
dfMetadata.columns

Index([u'caseIssuesId', u'decisionType', u'petitioner', u'petitionerState',
       u'respondent', u'respondentState', u'jurisdiction', u'caseOrigin',
       u'caseSource', u'lcDisagreement', u'lcDisposition',
       u'lcDispositionDirection', u'partyWinning', u'precedentAlteration',
       u'decisionDirection', u'decisionDirectionDissent',
       u'authorityDecision1', u'lawType', u'majVotes', u'minVotes',
       u'justiceName', u'vote', u'opinion', u'direction'],
      dtype='object')

In [14]:
with open('./report-data/words.csv') as f:
    dfWords = pd.read_csv(f)

In [15]:
with open('./report-data/turns.csv') as f:
    dfTurns = pd.read_csv(f)

In [16]:
dfWordsJustices = dfWords[dfWords['Role'] == 'justice']
dfWordsJustices['Name'].value_counts()

ROBERTS      198
SOTOMAYOR    197
GINSBURG     196
KAGAN        195
KENNEDY      193
BREYER       188
ALITO        181
SCALIA       166
THOMAS         1
Name: Name, dtype: int64

In [17]:
dfWordsOthers = dfWords[dfWords['Role'] == 'other']

In [27]:
wordTurnsJustices = {}
for record in dfWordsJustices['Name'].value_counts().index:
    wordTurnsJustices[record] = {
        'name': record,
        'words': 0,
        'turns': 0,
        'dockets': 0,
    }

In [28]:
def gatherStatistics(row, stats):
    stats[row['Name']]['words'] += row['Words']
    stats[row['Name']]['turns'] += row['Turns']
    stats[row['Name']]['dockets'] += 1

In [29]:
dfWordsJustices.apply(lambda row: gatherStatistics(row, wordTurnsJustices),
                      axis=1, reduce=False)
wordTurnsJustices

{'ALITO': {'dockets': 181,
  'name': 'ALITO',
  'turns': 1953.0,
  'words': 109523.0},
 'BREYER': {'dockets': 188,
  'name': 'BREYER',
  'turns': 3498.0,
  'words': 216658.0},
 'GINSBURG': {'dockets': 196,
  'name': 'GINSBURG',
  'turns': 2024.0,
  'words': 85567.0},
 'KAGAN': {'dockets': 195,
  'name': 'KAGAN',
  'turns': 2494.0,
  'words': 145995.0},
 'KENNEDY': {'dockets': 193,
  'name': 'KENNEDY',
  'turns': 2151.0,
  'words': 78200.0},
 'ROBERTS': {'dockets': 198,
  'name': 'ROBERTS',
  'turns': 3659.0,
  'words': 145197.0},
 'SCALIA': {'dockets': 166,
  'name': 'SCALIA',
  'turns': 3439.0,
  'words': 122246.0},
 'SOTOMAYOR': {'dockets': 197,
  'name': 'SOTOMAYOR',
  'turns': 3593.0,
  'words': 120623.0},
 'THOMAS': {'dockets': 1, 'name': 'THOMAS', 'turns': 11.0, 'words': 353.0}}

In [73]:
indJustice = []
docketsJustice = []
turnsJustice = []
wordsJustice = []
for rec in wordTurnsJustices.values():
    docketsJustice.append(rec['dockets'])
    indJustice.append(rec['name'])
    turnsJustice.append(rec['turns'])
    wordsJustice.append(rec['words'])

In [74]:
dfDocket = pd.DataFrame(docketsJustice, index=indJustice, columns=['Dockets'])

In [75]:
dfDocket.sort_values(by='Dockets', inplace=True, ascending=False)

In [76]:
dfDocket

Unnamed: 0,Dockets
ROBERTS,198
SOTOMAYOR,197
GINSBURG,196
KAGAN,195
KENNEDY,193
BREYER,188
ALITO,181
SCALIA,166
THOMAS,1


In [135]:
plt.figure()
plt.xlabel('Justices')
plt.ylabel('Number of dockets', fontsize=18)
plt.ylim([df['Dockets'].min(), df['Dockets'].max()*1.1])
dfDocket['Dockets'].plot.bar(rot=45, fontsize=14)
idxPos = -0.14
for idx in dfDocket.index:
    plt.text(idxPos, dfDocket['Dockets'][idx]+5, str(dfDocket['Dockets'][idx]),
             color='black', fontsize=20, fontweight='bold')
    idxPos += 1
plt.axvline(4.5, color='b', linestyle='--')
plt.text(5, 1500, 'Cut-off boundary', fontsize=14)
plt.show()

In [131]:
indJustice = []
docketsJustice = []
turnsJustice = []
wordsJustice = []
for rec in wordTurnsJustices.values():
    if rec['name'] == 'THOMAS':
        continue
    docketsJustice.append(rec['dockets'])
    indJustice.append(rec['name'])
    turnsJustice.append(rec['turns'])
    wordsJustice.append(rec['words'])

In [None]:
def averageVal(row, val1, val2, colResult):
    row[colResult] = row[val1] / float(row[val2])
    return row

In [96]:
dfTurns = pd.DataFrame(zip(docketsJustice, turnsJustice), index=indJustice,
                       columns=['Dockets', 'Turns'])
dfTurns['Turns per Docket'] = 0
dfTurns.sort_values(by='Turns', ascending=False, inplace=True)
dfTurns = dfTurns.apply(lambda row: averageVal(row, 'Turns', 'Dockets',
                                               'Turns per Docket'),
                        axis=1, reduce=False)

In [124]:
dfTurns

Unnamed: 0,Dockets,Turns,Turns per Docket
ROBERTS,198.0,3659.0,18.479798
SOTOMAYOR,197.0,3593.0,18.238579
BREYER,188.0,3498.0,18.606383
SCALIA,166.0,3439.0,20.716867
KAGAN,195.0,2494.0,12.789744
KENNEDY,193.0,2151.0,11.145078
GINSBURG,196.0,2024.0,10.326531
ALITO,181.0,1953.0,10.790055


In [139]:
fig = plt.figure()
ax = fig.add_subplot(111)
ax2 = ax.twinx()
plt.xlabel('Justices', fontsize=16)

dfTurns['Turns'].plot.bar(rot=45, ax=ax, position=0, width=0.2, color='#25e6e9')
dfTurns['Turns per Docket'].plot.bar(ax=ax2, position=1, width=0.2, color='yellow')

plt.axvline(0.5, color='r', linestyle='--', linewidth=2)
plt.axvline(3.5, color='b', linestyle='--')

ax.set_ylabel('Turns', fontsize=18)
ax2.set_ylabel('Turns per Docket', fontsize=18)
plt.show()

In [127]:
dfWordsByDocket = pd.DataFrame(zip(docketsJustice, wordsJustice), index=indJustice,
                       columns=['Dockets', 'Words'])
dfWordsByDocket['Words per Docket'] = 0
dfWordsByDocket.sort_values(by='Words', ascending=False, inplace=True)
dfWordsByDocket = dfWordsByDocket.apply(lambda row: averageVal(row, 'Words', 'Dockets',
                                               'Words per Docket'),
                        axis=1, reduce=False)

In [141]:
fig = plt.figure()
ax = fig.add_subplot(111)
ax2 = ax.twinx()
plt.xlabel('Justices', fontsize=16)

dfWordsByDocket['Words'].plot.bar(rot=45, ax=ax, position=0, width=0.2, color='#25e6e9')
dfWordsByDocket['Words per Docket'].plot.bar(ax=ax2, position=1, width=0.2, color='yellow')

ax.set_ylabel('Words', fontsize=18)
ax2.set_ylabel('Words per Docket', fontsize=18)
plt.axvline(0.5, color='r', linestyle='--', linewidth=2)

plt.show()

In [145]:
dfWordsOthers['Name'].value_counts().values

array([14, 11, 11, 10, 10, 10, 10, 10,  9,  9,  9,  8,  8,  7,  7,  7,  7,
        7,  6,  6,  6,  5,  5,  5,  5,  5,  4,  4,  4,  4,  4,  4,  3,  3,
        3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  2,  2,  2,
        2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
        2,  2,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1