In [2]:
import string
import pandas as pd
import numpy as np
from nltk import word_tokenize
import matplotlib.pyplot as plt
plt.style.use('ggplot')

In [3]:
with open('./corpora/washington-law/justice_centered_docket_post2000.csv') as fMetadata:
    dfAllMetadata = pd.read_csv(fMetadata)

In [21]:
dfAllMetadata.columns
dfAllMetadata.loc[dfAllMetadata['docket'] == '126, ORIG.', 'docket'] = '126 orig'

In [38]:
relevantColumns = ['docket', 'petitioner', 'petitionerState', 'respondent', 'respondentState', 'jurisdiction',
                  'caseOrigin', 'caseSource', 'lcDisagreement', 'caseDisposition', 'declarationUncon', 'caseDispositionUnusual',
                   'lcDisposition', 'lcDispositionDirection', 'partyWinning', 'precedentAlteration',
                  'decisionDirection', 'decisionDirectionDissent', 'authorityDecision1', 'lawType', 'majVotes', 'minVotes', 'justiceName', 'vote',
                  'direction']

dfMetadata = dfAllMetadata[relevantColumns]

In [39]:
dfMetadata.columns

Index([u'docket', u'petitioner', u'petitionerState', u'respondent',
       u'respondentState', u'jurisdiction', u'caseOrigin', u'caseSource',
       u'lcDisagreement', u'caseDisposition', u'declarationUncon',
       u'caseDispositionUnusual', u'lcDisposition', u'lcDispositionDirection',
       u'partyWinning', u'precedentAlteration', u'decisionDirection',
       u'decisionDirectionDissent', u'authorityDecision1', u'lawType',
       u'majVotes', u'minVotes', u'justiceName', u'vote', u'direction'],
      dtype='object')

In [90]:
with open('./report-data/words.csv') as f:
    dfWords = pd.read_csv(f)

In [91]:
with open('./report-data/turns.csv') as f:
    dfTurns = pd.read_csv(f)

In [92]:
dfWordsJustices = dfWords[dfWords['Role'] == 'justice']
dfWordsJustices['Name'].value_counts()

ROBERTS      198
SOTOMAYOR    197
GINSBURG     196
KAGAN        195
KENNEDY      193
BREYER       188
ALITO        181
SCALIA       166
THOMAS         1
Name: Name, dtype: int64

In [93]:
def giveJusticeId(row, col):
    mapping = {
        'ROBERTS': '111',
        'SOTOMAYOR': '113',
        'GINSBURG': '109',
        'KAGAN': '114',
        'KENNEDY': '106',
        'BREYER': '110',
        'ALITO': '112',
        'SCALIA': '105',
        'THOMAS': '108',
    }
    if row[col] in mapping.keys():
        row['justiceId'] = mapping[row[col]]
    else:
        row['justiceId'] = ''

    return row

In [94]:
dfWordsOthers = dfWords[dfWords['Role'] == 'other']

In [95]:
wordTurnsJustices = {}
for record in dfWordsJustices['Name'].value_counts().index:
    wordTurnsJustices[record] = {
        'name': record,
        'words': 0,
        'turns': 0,
        'dockets': 0,
    }

In [96]:
def gatherStatistics(row, stats):
    stats[row['Name']]['words'] += row['Words']
    stats[row['Name']]['turns'] += row['Turns']
    stats[row['Name']]['dockets'] += 1

In [97]:
dfWordsJustices.apply(lambda row: gatherStatistics(row, wordTurnsJustices),
                      axis=1, reduce=False)
wordTurnsJustices

{'ALITO': {'dockets': 181, 'name': 'ALITO', 'turns': 1953.0, 'words': 93456.0},
 'BREYER': {'dockets': 188,
  'name': 'BREYER',
  'turns': 3499.0,
  'words': 210777.0},
 'GINSBURG': {'dockets': 196,
  'name': 'GINSBURG',
  'turns': 2024.0,
  'words': 81916.0},
 'KAGAN': {'dockets': 195,
  'name': 'KAGAN',
  'turns': 2494.0,
  'words': 143651.0},
 'KENNEDY': {'dockets': 193,
  'name': 'KENNEDY',
  'turns': 2151.0,
  'words': 71101.0},
 'ROBERTS': {'dockets': 198,
  'name': 'ROBERTS',
  'turns': 3666.0,
  'words': 123689.0},
 'SCALIA': {'dockets': 166,
  'name': 'SCALIA',
  'turns': 3440.0,
  'words': 111427.0},
 'SOTOMAYOR': {'dockets': 197,
  'name': 'SOTOMAYOR',
  'turns': 3593.0,
  'words': 116935.0},
 'THOMAS': {'dockets': 1, 'name': 'THOMAS', 'turns': 11.0, 'words': 353.0}}

In [98]:
indJustice = []
docketsJustice = []
turnsJustice = []
wordsJustice = []
for rec in wordTurnsJustices.values():
    docketsJustice.append(rec['dockets'])
    indJustice.append(rec['name'])
    turnsJustice.append(rec['turns'])
    wordsJustice.append(rec['words'])

In [99]:
dfDocket = pd.DataFrame(docketsJustice, index=indJustice, columns=['Dockets'])

In [100]:
dfDocket.sort_values(by='Dockets', inplace=True, ascending=False)

In [101]:
dfDocket.index

Index([u'ROBERTS', u'SOTOMAYOR', u'GINSBURG', u'KAGAN', u'KENNEDY', u'BREYER',
       u'ALITO', u'SCALIA', u'THOMAS'],
      dtype='object')

In [135]:
plt.figure()
plt.xlabel('Justices')
plt.ylabel('Number of dockets', fontsize=18)
plt.ylim([df['Dockets'].min(), df['Dockets'].max()*1.1])
dfDocket['Dockets'].plot.bar(rot=45, fontsize=14)
idxPos = -0.14
for idx in dfDocket.index:
    plt.text(idxPos, dfDocket['Dockets'][idx]+5, str(dfDocket['Dockets'][idx]),
             color='black', fontsize=20, fontweight='bold')
    idxPos += 1
plt.axvline(4.5, color='b', linestyle='--')
plt.text(5, 1500, 'Cut-off boundary', fontsize=14)
plt.show()

In [131]:
indJustice = []
docketsJustice = []
turnsJustice = []
wordsJustice = []
for rec in wordTurnsJustices.values():
    if rec['name'] == 'THOMAS':
        continue
    docketsJustice.append(rec['dockets'])
    indJustice.append(rec['name'])
    turnsJustice.append(rec['turns'])
    wordsJustice.append(rec['words'])

In [None]:
def averageVal(row, val1, val2, colResult):
    row[colResult] = row[val1] / float(row[val2])
    return row

In [96]:
dfTurns = pd.DataFrame(zip(docketsJustice, turnsJustice), index=indJustice,
                       columns=['Dockets', 'Turns'])
dfTurns['Turns per Docket'] = 0
dfTurns.sort_values(by='Turns', ascending=False, inplace=True)
dfTurns = dfTurns.apply(lambda row: averageVal(row, 'Turns', 'Dockets',
                                               'Turns per Docket'),
                        axis=1, reduce=False)

In [124]:
dfTurns

Unnamed: 0,Dockets,Turns,Turns per Docket
ROBERTS,198.0,3659.0,18.479798
SOTOMAYOR,197.0,3593.0,18.238579
BREYER,188.0,3498.0,18.606383
SCALIA,166.0,3439.0,20.716867
KAGAN,195.0,2494.0,12.789744
KENNEDY,193.0,2151.0,11.145078
GINSBURG,196.0,2024.0,10.326531
ALITO,181.0,1953.0,10.790055


In [139]:
fig = plt.figure()
ax = fig.add_subplot(111)
ax2 = ax.twinx()
plt.xlabel('Justices', fontsize=16)

dfTurns['Turns'].plot.bar(rot=45, ax=ax, position=0, width=0.2, color='#25e6e9')
dfTurns['Turns per Docket'].plot.bar(ax=ax2, position=1, width=0.2, color='yellow')

plt.axvline(0.5, color='r', linestyle='--', linewidth=2)
plt.axvline(3.5, color='b', linestyle='--')

ax.set_ylabel('Turns', fontsize=18)
ax2.set_ylabel('Turns per Docket', fontsize=18)
plt.show()

In [127]:
dfWordsByDocket = pd.DataFrame(zip(docketsJustice, wordsJustice), index=indJustice,
                       columns=['Dockets', 'Words'])
dfWordsByDocket['Words per Docket'] = 0
dfWordsByDocket.sort_values(by='Words', ascending=False, inplace=True)
dfWordsByDocket = dfWordsByDocket.apply(lambda row: averageVal(row, 'Words', 'Dockets',
                                               'Words per Docket'),
                        axis=1, reduce=False)

In [141]:
fig = plt.figure()
ax = fig.add_subplot(111)
ax2 = ax.twinx()
plt.xlabel('Justices', fontsize=16)

dfWordsByDocket['Words'].plot.bar(rot=45, ax=ax, position=0, width=0.2, color='#25e6e9')
dfWordsByDocket['Words per Docket'].plot.bar(ax=ax2, position=1, width=0.2, color='yellow')

ax.set_ylabel('Words', fontsize=18)
ax2.set_ylabel('Words per Docket', fontsize=18)
plt.axvline(0.5, color='r', linestyle='--', linewidth=2)

plt.show()

In [153]:
res = dfWordsOthers['Name'].value_counts()
res[res == 1].shape

(171L,)

In [7]:
with open('./report-data/questions_asked.csv') as f:
    dfQuestions = pd.read_csv(f)

In [58]:
dfQuestions.columns

Index([u'Role', u'Name', u'Turn', u'Count', u'Position', u'idx', u'docket',
       u'justiceId'],
      dtype='object')

In [40]:
selected_dockets = dfQuestions['docket'].unique()
print len(selected_dockets)
dfMetadataFilter = dfMetadata[dfMetadata['docket'].isin(selected_dockets)]
dfQuestions = dfQuestions.apply(lambda row: giveJusticeId(row, 'Name'), axis=1)

198


In [41]:
print len(dfMetadataFilter['docket'].unique())
otherUnique = dfMetadataFilter['docket'].unique()

198


In [45]:
dfMetadataFilter['partyWinning'].value_counts()

1.0    1094
0.0     633
Name: partyWinning, dtype: int64

In [46]:
docketRelevantColumns = ['docket', 'petitioner', 'respondent', 'partyWinning', 'precedentAlteration',
                         'decisionDirection', 'majVotes', 'minVotes', 'lcDisagreement', 'lcDisposition', 'lcDispositionDirection']

In [50]:
dfMetadataDockets = dfMetadataFilter[docketRelevantColumns].drop_duplicates()

In [57]:
dfMetadataDockets['precedentAlteration'].value_counts()

0.0    197
1.0      1
Name: precedentAlteration, dtype: int64

In [56]:
dfMetadataDockets[dfMetadataDockets['partyWinning'] == 0]['precedentAlteration'].value_counts()

0.0    73
Name: precedentAlteration, dtype: int64

In [64]:
data = {
    'pet-justice-pet': {
        'questions': 0,
        'dockets': set(),
    },
    'pet-justice-res': {
        'questions': 0,
        'dockets': set(),
    },
    'pet-other-pet': {
        'questions': 0,
        'dockets': set(),
    },
    'pet-other-res': {
        'questions': 0,
        'dockets': set(),
    },
    
    'res-justice-pet': {
        'questions': 0,
        'dockets': set(),
    },
    'res-justice-res': {
        'questions': 0,
        'dockets': set(),
    },
    'res-other-pet': {
        'questions': 0,
        'dockets': set(),
    },
    'res-other-res': {
        'questions': 0,
        'dockets': set(),
    },
}
def collectQuestionData(row):
    winner = 'pet' if dfMetadataDockets[dfMetadataDockets['docket'] == row['docket']]['partyWinning'].values[0] == 1 else 'res'
    positionDialog = 'pet' if row['Position'] == 0 else 'res'
    key = '{}-{}-{}'.format(winner, row['Role'], positionDialog)
    data[key]['questions'] += row['Count']
    data[key]['dockets'].add(row['docket'])
    return row

dfQuestions.apply(lambda row: collectQuestionData(row), axis=1, reduce=False)

Unnamed: 0,Role,Name,Turn,Count,Position,idx,docket,justiceId
0,justice,ROBERTS,0.0,0.0,0.0,0.0,11-681,111
1,other,MESSENGER,1.0,0.0,0.0,1.0,11-681,
2,justice,GINSBURG,2.0,2.0,0.0,2.0,11-681,109
3,other,MESSENGER,3.0,0.0,0.0,3.0,11-681,
4,justice,GINSBURG,4.0,2.0,0.0,4.0,11-681,109
5,other,MESSENGER,5.0,0.0,0.0,5.0,11-681,
6,justice,SOTOMAYOR,6.0,1.0,0.0,6.0,11-681,113
7,other,MESSENGER,7.0,0.0,0.0,7.0,11-681,
8,justice,KAGAN,8.0,1.0,0.0,8.0,11-681,114
9,other,MESSENGER,9.0,0.0,0.0,9.0,11-681,


In [68]:
for key in data.keys():
    data[key]['questionsDocket'] = float(data[key]['questions']) / data[key]['dockets']
#    data[key]['dockets'] = len(data[key]['dockets'])

In [69]:
data

{'pet-justice-pet': {'dockets': 123,
  'questions': 4256.0,
  'questionsDocket': 34.60162601626016},
 'pet-justice-res': {'dockets': 123,
  'questions': 4504.0,
  'questionsDocket': 36.61788617886179},
 'pet-other-pet': {'dockets': 125,
  'questions': 345.0,
  'questionsDocket': 2.76},
 'pet-other-res': {'dockets': 123,
  'questions': 497.0,
  'questionsDocket': 4.040650406504065},
 'res-justice-pet': {'dockets': 72,
  'questions': 2482.0,
  'questionsDocket': 34.47222222222222},
 'res-justice-res': {'dockets': 70,
  'questions': 2520.0,
  'questionsDocket': 36.0},
 'res-other-pet': {'dockets': 72,
  'questions': 229.0,
  'questionsDocket': 3.1805555555555554},
 'res-other-res': {'dockets': 70,
  'questions': 267.0,
  'questionsDocket': 3.8142857142857145}}

In [70]:
flatData = [[key, data[key]['questions'], data[key]['questionsDocket']] for key in data.keys()]

In [78]:
flatData
dtFlatData = pd.DataFrame(flatData, columns=['Phase', 'Questions', 'Questions per Docket'])
#dtFlatData.index = dtFlatData['Phase'].values

In [79]:
colorsGraph = {
    'Questions':'orange',
    'Questions per Docket': '#25e6e9',
}

In [87]:
fig = plt.figure()
ax = fig.add_subplot(111)
ax2 = ax.twinx()
plt.xlabel('Dialog Phases', fontsize=16)
axis = {
    'Questions':ax,
    'Questions per Docket': ax2,
}
barWidth = 0.6 / 2
idx = 0
for variant in colorsGraph.keys():
    dtFlatData[variant].plot.bar(rot=45, ax=axis[variant], position=idx,
                                   color=colorsGraph[variant], width=barWidth,
                                   fontsize=14,
                             label=variant)
    idx += 1
    
plt.xticks(range(len(dtFlatData['Phase'].values)), dtFlatData['Phase'].values)
ax.set_ylabel('# of Questions', fontsize=18)
ax2.set_ylabel('# of Questions per Docket', fontsize=18)
ax.legend(loc='upper left', fontsize=16)
ax2.legend(loc='upper right', fontsize=16)
plt.show()

In [130]:
dfDocket.index
dataJustices = {}
for name in dfDocket.index:
    if name == 'THOMAS':
        continue
    dataJustices[str(name)] = {
        'pet': {
            'questions': 0,
            'dockets': set(),            
        },
        'res': {
            'questions': 0,
            'dockets': set(),            
        },        
    }
dataJustices

{'ALITO': {'pet': {'dockets': set(), 'questions': 0},
  'res': {'dockets': set(), 'questions': 0}},
 'BREYER': {'pet': {'dockets': set(), 'questions': 0},
  'res': {'dockets': set(), 'questions': 0}},
 'GINSBURG': {'pet': {'dockets': set(), 'questions': 0},
  'res': {'dockets': set(), 'questions': 0}},
 'KAGAN': {'pet': {'dockets': set(), 'questions': 0},
  'res': {'dockets': set(), 'questions': 0}},
 'KENNEDY': {'pet': {'dockets': set(), 'questions': 0},
  'res': {'dockets': set(), 'questions': 0}},
 'ROBERTS': {'pet': {'dockets': set(), 'questions': 0},
  'res': {'dockets': set(), 'questions': 0}},
 'SCALIA': {'pet': {'dockets': set(), 'questions': 0},
  'res': {'dockets': set(), 'questions': 0}},
 'SOTOMAYOR': {'pet': {'dockets': set(), 'questions': 0},
  'res': {'dockets': set(), 'questions': 0}}}

In [131]:
def collectQuestionDataJustices(row):
    if row['Role'] != 'justice' or row['Name'] == 'THOMAS':
        return row
    winner = 'pet' if dfMetadataDockets[dfMetadataDockets['docket'] == row['docket']]['partyWinning'].values[0] == 1 else 'res'
    dataJustices[row['Name']][winner]['questions'] += row['Count']
    dataJustices[row['Name']][winner]['dockets'].add(row['docket'])
    return row

dfQuestions.apply(lambda row: collectQuestionDataJustices(row), axis=1, reduce=False)

Unnamed: 0,Role,Name,Turn,Count,Position,idx,docket,justiceId
0,justice,ROBERTS,0.0,0.0,0.0,0.0,11-681,111
1,other,MESSENGER,1.0,0.0,0.0,1.0,11-681,
2,justice,GINSBURG,2.0,2.0,0.0,2.0,11-681,109
3,other,MESSENGER,3.0,0.0,0.0,3.0,11-681,
4,justice,GINSBURG,4.0,2.0,0.0,4.0,11-681,109
5,other,MESSENGER,5.0,0.0,0.0,5.0,11-681,
6,justice,SOTOMAYOR,6.0,1.0,0.0,6.0,11-681,113
7,other,MESSENGER,7.0,0.0,0.0,7.0,11-681,
8,justice,KAGAN,8.0,1.0,0.0,8.0,11-681,114
9,other,MESSENGER,9.0,0.0,0.0,9.0,11-681,


In [132]:
modes = ['res', 'pet']
for key in dataJustices.keys():
    for mode in modes:        
        dataJustices[key][mode]['dockets'] = len(dataJustices[key][mode]['dockets'])
        if dataJustices[key][mode]['dockets'] == 0:
            dataJustices[key][mode]['average'] = 0
        else:
            dataJustices[key][mode]['average'] = float(dataJustices[key][mode]['questions']) / dataJustices[key][mode]['dockets']

In [133]:
modes = ['res', 'pet']
dtFlatJusticesData = []
for key in dataJustices.keys():
    if key == 'THOMAS':
        continue
    for mode in modes:        
        dtFlatJusticesData.append([key, mode, dataJustices[key][mode]['questions'], dataJustices[key][mode]['average'], dataJustices[key][mode]['dockets']])

In [134]:
dtFinal = pd.DataFrame(dtFlatJusticesData, columns=['Justice', 'Winning Party', 'Questions', 'Average', 'Dockets'])

In [136]:
colorsGraph = {
    'res':'orange',
    'pet': '#25e6e9',
}
labels = {
    'res': 'Respondent wins',
    'pet': 'Petitioner wins',
}
fig = plt.figure()
ax = fig.add_subplot(111)
plt.xlabel('Justices', fontsize=16)
barWidth = 0.6 / 2
idx = 0
for variant in colorsGraph.keys():
    dtFinal[dtFinal['Winning Party'] == variant]['Average'].plot.bar(rot=45, ax=ax, position=idx,
                                   color=colorsGraph[variant], width=barWidth,
                                   fontsize=14,
                             label=labels[variant])
    idx += 1
    
plt.xticks(range(len(dataJustices.keys())), dataJustices.keys())
ax.set_ylabel('Average # of Questions', fontsize=18)
ax.legend(loc='upper left', fontsize=16)
plt.show()

In [137]:
dtFinal

Unnamed: 0,Justice,Winning Party,Questions,Average,Dockets
0,ALITO,res,669.0,10.292308,65
1,ALITO,pet,1127.0,9.715517,116
2,KENNEDY,res,460.0,6.571429,70
3,KENNEDY,pet,820.0,6.666667,123
4,GINSBURG,res,441.0,6.041096,73
5,GINSBURG,pet,819.0,6.658537,123
6,BREYER,res,809.0,11.897059,68
7,BREYER,pet,1494.0,12.45,120
8,ROBERTS,res,593.0,8.123288,73
9,ROBERTS,pet,927.0,7.416,125
