In [1]:
import string
import pandas as pd
import numpy as np
from nltk import word_tokenize
import matplotlib.pyplot as plt
plt.style.use('ggplot')

In [2]:
with open('./corpora/washington-law/justice_centered_docket_post2000.csv') as fMetadata:
    dfAllMetadata = pd.read_csv(fMetadata)

In [3]:
dfAllMetadata.columns
dfAllMetadata.loc[dfAllMetadata['docket'] == '126, ORIG.', 'docket'] = '126 orig'

In [4]:
relevantColumns = ['docket', 'petitioner', 'petitionerState', 'respondent', 'respondentState', 'jurisdiction',
                  'caseOrigin', 'caseSource', 'lcDisagreement', 'caseDisposition', 'declarationUncon', 'caseDispositionUnusual',
                   'lcDisposition', 'lcDispositionDirection', 'partyWinning', 'precedentAlteration',
                  'decisionDirection', 'decisionDirectionDissent', 'authorityDecision1', 'lawType', 'majVotes', 'minVotes', 'justiceName', 'vote',
                  'direction']

dfMetadata = dfAllMetadata[relevantColumns]

In [5]:
dfMetadata.columns

Index([u'docket', u'petitioner', u'petitionerState', u'respondent',
       u'respondentState', u'jurisdiction', u'caseOrigin', u'caseSource',
       u'lcDisagreement', u'caseDisposition', u'declarationUncon',
       u'caseDispositionUnusual', u'lcDisposition', u'lcDispositionDirection',
       u'partyWinning', u'precedentAlteration', u'decisionDirection',
       u'decisionDirectionDissent', u'authorityDecision1', u'lawType',
       u'majVotes', u'minVotes', u'justiceName', u'vote', u'direction'],
      dtype='object')

In [6]:
with open('./report-data/words.csv') as f:
    dfWords = pd.read_csv(f)

In [7]:
with open('./report-data/turns.csv') as f:
    dfTurns = pd.read_csv(f)

In [8]:
dfWordsJustices = dfWords[dfWords['Role'] == 'justice']
dfWordsJustices['Name'].value_counts()

ROBERTS      198
SOTOMAYOR    197
GINSBURG     196
KAGAN        195
KENNEDY      193
BREYER       188
ALITO        181
SCALIA       166
THOMAS         1
Name: Name, dtype: int64

In [9]:
def giveJusticeId(row, col):
    mapping = {
        'ROBERTS': '111',
        'SOTOMAYOR': '113',
        'GINSBURG': '109',
        'KAGAN': '114',
        'KENNEDY': '106',
        'BREYER': '110',
        'ALITO': '112',
        'SCALIA': '105',
        'THOMAS': '108',
    }
    if row[col] in mapping.keys():
        row['justiceId'] = mapping[row[col]]
    else:
        row['justiceId'] = ''

    return row

In [10]:
dfWordsOthers = dfWords[dfWords['Role'] == 'other']

In [11]:
wordTurnsJustices = {}
for record in dfWordsJustices['Name'].value_counts().index:
    wordTurnsJustices[record] = {
        'name': record,
        'words': 0,
        'turns': 0,
        'dockets': 0,
    }

In [12]:
def gatherStatistics(row, stats):
    stats[row['Name']]['words'] += row['Words']
    stats[row['Name']]['turns'] += row['Turns']
    stats[row['Name']]['dockets'] += 1

In [13]:
dfWordsJustices.apply(lambda row: gatherStatistics(row, wordTurnsJustices),
                      axis=1, reduce=False)
wordTurnsJustices

{'ALITO': {'dockets': 181, 'name': 'ALITO', 'turns': 1953.0, 'words': 93456.0},
 'BREYER': {'dockets': 188,
  'name': 'BREYER',
  'turns': 3499.0,
  'words': 210777.0},
 'GINSBURG': {'dockets': 196,
  'name': 'GINSBURG',
  'turns': 2024.0,
  'words': 81916.0},
 'KAGAN': {'dockets': 195,
  'name': 'KAGAN',
  'turns': 2494.0,
  'words': 143651.0},
 'KENNEDY': {'dockets': 193,
  'name': 'KENNEDY',
  'turns': 2151.0,
  'words': 71101.0},
 'ROBERTS': {'dockets': 198,
  'name': 'ROBERTS',
  'turns': 3666.0,
  'words': 123689.0},
 'SCALIA': {'dockets': 166,
  'name': 'SCALIA',
  'turns': 3440.0,
  'words': 111427.0},
 'SOTOMAYOR': {'dockets': 197,
  'name': 'SOTOMAYOR',
  'turns': 3593.0,
  'words': 116935.0},
 'THOMAS': {'dockets': 1, 'name': 'THOMAS', 'turns': 11.0, 'words': 353.0}}

In [14]:
indJustice = []
docketsJustice = []
turnsJustice = []
wordsJustice = []
for rec in wordTurnsJustices.values():
    docketsJustice.append(rec['dockets'])
    indJustice.append(rec['name'])
    turnsJustice.append(rec['turns'])
    wordsJustice.append(rec['words'])

In [15]:
dfDocket = pd.DataFrame(docketsJustice, index=indJustice, columns=['Dockets'])

In [16]:
dfDocket.sort_values(by='Dockets', inplace=True, ascending=False)

In [17]:
dfDocket.index

Index([u'ROBERTS', u'SOTOMAYOR', u'GINSBURG', u'KAGAN', u'KENNEDY', u'BREYER',
       u'ALITO', u'SCALIA', u'THOMAS'],
      dtype='object')

In [18]:
plt.figure()
plt.xlabel('Justices')
plt.ylabel('Number of dockets', fontsize=18)
plt.ylim([df['Dockets'].min(), df['Dockets'].max()*1.1])
dfDocket['Dockets'].plot.bar(rot=45, fontsize=14)
idxPos = -0.14
for idx in dfDocket.index:
    plt.text(idxPos, dfDocket['Dockets'][idx]+5, str(dfDocket['Dockets'][idx]),
             color='black', fontsize=20, fontweight='bold')
    idxPos += 1
plt.axvline(4.5, color='b', linestyle='--')
plt.text(5, 1500, 'Cut-off boundary', fontsize=14)
plt.show()

NameError: name 'df' is not defined

In [22]:
indJustice = []
docketsJustice = []
turnsJustice = []
wordsJustice = []
for rec in wordTurnsJustices.values():
    if rec['name'] == 'THOMAS':
        continue
    docketsJustice.append(rec['dockets'])
    indJustice.append(rec['name'])
    turnsJustice.append(rec['turns'])
    wordsJustice.append(rec['words'])

In [23]:
def averageVal(row, val1, val2, colResult):
    row[colResult] = row[val1] / float(row[val2])
    return row

In [24]:
dfTurns = pd.DataFrame(zip(docketsJustice, turnsJustice), index=indJustice,
                       columns=['Dockets', 'Turns'])
dfTurns['Turns per Docket'] = 0
dfTurns.sort_values(by='Turns', ascending=False, inplace=True)
dfTurns = dfTurns.apply(lambda row: averageVal(row, 'Turns', 'Dockets',
                                               'Turns per Docket'),
                        axis=1, reduce=False)

In [25]:
dfTurns

Unnamed: 0,Dockets,Turns,Turns per Docket
ROBERTS,198.0,3666.0,18.515152
SOTOMAYOR,197.0,3593.0,18.238579
BREYER,188.0,3499.0,18.611702
SCALIA,166.0,3440.0,20.722892
KAGAN,195.0,2494.0,12.789744
KENNEDY,193.0,2151.0,11.145078
GINSBURG,196.0,2024.0,10.326531
ALITO,181.0,1953.0,10.790055


In [26]:
fig = plt.figure()
ax = fig.add_subplot(111)
ax2 = ax.twinx()
plt.xlabel('Justices', fontsize=16)

dfTurns['Turns'].plot.bar(rot=45, ax=ax, position=0, width=0.2, color='#25e6e9')
dfTurns['Turns per Docket'].plot.bar(ax=ax2, position=1, width=0.2, color='yellow')

plt.axvline(0.5, color='r', linestyle='--', linewidth=2)
plt.axvline(3.5, color='b', linestyle='--')

ax.set_ylabel('Turns', fontsize=18)
ax2.set_ylabel('Turns per Docket', fontsize=18)
plt.show()

In [27]:
dfWordsByDocket = pd.DataFrame(zip(docketsJustice, wordsJustice), index=indJustice,
                       columns=['Dockets', 'Words'])
dfWordsByDocket['Words per Docket'] = 0
dfWordsByDocket.sort_values(by='Words', ascending=False, inplace=True)
dfWordsByDocket = dfWordsByDocket.apply(lambda row: averageVal(row, 'Words', 'Dockets',
                                               'Words per Docket'),
                        axis=1, reduce=False)

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)
ax2 = ax.twinx()
plt.xlabel('Justices', fontsize=16)

dfWordsByDocket['Words'].plot.bar(rot=45, ax=ax, position=0, width=0.2, color='#25e6e9')
dfWordsByDocket['Words per Docket'].plot.bar(ax=ax2, position=1, width=0.2, color='yellow')

ax.set_ylabel('Words', fontsize=18)
ax2.set_ylabel('Words per Docket', fontsize=18)
plt.axvline(0.5, color='r', linestyle='--', linewidth=2)

plt.show()

In [28]:
res = dfWordsOthers['Name'].value_counts()
res[res == 1].shape

(164L,)

In [29]:
with open('./report-data/questions_asked.csv') as f:
    dfQuestions = pd.read_csv(f)

In [30]:
dfQuestions.columns

Index([u'Role', u'Name', u'Turn', u'Count', u'Position', u'idx', u'docket'], dtype='object')

In [31]:
selected_dockets = dfQuestions['docket'].unique()
print len(selected_dockets)
dfMetadataFilter = dfMetadata[dfMetadata['docket'].isin(selected_dockets)]
dfQuestions = dfQuestions.apply(lambda row: giveJusticeId(row, 'Name'), axis=1)

198


In [32]:
print len(dfMetadataFilter['docket'].unique())
otherUnique = dfMetadataFilter['docket'].unique()

198


In [33]:
dfMetadataFilter['partyWinning'].value_counts()

1.0    1094
0.0     633
Name: partyWinning, dtype: int64

In [34]:
docketRelevantColumns = ['docket', 'petitioner', 'respondent', 'partyWinning', 'precedentAlteration',
                         'decisionDirection', 'majVotes', 'minVotes', 'lcDisagreement', 'lcDisposition', 'lcDispositionDirection']

In [35]:
dfMetadataDockets = dfMetadataFilter[docketRelevantColumns].drop_duplicates()

In [36]:
dfMetadataDockets['precedentAlteration'].value_counts()

0.0    197
1.0      1
Name: precedentAlteration, dtype: int64

In [37]:
dfMetadataDockets[dfMetadataDockets['partyWinning'] == 0]['precedentAlteration'].value_counts()

0.0    73
Name: precedentAlteration, dtype: int64

In [38]:
data = {
    'pet-justice-pet': {
        'questions': 0,
        'dockets': set(),
    },
    'pet-justice-res': {
        'questions': 0,
        'dockets': set(),
    },
    'pet-other-pet': {
        'questions': 0,
        'dockets': set(),
    },
    'pet-other-res': {
        'questions': 0,
        'dockets': set(),
    },
    
    'res-justice-pet': {
        'questions': 0,
        'dockets': set(),
    },
    'res-justice-res': {
        'questions': 0,
        'dockets': set(),
    },
    'res-other-pet': {
        'questions': 0,
        'dockets': set(),
    },
    'res-other-res': {
        'questions': 0,
        'dockets': set(),
    },
}
def collectQuestionData(row):
    winner = 'pet' if dfMetadataDockets[dfMetadataDockets['docket'] == row['docket']]['partyWinning'].values[0] == 1 else 'res'
    positionDialog = 'pet' if row['Position'] == 0 else 'res'
    key = '{}-{}-{}'.format(winner, row['Role'], positionDialog)
    data[key]['questions'] += row['Count']
    data[key]['dockets'].add(row['docket'])
    return row

dfQuestions.apply(lambda row: collectQuestionData(row), axis=1, reduce=False)

Unnamed: 0,Role,Name,Turn,Count,Position,idx,docket,justiceId
0,justice,ROBERTS,0.0,0.0,0.0,0.0,11-681,111
1,other,MESSENGER,1.0,0.0,0.0,1.0,11-681,
2,justice,GINSBURG,2.0,2.0,0.0,2.0,11-681,109
3,other,MESSENGER,3.0,0.0,0.0,3.0,11-681,
4,justice,GINSBURG,4.0,2.0,0.0,4.0,11-681,109
5,other,MESSENGER,5.0,0.0,0.0,5.0,11-681,
6,justice,SOTOMAYOR,6.0,1.0,0.0,6.0,11-681,113
7,other,MESSENGER,7.0,0.0,0.0,7.0,11-681,
8,justice,KAGAN,8.0,1.0,0.0,8.0,11-681,114
9,other,MESSENGER,9.0,0.0,0.0,9.0,11-681,


In [39]:
for key in data.keys():
    data[key]['questionsDocket'] = float(data[key]['questions']) / len(data[key]['dockets'])
#    data[key]['dockets'] = len(data[key]['dockets'])

In [40]:
data

{'pet-justice-pet': {'dockets': {'11-681',
   '11-965',
   '12-1036',
   '12-1038',
   '12-10882',
   '12-1117',
   '12-1128',
   '12-1163',
   '12-414',
   '12-462',
   '12-5196',
   '12-536',
   '12-562',
   '12-574',
   '12-609',
   '12-696',
   '12-751',
   '12-761',
   '12-786',
   '12-794',
   '12-815',
   '12-8561',
   '12-895',
   '12-929',
   '12-930',
   '12-992',
   '126 orig',
   '13-1010',
   '13-1019',
   '13-1032',
   '13-1034',
   '13-1041',
   '13-1067',
   '13-1080',
   '13-115',
   '13-1174',
   '13-132',
   '13-1339',
   '13-1352',
   '13-1402',
   '13-1421',
   '13-1428',
   '13-1433',
   '13-1487',
   '13-1496',
   '13-193',
   '13-301',
   '13-317',
   '13-339',
   '13-352',
   '13-433',
   '13-435',
   '13-461',
   '13-502',
   '13-550',
   '13-553',
   '13-6827',
   '13-684',
   '13-719',
   '13-7211',
   '13-7451',
   '13-854',
   '13-895',
   '13-896',
   '13-935',
   '13-975',
   '13-983',
   '13-9972',
   '14-10154',
   '14-1175',
   '14-1209',
   '14-1280'

In [41]:
flatData = [[key, data[key]['questions'], data[key]['questionsDocket']] for key in data.keys()]

In [42]:
flatData
dtFlatData = pd.DataFrame(flatData, columns=['Phase', 'Questions', 'Questions per Docket'])
#dtFlatData.index = dtFlatData['Phase'].values

In [43]:
colorsGraph = {
    'Questions':'orange',
    'Questions per Docket': '#25e6e9',
}

In [87]:
fig = plt.figure()
ax = fig.add_subplot(111)
ax2 = ax.twinx()
plt.xlabel('Dialog Phases', fontsize=16)
axis = {
    'Questions':ax,
    'Questions per Docket': ax2,
}
barWidth = 0.6 / 2
idx = 0
for variant in colorsGraph.keys():
    dtFlatData[variant].plot.bar(rot=45, ax=axis[variant], position=idx,
                                   color=colorsGraph[variant], width=barWidth,
                                   fontsize=14,
                             label=variant)
    idx += 1
    
plt.xticks(range(len(dtFlatData['Phase'].values)), dtFlatData['Phase'].values)
ax.set_ylabel('# of Questions', fontsize=18)
ax2.set_ylabel('# of Questions per Docket', fontsize=18)
ax.legend(loc='upper left', fontsize=16)
ax2.legend(loc='upper right', fontsize=16)
plt.show()

In [44]:
dfDocket.index
dataJustices = {}
for name in dfDocket.index:
    if name == 'THOMAS':
        continue
    dataJustices[str(name)] = {
        'pet': {
            'questions': 0,
            'dockets': set(),            
        },
        'res': {
            'questions': 0,
            'dockets': set(),            
        },        
    }
dataJustices

{'ALITO': {'pet': {'dockets': set(), 'questions': 0},
  'res': {'dockets': set(), 'questions': 0}},
 'BREYER': {'pet': {'dockets': set(), 'questions': 0},
  'res': {'dockets': set(), 'questions': 0}},
 'GINSBURG': {'pet': {'dockets': set(), 'questions': 0},
  'res': {'dockets': set(), 'questions': 0}},
 'KAGAN': {'pet': {'dockets': set(), 'questions': 0},
  'res': {'dockets': set(), 'questions': 0}},
 'KENNEDY': {'pet': {'dockets': set(), 'questions': 0},
  'res': {'dockets': set(), 'questions': 0}},
 'ROBERTS': {'pet': {'dockets': set(), 'questions': 0},
  'res': {'dockets': set(), 'questions': 0}},
 'SCALIA': {'pet': {'dockets': set(), 'questions': 0},
  'res': {'dockets': set(), 'questions': 0}},
 'SOTOMAYOR': {'pet': {'dockets': set(), 'questions': 0},
  'res': {'dockets': set(), 'questions': 0}}}

In [45]:
def collectQuestionDataJustices(row):
    if row['Role'] != 'justice' or row['Name'] == 'THOMAS':
        return row
    winner = 'pet' if dfMetadataDockets[dfMetadataDockets['docket'] == row['docket']]['partyWinning'].values[0] == 1 else 'res'
    dataJustices[row['Name']][winner]['questions'] += row['Count']
    dataJustices[row['Name']][winner]['dockets'].add(row['docket'])
    return row

dfQuestions.apply(lambda row: collectQuestionDataJustices(row), axis=1, reduce=False)

Unnamed: 0,Role,Name,Turn,Count,Position,idx,docket,justiceId
0,justice,ROBERTS,0.0,0.0,0.0,0.0,11-681,111
1,other,MESSENGER,1.0,0.0,0.0,1.0,11-681,
2,justice,GINSBURG,2.0,2.0,0.0,2.0,11-681,109
3,other,MESSENGER,3.0,0.0,0.0,3.0,11-681,
4,justice,GINSBURG,4.0,2.0,0.0,4.0,11-681,109
5,other,MESSENGER,5.0,0.0,0.0,5.0,11-681,
6,justice,SOTOMAYOR,6.0,1.0,0.0,6.0,11-681,113
7,other,MESSENGER,7.0,0.0,0.0,7.0,11-681,
8,justice,KAGAN,8.0,1.0,0.0,8.0,11-681,114
9,other,MESSENGER,9.0,0.0,0.0,9.0,11-681,


In [46]:
modes = ['res', 'pet']
for key in dataJustices.keys():
    for mode in modes:        
        dataJustices[key][mode]['dockets'] = len(dataJustices[key][mode]['dockets'])
        if dataJustices[key][mode]['dockets'] == 0:
            dataJustices[key][mode]['average'] = 0
        else:
            dataJustices[key][mode]['average'] = float(dataJustices[key][mode]['questions']) / dataJustices[key][mode]['dockets']

In [47]:
modes = ['res', 'pet']
dtFlatJusticesData = []
for key in dataJustices.keys():
    if key == 'THOMAS':
        continue
    for mode in modes:        
        dtFlatJusticesData.append([key, mode, dataJustices[key][mode]['questions'], dataJustices[key][mode]['average'], dataJustices[key][mode]['dockets']])

In [48]:
dtFinal = pd.DataFrame(dtFlatJusticesData, columns=['Justice', 'Winning Party', 'Questions', 'Average', 'Dockets'])

In [136]:
colorsGraph = {
    'res':'orange',
    'pet': '#25e6e9',
}
labels = {
    'res': 'Respondent wins',
    'pet': 'Petitioner wins',
}
fig = plt.figure()
ax = fig.add_subplot(111)
plt.xlabel('Justices', fontsize=16)
barWidth = 0.6 / 2
idx = 0
for variant in colorsGraph.keys():
    dtFinal[dtFinal['Winning Party'] == variant]['Average'].plot.bar(rot=45, ax=ax, position=idx,
                                   color=colorsGraph[variant], width=barWidth,
                                   fontsize=14,
                             label=labels[variant])
    idx += 1
    
plt.xticks(range(len(dataJustices.keys())), dataJustices.keys())
ax.set_ylabel('Average # of Questions', fontsize=18)
ax.legend(loc='upper left', fontsize=16)
plt.show()

In [49]:
dtFinal

Unnamed: 0,Justice,Winning Party,Questions,Average,Dockets
0,ALITO,res,669.0,10.292308,65
1,ALITO,pet,1127.0,9.715517,116
2,KENNEDY,res,460.0,6.571429,70
3,KENNEDY,pet,820.0,6.666667,123
4,GINSBURG,res,441.0,6.041096,73
5,GINSBURG,pet,819.0,6.658537,123
6,BREYER,res,809.0,11.897059,68
7,BREYER,pet,1494.0,12.45,120
8,ROBERTS,res,593.0,8.123288,73
9,ROBERTS,pet,927.0,7.416,125


In [50]:
with open('./report-data/interruptions.csv') as f:
    dfInterruptions = pd.read_csv(f)

In [51]:
dfInterruptions

Unnamed: 0,Name,Count,Position,idx,docket
0,SMITH,30.0,1.0,0.0,11-681
1,MESSENGER,-26.0,0.0,1.0,11-681
2,VERRILLI,8.0,1.0,2.0,11-681
3,RUSSELL,30.0,1.0,0.0,11-965
4,DUPREE,-20.0,0.0,1.0,11-965
5,KNEEDLER,-8.0,0.0,2.0,11-965
6,MASSEY,-27.0,0.0,0.0,12-1036
7,CURRAN,15.0,1.0,1.0,12-1036
8,CHEMERINSKY,29.0,1.0,0.0,12-1038
9,HORWICH,-14.0,0.0,1.0,12-1038


In [52]:
dataInterruptions = {
    'res': 0,
    'pet': 0,
}
dfMetadataDockets['overall-interruption'] = 0
def collectDataInterruptionsOverall(row):
    winner = 'pet' if dfMetadataDockets[dfMetadataDockets['docket'] == row['docket']]['partyWinning'].values[0] == 1 else 'res'
    dfMetadataDockets.loc[dfMetadataDockets['docket'] == row['docket'], 'overall-interruption'] += row['Count']
    return row


In [53]:
dfInterruptions.apply(lambda row: collectDataInterruptionsOverall(row), axis=1)

Unnamed: 0,Name,Count,Position,idx,docket
0,SMITH,30.0,1.0,0.0,11-681
1,MESSENGER,-26.0,0.0,1.0,11-681
2,VERRILLI,8.0,1.0,2.0,11-681
3,RUSSELL,30.0,1.0,0.0,11-965
4,DUPREE,-20.0,0.0,1.0,11-965
5,KNEEDLER,-8.0,0.0,2.0,11-965
6,MASSEY,-27.0,0.0,0.0,12-1036
7,CURRAN,15.0,1.0,1.0,12-1036
8,CHEMERINSKY,29.0,1.0,0.0,12-1038
9,HORWICH,-14.0,0.0,1.0,12-1038


In [54]:
dfInterruptions['Count'].groupby(dfInterruptions['docket']).cumsum()

0      30.0
1       4.0
2      12.0
3      30.0
4      10.0
5       2.0
6     -27.0
7     -12.0
8      29.0
9      15.0
10     30.0
11      8.0
12     40.0
13     46.0
14     42.0
15     23.0
16     21.0
17     32.0
18     15.0
19      5.0
20      7.0
21      3.0
22      1.0
23    -26.0
24     -4.0
25     15.0
26     30.0
27     16.0
28    -19.0
29     -3.0
       ... 
488     0.0
489   -21.0
490    -6.0
491     1.0
492   -14.0
493    -4.0
494    31.0
495    20.0
496   -20.0
497    -8.0
498    -5.0
499    35.0
500     4.0
501    39.0
502    19.0
503    21.0
504    66.0
505    57.0
506   -39.0
507   -19.0
508    15.0
509     8.0
510     5.0
511   -48.0
512   -16.0
513     0.0
514    11.0
515   -20.0
516   -10.0
517     0.0
Name: Count, dtype: float64

In [55]:
dataInterruptions['pet-avg'] = dataInterruptions['pet'] / dfMetadataDockets[(dfMetadataDockets['partyWinning'] == 1)].shape[0]
dataInterruptions['res-avg'] = dataInterruptions['res'] / dfMetadataDockets[(dfMetadataDockets['partyWinning'] == 0)].shape[0]

In [56]:
dfMetadataDockets[(dfMetadataDockets['partyWinning'] == 1) & (dfMetadataDockets['overall-interruption'] < 1)].shape

(46, 12)

In [57]:
dfMetadataDockets[(dfMetadataDockets['partyWinning'] == 0)].shape

(73, 12)

In [58]:
with open('./report-data/mostfollow.csv') as f:
    dfMostFollow = pd.read_csv(f)

In [59]:
justices = sorted([str(w) for w in dfDocket.index if not w == 'THOMAS'])

In [60]:
dataMostFollow = {}
for justice in justices:
    dataMostFollow[justice] = {
        'pet': {
        },
        'res': {
        },        
    }
    print justice
    print dfMostFollow[dfMostFollow['Name'] == justice]['Follower Name'].value_counts()

ALITO
ROBERTS      47
SCALIA       34
KENNEDY      27
BREYER       25
SOTOMAYOR    23
KAGAN        14
GINSBURG     11
Name: Follower Name, dtype: int64
BREYER
SCALIA       54
ROBERTS      36
ALITO        29
KENNEDY      28
SOTOMAYOR    19
KAGAN        13
GINSBURG      9
Name: Follower Name, dtype: int64
GINSBURG
ROBERTS      57
KENNEDY      40
ALITO        24
SCALIA       22
SOTOMAYOR    21
KAGAN        17
BREYER       15
Name: Follower Name, dtype: int64
KAGAN
SCALIA       35
ALITO        33
BREYER       28
KENNEDY      28
ROBERTS      26
SOTOMAYOR    25
GINSBURG     20
Name: Follower Name, dtype: int64
KENNEDY
ROBERTS      58
GINSBURG     32
SCALIA       30
ALITO        28
BREYER       20
KAGAN        13
SOTOMAYOR    12
Name: Follower Name, dtype: int64
ROBERTS
SCALIA       40
SOTOMAYOR    33
ALITO        30
BREYER       28
KENNEDY      26
KAGAN        21
GINSBURG     20
Name: Follower Name, dtype: int64
SCALIA
ROBERTS      55
BREYER       31
ALITO        21
KENNEDY      17
SOTOMAYOR

In [61]:
def collectDataMostFollow(row):
    if row['Name'] == 'THOMAS' or row['Follower Name'] == '---':
        return row
    winner = 'pet' if dfMetadataDockets[dfMetadataDockets['docket'] == row['docket']]['partyWinning'].values[0] == 1 else 'res'
    if not row['Follower Name'] in dataMostFollow[row['Name']][winner].keys():
        dataMostFollow[row['Name']][winner][row['Follower Name']] = {
            'name': row['Follower Name'],
            'count': 1,
        }
    else:
        dataMostFollow[row['Name']][winner][row['Follower Name']]['count'] += 1
        
    return row


In [62]:
dfMostFollow.apply(lambda row: collectDataMostFollow(row), axis =1, reduce=False)

Unnamed: 0,Role,Name,Follower Role,Follower Name,Ratio,idx,docket
0,justice,ALITO,justice,ROBERTS,0.222222,0.0,11-681
1,justice,KENNEDY,justice,KAGAN,0.600000,1.0,11-681
2,justice,BREYER,justice,ROBERTS,0.666667,2.0,11-681
3,justice,ROBERTS,justice,ALITO,0.222222,3.0,11-681
4,justice,SCALIA,justice,SOTOMAYOR,0.416667,4.0,11-681
5,justice,SOTOMAYOR,justice,KENNEDY,0.250000,5.0,11-681
6,justice,GINSBURG,justice,ALITO,0.333333,6.0,11-681
7,justice,KAGAN,justice,SCALIA,0.750000,7.0,11-681
8,justice,ALITO,justice,KAGAN,0.333333,0.0,11-965
9,justice,KENNEDY,justice,GINSBURG,1.000000,1.0,11-965


In [63]:
dataMostFollow

{'ALITO': {'pet': {'BREYER': {'count': 18, 'name': 'BREYER'},
   'GINSBURG': {'count': 6, 'name': 'GINSBURG'},
   'KAGAN': {'count': 7, 'name': 'KAGAN'},
   'KENNEDY': {'count': 18, 'name': 'KENNEDY'},
   'ROBERTS': {'count': 31, 'name': 'ROBERTS'},
   'SCALIA': {'count': 19, 'name': 'SCALIA'},
   'SOTOMAYOR': {'count': 17, 'name': 'SOTOMAYOR'}},
  'res': {'BREYER': {'count': 7, 'name': 'BREYER'},
   'GINSBURG': {'count': 5, 'name': 'GINSBURG'},
   'KAGAN': {'count': 7, 'name': 'KAGAN'},
   'KENNEDY': {'count': 9, 'name': 'KENNEDY'},
   'ROBERTS': {'count': 16, 'name': 'ROBERTS'},
   'SCALIA': {'count': 15, 'name': 'SCALIA'},
   'SOTOMAYOR': {'count': 6, 'name': 'SOTOMAYOR'}}},
 'BREYER': {'pet': {'ALITO': {'count': 18, 'name': 'ALITO'},
   'GINSBURG': {'count': 7, 'name': 'GINSBURG'},
   'KAGAN': {'count': 6, 'name': 'KAGAN'},
   'KENNEDY': {'count': 16, 'name': 'KENNEDY'},
   'ROBERTS': {'count': 22, 'name': 'ROBERTS'},
   'SCALIA': {'count': 37, 'name': 'SCALIA'},
   'SOTOMAYOR': {'

In [64]:
modes = ['pet', 'res']
for justice in dataMostFollow:
    for mode in modes:
        records = [(dataMostFollow[justice][mode][key]['name'], dataMostFollow[justice][mode][key]['count'])
                   for key in dataMostFollow[justice][mode].keys()]
        dataMostFollow[justice][mode]['---'] = sorted(records, key=lambda rec: rec[1], reverse=True)

In [65]:
splitDataMostFollow = []
for justice in dataMostFollow:
    for mode in modes:
        splitDataMostFollow.append([justice, mode, dataMostFollow[justice][mode]['---'][0][1], dataMostFollow[justice][mode]['---'][0][0]])

In [66]:
splitDataMostFollow

[['ALITO', 'pet', 31, 'ROBERTS'],
 ['ALITO', 'res', 16, 'ROBERTS'],
 ['KENNEDY', 'pet', 36, 'ROBERTS'],
 ['KENNEDY', 'res', 22, 'ROBERTS'],
 ['GINSBURG', 'pet', 40, 'ROBERTS'],
 ['GINSBURG', 'res', 17, 'ROBERTS'],
 ['BREYER', 'pet', 37, 'SCALIA'],
 ['BREYER', 'res', 17, 'SCALIA'],
 ['ROBERTS', 'pet', 26, 'SCALIA'],
 ['ROBERTS', 'res', 15, 'SOTOMAYOR'],
 ['SOTOMAYOR', 'pet', 40, 'ROBERTS'],
 ['SOTOMAYOR', 'res', 29, 'ROBERTS'],
 ['KAGAN', 'pet', 25, 'SCALIA'],
 ['KAGAN', 'res', 13, 'SOTOMAYOR'],
 ['SCALIA', 'pet', 32, 'ROBERTS'],
 ['SCALIA', 'res', 23, 'ROBERTS']]

In [67]:
with open('./report-data/tci_3.csv') as f:
    dfTci = pd.read_csv(f)
dfTci = dfTci[dfTci['Count'] > 2]

dfTci['NounLen'] = dfTci['Noun'].map(lambda cell: len(cell))
dfTci = dfTci[dfTci['NounLen'] > 2]

In [70]:
dfTci['punct'] = dfTci['Noun'].map(lambda cell: any([(c in string.punctuation) for c in cell]))

In [71]:
dfTci['Count'].value_counts()

3.0      1955
4.0      1755
5.0      1403
6.0      1244
7.0       982
8.0       917
9.0       749
10.0      599
11.0      475
12.0      404
13.0      350
14.0      344
15.0      293
16.0      275
17.0      266
19.0      235
18.0      234
20.0      212
21.0      183
22.0      176
24.0      152
26.0      140
27.0      139
23.0      137
25.0      124
28.0      119
30.0      112
29.0      104
32.0      102
31.0       91
         ... 
164.0       1
215.0       1
169.0       1
130.0       1
99.0        1
173.0       1
176.0       1
181.0       1
185.0       1
190.0       1
205.0       1
213.0       1
93.0        1
158.0       1
72.0        1
159.0       1
153.0       1
151.0       1
113.0       1
145.0       1
142.0       1
117.0       1
140.0       1
139.0       1
138.0       1
119.0       1
122.0       1
123.0       1
124.0       1
250.0       1
Name: Count, dtype: int64

In [72]:
dfTci['Noun'].value_counts()

brief           133
thank           133
court           118
case            111
respect         110
state           109
question        106
congress        105
fact            102
federal          99
example          99
right            97
sense            91
argument         83
okay             80
chief            79
difference       76
course           76
people           76
years            75
point            74
government       72
laughter         68
something        66
context          66
breyer           66
one              64
statute          64
united           63
position         62
               ... 
o'bannon          1
token             1
piotrowski        1
labeling          1
occasions         1
designation       1
ifp               1
moratorium        1
walden            1
randolph          1
dapa              1
perry             1
mdls              1
patentholder      1
one-vote          1
wisconsin         1
gases             1
pun               1
highway           1


In [73]:
top10 = dfTci.ix[dfTci.groupby('docket')['Count'].nlargest(10).reset_index()['level_1']]
low10 = dfTci.ix[dfTci.groupby('docket')['Count'].nsmallest(10).reset_index()['level_1']]

In [74]:
top10['Count'].value_counts()

32.0     78
28.0     77
30.0     74
34.0     71
29.0     71
27.0     70
33.0     69
31.0     64
35.0     64
38.0     54
26.0     54
37.0     53
36.0     51
39.0     49
45.0     45
25.0     45
41.0     45
24.0     44
40.0     38
42.0     37
44.0     36
47.0     34
50.0     34
46.0     31
48.0     28
23.0     28
43.0     27
51.0     26
49.0     25
57.0     21
         ..
72.0      1
215.0     1
213.0     1
205.0     1
190.0     1
185.0     1
181.0     1
176.0     1
173.0     1
169.0     1
164.0     1
159.0     1
158.0     1
153.0     1
151.0     1
145.0     1
142.0     1
140.0     1
139.0     1
138.0     1
130.0     1
124.0     1
123.0     1
122.0     1
119.0     1
117.0     1
113.0     1
99.0      1
93.0      1
15.0      1
Name: Count, dtype: int64

In [75]:
low10['Count'].value_counts()

3.0    1699
4.0     274
5.0       5
6.0       2
Name: Count, dtype: int64

In [76]:
len(dfTci['docket'].value_counts().values)

198

In [77]:
dfTci.describe()

Unnamed: 0,Count,Start,Last,Length,idx,NounLen
count,15629.0,15629.0,15629.0,15629.0,15629.0,15629.0
mean,13.920404,0.149634,0.87734,0.727706,42.148826,6.879391
std,17.428153,0.186389,0.160613,0.251593,26.230686,2.409265
min,3.0,0.0,0.03937,0.006472,0.0,3.0
25%,5.0,0.009091,0.84186,0.570552,20.0,5.0
50%,8.0,0.073593,0.945098,0.809783,40.0,7.0
75%,16.0,0.222222,0.983333,0.939891,62.0,8.0
max,250.0,0.97235,0.997512,0.997512,134.0,23.0


In [78]:
dfTci[dfTci['Length'] < 0.1].shape

(320, 9)

In [79]:
len(dfTci[dfTci['Length'] < 0.4]['docket'].value_counts().values)

198

In [80]:
dfTci[dfTci['Length'] > 0.8]

Unnamed: 0,Noun,Count,Start,Last,Length,idx,docket,NounLen,punct
0,part,124.0,0.003704,0.992593,0.988889,0.0,11-681,4,False
1,union,107.0,0.003704,0.992593,0.988889,1.0,11-681,5,False
2,respect,5.0,0.011111,0.992593,0.981481,2.0,11-681,7,False
3,employees,25.0,0.022222,0.992593,0.970370,3.0,11-681,9,False
4,nothing,19.0,0.014815,0.981481,0.966667,4.0,11-681,7,False
6,thousands,4.0,0.003704,0.966667,0.962963,6.0,11-681,9,False
7,first,32.0,0.000000,0.962963,0.962963,7.0,11-681,5,False
8,public,47.0,0.014815,0.962963,0.948148,8.0,11-681,6,False
9,extent,9.0,0.048148,0.992593,0.944444,9.0,11-681,6,False
10,kinds,6.0,0.022222,0.966667,0.944444,10.0,11-681,5,False


In [105]:
with open('./parsed-data/dialog.csv') as f:
    dfSentiment = pd.read_csv(f)

In [106]:
dfTest = dfSentiment.groupby('docket')['sentiment']

In [107]:
results = dfTest.sum()

In [108]:
dfSentiment[dfSentiment['role'] == 'justice']['sentiment'].value_counts()

 0.0    15273
-1.0     3938
 1.0     3620
Name: sentiment, dtype: int64

In [109]:
dataSentiment = {'res': {-1: 0, 0: 0, 1: 0 }, 'pet': {-1: 0, 0: 0, 1: 0 }}

def calcSentiment(row):
    if row['name'] == 'THOMAS':
        return row
    winner = 'pet' if dfMetadataDockets[dfMetadataDockets['docket'] == row['docket']]['partyWinning'].values[0] == 1 else 'res'
    dataSentiment[winner][row['sentiment']] += 1
    return row

In [None]:
dfSentiment.apply(lambda row: calcSentiment(row), axis=1, reduce=False)

In [104]:
dataSentiment

{'pet': {-1: 4583, 0: 18952, 1: 4190}, 'res': {-1: 2481, 0: 11332, 1: 2365}}