# Notebook for testing hypotheses

## Load enriched case log

In [17]:
import pandas as pd
from sklearn.metrics import precision_score, recall_score, confusion_matrix

In [18]:
INPUT_FILE_NAME = 'cases_rtfm_full.csv' 

DATA_TYPES = {
    'Create Fine.article': object,
    'Create Fine.org:resource': object
    }

df = pd.read_csv(INPUT_FILE_NAME, dtype=DATA_TYPES)
print('Read', len(df), 'rows from', INPUT_FILE_NAME)
df = df.dropna(axis=1, how='all')  # drop all completely empty columns
df.describe() 

  df = pd.read_csv(INPUT_FILE_NAME, dtype=DATA_TYPES)


Read 150370 rows from cases_rtfm_full.csv


Unnamed: 0,Create Fine.count,event_count,start_time_rel,duration,Create Fine.start,Create Fine.amount,Create Fine.totalPaymentAmount,Create Fine.points,Create Fine.amount::last,Final.amount::last,...,Add penalty:Insert Fine Notification.delay,Add penalty:Send Fine.delay,Add penalty:Payment.delay,Add penalty:Create Fine.delay,Insert Fine Notification:Send Fine.delay,Insert Fine Notification:Payment.delay,Insert Fine Notification:Create Fine.delay,Send Fine:Payment.delay,Send Fine:Create Fine.delay,Payment:Create Fine.delay
count,150370.0,150370.0,150370.0,150370.0,150370.0,150370.0,150370.0,150370.0,150370.0,150370.0,...,79860.0,79860.0,19739.0,79860.0,79860.0,19739.0,79860.0,23332.0,103987.0,69715.0
mean,1.0,3.733923,2203.246592,341.670845,0.0,44.71538,0.0,0.078879,44.71538,71.41678,...,-60.0,-77.385287,134.40154,-163.422878,-17.385287,194.40154,-103.422878,183.459583,-87.507333,-97.51415
std,0.0,1.641017,1310.467092,347.051441,0.0,49.416618,0.0,0.577687,49.416618,100.537082,...,0.0,14.959963,193.884228,43.721888,14.959963,193.884228,43.721888,191.987395,42.486759,171.582472
min,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-60.0,-492.0,-212.0,-792.0,-432.0,-152.0,-732.0,-144.0,-732.0,-2086.0
25%,1.0,2.0,991.0,12.0,0.0,32.8,0.0,0.0,32.8,35.0,...,-60.0,-82.0,0.0,-196.0,-22.0,60.0,-136.0,58.0,-121.0,-132.0
50%,1.0,5.0,2204.0,198.0,0.0,35.0,0.0,0.0,35.0,62.59,...,-60.0,-75.0,32.0,-164.0,-15.0,92.0,-104.0,90.0,-88.0,-8.0
75%,1.0,5.0,3252.0,605.0,0.0,38.0,0.0,0.0,38.0,71.5,...,-60.0,-69.0,269.0,-132.0,-9.0,329.0,-72.0,314.0,-54.0,-3.0
max,1.0,20.0,4917.0,4372.0,0.0,4351.0,0.0,10.0,4351.0,8000.0,...,-60.0,-60.0,1940.0,-60.0,0.0,2000.0,0.0,2027.0,0.0,0.0


## Evaluate a explanation against the enriched case log

### Set outcome and explanation

In [19]:
# Set baseline to relativize metrics to that baseline
baseline = lambda row: row['paid_full'] or row['dismissed']

outcome = lambda row: row['Send for Credit Collection.count'] <= 0.0
# outcome = lambda row: row['Insert Fine Notification.count'] <= 0.0

explanation = lambda row: row['duration'] <= 269.0
explanation = lambda row: row['Insert Fine Notification.count'] <= 0.0
explanation = lambda row: row['Add penalty:Payment.delay'] <= 3.0 and row['Final.outstanding_balance_without_penalty'] <= 0.01  
# explanation = lambda row: row['Add penalty:Payment.delay'] <= 3.0 and row['Final.outstanding_balance_without_penalty'] <= 0.01 and row['Payment.count'] == 1
# explanation = lambda row: row['Add penalty:Payment.delay'] <= 3.0 and row['Final.outstanding_balance_without_penalty'] <= 0.01 and row['Payment.count'] >= 2
# explanation = lambda row: row['start_time_rel'] >= 4481.0
# explanation = lambda row: row['start_time_rel'] >= 4401.0 and row['Payment.count'] >= 1.0
# explanation = lambda row: row['Insert Date Appeal to Prefecture.count'] >= 1.0 and row['Notify Result Appeal to Offender.count'] <= 0.0
# explanation = lambda row: row['Final.outstanding_balance'] <= 10.00
# explanation = lambda row: row['appeal'] and row['Final.outstanding_balance_without_penalty'] <= 0.01
# special_dismissal_codes = ['2', '3', '5', 'A', 'B', 'E', 'F', 'I', 'J', 'K', 'M', 'N', 'Q', 'R', 'T', 'U', 'V']
# explanation = lambda row: row['Create Fine.dismissal'] in special_dismissal_codes
# explanation = lambda row: row['Create Fine.count'] == 1.0 and row['Send Fine.count'] == 1.0 and row['event_count'] == 2.0



### Baseline check

In [20]:
# The baseline should have high precision for explaining the outcome with respect to the entire log
df['outcome'] = df.apply(outcome, axis=1)
df['baseline'] = df.apply(baseline, axis=1)
y_true = df['outcome']
print('Outcome distribution:')
print(y_true.value_counts())

y_pred = df['baseline']
precision = precision_score(y_true, y_pred, pos_label=True, zero_division=1)
print('Precision of baseline on case log:', round(precision, 3), precision)


Outcome distribution:
outcome
True     91357
False    59013
Name: count, dtype: int64
Precision of baseline on case log: 1.0 0.9999518845228549


### Get scores

In [21]:
df['explanation'] = df.apply(explanation, axis=1)

df_baseline = df[~df['baseline']]
print('Baseline filter: {} out of {} rows remain.'.format(len(df_baseline),len(df)))
y_true_baseline = df_baseline['outcome']
print('Outcome distribution on baseline:')
print(y_true_baseline.value_counts())

y_pred = df['explanation']
y_pred_baseline = df_baseline['explanation']
precision = precision_score(y_true, y_pred, pos_label=True, zero_division=1)
precision_baseline = precision_score(y_true_baseline, y_pred_baseline, pos_label=True, zero_division=1)
recall = recall_score(y_true, y_pred, pos_label=True)
recall_baseline = recall_score(y_true_baseline, y_pred_baseline, pos_label=True)

print('Precision on case log:', round(precision, 3), precision)
print('Precision on baseline:', round(precision_baseline, 3), precision_baseline)
print('Recall on case log:', round(recall, 3), recall)
print('Recall on baseline:', round(recall_baseline, 3), recall_baseline)

nof_true_neg, nof_false_pos, nof_false_neg, nof_true_pos = confusion_matrix(y_true, y_pred).ravel()
nof_true_neg_b, nof_false_pos_b, nof_false_neg_b, nof_true_pos_b = confusion_matrix(y_true_baseline, y_pred_baseline).ravel()

print('True positives (baseline, log):', nof_true_pos_b , nof_true_pos)
print('False positives (baseline, log):', nof_false_pos_b, nof_false_pos)
print('True negatives (baseline, log):', nof_true_neg_b, nof_true_neg)
print('False negatives (baseline, log):', nof_false_neg_b, nof_false_neg)

Baseline filter: 88020 out of 150370 rows remain.
Outcome distribution on baseline:
outcome
False    59010
True     29010
Name: count, dtype: int64
Precision on case log: 0.999 0.9991558331926389
Precision on baseline: 0.999 0.9991310392770246
Recall on case log: 0.065 0.06477883468152414
Recall on baseline: 0.198 0.1981730437780076
True positives (baseline, log): 5749 5918
False positives (baseline, log): 5 5
True negatives (baseline, log): 59005 59008
False negatives (baseline, log): 23261 85439


### Show true positives

In [22]:
pos = df[df['explanation']]
true_pos = pos[pos['outcome']]
print(len(true_pos), 'true positives')
true_pos.head()

5918 true positives


Unnamed: 0,case_id,Create Fine.count,event_count,start_time,start_time_rel,duration,Create Fine.start,Create Fine.amount,Create Fine.org:resource,Create Fine.dismissal,...,credit_collection,unresolved,appeal,overturned_judge,overturned_prefecture,overturned,upheld,outcome,baseline,explanation
2,S49055,1,6,2000-01-02,1,239,0,31.3,35,NIL,...,False,True,False,False,False,False,False,True,False,True
10,S44571,1,6,2000-01-03,2,252,0,31.3,36,NIL,...,False,True,False,False,False,False,False,True,False,True
19,N22019,1,6,2000-01-03,2,256,0,31.3,536,NIL,...,False,True,False,False,False,False,False,True,False,True
27,N21197,1,6,2000-01-03,2,169,0,31.3,538,NIL,...,False,True,False,False,False,False,False,True,False,True
28,N22415,1,6,2000-01-03,2,138,0,18.78,550,NIL,...,False,True,False,False,False,False,False,True,False,True


### Show false positives

In [23]:
false_pos = pos[~pos['outcome']]
print(len(false_pos), 'false positives')
false_pos.head()

5 false positives


Unnamed: 0,case_id,Create Fine.count,event_count,start_time,start_time_rel,duration,Create Fine.start,Create Fine.amount,Create Fine.org:resource,Create Fine.dismissal,...,credit_collection,unresolved,appeal,overturned_judge,overturned_prefecture,overturned,upheld,outcome,baseline,explanation
142703,S175263,1,6,2011-08-27,4256,606,0,80.0,852,NIL,...,True,False,False,False,False,False,False,False,False,True
143121,S174598,1,6,2011-09-10,4270,592,0,39.0,861,NIL,...,True,False,False,False,False,False,False,False,False,True
143848,S176634,1,6,2011-10-08,4298,564,0,80.0,852,NIL,...,True,False,False,False,False,False,False,False,False,True
143947,P3198,1,6,2011-10-17,4307,555,0,24.0,538,NIL,...,True,False,False,False,False,False,False,False,False,True
144993,S178168,1,6,2012-01-24,4406,456,0,80.0,49,NIL,...,True,False,False,False,False,False,False,False,False,True


### Show false negatives

In [24]:
neg = df[~df['explanation']]
false_neg = neg[neg['outcome']]
print(len(false_neg), 'false negatives')
false_neg.head()

85439 false negatives


Unnamed: 0,case_id,Create Fine.count,event_count,start_time,start_time_rel,duration,Create Fine.start,Create Fine.amount,Create Fine.org:resource,Create Fine.dismissal,...,credit_collection,unresolved,appeal,overturned_judge,overturned_prefecture,overturned,upheld,outcome,baseline,explanation
9,S44572,1,2,2000-01-03,2,1,0,31.3,36,NIL,...,False,False,False,False,False,False,False,True,True,False
14,S49387,1,2,2000-01-03,2,0,0,31.3,34,NIL,...,False,False,False,False,False,False,False,True,True,False
16,N22010,1,3,2000-01-03,2,60,0,31.3,536,NIL,...,False,True,False,False,False,False,False,True,False,False
18,N22015,1,2,2000-01-03,2,1,0,31.3,536,NIL,...,False,False,False,False,False,False,False,True,True,False
26,N21196,1,9,2000-01-03,2,366,0,31.3,538,NIL,...,False,False,True,False,False,False,True,True,True,False
