In [1]:
import sys
sys.path.append('../')

from model import *
import pandas as pd
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
import pickle as pkl

# Load child welfare data

This notebook runs the analysis on the childwelfare data by leveraging experts' agreement
1. Explore a model build on data ignoring experts 
2. Compute agreement between experts using influence function
3. Retrain the model on the set of labels for which experts strongly agree

The current analysis uses multi layer perceptrons in a single train / test split.

### Data

Reopen the data created with the notebook in `data/`

In [2]:
with open('../../../data/ChildWelfare/X_preprocess.pkl', 'rb') as handle:
    X, screener_ids, refer_ids, Y_observed, Y_human, Y_serv, Y_sub, colnames = pkl.load(handle)

In [3]:
Y_sub = np.reshape(Y_sub,(Y_sub.shape[0],1))

In [4]:
target = np.concatenate((Y_human, Y_observed, Y_sub),axis=1)

In [5]:
target =pd.DataFrame(target, columns = ['D', 'Y1', 'Y2'])

In [6]:

#covariates, target, nurses = triage.drop(columns = ['D', 'Y1', 'Y2', 'YC', 'acuity', 'nurse']), triage[['D', 'Y1', 'Y2', 'YC']], triage['nurse']

In [7]:
#convert screener ids to integers
screener_ids = [int(i[2:]) for i in screener_ids]

Split data in a 80% train, 20% test

In [14]:
cov_train, cov_test, tar_train, tar_test, nur_train, nur_test = train_test_split(pd.DataFrame(X), target, pd.Series(screener_ids), test_size = 0.2, random_state = 42)

### Modelling

In [15]:
# Model's characteristics
params = {'layers': []} # If = [] equivalent to a simple logistic regression

# Amalgation parameters
rho = 0.05 # Control which point to consider from a confience point of view
pi_1 = 4.0 # Control criterion on centre mass metric
pi_2 = 0.8 # Control criterion on opposing metric
tau = 1.0  # Balance between observed and expert labels

##### 1. Train on decision

This model models the nurse decision based on covariates

In [16]:
model = BinaryMLP(**params)
model = model.fit(cov_train, tar_train['D'], nur_train)


  0%|          | 0/1000 [00:00<?, ?it/s][A
Loss: 0.595:   0%|          | 0/1000 [00:00<?, ?it/s][A
Loss: 0.595:   0%|          | 1/1000 [00:00<06:13,  2.67it/s][A
Loss: 0.569:   0%|          | 1/1000 [00:00<06:13,  2.67it/s][A
Loss: 0.569:   0%|          | 2/1000 [00:00<06:25,  2.59it/s][A
Loss: 0.553:   0%|          | 2/1000 [00:01<06:25,  2.59it/s][A
Loss: 0.553:   0%|          | 3/1000 [00:01<06:28,  2.57it/s][A
Loss: 0.542:   0%|          | 3/1000 [00:01<06:28,  2.57it/s][A
Loss: 0.542:   0%|          | 4/1000 [00:01<06:17,  2.64it/s][A
Loss: 0.536:   0%|          | 4/1000 [00:01<06:17,  2.64it/s][A
Loss: 0.536:   0%|          | 5/1000 [00:01<06:39,  2.49it/s][A
Loss: 0.529:   0%|          | 5/1000 [00:02<06:39,  2.49it/s][A
Loss: 0.529:   1%|          | 6/1000 [00:02<08:05,  2.05it/s][A
Loss: 0.525:   1%|          | 6/1000 [00:03<08:05,  2.05it/s][A
Loss: 0.525:   1%|          | 7/1000 [00:03<08:45,  1.89it/s][A
Loss: 0.521:   1%|          | 7/1000 [00:03<08:45,  1

Loss: 0.500:   6%|▌         | 62/1000 [00:27<07:17,  2.14it/s][A
Loss: 0.500:   6%|▌         | 62/1000 [00:27<07:17,  2.14it/s][A
Loss: 0.500:   6%|▋         | 63/1000 [00:27<07:22,  2.12it/s][A
Loss: 0.500:   6%|▋         | 63/1000 [00:28<07:22,  2.12it/s][A
Loss: 0.500:   6%|▋         | 64/1000 [00:28<07:17,  2.14it/s][A
Loss: 0.500:   6%|▋         | 64/1000 [00:28<07:17,  2.14it/s][A
Loss: 0.500:   6%|▋         | 65/1000 [00:28<07:05,  2.20it/s][A
Loss: 0.500:   6%|▋         | 65/1000 [00:28<07:05,  2.20it/s][A
Loss: 0.500:   7%|▋         | 66/1000 [00:28<06:50,  2.27it/s][A
Loss: 0.500:   7%|▋         | 66/1000 [00:29<06:50,  2.27it/s][A
Loss: 0.500:   7%|▋         | 67/1000 [00:29<06:35,  2.36it/s][A
Loss: 0.500:   7%|▋         | 67/1000 [00:29<06:35,  2.36it/s][A
Loss: 0.500:   7%|▋         | 68/1000 [00:29<07:09,  2.17it/s][A
Loss: 0.499:   7%|▋         | 68/1000 [00:30<07:09,  2.17it/s][A
Loss: 0.499:   7%|▋         | 69/1000 [00:30<07:24,  2.10it/s][A
Loss: 0.50

Loss: 0.500:  12%|█▏        | 123/1000 [00:53<05:20,  2.73it/s][A
Loss: 0.500:  12%|█▏        | 124/1000 [00:53<06:46,  2.15it/s][A
Loss: 0.500:  12%|█▏        | 124/1000 [00:54<06:46,  2.15it/s][A
Loss: 0.500:  12%|█▎        | 125/1000 [00:54<06:40,  2.19it/s][A
Loss: 0.499:  12%|█▎        | 125/1000 [00:54<06:40,  2.19it/s][A
Loss: 0.499:  13%|█▎        | 126/1000 [00:54<06:06,  2.39it/s][A
Loss: 0.500:  13%|█▎        | 126/1000 [00:54<06:06,  2.39it/s][A
Loss: 0.500:  13%|█▎        | 127/1000 [00:54<05:35,  2.61it/s][A
Loss: 0.499:  13%|█▎        | 127/1000 [00:55<05:35,  2.61it/s][A
Loss: 0.499:  13%|█▎        | 128/1000 [00:55<05:30,  2.64it/s][A
Loss: 0.500:  13%|█▎        | 128/1000 [00:55<05:30,  2.64it/s][A
Loss: 0.500:  13%|█▎        | 129/1000 [00:55<05:57,  2.44it/s][A
Loss: 0.500:  13%|█▎        | 129/1000 [00:56<05:57,  2.44it/s][A
Loss: 0.500:  13%|█▎        | 130/1000 [00:56<05:31,  2.62it/s][A
Loss: 0.501:  13%|█▎        | 130/1000 [00:56<05:31,  2.62it/s

In [17]:
# Naive performance
roc_auc_score(tar_test['Y1'], model.predict(cov_test))

0.6924127916055088

In [18]:
# Yc performance
roc_auc_score(tar_test['Y2'], model.predict(cov_test))

0.9727039767223464

##### 2. Agreement computation 

Measure of agreeability are estimated in a cross validation fashion on the train set.

In [19]:
# Fold evaluation of influences
folds, predictions, influence = influence_cv(BinaryMLP, cov_train, tar_train['D'], nur_train, params = params)



  0%|          | 0/1000 [00:00<?, ?it/s][A[A

Loss: 0.607:   0%|          | 0/1000 [00:00<?, ?it/s][A[A

Loss: 0.607:   0%|          | 1/1000 [00:00<03:35,  4.63it/s][A[A

Loss: 0.581:   0%|          | 1/1000 [00:00<03:35,  4.63it/s][A[A

Loss: 0.581:   0%|          | 2/1000 [00:00<03:37,  4.60it/s][A[A

Loss: 0.566:   0%|          | 2/1000 [00:00<03:37,  4.60it/s][A[A

Loss: 0.566:   0%|          | 3/1000 [00:00<03:35,  4.63it/s][A[A

Loss: 0.557:   0%|          | 3/1000 [00:00<03:35,  4.63it/s][A[A

Loss: 0.557:   0%|          | 4/1000 [00:00<03:27,  4.80it/s][A[A

Loss: 0.549:   0%|          | 4/1000 [00:01<03:27,  4.80it/s][A[A

Loss: 0.549:   0%|          | 5/1000 [00:01<03:25,  4.83it/s][A[A

Loss: 0.542:   0%|          | 5/1000 [00:01<03:25,  4.83it/s][A[A

Loss: 0.542:   1%|          | 6/1000 [00:01<03:25,  4.83it/s][A[A

Loss: 0.537:   1%|          | 6/1000 [00:01<03:25,  4.83it/s][A[A

Loss: 0.537:   1%|          | 7/1000 [00:01<03:22,  4.90it/s]

Loss: 0.499:   6%|▌         | 58/1000 [00:13<03:37,  4.33it/s][A[A

RuntimeError: linalg_solve: The diagonal element 117 is zero, the solve could not be completed because the input matrix is singular.

In [9]:
# Compute metrics agreeability
center_metric, opposing_metric = compute_agreeability(influence)

  center = np.dot(inf_sorted, np.arange(len(influence_point))) / np.sum(inf_sorted)
  opposing = np.max([inf_pos.sum(), - inf_neg.sum()]) / total


In [10]:
# Apply criteria on amalgamation
high_conf = (predictions > (1 - rho)) | (predictions < rho)
high_agr = (center_metric > pi_1) & (opposing_metric > pi_2) & high_conf
high_agr_correct = ((predictions - tar_train['D']).abs() < rho) & high_agr

In [11]:
# Create amalgamated labels
tar_train['Ya'] = tar_train['Y1'].copy()
tar_train['Ya'][high_agr_correct] = (1 - tau) * tar_train['Y1'][high_agr_correct] \
                                    + tau * tar_train['D'][high_agr_correct]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tar_train['Ya'] = tar_train['Y1'].copy()
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return self._update_inplace(result)


In [12]:
index_amalg = tar_train['D'] | high_agr_correct

##### 3. Updated model

In [20]:
model = BinaryMLP(**params)
model = model.fit(cov_train[index_amalg], tar_train[index_amalg]['Ya'], nur_train[index_amalg])

Loss: 0.623:   2%|▏         | 23/1000 [00:08<06:13,  2.62it/s]


In [21]:
# Naive performance
roc_auc_score(tar_test['Y1'], model.predict(cov_test))

0.6490108254809583

In [22]:
# Yc performance
roc_auc_score(tar_test['YC'],model.predict(cov_test))

0.5658628634431628

##### 4. Train on observed data

In [16]:
model = BinaryMLP(**params)
model = model.fit(cov_train, tar_train['Y1'], nur_train)

Loss: 0.687:  13%|█▎        | 128/1000 [01:55<13:04,  1.11it/s]


In [17]:
# Naive performance
roc_auc_score(tar_test['Y1'], model.predict(cov_test))

0.6532088012868359

In [18]:
# Yc performance
roc_auc_score(tar_test['YC'],model.predict(cov_test))

0.5464182542438537