In [111]:
import sys
sys.path.append('../')

from model import *
import pandas as pd
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
import pickle as pkl
import matplotlib.pyplot as plt

from utils_synthetic import *

In [112]:
import sklearn
sklearn.__version__

'1.0.2'

# Load child welfare synthetic data

This notebook runs the analysis on the childwelfare data by leveraging experts' agreement
1. Explore a model build on data ignoring experts 
2. Compute agreement between experts using influence function
3. Retrain the model on the set of labels for which experts strongly agree

The current analysis uses multi layer perceptrons in a single train / test split.

### Data

Reopen the data created with the notebook in `data/`

In [158]:
data_file = '../../../data/semi_synthetic/Data_semisynthetic_v1.pkl'

In [159]:
with open(data_file, 'rb') as handle:
    X,Y_1,Y_2,Y,D_0,refer_ids,screener_ids,coef_pred_y = pkl.load(handle)

In [160]:
X = np.append(X,np.ones((X.shape[0],1)),axis=1)

In [161]:
#drop instances if expert assessed a single case

drop_experts = []
for num in screener_ids:

    if screener_ids.count(num) < 10:

        drop_experts.append(num)

In [162]:
drop_idx = []
for index, elem in enumerate(screener_ids):
    if elem in drop_experts:
        drop_idx.append(index)

In [163]:
X = np.delete(X,drop_idx,axis=0)
Y_1 = np.delete(Y_1,drop_idx,axis=0)
Y_2 = np.delete(Y_2,drop_idx,axis=0)
Y = np.delete(Y,drop_idx,axis=0)
D_0 = np.delete(D_0,drop_idx,axis=0)
refer_ids = np.delete(refer_ids,drop_idx,axis=0)
screener_ids = np.delete(screener_ids,drop_idx,axis=0)

In [164]:
selective_labels = True
#noise = True
opb = True
opb_blind = False

unobservables = False
unobs_k = 5 #number of unobsevables, k features with largest coefficient

change_some_coef = False #resample some coefficients for each human?
change_same = False
n=44#how many coefficients to change if change_some_coef == True
shared_bias = False
bias_opposite = False #if shared_bias true, should the bias overestimate the importance of use it in the opposite direction?

bias_assignment = False
change_all_coef = False#resample all non-zero coefficients?

random_if_not_good = False

#If opb_out, modeled as a business rule?
business_rule = False

#selective labels? Do we only observe label when D=1?


#HUMAN DECISIONS MODEL PARAMETERS

rand = False #are decisions made by humans random?

In [165]:
if not opb:
        Y = Y_1
elif opb_blind and not business_rule:
    Y = np.array([((Y_1[i]==1)&(D_0[i]==0)) for i in np.arange(len(Y_1))])
    Y_2 = 1-D_0
    logit = linear_model.LogisticRegression(penalty = 'l1', C=0.01, random_state=42, fit_intercept=False)
    clf = logit.fit(X, Y)
    Y_pred = clf.predict_proba(X)
    fpr, tpr,thres = sklearn.metrics.roc_curve(Y, Y_pred[:,1])
    roc_auc = sklearn.metrics.auc(fpr, tpr)
    #print(roc_auc)
    coef_pred_y = clf.coef_
    #print(sum(coef_pred_y[0]!=0))
    #plt.plot(fpr, tpr, color='darkorange',label='ROC curve (area = %0.2f)' % roc_auc)
elif opb_blind and business_rule:
    Y_2 = 1-D_0



if unobservables: #delete one of the variables that receive a lot of weight
    X_obs = np.delete(X,np.argsort(coef_pred_y[0])[-(unobs_k+2):-2],1)
else:
    X_obs = X

if bias_assignment:
    screener_ids = np.array(screener_ids)
    screener_ids[X[:,-2] == max(X[:,-2])] = 'TNew'
    screener_ids=list(screener_ids)

screener_set = np.array([x for x in set(screener_ids) if str(x)!='nan'])

D, alphas = decision_model(X, screener_ids, screener_set, coef_pred_y[0], change_coef = change_some_coef, change_same = change_same, change_all=change_all_coef, n=n,  shared_bias=shared_bias, rand= rand, bias_opposite=bias_opposite, bias_assignment= bias_assignment, random_if_not_good = random_if_not_good)

if opb and opb_blind and business_rule:
    D[D_0] = 0
    Y[D_0] = 0

#     with open('../../data/semi_synthetic/Y_human_'+setting+'.pkl', 'wb') as file:
#         pkl.dump([X,Y_1,Y_2,Y,D_0,refer_ids,screener_ids,coef_pred_y,D],file)
print(sum(D)/len(D))   
print(sum(D==Y)/len(D)   )

ValueError: shapes (4137,218) and (217,) not aligned: 218 (dim 1) != 217 (dim 0)

In [None]:
Y_2 = Y_2*1

In [None]:
YC = [max(Y_1[i], Y_2[i]) for i in np.arange(len(Y_1))]

In [None]:
target = pd.DataFrame({'D': D, 'Y1': Y_1, 'Y2': Y_2, 'YC': YC})

In [None]:
print("D!=Y1: ",sum(target['D']!=target['Y1']))
print("Y2!=Y1: ",sum(target['Y2']!=target['Y1']))

In [None]:

#covariates, target, nurses = triage.drop(columns = ['D', 'Y1', 'Y2', 'YC', 'acuity', 'nurse']), triage[['D', 'Y1', 'Y2', 'YC']], triage['nurse']

In [None]:
# ids_map = {}
# screener_ids
# for i in range(len(set(screener_ids))):
#     ids_map[]

In [None]:
#convert screener ids to integers
#screener_ids = [int(i[2:]) for i in screener_ids]

Split data in a 80% train, 20% test

In [None]:
cov_train, cov_test, tar_train, tar_test, nur_train, nur_test = train_test_split(pd.DataFrame(X), target, pd.Series(screener_ids), test_size = 0.2, random_state = 42)

### Modelling

In [None]:
# Model's characteristics
params = {'layers': []} # If = [] equivalent to a simple logistic regression

# Amalgation parameters
rho = 0.05 # Control which point to consider from a confience point of view
pi_1 = 4.0 # Control criterion on centre mass metric
pi_2 = 0.8 # Control criterion on opposing metric
tau = 1.0  # Balance between observed and expert labels

##### 1. Train on decision

This model models the nurse decision based on covariates

In [None]:
for l1_penalty in [1.0, 0.1, 0.001, 0.01]:
    try:
        model = BinaryMLP(**params)
        model = model.fit(cov_train, tar_train['D'], nur_train, l1_penalty = l1_penalty)
        break
    except Exception as e:
        print(e, l1_penalty)
        pass

In [None]:
# Naive performance
roc_auc_score(tar_test['Y1'], model.predict(cov_test))

In [None]:
# Yc performance
roc_auc_score(tar_test['Y2'], model.predict(cov_test))

In [None]:
roc_auc_score(tar_test['YC'], model.predict(cov_test))

In [None]:
roc_auc_score(tar_test['D'], model.predict(cov_test))

##### 2. Agreement computation 

Measure of agreeability are estimated in a cross validation fashion on the train set.

In [None]:
# Fold evaluation of influences
folds, predictions, influence = influence_cv(BinaryMLP, cov_train, tar_train['D'], nur_train, params = params)

In [None]:
# Compute metrics agreeability
center_metric, opposing_metric = compute_agreeability(influence)

In [None]:
import matplotlib.pyplot as plt
plt.scatter(center_metric, opposing_metric)

In [None]:
# Apply criteria on amalgamation
high_conf = (predictions > (1 - rho)) | (predictions < rho)
high_agr = (center_metric > pi_1) & (opposing_metric > pi_2) & high_conf
high_agr_correct = ((predictions - tar_train['D']).abs() < rho) & high_agr

In [None]:
plt.hist(predictions)

In [None]:
print('high_conf:', sum(high_conf))
print('high_agr:', sum(high_agr))
print('high_agr_correct:', sum(high_agr_correct))

In [None]:
from sklearn.calibration import calibration_curve
prob_true, prob_pred = calibration_curve(tar_train['D'], predictions, n_bins=7)
plt.plot(prob_true,prob_pred, marker='o', linewidth=1, label='logreg')


In [None]:
# Create amalgamated labels
tar_train['Ya'] = tar_train['Y1'].copy()
tar_train['Ya'][high_agr_correct] = (1 - tau) * tar_train['Y1'][high_agr_correct] \
                                    + tau * tar_train['D'][high_agr_correct]

In [None]:
sum(tar_train['D']!=tar_train['Y1'])

In [None]:
tar_train['Y1'][high_agr_correct]

In [None]:
sum(tar_train['Ya']!=tar_train['Y1'])

In [None]:
#index for selective labels
index_amalg = [i==1.0 for i in tar_train['D']] | high_agr_correct

##### 3. Updated model

In [None]:
#model with selective labels
model = BinaryMLP(**params)
model = model.fit(cov_train[index_amalg], tar_train[index_amalg]['Ya'], nur_train[index_amalg])

In [None]:
#model without selective labels
model = BinaryMLP(**params)
model = model.fit(cov_train, tar_train['Ya'], nur_train[index_amalg])

In [None]:
# Naive performance
roc_auc_score(tar_test['Y1'], model.predict(cov_test))

In [None]:
# Yc performance
roc_auc_score(tar_test['YC'],model.predict(cov_test))

In [None]:
roc_auc_score(tar_test['Y2'],model.predict(cov_test))

In [None]:
roc_auc_score(tar_test['D'],model.predict(cov_test))

##### 4. Train on observed data

In [None]:
model = BinaryMLP(**params)
model = model.fit(cov_train, tar_train['Y1'], nur_train)

In [None]:
# Naive performance
roc_auc_score(tar_test['Y1'], model.predict(cov_test))

In [None]:
# Yc performance
roc_auc_score(tar_test['YC'],model.predict(cov_test))

In [None]:
roc_auc_score(tar_test['Y2'],model.predict(cov_test))

In [None]:
roc_auc_score(tar_test['D'],model.predict(cov_test))