### Evaluation

There are 3 sets of configuration that are set.  One for the prediction (`c_p`), one for the  (`c_r`), one for the experiment(`c_e`). 

I've generated some fake predictions so that for the evaluation time period selected the results are initial 100% correct. 

In [1]:
%reload_ext autoreload
%autoreload 2
#ignore warnings.
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

In [2]:
#Set config. 
import sys, datetime, os, importlib
from pathlib import Path
import pandas as pd
pd.set_option('display.max_columns', 9999)
sys.path.append(os.path.join(Path.cwd().parent, 'modules'))
import evaluation as ev
importlib.reload(ev)
#These are configuration prediction files
c_p={'dir': '../data/predictions/tests/',
         'file': 'tests_75_shift0.csv',
         'per_col': 'person_id',
         'date_col': 'yyyymm',
         'date_for': '%Y%m'
        }
#These are configuration referral files
c_r={ 'dir': '../data/referrals/',
         'file': 'test.csv',
         'per_col': 'person_id',
         'date_col_in': 'date',
         'date_for_in': '%m/%d/%Y',
         'date_col_out': 'yyyymm', 
         'date_for_out': '%Y%m', 
         'reason_col': 'class'
        }
#These are configuration for the experiment
c_e={    'experiment': 'Initial Tests',
         'dir': '../results/',
         'file': 'results.csv',
         'save':  True,
         'append': False,
         'thresholds': [0.5, 0.6],
         'k': [5, 10], #k is used the set the conversion from prob to class prediction
         'landmarks': [[0,3], [0,6], [0,12]],
         'ref_target': 'ref',
         'pred_target': 'pref',
         'eval_date': pd.to_datetime(datetime.date(2017,1,1))
        }

<H3> PLAYGROUND - Test functions of evaluation.py module sequentially </H3>

In [3]:
"""
preprocess_referrals(c_r, drop_duplicates=True)
"""

import numpy as np

ref=pd.read_csv(c_r['dir']+c_r['file'])

ref['datetime']=pd.to_datetime(ref[c_r['date_col_in']], format= c_r['date_for_in'])
ref[c_r['date_col_out']]=ref['datetime'].dt.strftime(c_r['date_for_out']).astype(int)

trans=ref[c_r['reason_col']].unique()
translate=dict(zip(ref[c_r['reason_col']].unique(),[x for x in range(1,len(trans)+1)]))

ref=ref.loc[:,[c_r['per_col'],c_r['date_col_out'], c_r['reason_col'],'datetime']]

ref['label']=ref[c_r['reason_col']].map(lambda x: translate[x])
lab_dum=pd.get_dummies(ref['label'],prefix='lab')
ref = pd.concat([ref, lab_dum], axis=1)

ref = pd.pivot_table(ref, values=lab_dum.columns, index=[c_r['per_col'],c_r['date_col_out']], aggfunc=np.sum)
ref['ref']=1

drop_duplicates = True
if drop_duplicates==True:
    cols=list(ref.columns[ref.columns.str[0:len('lab_')]=='lab_'])
    for c in cols:
        ref[c]=ref[c].map(lambda z: 1 if z>=1 else 0)

#reset the index
ref.reset_index(inplace=True)
ref

Unnamed: 0,person_id,yyyymm,lab_1,lab_2,lab_3,ref
0,1005,201701,1,0,0,1
1,1006,201702,0,1,0,1
2,1007,201702,0,1,0,1
3,1007,201706,0,1,0,1
4,2000,201710,0,0,1,1
5,2001,201710,0,0,1,1
6,3000,201701,1,0,0,1
7,3001,201701,0,1,0,1
8,3002,201702,0,1,0,1
9,3003,201703,0,1,0,1


In [4]:
"""
fill_na(df, patterns, value, c_type)
"""
def fill_na(df, patterns, value, c_type):
    for pattern in patterns:
        cols=df.columns[df.columns.str.contains(pattern)]
        for x in cols:
            df[x]=df[x].fillna(value).astype(c_type)
    return df

In [13]:
"""
generate_test_prediction_files(c_p, c_r, patients, startdate, enddate)
"""
#import
import itertools

#params to pass
patients=100
startdate='2016-01-01'
enddate='2018-01-01'


cols=['ref','lab_1','lab_2','lab_3'] #cols for referral - ground truth
pcols=['pref','plab_1','plab_2','plab_3'] #cols for predictions

s1 = pd.Series(range(0,patients))
s2 = pd.date_range(startdate,enddate, freq='MS').strftime("%Y%m").astype(int)

pred = pd.DataFrame(list(itertools.product(s1,s2)),columns=[c_p['per_col'],c_p['date_col']])

for col in pcols:
        pred[col]=0

df_dates_ref=pd.DataFrame()
df_dates_ref['datetime']=pd.to_datetime(ref[c_r['date_col_out']], format= c_r['date_for_out'])
ref_temp=ref.copy()

for x in range(-12,13,6):

    ref_temp=ref.copy()
    sh="shift"+str(x)
    #print(sh)
    df_dates_ref[sh]= df_dates_ref['datetime']+ pd.DateOffset(months=x)
    df_dates_ref[sh]= df_dates_ref[sh].dt.strftime(c_r['date_for_out']).astype(int)
    ref_temp[c_r['date_col_out']]=df_dates_ref[sh]
    df=pd.merge(pred, ref_temp, how='left',  on=[c_r['per_col'], c_r['date_col_out']])
    df=fill_na(df,['lab_','ref'],0, int)
    df[pcols]=df[cols]
    df.drop(columns=cols, inplace=True, axis=0) #axis 0 = horizontal, axis 1 = vertical
#     print(df.head())

In [14]:
"""
score_times(c_p, c_r, c_e, ref=pd.DataFrame(), pred=pd.DataFrame())
"""
ref['datetime']=pd.to_datetime(ref[c_r['date_col_out']], format= c_r['date_for_out'])
ref_w=ref.pivot_table(index=c_r['per_col'], columns='datetime', values=c_e['ref_target'], aggfunc='sum')
ref_w=ref_w.fillna(0) #fill in NA so sums correctly.

pred['datetime']=pd.to_datetime(pred[c_p['date_col']], format= c_p['date_for'])
pred=pred.loc[pred['datetime']==c_e['eval_date'],[c_p['per_col'],'datetime',c_e['pred_target']]]

# results=pd.DataFrame()
# row=0

for w in c_e['landmarks']:
    start=c_e['eval_date']+pd.DateOffset(months=w[0])
    end=c_e['eval_date']+pd.DateOffset(months=w[1])
    label=start.strftime(c_r['date_for_out'])+'-'+ end.strftime(c_r['date_for_out'])
    #print("Splitting dataset for evaluation at", c_e['eval_date'], "Evaluating from:", label)
    sl=slice(start,end)
    ref_w=ref_w.fillna(0)
    y= ref_w.loc[:,sl]
    display(y)
#     #print("Examining Columns Slice:", str(y.columns))
#     y= y.sum(axis=1)
#     y[y>1]=1
#     y=y[y.index.isin(pred[c_p['per_col']])]
pred


datetime,2017-01-01,2017-02-01,2017-03-01,2017-04-01
person_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1005,1.0,0.0,0.0,0.0
1006,0.0,1.0,0.0,0.0
1007,0.0,1.0,0.0,0.0
2000,0.0,0.0,0.0,0.0
2001,0.0,0.0,0.0,0.0
3000,1.0,0.0,0.0,0.0
3001,1.0,0.0,0.0,0.0
3002,0.0,1.0,0.0,0.0
3003,0.0,0.0,1.0,0.0
3004,0.0,0.0,0.0,1.0


datetime,2017-01-01,2017-02-01,2017-03-01,2017-04-01,2017-05-01,2017-06-01,2017-07-01
person_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1005,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1006,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1007,0.0,1.0,0.0,0.0,0.0,1.0,0.0
2000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2001,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3000,1.0,0.0,0.0,0.0,0.0,0.0,0.0
3001,1.0,0.0,0.0,0.0,0.0,0.0,0.0
3002,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3003,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3004,0.0,0.0,0.0,1.0,0.0,0.0,0.0


datetime,2017-01-01,2017-02-01,2017-03-01,2017-04-01,2017-05-01,2017-06-01,2017-07-01,2017-08-01,2017-09-01,2017-10-01,2017-11-01,2017-12-01,2018-01-01
person_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1005,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1006,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1007,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
3000,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3001,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3002,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3003,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3004,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Unnamed: 0,person_id,datetime,pref
12,0,2017-01-01,0
37,1,2017-01-01,0
62,2,2017-01-01,0
87,3,2017-01-01,0
112,4,2017-01-01,0
...,...,...,...
2387,95,2017-01-01,0
2412,96,2017-01-01,0
2437,97,2017-01-01,0
2462,98,2017-01-01,0


<H3> CONTINUE Evaluation.ipynb </H3>

In [27]:
#generate fake datasets. This will take a while.  Generate a number of face datasets with the truth "shifted" to various degrees. 
#

importlib.reload(ev)
numberofpatients=5000 #100 simulation
startedate='2016-01-01'
enddate='2018-01-01'
ev.generate_test_prediction_files(c_p, c_r, numberofpatients, startedate, enddate)

shift-12
Saving dataframe.  Records: 125000 Patients 5000
shift-6
Saving dataframe.  Records: 125000 Patients 5000
shift0
Saving dataframe.  Records: 125000 Patients 5000
shift6
Saving dataframe.  Records: 125000 Patients 5000
shift12
Saving dataframe.  Records: 125000 Patients 5000


In [85]:
#This is the truth 
ref=pd.read_csv(c_r['dir']+c_r['file'])
ref

Unnamed: 0,person_id,date,class
0,3000,1/1/2017,diabetes
1,3001,1/2/2017,liver
2,3002,2/2/2017,liver
3,3003,3/2/2017,liver
4,3004,4/2/2017,liver
5,3005,5/3/2017,liver
6,3006,6/3/2017,liver
7,3007,7/4/2017,liver
8,3008,8/4/2017,liver
9,3009,9/4/2017,liver


In [86]:
#The above referrals are preprocessed. 
ref, trans = ev.preprocess_referrals(c_r)
print("the label encoding is:", trans)
ref

the label encoding is: {'diabetes': 1, 'liver': 2, 'pnemonia': 3}


Unnamed: 0,person_id,yyyymm,lab_1,lab_2,lab_3,ref
0,1005,201701,1,0,0,1
1,1006,201702,0,1,0,1
2,1007,201702,0,1,0,1
3,1007,201706,0,1,0,1
4,2000,201710,0,0,1,1
5,2001,201710,0,0,1,1
6,3000,201701,1,0,0,1
7,3001,201701,0,1,0,1
8,3002,201702,0,1,0,1
9,3003,201703,0,1,0,1


In [87]:
#These are the predictions. 
pred=pd.read_csv(c_p['dir']+c_p['file'])
pred

Unnamed: 0,person_id,yyyymm,pref,plab_1,plab_2,plab_3
0,0.0,201601,0.0,0.0,0.0,0.0
1,0.0,201602,0.0,0.0,0.0,0.0
2,0.0,201603,0.0,0.0,0.0,0.0
3,0.0,201604,0.0,0.0,0.0,0.0
4,0.0,201605,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...
124995,4999.0,201709,0.0,0.0,0.0,0.0
124996,4999.0,201710,0.0,0.0,0.0,0.0
124997,4999.0,201711,0.0,0.0,0.0,0.0
124998,4999.0,201712,0.0,0.0,0.0,0.0


## Run One Evaluation
This runs one evaluation at the time in the config. 

In [88]:
#This runs the evaluation at the time set in the config.
importlib.reload(ev)
results = ev.score_times(c_p, c_r, c_e)
results

Loading reference dataframe.. test.csv
Loading predictions dataframe.. tests_75_shift0.csv
Shape of referrals dataframe: (125000, 6)
Shape of referrals dataframe: (5000, 3)
Splitting dataset for evaluation at 2017-01-01 00:00:00 Evaluating from: 201701-201704
Examining Columns Slice: DatetimeIndex(['2017-01-01', '2017-02-01', '2017-03-01', '2017-04-01'], dtype='datetime64[ns]', name='datetime', freq=None)
Splitting dataset for evaluation at 2017-01-01 00:00:00 Evaluating from: 201701-201707
Examining Columns Slice: DatetimeIndex(['2017-01-01', '2017-02-01', '2017-03-01', '2017-04-01',
               '2017-05-01', '2017-06-01', '2017-07-01'],
              dtype='datetime64[ns]', name='datetime', freq=None)
Splitting dataset for evaluation at 2017-01-01 00:00:00 Evaluating from: 201701-201801
Examining Columns Slice: DatetimeIndex(['2017-01-01', '2017-02-01', '2017-03-01', '2017-04-01',
               '2017-05-01', '2017-06-01', '2017-07-01', '2017-08-01',
               '2017-09-01', '

Unnamed: 0,experiment,start_time,pred_dir,pred_file,n,range,log_loss,roc_auc_score,precision_ref_@k=5,recall_ref_@k=5,accuracy_ref_@k=5,balanced_accuracy_ref_@k=5,f1_ref_@k=5,precision_ref_@k=10,recall_ref_@k=10,accuracy_ref_@k=10,balanced_accuracy_ref_@k=10,f1_ref_@k=10,precision_ref_p>0.5,recall_ref_p>0.5,accuracy_ref_p>0.5,balanced_accuracy_ref_p>0.5,f1_ref_p>0.5,precision_ref_p>0.6,recall_ref_p>0.6,accuracy_ref_p>0.6,balanced_accuracy_ref_p>0.6,f1_ref_p>0.6,end_time,elapsed_time
0,Initial Tests,2020-05-13 01:42:32.363244032,../data/predictions/tests/,tests_75_shift0.csv,5000.0,201701-201704,0.034769,0.722222,0.8,0.444444,0.9988,0.722122,0.571429,0.4,0.444444,0.9978,0.721621,0.421053,1.0,0.444444,0.999,0.722222,0.615385,1.0,0.444444,0.999,0.722222,0.615385,2020-05-13 01:42:32.445480960,00:00:00.082236
1,Initial Tests,2020-05-13 01:42:32.456516096,../data/predictions/tests/,tests_75_shift0.csv,5000.0,201701-201707,0.055492,0.666667,0.8,0.333333,0.9982,0.666566,0.470588,0.4,0.333333,0.9972,0.666065,0.363636,1.0,0.333333,0.9984,0.666667,0.5,1.0,0.333333,0.9984,0.666667,0.5,2020-05-13 01:42:32.523312128,00:00:00.066796
2,Initial Tests,2020-05-13 01:42:32.532505088,../data/predictions/tests/,tests_75_shift0.csv,5000.0,201701-201801,0.110754,0.6,0.8,0.2,0.9966,0.5999,0.32,0.4,0.2,0.9956,0.599398,0.266667,1.0,0.2,0.9968,0.6,0.333333,1.0,0.2,0.9968,0.6,0.333333,2020-05-13 01:42:32.587367936,00:00:00.054862


# Multiple different evaluations
This is an example of multiple different evaluations. This evaluates a bunch of files where the predictions have been shifted. 


In [None]:
importlib.reload(ev)
c_e['experiment']='All Tests'
c_e['append']= True
c_e['file']= 'all_results.csv'

pred_files = [f for f in os.listdir(c_p['dir']) if os.path.isfile(os.path.join(c_p['dir'], f))]

for file in pred_files:
    c_p['file']=file
    results = ev.score_times(c_p, c_r, c_e)

In [None]:
#read in the full results
results=pd.read_csv(c_e['dir']+c_e['file'])
results

In [3]:
from sksurv.metrics import cumulative_dynamic_auc

ModuleNotFoundError: No module named 'sksurv'