In [1]:
import sys
sys.path.append('/Users/pschulam/Git/mypy')

In [23]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import nips15
import adjustment

np.set_printoptions(precision=4)
%matplotlib inline

In [12]:
folds_dir = 'models/jmlr/folds'

def load_model(marker, fold, folds_dir=folds_dir):
    param_dir = os.path.join(folds_dir, marker, '{:02d}'.format(fold), 'param')
    return nips15.NipsModel.from_directory(param_dir)

def get_posteriors(model, data):
    P = [model.posterior(*d.unpack()) for d in data]
    P = np.array(P)
    return P

In [55]:
pfvc_spec = {'t' : 'years_seen_full',
             'y' : 'pfvc',
             'x1': ['female', 'afram'],
             'x2': ['female', 'afram', 'aca', 'scl']}

pfvc    = pd.read_csv('data/benchmark_pfvc.csv')
pfvc_pd = [nips15.PatientData.from_tbl(tbl, **pfvc_spec) for _, tbl in pfvc.groupby('ptid')]

tss_spec = {'t' : 'years_seen',
            'y' : 'tss',
            'x1': ['female', 'afram'],
            'x2': ['female', 'afram']}

tss = pd.read_csv('data/benchmark_tss.csv')
tss_match = ['ptid'] + tss_spec['x1']
tss = pd.merge(pfvc[tss_match], tss, 'left', tss_match)
tss_pd = [nips15.PatientData.from_tbl(tbl, **tss_spec) for _, tbl in tss.groupby('ptid')]

pdlco_spec = {'t' : 'years_seen',
              'y' : 'pdlco',
              'x1': ['female', 'afram'],
              'x2': ['female', 'afram']}

pdlco = pd.read_csv('data/benchmark_pdc.csv')
pdlco_match = ['ptid'] + pdlco_spec['x1']
pdlco = pd.merge(pfvc[pdlco_match], pdlco, 'left', pdlco_match)
pdlco_pd = [nips15.PatientData.from_tbl(tbl, **pdlco_spec) for _, tbl in pdlco.groupby('ptid')]

combined_pd = list(zip(pfvc_pd, tss_pd, pdlco_pd))

In [56]:
folds = pfvc.loc[:, ['ptid', 'fold']].drop_duplicates()
folds = dict(zip(folds['ptid'], folds['fold']))

In [96]:
k = 1
train = [pd for pd in combined_pd if not folds[pd[0].ptid] == k]
test  = [pd for pd in combined_pd if     folds[pd[0].ptid] == k]

In [97]:
pfvc_model = load_model('pfvc', k)
P = get_posteriors(pfvc_model, [pd[0] for pd in train])
Q = get_posteriors(pfvc_model, [pd[0].truncate(1.0) for pd in train])

In [108]:
tss_model = load_model('tss', k)
#R1 = get_posteriors(tss_model, [pd[1] for pd in train])
R1 = get_posteriors(tss_model, [pd[1].truncate(1.0) for pd in train])
M1 = adjustment.choose_features(P, R1)

In [109]:
pdlco_model = load_model('pdc', k)
#R2 = get_posteriors(pdlco_model, [pd[2] for pd in train])
R2 = get_posteriors(pdlco_model, [pd[2].truncate(1.0) for pd in train])
M2 = adjustment.choose_features(P, R1)

In [100]:
X1 = get_posteriors(tss_model, [pd[1].truncate(1.0) for pd in train])
X2 = get_posteriors(pdlco_model, [pd[2].truncate(1.0) for pd in train])

#X1 = get_posteriors(tss_model, [pd[1] for pd in train])
#X2 = get_posteriors(pdlco_model, [pd[2] for pd in train])

In [110]:
X = np.c_[X1, X2]
M = np.c_[M1, M2]
W = adjustment.fit_adjustment(P, Q, X, M)

In [111]:
Ptest = get_posteriors(pfvc_model, [pd[0] for pd in test])
Qtest = get_posteriors(pfvc_model, [pd[0].truncate(1.0) for pd in test])

In [112]:
X1test = get_posteriors(tss_model, [pd[1].truncate(1.0) for pd in test])
X2test = get_posteriors(pdlco_model, [pd[2].truncate(1.0) for pd in test])

#X1test = get_posteriors(tss_model, [pd[1] for pd in test])
#X2test = get_posteriors(pdlco_model, [pd[2] for pd in test])

Xtest  = np.c_[X1test, X2test]

In [113]:
Qhat   = adjustment.make_adjustment(W, Qtest, Xtest)

In [114]:
adjustment.xentropy(Ptest)

60.375734267340718

In [115]:
adjustment.xentropy(Ptest, Qtest)

110.64726918971706

In [116]:
adjustment.xentropy(Ptest, Qhat)

110.85604703587147