# GAN modified from perfect-match github repository 

Works for one variable (whole GAN-ITE structure).

Not yet implemented in multivariable output scenarios

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [2]:
import sys
!pip install ../perfect_match-master/perfect_match-master/

Processing /home/suraj/perfect_match-master/perfect_match-master
Collecting Keras>=1.2.2 (from perfect-match==1.0.0)
  Using cached https://files.pythonhosted.org/packages/5e/10/aa32dad071ce52b5502266b5c659451cfd6ffcbf14e6c8c4f16c0ff5aaab/Keras-2.2.4-py2.py3-none-any.whl
Collecting tensorflow==1.4.0 (from perfect-match==1.0.0)
  Using cached https://files.pythonhosted.org/packages/99/72/a420e22dc93416d30981e87a2318823ec09a9b18631369df0e7d9d164073/tensorflow-1.4.0-cp27-cp27mu-manylinux1_x86_64.whl
Collecting matplotlib>=1.3.1 (from perfect-match==1.0.0)
  Using cached https://files.pythonhosted.org/packages/32/6b/0368cfa5e1d1ae169ab7dc78addda3fd5e6262e48d7373a9114bac7caff7/matplotlib-2.2.4-cp27-cp27mu-manylinux1_x86_64.whl
Collecting pandas>=0.18.0 (from perfect-match==1.0.0)
  Using cached https://files.pythonhosted.org/packages/db/83/7d4008ffc2988066ff37f6a0bb6d7b60822367dcb36ba5e39aa7801fda54/pandas-0.24.2-cp27-cp27mu-manylinux1_x86_64.whl
Collecting h5py>=2.6.0 (from perfect-match==

Successfully installed Keras-2.2.4 backports.functools-lru-cache-1.5 backports.weakref-1.0.post1 bleach-1.5.0 cycler-0.10.0 enum34-1.1.6 funcsigs-1.0.2 futures-3.3.0 h5py-2.9.0 html5lib-0.9999999 keras-applications-1.0.8 keras-preprocessing-1.1.0 kiwisolver-1.1.0 markdown-3.1.1 matplotlib-2.2.4 mock-3.0.5 numpy-1.16.4 pandas-0.24.2 perfect-match-1.0.0 protobuf-3.9.1 pyparsing-2.4.2 python-dateutil-2.8.0 pytz-2019.2 pyyaml-5.1.2 rpy2-2.8.6 scikit-learn-0.20.4 scipy-1.2.2 setuptools-41.1.0 singledispatch-3.4.0.3 six-1.12.0 subprocess32-3.5.4 tensorflow-1.4.0 tensorflow-tensorboard-0.4.0 werkzeug-0.15.5 wheel-0.33.6


In [3]:
# SOURCE: https://github.com/clinicalml/cfrnet, MIT-License
import tensorflow as tf
import numpy as np

SQRT_CONST = 1e-10


def get_nonlinearity_by_name(name):
    if name.lower() == 'elu':
        return tf.nn.elu
    else:
        return tf.nn.relu


def build_mlp(x, num_layers=1, num_units=16, dropout=0.0,
              nonlinearity=tf.nn.elu, weight_initialisation_std=0.1):
    input_dim = int(x.shape[-1])
    h_in, weights_in, biases_in = [x], [], []
    for i in range(0, num_layers):
        if i == 0:
            ''' If using variable selection, first layer is just rescaling'''
            weights_in.append(tf.Variable(tf.random_normal([input_dim, num_units],
                                                           stddev=weight_initialisation_std / np.sqrt(input_dim))))
        else:
            weights_in.append(tf.Variable(tf.random_normal([num_units, num_units],
                                                           stddev=weight_initialisation_std / np.sqrt(num_units))))

        biases_in.append(tf.Variable(tf.zeros([1, num_units])))
        z = tf.matmul(h_in[i], weights_in[i]) + biases_in[i]

        h_in.append(nonlinearity(z))
        h_in[i + 1] = tf.nn.dropout(h_in[i + 1], 1.0 - dropout)

    h_rep = h_in[len(h_in) - 1]
    return h_rep, weights_in, biases_in



In [4]:
import tensorflow as tf
#from ..cfr.util import get_nonlinearity_by_name, build_mlp


class GANITEBuilder(object):
    @staticmethod
    def build(input_dim, output_dim, num_units=128, dropout=0.0, l2_weight=0.0, learning_rate=0.0001, num_layers=2,
              num_treatments=2, with_bn=False, nonlinearity="elu", initializer=tf.variance_scaling_initializer(),
              alpha=1.0, beta=1.0):
        x = tf.placeholder("float", shape=[None, input_dim], name='x')
        t = tf.placeholder("float", shape=[None, 1], name='t')
        y_f = tf.placeholder("float", shape=[None, output_dim], name='y_f')
        y_full = tf.placeholder("float", shape=[None, num_treatments], name='y_full')
                
        y_pred_cf, propensity_scores, z_g = GANITEBuilder.build_counterfactual_block(input_dim, x, t, y_f,
                                                                                     num_units, dropout, l2_weight,
                                                                                     learning_rate, num_layers,
                                                                                     num_treatments, with_bn,
                                                                                     nonlinearity, initializer)

        y_pred_ite, d_ite_pred, d_ite_true, z_i = GANITEBuilder.build_ite_block(input_dim, x, t, y_f, y_full,
                                                                                num_units, dropout, l2_weight,
                                                                                learning_rate, num_layers,
                                                                                num_treatments, with_bn,
                                                                                nonlinearity, initializer)

        # Build losses and optimizers.
        t_one_hot = tf.one_hot(tf.cast(t, "int32"), num_treatments)

        propensity_loss_cf = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=propensity_scores,
                                                                                    labels=t_one_hot))

        batch_size = tf.shape(y_pred_cf)[0]
        indices = tf.stack([tf.range(batch_size), tf.cast(t, "int32")[:, 0]], axis=-1)
        y_f_pred = tf.gather_nd(y_pred_cf, indices)

        y_f_i = y_f  # tf.Print(y_f, [y_f[:, 0]], message="y_f=", summarize=8)
        y_f_pred_i = y_f_pred  # tf.Print(y_f_pred, [y_f_pred], message="y_f_pred=", summarize=8)

        supervised_loss_cf = tf.sqrt(tf.reduce_mean(tf.squared_difference(y_f_i[:, 0], y_f_pred_i)))

        cf_discriminator_loss = propensity_loss_cf
        cf_generator_loss = -propensity_loss_cf + alpha * supervised_loss_cf

        # D_ITE goal: 0 when True, 1 when Pred
        ite_loss = tf.reduce_mean(tf.log(d_ite_true)) + tf.reduce_mean(tf.log(1 - d_ite_pred))

        y_full_i = y_full  # tf.Print(y_full, [y_full], message="y_full=", summarize=8)
        y_pred_ite_i = y_pred_ite  # tf.Print(y_pred_ite, [y_pred_ite], message="y_pred_ite=", summarize=8)
        supervised_loss_ite = tf.sqrt(tf.reduce_mean(tf.squared_difference(y_full_i, y_pred_ite_i)))

        ite_discriminator_loss = -ite_loss
        ite_generator_loss = ite_loss + beta * supervised_loss_ite
        return cf_generator_loss, cf_discriminator_loss, ite_generator_loss, ite_discriminator_loss, \
               x, t, y_f, y_full, y_pred_cf, y_pred_ite, z_g, z_i

    @staticmethod
    def build_tarnet(mlp_input, t, input_dim, num_layers, num_units, dropout, num_treatments, nonlinearity):
        initializer = tf.variance_scaling_initializer()
        x = build_mlp(mlp_input, num_layers, num_units, dropout, nonlinearity)

        all_indices, outputs = [], []
        for i in range(num_treatments):
            indices = tf.reshape(tf.to_int32(tf.where(tf.equal(tf.reshape(t, (-1,)), i))), (-1,))
            current_last_layer_h = tf.gather(x, indices)

            last_layer, _, _ = build_mlp(current_last_layer_h, num_layers, num_units, dropout, nonlinearity)

            output = tf.layers.dense(last_layer, units=num_treatments, use_bias=True,
                                     bias_initializer=initializer)

            all_indices.append(indices)
            outputs.append(output)
        return tf.concat(outputs, axis=-1), all_indices

    @staticmethod
    def build_counterfactual_block(input_dim, x, t, y_f, num_units=128, dropout=0.0, l2_weight=0.0,
                                   learning_rate=0.0001, num_layers=2,
                                   num_treatments=2, with_bn=False, nonlinearity="elu",
                                   initializer=tf.variance_scaling_initializer()):

        y_pred, z_g = GANITEBuilder.build_counterfactual_generator(input_dim, x, t, y_f, num_units,
                                                                   dropout, l2_weight, learning_rate,
                                                                   num_layers, num_treatments, with_bn,
                                                                   nonlinearity,
                                                                   initializer)

        propensity_scores = GANITEBuilder.build_counterfactual_discriminator(input_dim, x, t, y_pred, num_units,
                                                                             dropout, l2_weight, learning_rate,
                                                                             num_layers, num_treatments, with_bn,
                                                                             nonlinearity,
                                                                             initializer)
        return y_pred, propensity_scores, z_g

    @staticmethod
    def build_counterfactual_generator(input_dim, x, t, y_f, num_units=128, dropout=0.0, l2_weight=0.0,
                                       learning_rate=0.0001, num_layers=2,
                                       num_treatments=2, with_bn=False, nonlinearity="elu",
                                       initializer=tf.variance_scaling_initializer()):
        nonlinearity = get_nonlinearity_by_name(nonlinearity)
        with tf.variable_scope("g_cf",
                               initializer=initializer):
            z_g = tf.placeholder("float", shape=[None, num_treatments-1], name='z_g')

            mlp_input = tf.concat([x, y_f, t, z_g], axis=-1)
            x, _, _ = build_mlp(mlp_input, num_layers, num_units, dropout, nonlinearity)
            y = tf.layers.dense(x, units=num_treatments, use_bias=True,
                                bias_initializer=initializer)
            #why is it returning only the num_treatments
            return y, z_g

    @staticmethod
    def build_counterfactual_discriminator(input_dim, x, t, y_pred, num_units=128, dropout=0.0, l2_weight=0.0,
                                           learning_rate=0.0001, num_layers=2,
                                           num_treatments=2, with_bn=False, nonlinearity="elu",
                                           initializer=tf.variance_scaling_initializer(),
                                           reuse=False):
        nonlinearity = get_nonlinearity_by_name(nonlinearity)
        with tf.variable_scope("d_cf",
                               reuse=reuse,
                               initializer=initializer):
            mlp_input = tf.concat([x, y_pred], axis=-1)
            x, _, _ = build_mlp(mlp_input, num_layers, num_units, dropout, nonlinearity)
            propensity_scores = tf.layers.dense(x, units=num_treatments, use_bias=True,
                                                bias_initializer=initializer)
            return propensity_scores


    @staticmethod
    def build_ite_block(input_dim, x, t, y_f, y_full, num_units=128, dropout=0.0, l2_weight=0.0,
                        learning_rate=0.0001, num_layers=2,
                        num_treatments=2, with_bn=False, nonlinearity="elu",
                        initializer=tf.variance_scaling_initializer()):
        y_pred_ite, z_i = GANITEBuilder.build_ite_generator(input_dim, x, t, y_f, num_units,
                                                        dropout, l2_weight, learning_rate,
                                                        num_layers, num_treatments, with_bn,
                                                        nonlinearity, initializer)

        d_ite_pred = GANITEBuilder.build_ite_discriminator(input_dim, x, t, y_pred_ite, num_units,
                                                           dropout, l2_weight, learning_rate,
                                                           num_layers, num_treatments, with_bn,
                                                           nonlinearity, initializer, reuse=False)

        d_ite_true = GANITEBuilder.build_ite_discriminator(input_dim, x, t, y_full, num_units,
                                                           dropout, l2_weight, learning_rate,
                                                           num_layers, num_treatments, with_bn,
                                                           nonlinearity, initializer, reuse=True)

        return y_pred_ite, d_ite_pred, d_ite_true, z_i

    @staticmethod
    def build_ite_generator(input_dim, x, t, y_f, num_units=128, dropout=0.0, l2_weight=0.0,
                            learning_rate=0.0001, num_layers=2,
                            num_treatments=2, with_bn=False, nonlinearity="elu",
                            initializer=tf.variance_scaling_initializer()):
        nonlinearity = get_nonlinearity_by_name(nonlinearity)
        with tf.variable_scope("g_ite",
                               initializer=initializer):
            z_i = tf.placeholder("float", shape=[None, num_treatments], name='z_i')
            mlp_input = tf.concat([x, z_i], axis=-1)
            x, _, _ = build_mlp(mlp_input, num_layers, num_units, dropout, nonlinearity)
            y_pred = tf.layers.dense(x, units=num_treatments, use_bias=True,
                                     bias_initializer=initializer)
            return y_pred, z_i

    @staticmethod
    def build_ite_discriminator(input_dim, x, t, y_pred, num_units=128, dropout=0.0, l2_weight=0.0,
                                learning_rate=0.0001, num_layers=2,
                                num_treatments=2, with_bn=False, nonlinearity="elu",
                                initializer=tf.variance_scaling_initializer(),
                                reuse=False):
        nonlinearity = get_nonlinearity_by_name(nonlinearity)
        with tf.variable_scope("d_ite",
                               reuse=reuse,
                               initializer=initializer):
            mlp_input = tf.concat([x, y_pred], axis=-1)
            x, _, _ = build_mlp(mlp_input, num_layers, num_units, dropout, nonlinearity)
            y = tf.layers.dense(x, units=1, use_bias=True,
                                bias_initializer=initializer, activation=tf.nn.sigmoid)
            return y


In [5]:
from __future__ import print_function

import sys
import numpy as np
import tensorflow as tf
#from perfect_match.models.baselines.ganite_package.ganite_builder import GANITEBuilder


class GANITEModel(object):
    def __init__(self, input_dim, output_dim, num_units=128, dropout=0.0, l2_weight=0.0, learning_rate=0.0001, num_layers=2,
                 num_treatments=2, with_bn=False, nonlinearity="elu", initializer=tf.variance_scaling_initializer(),
                 alpha=1.0, beta=1.0):

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        self.sess = tf.Session(config=config)
        self.num_treatments = num_treatments

        self.cf_generator_loss, self.cf_discriminator_loss, \
        self.ite_generator_loss, self.ite_discriminator_loss, \
        self.x, self.t, self.y_f, self.y_full, self.y_pred_cf, self.y_pred_ite, self.z_g, self.z_i = \
            GANITEBuilder.build(input_dim, output_dim,
                                num_units=num_units,
                                dropout=dropout,
                                l2_weight=l2_weight,
                                learning_rate=learning_rate,
                                num_layers=num_layers,
                                num_treatments=num_treatments,
                                with_bn=with_bn,
                                nonlinearity=nonlinearity,
                                initializer=initializer,
                                alpha=alpha,
                                beta=beta)

    @staticmethod
    def get_scoped_variables(scope_name):
        t_vars = tf.trainable_variables()
        vars = [var for var in t_vars if scope_name in var.name]
        return vars

    @staticmethod
    def get_cf_generator_vairables():
        return GANITEModel.get_scoped_variables("g_cf")

    @staticmethod
    def get_cf_discriminator_vairables():
        return GANITEModel.get_scoped_variables("d_cf")

    @staticmethod
    def get_ite_generator_vairables():
        return GANITEModel.get_scoped_variables("g_ite")

    @staticmethod
    def get_ite_discriminator_vairables():
        return GANITEModel.get_scoped_variables("d_ite")

    def load(self, path):
        saver = tf.train.Saver()
        # saver.restore(self.sess, path)

    def train(self, train_generator, train_steps, val_generator, val_steps, num_epochs,
              learning_rate, learning_rate_decay=0.97, iterations_per_decay=100,
              dropout=0.0, imbalance_loss_weight=0.0, l2_weight=0.0, checkpoint_path="",
              early_stopping_patience=12, early_stopping_on_pehe=False):

        saver = tf.train.Saver(max_to_keep=3)

        global_step_1 = tf.Variable(0, trainable=False, dtype="int64")
        global_step_2 = tf.Variable(0, trainable=False, dtype="int64")
        global_step_3 = tf.Variable(0, trainable=False, dtype="int64")
        global_step_4 = tf.Variable(0, trainable=False, dtype="int64")

        opt = tf.train.AdamOptimizer(learning_rate)
        train_step_g_cf = opt.minimize(self.cf_generator_loss, global_step=global_step_1,
                                       var_list=GANITEModel.get_cf_generator_vairables())
        train_step_d_cf = opt.minimize(self.cf_discriminator_loss, global_step=global_step_2,
                                       var_list=GANITEModel.get_cf_discriminator_vairables())
        train_step_g_ite = opt.minimize(self.ite_generator_loss, global_step=global_step_3,
                                        var_list=GANITEModel.get_ite_generator_vairables())
        train_step_d_ite = opt.minimize(self.ite_discriminator_loss, global_step=global_step_4,
                                        var_list=GANITEModel.get_ite_discriminator_vairables())

        self.sess.run(tf.global_variables_initializer())

        best_val_loss, num_epochs_without_improvement = np.finfo(float).max, 0
        print("COUNTERFACTUAL TRAINING")
        for epoch_idx in range(num_epochs):
            for step_idx in range(train_steps):
                train_losses_g = self.run_generator(train_generator, 1, self.cf_generator_loss, train_step_g_cf)
                train_losses_d = self.run_generator(train_generator, 1, self.cf_discriminator_loss, train_step_d_cf)

            val_losses_g = self.run_generator(val_generator, val_steps, self.cf_generator_loss)
            val_losses_d = self.run_generator(val_generator, val_steps, self.cf_discriminator_loss)

            current_val_loss = val_losses_g[0]
            do_save = current_val_loss < best_val_loss
            if do_save:
                num_epochs_without_improvement = 0
                best_val_loss = current_val_loss
                saver.save(self.sess, checkpoint_path)
            else:
                num_epochs_without_improvement += 1

            self.print_losses(epoch_idx, num_epochs,
                              [train_losses_g[0], train_losses_d[0]],
                              [val_losses_g[0], val_losses_d[0]],
                              do_save)

            if num_epochs_without_improvement >= early_stopping_patience:
                break

        best_val_loss, num_epochs_without_improvement = np.finfo(float).max, 0
        
        print("ITE TRAINING")
        for epoch_idx in range(num_epochs):
            for step_idx in range(train_steps):
                train_losses_g = self.run_generator(train_generator, 1, self.ite_generator_loss, train_step_g_ite,
                                                    include_y_full=True)
                train_losses_d = self.run_generator(train_generator, 1, self.ite_discriminator_loss, train_step_d_ite,
                                                    include_y_full=True)
            val_losses_g = self.run_generator(val_generator, val_steps, self.ite_generator_loss,
                                              include_y_full=True)
            val_losses_d = self.run_generator(val_generator, val_steps, self.ite_discriminator_loss,
                                              include_y_full=True)

            current_val_loss = val_losses_g[0]
            do_save = current_val_loss < best_val_loss
            if do_save:
                num_epochs_without_improvement = 0
                best_val_loss = current_val_loss
                saver.save(self.sess, checkpoint_path)
            else:
                num_epochs_without_improvement += 1

            self.print_losses(epoch_idx, num_epochs,
                              [train_losses_g[0], train_losses_d[0]],
                              [val_losses_g[0], val_losses_d[0]],
                              do_save)

            if num_epochs_without_improvement >= early_stopping_patience:
                break

    def print_losses(self, epoch_idx, num_epochs, train_losses, val_losses, did_save=False):
        print("Epoch [{:04d}/{:04d}] {:} TRAIN: G={:.3f} D={:.3f} VAL: G={:.3f} D={:.3f}"
              .format(
                  epoch_idx, num_epochs,
                  "xx" if did_save else "::",
                  train_losses[0], train_losses[1],
                  val_losses[0], val_losses[1]
              ),
              file=sys.stderr)

    def run_generator(self, generator, steps, loss, train_step=None, include_y_full=False):
        losses = []
        for iter_idx in range(steps):
            (x_batch, t_batch), y_batch = generator()
            t_batch = np.expand_dims(t_batch, axis=-1)
            y_batch = np.expand_dims(y_batch, axis=-1)

            batch_size = len(x_batch)
            feed_dict = {
                self.x: x_batch,
                self.t: t_batch,
                self.y_f: y_batch,
                self.z_g: np.random.uniform(size=(batch_size, self.num_treatments-1)),
                self.z_i: np.random.uniform(size=(batch_size, self.num_treatments))
            }
            if include_y_full:
                y_pred = self._predict_g_cf([x_batch, t_batch], y_batch)
                
                y_pred[np.arange(len(y_pred)), t_batch] = y_batch
                feed_dict[self.y_full] = y_pred

            if train_step is not None:
                self.sess.run(train_step, feed_dict=feed_dict)

            losses.append(self.sess.run([loss],
                                        feed_dict=feed_dict))
        return np.mean(losses, axis=0)

    def _predict_g_cf(self, x, y_f):
        batch_size = len(x[0])
        y_pred = self.sess.run(self.y_pred_cf, feed_dict={
            self.x: x[0],
            self.t: x[1],
            self.y_f: y_f,
            self.z_g: np.random.uniform(size=(batch_size, self.num_treatments-1))
        })
        return y_pred

    def predict(self, x):
        batch_size = len(x[0])
        y_pred = self.sess.run(self.y_pred_ite, feed_dict={
             self.x: x[0],
             self.z_i: np.random.uniform(size=(batch_size, self.num_treatments))
        })
        y_pred = np.array(map(lambda inner, idx: inner[idx], y_pred, x[1]))
        return y_pred


In [6]:
import numpy as np
import pandas as pd
from functools import partial
#from perfect_match.models.model_factory import ModelFactory


class Baseline(object):
    def __init__(self):
        self.model = None

    @staticmethod
    def to_data_frame(x):
        return pd.DataFrame(data=x, index=np.arange(x.shape[0]), columns=np.arange(x.shape[1]))

    def _build(self, **kwargs):
        return None

    def build(self, **kwargs):
        self.model = self._build(**kwargs)

    def preprocess(self, x):
        return x

    def postprocess(self, y):
        return y

    def load(self, path):
        pass

    def save(self, path):
        pass

    def predict_for_model(self, model, x):
        if hasattr(self.model, "predict_proba"):
            return self.postprocess(model.predict_proba(self.preprocess(x)))
        else:
            return self.postprocess(model.predict(self.preprocess(x)))

    def predict(self, x):
        a = x[0]
        b = x[1]
        #return self.model._predict_g_cf(a, b)
        return self.predict_for_model(self.model, x)

    def fit_generator_for_model(self, model, train_generator, train_steps, val_generator, val_steps, num_epochs):
        x, y = self.collect_generator(train_generator, train_steps)
        model.fit(x, y)

    def fit_generator(self, train_generator, train_steps, val_generator, val_steps, num_epochs, batch_size):
        self.fit_generator_for_model(self.model, train_generator, train_steps, val_generator, val_steps, num_epochs)

    def collect_generator(self, generator, generator_steps):
        all_outputs = []
        for _ in range(generator_steps):
            generator_output = next(generator)
            x, y = generator_output[0], generator_output[1]
            all_outputs.append((self.preprocess(x), y))
        return map(partial(np.concatenate, axis=0), zip(*all_outputs))


class PickleableMixin(object):
    def load(self, path):
        self.model = ModelFactory.load_object(path)

    def save(self, path):
        ModelFactory.save_object(self.model, path)


In [7]:
from __future__ import print_function

import sys
#from perfect_match.models.baselines.baseline import Baseline
#from perfect_match.models.baselines.ganite_package.ganite_model import GANITEModel


class GANITE(Baseline):
    def __init__(self):
        super(GANITE, self).__init__()
        self.callbacks = []

    def load(self, path):
        self.model.load(path)

    def _build(self, **kwargs):
        self.best_model_path = kwargs["best_model_path"]
        self.learning_rate = kwargs["learning_rate"]
        self.dropout = kwargs["dropout"]
        self.l2_weight = kwargs["l2_weight"]
        self.num_units = kwargs["num_units"]
        self.num_layers = kwargs["num_layers"]
        self.num_treatments = kwargs["num_treatments"]
        self.imbalance_loss_weight = kwargs["imbalance_loss_weight"]
        self.early_stopping_patience = kwargs["early_stopping_patience"]
        self.early_stopping_on_pehe = kwargs["early_stopping_on_pehe"]
        self.input_dim = kwargs["input_dim"]
        self.output_dim = kwargs["output_dim"]
        self.ganite_weight_alpha = kwargs["ganite_weight_alpha"]
        self.ganite_weight_beta = kwargs["ganite_weight_beta"]
        return GANITEModel(self.input_dim,
                           self.output_dim,
                           num_units=self.num_units,
                           dropout=self.dropout,
                           l2_weight=self.l2_weight,
                           learning_rate=self.learning_rate,
                           num_layers=self.num_layers,
                           num_treatments=self.num_treatments,
                           with_bn=False,
                           nonlinearity="elu",
                           alpha=self.ganite_weight_alpha,
                           beta=self.ganite_weight_beta)

    def fit_generator(self, train_generator, train_steps, val_generator, val_steps, num_epochs, batch_size):
        # num_epochs = int(np.ceil(3000 / batch_size))
        self.model.train(train_generator,
                         train_steps,
                         num_epochs=num_epochs,
                         learning_rate=self.learning_rate,
                         val_generator=val_generator,
                         val_steps=val_steps,
                         dropout=self.dropout,
                         l2_weight=self.l2_weight,
                         imbalance_loss_weight=self.imbalance_loss_weight,
                         checkpoint_path=self.best_model_path,
                         early_stopping_patience=self.early_stopping_patience,
                         early_stopping_on_pehe=self.early_stopping_on_pehe)


In [8]:
model = GANITE()

In [9]:
kwargs = dict()
kwargs["best_model_path"] = "."
kwargs["learning_rate"] = 0.0001
kwargs["dropout"] = 0.05
kwargs["l2_weight"] = 0.001 
kwargs["num_units"] = 90
kwargs["num_layers"] = 3
kwargs["num_treatments"] = 2
kwargs["imbalance_loss_weight"] = 0.0
kwargs["early_stopping_patience"] = 70 
kwargs["early_stopping_on_pehe"] = 0
kwargs["input_dim"] = 90
kwargs["output_dim"] = 1 
kwargs["ganite_weight_alpha"] = 1
kwargs["ganite_weight_beta"] = 1

In [10]:
kwargs

{'best_model_path': '.',
 'dropout': 0.05,
 'early_stopping_on_pehe': 0,
 'early_stopping_patience': 70,
 'ganite_weight_alpha': 1,
 'ganite_weight_beta': 1,
 'imbalance_loss_weight': 0.0,
 'input_dim': 90,
 'l2_weight': 0.001,
 'learning_rate': 0.0001,
 'num_layers': 3,
 'num_treatments': 2,
 'num_units': 90,
 'output_dim': 1}

In [13]:
diabetes = pd.read_csv('../autoencoder/DIABETES_DATA_100k.tsv', delimiter='\t').drop(columns = ['DX_EMB', 'Unnamed: 0'])

for a in diabetes.columns:
    diabetes[a].fillna(diabetes[a].median(), inplace=True)


In [14]:
#diabetes = diabetes.drop(['P_ID', 'COMMENT 01', 'RACE'], axis=1)

In [15]:
diabetes = diabetes.drop(['P_ID'], axis=1)

In [16]:
diabetes.head()

Unnamed: 0,AGE,FEMALE,BP_SYSTOLIC,BP_DIASTOLIC,TEMPERATURE,PULSE,WEIGHT,HEIGHT,BMI,ALT,...,27,28,29,30,31,32,33,34,35,DIABETES
0,84.0,0.0,197.0,73.5,97.16,65.0,2576.136,67.514625,24.939445,22.0,...,0.356847,1.847694,0.361279,-4.5189,0.127954,0.369101,-0.244062,-0.386845,-0.151202,1
1,78.0,0.0,121.461538,68.0,98.275,83.0,2570.85,75.545455,19.955741,22.5,...,-0.01588,3.052436,-0.607785,-10.509056,0.301617,1.73127,-7.489346,-0.624842,-0.290411,0
2,75.0,0.0,108.75,60.25,98.175,52.375,3275.5925,72.0,27.762605,60.0,...,0.021642,1.009353,-1.400726,-11.73357,1.206445,0.186851,-9.053557,-0.502555,0.059039,1
3,71.0,1.0,137.0,80.0,98.0,71.0,2416.24,64.0,25.918834,22.0,...,-0.23211,0.114754,0.037359,-1.318564,0.337534,0.43332,-0.90099,0.147427,-0.895883,0
4,71.0,1.0,109.5,61.0,97.7,62.5,3287.5,66.0,33.159902,22.0,...,0.472733,0.673878,-1.013962,-1.412579,0.287313,0.025211,-2.841061,1.085241,-0.88706,0


In [17]:
treatments = diabetes['DIABETES']

dataset = tf.data.Dataset.from_tensor_slices((diabetes.values, treatments.values, diabetes.values))

In [18]:
dataset

<DatasetV1Adapter shapes: ((91,), (), (91,)), types: (tf.float64, tf.int64, tf.float64)>

In [19]:
diabetes.shape

(74282, 91)

In [20]:
import random
def patient_generator(): 
    #iterator = dataset.make_one_shot_iterator()
    sample = diabetes.sample(512)
    treatment = sample.pop('DIABETES')
#     y = sample['HGBA1C%']
    y = sample['WEIGHT']

    return (sample.values, treatment.values), y.values
#### p_id, vitals, labs, sum of embeddings, age, female, whether or not diabetic

In [606]:
(x, t), y = patient_generator()

In [607]:
y.shape

(512, 90)

In [29]:
tf.reset_default_graph()


model.build(**kwargs)
model.fit_generator(patient_generator, 10, patient_generator, 10, 500, 512)


InternalError: CUDA runtime implicit initialization on GPU:0 failed. Status: out of memory

In [32]:
(x_batch, t_batch), y_batch = patient_generator()

t_batch = np.expand_dims(t_batch, axis=-1)
y_batch = np.expand_dims(y_batch, axis=-1)


y_pred = model.model._predict_g_cf([x_batch, t_batch], y_batch)

print(y_pred[0:100])


AttributeError: 'NoneType' object has no attribute '_predict_g_cf'

In [30]:
((y_pred[:, 1] - y_pred[:, 0]) > 0).sum()

NameError: name 'y_pred' is not defined

In [31]:
y_batch[0:100]

array([[2349.22      ],
       [2483.26      ],
       [2370.616     ],
       [2915.365     ],
       [1681.96333333],
       [2737.23      ],
       [2585.55      ],
       [2505.92      ],
       [2752.22      ],
       [3435.06      ],
       [2880.        ],
       [2567.92      ],
       [2987.675     ],
       [3030.        ],
       [3115.674     ],
       [4866.8725    ],
       [2331.832     ],
       [2596.13666667],
       [1803.11517241],
       [2437.41      ],
       [1944.984     ],
       [2225.76      ],
       [2926.53333333],
       [2874.8       ],
       [3143.955     ],
       [2173.156     ],
       [2931.23666667],
       [2881.855     ],
       [2804.762     ],
       [2280.44      ],
       [2522.06      ],
       [3169.92      ],
       [3200.        ],
       [3664.42285714],
       [3562.635     ],
       [2807.77666667],
       [1637.27428571],
       [2231.055     ],
       [2065.3925    ],
       [4041.97784615],
       [1600.        ],
       [2720.   

In [None]:
model.save(".")