In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from Utils import Utils
from Constants import Constants
from Propensity_socre_network import Propensity_socre_network
from Utils import Utils
from PS_Manager import PS_Manager
from PS_Treated_Generator import PS_Treated_Generator

from GAN import Generator, Discriminator
from sklearn.neighbors import NearestNeighbors

from GAN_Manager import GAN_Manager
from Utils import Utils

from matplotlib import pyplot
from torch.autograd.variable import Variable
from collections import OrderedDict
from scipy.special import expit

import os
from os.path import join
import sys
from dataloader import DataLoader
from DCN_Experiments import DCN_Experiments
from Metrics import Metrics

In [2]:
def __get_ps_model(ps_model_type, iter_id,
                   input_nodes, device,
                   np_covariates_X_train, 
                   np_covariates_X_test, 
                   np_covariates_Y_train,
                   np_covariates_Y_test, dL):
    ps_train_set = dL.convert_to_tensor(np_covariates_X_train, np_covariates_Y_train)
    ps_test_set = dL.convert_to_tensor(np_covariates_X_test,
                                            np_covariates_Y_test)
    ps_manager = PS_Manager()
    if ps_model_type == Constants.PS_MODEL_NN:
        return ps_manager.get_propensity_scores(ps_train_set,
                                                    ps_test_set, iter_id,
                                                    input_nodes, device)

In [3]:
iter_id = 1

In [4]:
input_nodes = 17
device = Utils.get_device()
ps_model_type=Constants.PS_MODEL_NN
train_path = "Dataset/jobs_DW_bin.new.10.train.npz"
test_path = "Dataset/jobs_DW_bin.new.10.test.npz"
split_size = 0.8
dL = DataLoader()

Constants.PROP_SCORE_NN_EPOCHS

150

In [5]:
np_covariates_X_train, np_covariates_X_test, np_covariates_T_train, \
        np_covariates_T_test \
            = dL.load_train_test_jobs(train_path, test_path, iter_id)

Numpy Train Statistics:
(2570, 19)
(2570, 1)
 Numpy Test Statistics:
(642, 19)
(642, 1)


In [6]:
ps_score_list_train, ps_score_list_test, ps_model = __get_ps_model(ps_model_type, 
                                                                   1, input_nodes,
                                                                device, 
                                                                np_covariates_X_train, 
                                                                np_covariates_X_test, 
                                                                np_covariates_T_train,
                                                                np_covariates_T_test,
                                                                dL)


############### Propensity Score neural net Training ###############
.. PS Training started ..
Epoch: 50, loss: 13.957078225910664, correct: 2347/2570, accuracy: 0.9132295719844358
Epoch: 100, loss: 12.979373145848513, correct: 2371/2570, accuracy: 0.9225680933852141
Epoch: 150, loss: 12.010436907410622, correct: 2381/2570, accuracy: 0.9264591439688716
Training Completed..


In [7]:
print(len(ps_score_list_train))
print(len(ps_score_list_test))

2570
642


In [8]:
print("--->>Train size: ")
data_loader_dict_train = dL.prepare_tensor_for_DCN(np_covariates_X_train,
                                                        np_covariates_T_train,
                                                        ps_score_list_train,
                                                        False)
print("--->>Test size: ")
data_loader_dict_test = dL.prepare_tensor_for_DCN(np_covariates_X_test,
                                                      np_covariates_T_test,
                                                      ps_score_list_test,
                                                      False)
print(len(data_loader_dict_train["treated_data"]))

tensor_treated_train_original = \
                Utils.create_tensors_from_tuple(data_loader_dict_train["treated_data"])
tensor_control_train_original = \
                Utils.create_tensors_from_tuple(data_loader_dict_test["control_data"])

--->>Train size: 
Big X: (2570, 21)
 Treated Statistics ==>
(237, 17)
 Control Statistics ==>
(2333, 17)
--->>Test size: 
Big X: (642, 21)
 Treated Statistics ==>
(60, 17)
 Control Statistics ==>
(582, 17)
4


In [9]:
ps_t = PS_Treated_Generator(data_loader_dict_train, ps_model)
balanced_dataset_dict = ps_t.simulate_treated_semi_supervised(input_nodes, iter_id, device)

-> Matched Control: (237, 17)
-> UnMatched Control: (2205, 17)
-> GAN training started
Epoch: 1000, D_loss: 32.7128946185112, D_score_real: 23.75793957710266, D_score_Fake: 11.37055292725563, G_loss: 41.03593552112579, Prop_loss: 110.76152908802032
Epoch: 2000, D_loss: 30.848858952522278, D_score_real: 24.5241539478302, D_score_Fake: 10.49499523639679, G_loss: 44.557498812675476, Prop_loss: 61.76901459693909
Epoch: 3000, D_loss: 32.90187919139862, D_score_real: 23.768618285655975, D_score_Fake: 11.267085611820221, G_loss: 42.22055399417877, Prop_loss: 81.71781003475189
Epoch: 4000, D_loss: 31.761606335639954, D_score_real: 24.31711834669113, D_score_Fake: 10.861313104629517, G_loss: 42.7986261844635, Prop_loss: 51.57673639059067
Epoch: 5000, D_loss: 31.677600860595703, D_score_real: 24.180280685424805, D_score_Fake: 10.598519057035446, G_loss: 44.0903936624527, Prop_loss: 46.65553414821625
Epoch: 6000, D_loss: 30.43159329891205, D_score_real: 24.641621947288513, D_score_Fake: 10.398718

<Figure size 432x288 with 0 Axes>

In [None]:
tensor_treated_balanced_dcn = balanced_dataset_dict["tensor_treated_balanced_dcn"]
tensor_control_balanced_dcn = balanced_dataset_dict["tensor_control_balanced_dcn"]
dcn_experiments = DCN_Experiments(input_nodes, device)
dcn_pd_models_eval_dict = dcn_experiments.evaluate_DCN_Model(tensor_treated_train_original,
                                                                         tensor_control_train_original,
                                                                         tensor_treated_balanced_dcn,
                                                                         tensor_control_balanced_dcn,
                                                                         data_loader_dict_test)

----------------------------------------
###### Model 1: DCN - PD Supervised Training started ######
Train_mode: train_PD
epoch: 100, Treated + Control loss: 100.9043382124537
epoch: 200, Treated + Control loss: 79.30706841783076
epoch: 300, Treated + Control loss: 69.57941165697125
epoch: 400, Treated + Control loss: 58.21852614075187
----------------------------------------
###### Model 2: DCN PM GAN - No dropout - Supervised Training started ######
Train_mode: train_with_no_dropout
epoch: 100, Treated + Control loss: 378.2583334859115
epoch: 200, Treated + Control loss: 276.443437486341
epoch: 300, Treated + Control loss: 212.0829366477054
epoch: 400, Treated + Control loss: 211.10518839375973
###### Model 3: DCN PM GAN - Probability 0.2 - Supervised Training started ######
Train_mode: train_constant_dropout_2
epoch: 100, Treated + Control loss: 527.9360118243728
epoch: 200, Treated + Control loss: 485.9896758321023
epoch: 300, Treated + Control loss: 437.57827957144605
epoch: 400, 