<a href="https://colab.research.google.com/github/luciedeghellinck/Deep-Learning-Project/blob/master/Deep_Learning_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Imports**

In [None]:
import torch as th
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from torchvision import datasets, transforms

from tqdm import tqdm
import numpy as np

**Propensity calculations**

In [None]:
def propensityScore(dataset, feature_x): 
  """
  Calculates the propensity score e(x) = P(T=1|X=x) given a dataset.

  Args: 
    dataset: torch tensor where each row represents a person, the first 
      column represents the float feature vector, the second is the 0/1 
      treatment type and the third is the float outcome.
    feature_x: n dimensional float feature vector whose propensity must be 
      calculated.
  Returns: 
    The float propensity relative to the given feature vector feature_x.
  """
  sum_true = 0
  for person in dataset.data: 
    if person[0] == feature_x: #if the person defined in feature_x is the same as the one in the dataset
      if person[1] == 1: #if this person has been treated
        sum_true += 1
  propensity = sum_true / dataset.size #probability that a person has been treated for this specific feature_x given the input dataset

def propensityRegression(dataset): 
  """
  Evaluates a regression function for the propensity given the known propensity scores

  Args: 
    dataset: torch tensor where each row represents a person, the first 
      column represents the float feature vector, the second is the 0/1 
      treatment type and the third is the float outcome.
  
  Returns: 
    A function that takes an m dimensional feature vector as input and 
    estimates its propensity
  """
  #Call propensityScore to use the propensity for each input feature vector

def propensityEstimate(regression, new_feature_x): 
  """
  Using the function obtained from propensityRegression, the estimate of 
  the propensity of a new feature vector is calculated.

  Args: 
    regression: regression function that allows the propensity to be estimated.
    new_feature_x: n dimensional float feature vector whose propensity 
    must be approximated.
  Returns: 
    The estimated propensity score of the new input feature vector. 
  """
  #Use the function found with propensityRegression to estimate the propensity for a feature vector that doesn't belong to the dataset 

**IPM calculation**

In [None]:
def IPM(dataset, representation_false, representation_true): 
  #Not sure about how this works... I don't know what is the set of functions G
  #part of equation 12 but I don't know where in the code it should appear
  """
  Calculates the IPM distance for two probability functions.

  Args: 
    dataset: torch tensor where each row represents a person, the first 
      column represents the float feature vector, the second is the 0/1 
      treatment type and the third is the float outcome.
    representation_false: 
    representation_true:
  Returns: 
    The IPM distance evaluated on all 
  """

**Weight calculation**

In [None]:
def weight(dataset, x, t):
  """
  Calculates the weight for a given feature vector and a given treatment type.

  Args: 
   dataset: torch tensor where each row represents a person, the first 
      column represents the float feature vector, the second is the 0/1 
      treatment type and the third is the float outcome.
    x: m dimensional float feature vector
    t: 0/1 treatment type.
  Returns: 
    The float weight for the feature vector and the treatment type.
  """
  regression = propensityRegression(dataset)
  propensity = propensityEstimate(regression, x)
  weight = (t * (1 - 2 * propensity) + propensity**2) / (propensity * (1 - propensity))
  
  return weight

def pi(dataset, t):
  """
  Calculates the percentage of a given treatment type for the dataset.

  Args: 
    dataset: torch tensor where each row represents a person, the first 
      column represents the float feature vector, the second is the 0/1 
      treatment type and the third is the float outcome.
    t: 0/1 treatment type.

  Returns: 
    The float percentage of the treatment type.
  """
  sum = 0
  for treatment in dataset.data[1]: 
    if treatment == t: 
      sum += 1
  pi = sum / dataset.size

  return pi

def adaptedWeight(dataset, x, t): 
  """
  Calculates the weight for a given feature vector and a given treatment type.

  Args: 
   dataset: torch tensor where each row represents a person, the first 
      column represents the float feature vector, the second is the 0/1 
      treatment type and the third is the float outcome.
    x: m dimensional float feature vector
    t: 0/1 treatment type.

  Returns: 
    The float adapted weight for the feature vector and the treatment type.
  """
  old_weight = weight(dataset, x, t)
  pi_0 = pi(dataset, 0)
  pi_1 = pi(dataset, 1)
  adapted_weight = old_weight / 2 * (t / pi_1 + (1-t) / pi_0)

  return adapted_weight

**Tau and performance estimator**

In [None]:
def plugInTau(x, t, y, f0, f1, regression):
  """
  Calculates the plug in tau for the feature vector.

  Args: 
    x: m dimensional float feature vector.
    t: 0/1 treatment type.
    y: float outcome.
    f0: hypothesis function evaluated at the feature vector when there is no treatment.
    f1: hypothesis function evaluated at the feature vector when there is a treatment.
    regression: regression function for the propensity.

  Returns: 
    A float representing the plug-in predictor for the datapoint [X,T,Y] given the hypotheses functions f0 and f1 as well as the propensity regression function.
  """
  if fT == 0: 
    fT = f0
  else: 
    fT = f1

  propensity = propensityEstimate(regression, x)
    
  return (t - propensity) / (propensity (1 - propensity)) * (y - fT(x)) + f1(x) - f0(x)

def candidatePredictorTau(x, MLAlgorithm, metalearner): 
  """
  Calculates the tau for a given a Machine Learning algorithm, a meta-learner and a feature vector.

  Args: 
    x: m dimensional float feature vector.
    MLAlgorithm: Machine-learning algorithm.
    metaLearner: metal-learner.
  Returns: 
    A float representing the CATE predictor for the algorithm and the meta-learner for the feature. 
  """
  #If-else function that checks the machine learning algorithm and the metalearner type
  pass

def performanceEstimator(plugIn, candidate):
  """
  Calculates the performance estimator for a set of plug-in and candidate tau.

  Args: 
    plugIn: float plug-in CATE preictor
    candidate: float candidate CATE predictor from the Maching learning algorithms and the meta-learners
  Returns: 
    A float representing the performance estimator between two CATE predictors.
  """
  #Equation 5
  pass

**Data set**

In [None]:
 class dataset(object): 
   """
   Model selection procedure for causal inference models

   Args: 
    dataset: input file containing the features, treatments and outcomes 
    (add file type)
   """
   def __init__(self, csv_file):
    ##Should I use a super init? I don't understand it's use in the assignements...
     """
        Takes a dataset as input and returns a parsed Torch tensor where each 
        row {X_i, T_i, Y_i} represents the feature vector, the treatment type 
        and the outcome for a particular person.
     """
    #  Add Chang's parsing of the dataset

    # Each n row represents a person; the first column is the m dimensional float
    # feature vector, the second column is the 0/1 treatment type, and the 
    # third column is the float outcome.
    self.data = th.Tensor(self.size, 3)

    # Number of persons in the dataset
    self.size = None


**Feed Forward Network**

In [None]:
class CATEModel(): 
  """
  Feed forward neural network that takes the feature vectors as inputs and 
  estimates the outcome given the treatment type. 

  Args: 

  Returns:
    
  """
  # Extract the feature vectors from self.dataset for the input of the 
  # network. Extract the outcomes and treatement type for the weight
  # optimisation backward process. 

  # Add Lucas' forward and backward passes (I am guessing a similar 
  # structure to the one defined in the net of Assignement A2.3)


**Test and train**

In [None]:
#Copy and paste from the assignements --> check this 
#Is this where we have to input equation 5?
def train(train_loader, net, optimizer, criterion):
    """
    Trains network for one epoch in batches.

    Args:
        train_loader: Data loader for training set.
        net: Neural network model.
        optimizer: Optimizer (e.g. SGD).
        criterion: Loss function (e.g. cross-entropy loss).
    """
  
    avg_loss = 0
    correct = 0
    total = 0

    # iterate through batches
    for i, data in enumerate(train_loader):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # keep track of loss and accuracy
        avg_loss += loss
        _, predicted = th.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    return avg_loss/len(train_loader), 100 * correct / total
        
def test(test_loader, net, criterion):
    """
    Evaluates network in batches.

    Args:
        test_loader: Data loader for test set.
        net: Neural network model.
        criterion: Loss function (e.g. cross-entropy loss).
    """

    avg_loss = 0
    correct = 0
    total = 0
    
    # Use torch.no_grad to skip gradient calculation, not needed for evaluation
    with th.no_grad():
        # iterate through batches
        for data in test_loader:
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data

            # forward pass
            outputs = net(inputs)
            loss = criterion(outputs, labels)

            # keep track of loss and accuracy
            avg_loss += loss
            _, predicted = th.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    return avg_loss/len(test_loader), 100 * correct / total

**Parameters**

In [None]:
n_hidden_layers = 3
dim_hidden_layers = 100
alpha = 0.356
learning_rate = 4.292 * 10**(-4)
batch_size = 256
dropout_rate = 0.2

**Counter-factual cross-validation**

In [None]:
# Require: a set of candidate CATE predictors M
MLAlgorithms = []
metaLearners = []

# Require: an observational validation dataset V
dataset = dataset(csv_file)
input_size = dataset.size

# Step 1: Train f(X,T) by minimising Eq.12 using V
model = CATEModel(input_size, n_hidden_layers, dim_hidden_layers, alpha)

In [None]:
criterion = nn.CrossEntropyLoss() #not sure about the loss type
optimizer = optim.Adam(model.parameters(), lr = learning_rate) #not sure if model.parameters() works since the model depends on the object Module
train_loader = DataLoader(th.cat(dataset.data[:, 0], dataset.data[:, 2]), batch_size = batch_size) #not sure about the first parameter for the dataloaders (I took the X and Y here)
test_loader = DataLoader(th.cat(dataset.data[:, 0], dataset.data[:, 2]), batch_size = batch_size)

epochs = 100 #They don't give the number of epochs... (this is a random number)

#for the moment, the dropout hasn't yet been integrated
for epoch in tqdm(range(epochs)):
    train_loss, train_acc = train(train_loader, model, optimizer, criterion)
    test_loss, test_acc = test(test_loader, model, criterion)

In [None]:
# Step 2: Estimate the propensity score (done in step 3 since the propensity is used in the definition of tau)
# Step 3: Caculate the plug-in tau of samples in V
tau = [] # will be an n dimensional vector with the plug-in tau for each feature vector.
features = dataset.data[:,0]
treatments = dataset.data[:,1]
outcomes = dataset.data[:,2]
for datapoint in range(dataset.size): # Iterate over each feature vector from the dataset
  f0 = dataset.get_hypothesis(features[datapoint], 0) # Obtain the hypothesis for the case where there is no treatment for this feature vector
  f1 = dataset.get_hypothesis(features[datapoint], 1) # Obtain the hypothesis for the case where there is a treatment for this feature vector
  regression = propensityRegression(dataset) # Obtain the regression function for the propensity for this feature vector
  plugIn = plugInTau(features[datapoint], treatments[datapoint], outcomes[datapoint], f0, f1, regression)
  tau.append(plugIn)

# Step 4: Estimate the performance of candidate predictors in M based on the performance estimator R and tau.
performance = th.empty((5, 5)) # Tensor containing at position [i,j] the performance estimator R relative to the MLAlgorithm i and the metaLearner j
for algo in MLAlgorithms: 
  for learner in metaLearners: 
    candidate = candidatePredictorTau(features, algo, learner) # n dimensional vector with the candidate tau for each feature vector.
    performance[algo, learner] = performanceEstimator(tau, candidate) # Equation 5 I think

best = th.argmax(performance)
bestAlgo = MLAlgorithms[best // 5]
bestLearner = metaLearners[best % 5]