# Counterfactual Benchmark Notebook
- This notebook compacts what this repository does by showing an example for a GAN for Celeba Simple dataset
- This notebook must be execute in-place inside the repo
- Clone the repo and get a conda env with the correct packages

# Part 1: Training
- Start by 

In [1]:
#imports from root to use local packages
import os, sys
os.chdir('..')
from models.gans.celeba_gan import CelebaCondGAN
from datasets.celeba.dataset import Celeba
from models.classifiers.celeba_classifier import CelebaClassifier
import torch
import joblib
from pytorch_lightning import Trainer
from torchvision.transforms import RandomHorizontalFlip
from datasets.transforms import SelectParentAttributesTransform
from models.utils import generate_checkpoint_callback, generate_early_stopping_callback, generate_ema_callback

In [2]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
    device = torch.device("cpu")
    print("Using CPU")

Using GPU: NVIDIA RTX 3500 Ada Generation Laptop GPU


In [None]:
# from torch.utils.data import Dataset
# from torchvision.datasets import CelebA
# from torchvision.transforms import Resize, ToTensor, CenterCrop, Compose, ConvertImageDtype
# import torch

# transforms = Compose([CenterCrop(150), Resize((64, 64)), ToTensor(), ConvertImageDtype(dtype=torch.float32),])

# data = CelebA(root="datasets\celeba\data", split="train", target_type="attr", transform=transforms, download=False)

In [3]:
def get_dataloaders(data_class, attribute_size, config, transform=None, **kwargs):
    data = data_class(data_dir="datasets\celeba\data", attribute_size=attribute_size, transform=transform, split='train', **kwargs)

    if data.has_valid_set:
        train_set = data
        val_set = data_class(data_dir="datasets\celeba\data", attribute_size=attribute_size, transform=transform, split='valid', **kwargs)
    else:
        train_set, val_set = torch.utils.data.random_split(data, [config["train_val_split"], 1 - config["train_val_split"]])

    train_data_loader = torch.utils.data.DataLoader(train_set, batch_size=config["batch_size_train"], shuffle=True, num_workers=7)
    val_data_loader = torch.utils.data.DataLoader(val_set, batch_size=config["batch_size_val"], shuffle=False, num_workers=7)
    return train_data_loader, val_data_loader

In [4]:
def train_gan(gan, config, data_class, graph_structure, attribute_size, checkpoint_dir, **kwargs):
    transform = SelectParentAttributesTransform("image", attribute_size, graph_structure)

    train_data_loader, val_data_loader = get_dataloaders(data_class, attribute_size, config, transform, **kwargs)

    monitor= "fid" if config['finetune'] == 0 else "lpips"
    callbacks = [
        generate_checkpoint_callback(gan.name, checkpoint_dir, monitor=monitor),
        generate_early_stopping_callback(patience=config["patience"], monitor=monitor)
    ]


    trainer = Trainer(accelerator="auto", devices="auto", strategy="auto",
                      callbacks=callbacks,
                      default_root_dir=checkpoint_dir, max_epochs=config["max_epochs"])

    trainer.fit(gan, train_data_loader, val_data_loader)

In [5]:
config_cls = {
    "attribute_size": {
        "Smiling": 1,
        "Eyeglasses": 1
    },

    "dataset": "celeba",
    "ckpt_path" : "../methods/deepscm/checkpoints/celeba/simple/trained_classifiers", #modified this line for the notebook
    "in_shape" : [3, 64, 64] ,
    "patience" : 10,
    "batch_size_train" : 128,
    "batch_size_val" : 128,
    "lr" : 1e-3,
    "max_epochs" : 1000,
    "ema": "True"
}

In [6]:
# define causal graph (canibalised from config/../gan.json)
causal_graph = {
        "Smiling": [],
        "Eyeglasses": [],
        "image": ["Smiling", "Eyeglasses"],
    }

#define the models for each mechanism (only one for image here as the rest in the graph are roots)
mechanism_models =  {
        "image": {
            "model_type": "gan",
            "model_class": "CelebaCondGAN",
            "module": "models.gans",
            "params": {
                "n_chan_enc": [3, 64, 128, 256, 256, 512, 512],
                "n_chan_gen": [512, 512, 256, 256, 128, 64, 3],
                "latent_dim": 512,
                "num_continuous": 2,
                "d_updates_per_g_update": 1,
                "gradient_clip_val": 0.5,
                "finetune": 1,
                "pretrained_path": "",
                "lr": 1e-4,
                "batch_size_train": 128,
                "batch_size_val": 128,
                "patience": 10,
                "max_epochs": 1000
            }
        }
    }

attribute_size = {
        "Smiling": 1,
        "Eyeglasses": 1
         }

In [41]:
CelebaCondGAN.name

AttributeError: type object 'CelebaCondGAN' has no attribute 'name'

### Train model:
- Train the GAN model for Celeb data set image generation:

In [None]:
for variable in causal_graph:
    if variable not in mechanism_models: continue #only want to train variables with models, root variables don't have a causal mechanism
    print("training...")
    train_gan(
        gan=CelebaCondGAN(params=mechanism_models[variable]["params"], attr_size=config_cls["attribute_size"]),
        config=mechanism_models[variable]["params"],
        data_class=Celeba,
        graph_structure=causal_graph,
        attribute_size=attribute_size,
        checkpoint_dir='methods\deepscm\checkpoints\celeba\simple/trained_scm' #adjusted default path because the notebook is down one
        )

training...


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
c:\Users\maq25jh\miniconda3\envs\counterfactual-benchmarks\lib\site-packages\pytorch_lightning\trainer\connectors\logger_connector\logger_connector.py:76: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default
You are using a CUDA device ('NVIDIA RTX 3500 Ada Generation Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_fl

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

c:\Users\maq25jh\miniconda3\envs\counterfactual-benchmarks\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.


                                                                           

c:\Users\maq25jh\miniconda3\envs\counterfactual-benchmarks\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.


Epoch 0:  31%|███       | 392/1272 [26:11<58:48,  0.25it/s, v_num=2]  

### Train Classifier:
- Train the classifiers for each attribute which will be used later for evaluation

In [18]:
def train_classifier(classifier, attr, train_set, val_set, config, default_root_dir, weights=None):
    mode = 'min' if attr in ["age", "brain_vol", "vent_vol", "thickness", "intensity"] else 'max'

    callbacks = [
        generate_checkpoint_callback(attr + "_classifier", config["ckpt_path"], monitor="val_metric", mode=mode),
        generate_early_stopping_callback(patience=config["patience"], monitor="val_metric", mode=mode, min_delta=1e-5)
    ]

    if config["ema"] == "True":
        callbacks.append(generate_ema_callback(decay=0.999))

    trainer = Trainer(accelerator="auto", devices="auto", strategy="auto",
                      callbacks=callbacks,
                      default_root_dir=default_root_dir, max_epochs=config["max_epochs"])

    if weights != None:
        sampler = torch.utils.data.sampler.WeightedRandomSampler(weights, len(train_set), replacement=True)
        print("Using sampler!")
        train_data_loader = torch.utils.data.DataLoader(train_set, sampler=sampler, batch_size=config["batch_size_train"],  drop_last=False, num_workers=7)
    else:
        train_data_loader = torch.utils.data.DataLoader(train_set, batch_size=config["batch_size_train"], shuffle=True, drop_last=False, num_workers=7)


    val_data_loader = torch.utils.data.DataLoader(val_set, batch_size=config["batch_size_val"], shuffle=False, num_workers=7)
    trainer.fit(classifier, train_data_loader, val_data_loader)

In [31]:
for attribute in attribute_size[0].keys():

    classifier = CelebaClassifier(attr=attribute, num_outputs=attribute_size[0][attribute], lr=config_cls["lr"])

    # train_set = Celeba(attribute_size=attribute_size, split="train", transform_cls=RandomHorizontalFlip(0.5), data_dir="../datasets/celeba/data/img_align_celeba")
    train_set = Celeba(data_dir="datasets\celeba\data", split="train", transform=transforms, attribute_size=attribute_size[0], transform_cls=RandomHorizontalFlip(0.5))

    val_set = Celeba(data_dir="datasets\celeba\data", attribute_size=attribute_size[0], split="valid")

    #weights:
    if attribute == "Smiling":
        weights = torch.tensor(joblib.load("datasets\celeba\weights\weights_smiling.pkl")).double() #this path may need updating

    elif attribute == "Eyeglasses":
        weights = torch.tensor(joblib.load("datasets\celeba\weights\weights_eyes.pkl")).double()

    elif attribute in {"No_Beard", "Bald"}:
        labels = train_set.attrs[: , classifier.variables[attribute]].long()
        print((labels == 1).sum(), (labels==0).sum())
        class_count = torch.tensor([(labels == t).sum() for t in torch.unique(labels, sorted=True)])
        print(class_count)
        class_weights = 1. / class_count.float()

        weights = class_weights[labels]
        print(weights)

    else:
        weights = None

    train_classifier(
        classifier=classifier,
        attr=attribute,
        train_set=train_set,
        val_set=val_set,
        config=config_cls,
        default_root_dir=config_cls["ckpt_path"],
        weights=weights
    )

    

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
c:\Users\maq25jh\miniconda3\envs\counterfactual-benchmarks\lib\site-packages\pytorch_lightning\trainer\connectors\logger_connector\logger_connector.py:76: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default
c:\Users\maq25jh\miniconda3\envs\counterfactual-benchmarks\lib\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:751: Checkpoint directory C:\Users\maq25jh\Documents\Code\counterfactual-benchmarks\counterfactual-benchmark\methods\deepscm\checkpoints\celeba\simple\trained_classifiers exists and is not empty.

  | Name     | Type           | Params | M

Using sampler!
Sanity Checking: |          | 0/? [00:00<?, ?it/s]

c:\Users\maq25jh\miniconda3\envs\counterfactual-benchmarks\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.


Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

AttributeError: 'CelebaClassifier' object has no attribute 'attr'

# Part 2: Evaluation

We will start by running the model (abduction, action, and prediction), then comparing its output to the classifier

In [None]:
def produce_counterfactuals(factual_batch: torch.Tensor, scm: nn.Module, do_parent:str, intervention_source: Dataset,
                            force_change: bool = False, possible_values = None, device: str = 'cuda', bins = None):
    factual_batch = {k: v.to(device) for k, v in factual_batch.items()}

    #update with the counterfactual parent
    if force_change:
        possible_values = possible_values[do_parent]
        values = factual_batch[do_parent].cpu()
        if do_parent not in ["digit", "apoE", "slice"]:
            interventions = {do_parent: torch.cat([torch.tensor(np.random.choice(possible_values[different_value(possible_values, value, bins, do_parent)])).unsqueeze(0)
                                                for value in values]).view(-1).unsqueeze(1).to(device)}
        else:
            interventions = {do_parent: torch.cat([torch.tensor(rng.choice(possible_values[torch.where((different_value(possible_values, value, bins, do_parent)).any(dim=1))], axis=0)).unsqueeze(0)
                                                for value in values]).to(device)}
    else:
        batch_size, _ , _ , _ = factual_batch["image"].shape
        idxs = torch.randperm(len(intervention_source))[:batch_size] # select random indices from train set to perform interventions

        interventions = {do_parent: torch.cat([intervention_source[id][do_parent] for id in idxs]).view(-1).unsqueeze(1).to(device)
                        if do_parent not in ["digit", "apoE", "slice"] else torch.cat([intervention_source[id][do_parent].unsqueeze(0).to(device) for id in idxs])}

    abducted_noise = scm.encode(**factual_batch)
    counterfactual_batch = scm.decode(interventions, **abducted_noise)

    return counterfactual_batch

In [None]:

def evaluate_effectiveness(test_set: Dataset, unnormalize_fn, batch_size:int , scm: nn.Module, attributes: List[str], do_parent:str,
                           intervention_source: Dataset, predictors: Dict[str, Classifier], dataset: str):

    test_data_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=7)

    effectiveness_scores = {attr_key: [] for attr_key in attributes}
    for factual_batch in tqdm(test_data_loader):
        counterfactuals = produce_counterfactuals(factual_batch, scm, do_parent, intervention_source,
                                                  force_change=True, possible_values=test_set.possible_values, bins=test_set.bins)
        e_score = effectiveness(counterfactuals, unnormalize_fn, predictors, dataset)

        for attr in attributes:
            effectiveness_scores[attr].append(e_score[attr])

    effectiveness_score = {key  : (round(np.mean(score), 3), round(np.std(score), 3)) for key, score in effectiveness_scores.items()}

    print(f"Effectiveness score do({do_parent}): {effectiveness_score}")

    return effectiveness_score