In [1]:
from pathlib import Path
import sys
import os

ROOT_DIR = Path(os.path.abspath("../"))

sys.path.append(str((ROOT_DIR / "src").resolve()))
sys.path

['/home/fact21/fact_refactor/experiments',
 '/home/fact21/miniconda3/envs/fairAC/lib/python38.zip',
 '/home/fact21/miniconda3/envs/fairAC/lib/python3.8',
 '/home/fact21/miniconda3/envs/fairAC/lib/python3.8/lib-dynload',
 '',
 '/home/fact21/miniconda3/envs/fairAC/lib/python3.8/site-packages',
 '/home/fact21/fact_refactor/src']

In [2]:
from models.ac import FairAC, Trainer
from dataset import NBA


import torch
import numpy as np

In [3]:
# Experiment configuration
DEVICE_INDEX = 2 # cuda device index, should be 0 if your machine only one GPU
SEED = 20 # The seed to use for the experiment

DATA_PATH = ROOT_DIR / "dataset/NBA"
LOG_DIR = ROOT_DIR / "experiments/logs/pokec_z"

LOG_DIR.mkdir(parents=True, exist_ok=True)

In [4]:
# Run required setup
device = torch.device(f"cuda:{DEVICE_INDEX}" if torch.cuda.is_available() else "cpu")

print("Using device", device)

np.random.seed(SEED)
torch.manual_seed(SEED)

print("Using seed:", SEED)

Using device cuda:2
Using seed: 20


In [5]:
# Load in the dataset
dataset = NBA(
    nodes_path=DATA_PATH / "nba.csv",
    edges_path=DATA_PATH / "nba_relationship.txt",
    embedding_path=DATA_PATH / "nba_embedding10.npy",
    feat_drop_rate=0.3,
    device=device
)

print(f"Loaded dataset with {dataset.graph.num_nodes()} nodes and {dataset.graph.num_edges()} edges")
print(f"Using feat_drop_rate: {dataset.feat_drop_rate}")

Loaded dataset with 403 nodes and 21645 edges
Using feat_drop_rate: 0.3


In [6]:
# Create FairAC model
fair_ac = FairAC(
    feature_dim=dataset.features.shape[1],
    transformed_feature_dim=128,
    emb_dim=dataset.embeddings.shape[1],
    attn_vec_dim=128,
    attn_num_heads=1,
    dropout=0.5,
    num_sensitive_classes=1,
)

print(f"Created FairAC model with {1} sensitive class")

Created FairAC model with 1 sensitive class


In [7]:
# Create FairAC trainer
trainer = Trainer(
    ac_model=fair_ac,
    lambda1=1.0,
    lambda2=1.0,
    dataset=dataset,
    device=device,
    gnn_kind="GCN",
    gnn_hidden_dim=128,
    gnn_lr=1e-3,
    gnn_weight_decay=1e-5,
    gnn_args={"dropout": 0.5},
    log_dir=LOG_DIR,
    min_acc=0.65,
    min_roc=0.69,
)

print(f"Created trainer with {'GCN'} model, using LOG_DIR: {LOG_DIR}")

Created trainer with GCN model, using LOG_DIR: /home/fact21/fact_refactor/experiments/logs/pokec_z


In [8]:
# Run pre-training
trainer.pretrain(epochs=200)
print("Finished pretraining")

  0%|          | 0/200 [00:00<?, ?it/s]

Finished pretraining


In [9]:
# Main training loop, with GNN validation
trainer.train(val_start_epoch=800, val_epoch_interval=200, epochs=2800)

  0%|          | 0/2800 [00:00<?, ?it/s]

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:2 and cpu! (when checking argument for argument mat1 in method wrapper_addmm)