# CellCycleLifting Tutorial

In [1]:
import rootutils
import json

rootutils.setup_root("./", indicator=".project-root", pythonpath=True)
root_folder = rootutils.find_root()
import omegaconf

from modules.transforms.liftings.graph2cell import CellCycleLifting
from modules.io.load.loaders import GraphLoader

# Load Dataset Config

Either we keep yaml config files and provide a brief overview of them, or we build the required config files by hand in these tutorials. (I prefer the former option.)

In [2]:
dataset_name = "manual_dataset"
dataset_config = omegaconf.OmegaConf.load(
    f"{root_folder}/configs/dataset/{dataset_name}.yaml"
).parameters

#### Print dataset config

In [3]:
# Print config file for nice visualization
dict2print = dataset_config.copy()
print(json.dumps(dict(dict2print), indent=4))

{
    "data_domain": "graph",
    "data_type": "manual_dataset",
    "data_name": "manual",
    "data_dir": "/Users/gbg141/Documents/TopoProjectX/challenge-icml-2024/datasets/graph/manual_dataset",
    "num_features": 1,
    "num_classes": 2,
    "task": "classification",
    "loss_type": "cross_entropy",
    "monitor_metric": "accuracy",
    "task_level": "node",
    "split_type": "k-fold",
    "k": 10,
    "data_seed": 0
}


# Load Transform Config file

Same dilemma as before, yaml files or dicts within tutorials.

In [4]:
# Define lifting type
lifting_type = "graph2cell"

# Define lifting id (name)
id_lifting = "cell_cycle"

# Read yaml file
transform_config = {
    "lifting": omegaconf.OmegaConf.load(
        f"{root_folder}/configs/transforms/topological_liftings/{lifting_type}/{id_lifting}.yaml"
    )
    # other transforms (e.g. data manipulations, feature liftings) can be added here
}

### Print transform config

In [5]:
# Print config file for nice visualization
dict2print = transform_config.copy()
dict2print["lifting"] = dict(dict2print["lifting"])

print(json.dumps(dict(dict2print), sort_keys=True, indent=4))

{
    "lifting": {
        "_target_": "modules.transforms.data_transform.DataTransform",
        "complex_dim": 3,
        "feature_lifting": "SumLifting",
        "k_value": 1,
        "max_cell_length": null,
        "preserve_edge_attr": false,
        "transform_name": "CellCycleLifting",
        "transform_type": "lifting"
    }
}


### Load Dataset

In [6]:
dataset = GraphLoader(dataset_config, transform_config).load()

Transform parameters are the same, using existing data_dir: /Users/gbg141/Documents/TopoProjectX/challenge-icml-2024/datasets/graph/manual_dataset/manual/lifting/3481276899




### Visualize graph

In [8]:
# from modules.io.load.utils import plot_manual_graph
# plot_manual_graph(dataset.data)
# DONE FOR SIMPLICIAL COMPLEXES; Shall we do smth similar with cells and hypergraphs?

### Create a Neural Network Model

In [12]:
from topomodelx.nn.cell.cwn import CWN
import torch


class Network(torch.nn.Module):
    def __init__(
        self,
        in_channels_0,
        in_channels_1,
        in_channels_2,
        hidden_channels,
        out_channels,
        n_layers=1,
    ):
        super().__init__()
        self.base_model = CWN(
            in_channels_0,
            in_channels_1,
            in_channels_2,
            hidden_channels,
            n_layers,
        )
        self.linear_0 = torch.nn.Linear(hidden_channels, out_channels)
        self.linear_1 = torch.nn.Linear(hidden_channels, out_channels)
        self.linear_2 = torch.nn.Linear(hidden_channels, out_channels)

    def forward(self, data):
        x_0, x_1, x_2 = self.base_model(
            data.x_0,
            data.x_1,
            data.x_2,
            data.adjacency_1,
            data.incidence_2,
            data.incidence_1.T,
        )
        x_0 = self.linear_0(x_0)
        x_1 = self.linear_1(x_1)
        x_2 = self.linear_2(x_2)
        return x_0, x_1, x_2

### Run the Model

In [13]:
n_layers = 2
in_channels_0 = dataset_config["num_features"]
in_channels_1 = dataset_config["num_features"]
in_channels_2 = dataset_config["num_features"]
hidden_channels = 32
out_channels = dataset_config["num_classes"]

model = Network(
    in_channels_0=in_channels_0,
    in_channels_1=in_channels_1,
    in_channels_2=in_channels_2,
    hidden_channels=hidden_channels,
    out_channels=out_channels,
    n_layers=n_layers,
)

In [14]:
y_hat = model(dataset)

Note that for simplicity and visualization reason we utilised simple graph, however there is a set of available datasets that you can play with.

In [11]:
dataset_names = [
    # Working datasets:
    # "cocitation_cora",
    # "cocitation_citeseer",
    # "cocitation_pubmed",
    # "MUTAG",
    # "NCI1",
    # "NCI109",
    # Something is wrong with the following datasets:
    # "IMDB-BINARY",
    # "IMDB-MULTI",
    # "REDDIT-BINARY",
]
for dataset_name in dataset_names:
    dataset_config = omegaconf.OmegaConf.load(
        f"{root_folder}/configs/dataset/{dataset_name}.yaml"
    ).parameters

    dataset = GraphLoader(dataset_config, transform_config).load()

# Define lifting type
# lifting_type = "graph2simplicial"

# # Define lifting id (name)
# id_lifting = "simplicial_clique"

# # Read yaml file
# transform_config = {
#     "lifting": omegaconf.OmegaConf.load(
#         f"{root_folder}/configs/transforms/topological_liftings/{lifting_type}/{id_lifting}.yaml"
#     )
#     # other transforms (e.g. data manipulations, feature liftings) can be added here
# }