In [None]:
%pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -q -f https://data.pyg.org/whl/torch-2.0.1+cu118.html
%pip install wandb optuna -qU

# Semi-Supervised Transductive Learning Pipeline

- **You can either use image embeddings  or not**
- **To make results reproducable its reccomended to use CPU**

## Imports

In [1]:
import numpy as np
import optuna
import pandas as pd
import torch
import sys
import torch_geometric.transforms as T

sys.path.append("../../src")

#here you can find utils that are used in each notebook
from utils import set_seed, EarlyStoppingR2, train_CFG
#here are some functions like train_one_epoch or cross_val
from training_utils import train_one_epoch_transductive, val_one_epoch_transductive, train_transductive, cross_val_transductive
#here optuna objective for all* pipelines
from optuna_objectives import objective_transductive
#here are models
from models import TransductiveGCN, TransductiveGAT


#* not all pipeline`s objectives are implemented yet

## Setting up all session parameters

In [17]:
# global params

#IMPORTANT: USE THIS SEED
SEED = 111
device = torch.device("cpu" if not torch.cuda.is_available() else "cuda")

# use feature-propagation algo or not
use_features_propagation = False

# current region that you are working on
region = 812

regions_mapper = {
        777 : "Moscow",
        812 : "Saint-Petersburg",
        287 : "Kazan",
        473 : "Sochi"
    }


#paths params
path_for_graph = f"../../../data/graph_preprocessing/{regions_mapper[region]}/graph_with_cv_full.pickle"
checkpoints_path = "../../chkps/inductive_gcn_pipeline" #from the notebook directory, must start with ../../chkps/
assert (os.path.exists(checkpoints_path)), "path for checkoints must exists"


#model params
hidden_dim = 64
n_layers = 4
n_head=2

#training params setting
optimizer_name = "AdamW" #("Adam", "AdamW", "RMSProp")
use_scheduler = True

#early stopper params
use_stopper = True
stopper_patience = 100
stopper_delta = 0.001

verbose = 10

num_epochs = 1000

#number of epochs before starting using sheduler and stopper
started_patience = 300

#image embeddgings type (set to None if you`re not using it)
image_embeddings_pipeline = "basic" #("basic", "efficient_all_channels", "efficient_delete_channels")
image_embedding_size = 1280

image_embeddings_path = f"../../data/image_embeddings/image_embeddings_{image_embeddings_pipeline}_{region}.pickle"

#image features
add_image_features = True

image_features_path = f"../../data/image_embeddings/image_features_{region}.pickle"

## Loading preprocessed Graph

In [18]:
graph = torch.load(
    path_for_graph
).to(device, "x", "edge_index")

if use_features_propagation:
    graph.x[graph.x == -1] = torch.nan
    graph = T.FeaturePropagation(missing_mask=torch.isnan(graph.x), num_iterations = 400)(graph)
    
    
if image_embeddings_pipeline:
    images = torch.load(
        image_embeddings_path
    ).to(device)

    graph.image_embeds = images
    
if add_image_features:
    image_features = torch.load(image_features_path)
    graph.x = torch.cat([graph.x, image_features], dim=-1)

## Initializing model, optimizer

In [4]:
set_seed(SEED)

# model
# can be TransductiveGCN or TransductiveGAT
# TransductiveGATnoIMGS will require one more param - head (number of heads in each conv)

model = TransductiveGAT(
    n_in=graph.num_features,
    n_out=1,
    hidden_dim=hidden_dim,
    n_layers=n_layers,
    head=n_head,
    use_image = (True if image_embeddings_type is not None else False),
    image_size = (image_embedding_size if image_embeddings_type is not None else False)
).to(device)

#optimizer
optimizer = getattr(torch.optim, optimizer_name)(model.parameters(), lr=0.001771619056705244)

#scheduler
if use_scheduler:
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer=optimizer, factor=0.7, patience=30, threshold=0.01, min_lr=1e-5 / 5
    )

#loss function 
loss_fn = torch.nn.MSELoss()

#EarlyStopper
if use_stopper:
    earlystopper = EarlyStoppingR2(
        patience=stopper_patience,
        verbose=False,
        delta=stopper_delta,
        path=checkpoints_path,
        trace_func=print,
        model_name="best_model_train.pt"
    )

## Default basic training

In [1]:
if base_train := True:

    #training
    
    #using train_CFG class instead of dictionary
    train_cfg = train_CFG()
    
    train_cfg("num_epochs", num_epochs)
    train_cfg("verbose", verbose)
    train_cfg("train_mask", "train_mask") # Train Mask name depending on its name in graph object
    train_cfg("val_mask", "val_mask") # Val Mask name depending on its name in graph object
    
    train_transductive(
        dataset=graph, 
        model=model, 
        optimizer=optimizer, 
        loss_fn=loss_fn, 
        train_cfg=train_cfg, 
        scheduler=(scheduler if use_scheduler else None),
        started_patience=started_patience,
        earlystopper=(earlystopper if use_stopper else None),
        use_image=True
    )

    # evaluation
    _, result_r2 = val_one_epoch_transductive(graph, model, loss_fn, "test_mask")
    print(result_r2)

NameError: name 'train_CFG' is not defined

## Stratified KFold Validation

In [None]:
# Using Stratified KFOLD splitting target on bins

cv_cfg = train_CFG()
cv_cfg("num_epochs", num_epochs)
cv_cfg("verbose", verbose)
cv_cfg("scheduler", (True if use_scheduler else None))
cv_cfg("stopper_patience", stopper_patience)
cv_cfg("stopper_delta", stopper_delta)
cv_cfg("started_patience", started_patience)


val_score = cross_val_transductive(
    num_folds=5, 
    dataset=graph, 
    model_name="GAT", #model architecture name
    model_params=dict(
        n_in=graph.num_features,
        n_out=1,
        hidden_dim=hidden_dim,
        n_layers=n_layers,
        head=n_head #depending on model architecture you`d like to use heads 
    ),
    optimizer_params={"lr" : 0.001771619056705244}, 
    optimizer_name=optimizer_name,
    cv_cfg=cv_cfg, 
    checkpoints_path=None, # checkpoints path
    eval_test=False, # For now if set to "True" than all models evaluates using "test_mask" from graph
    device=device,
    use_image=True
)

## Getting best params using Optuna

In [None]:
study = optuna.create_study(
    direction="maximize",
)

graph_no_fp = torch.load(
    path_for_graph
).to(device, "x", "edge_index")

if image_embeddings_type:
    images = torch.load(
        image_embeddings_path
    ).to(device)

    graph_no_fp.image_embeds = images

# Special dictionary with some params
optimizing_params = dict(
    fp_ds=graph, #This dataset will be used when trial suggest to use Feature Propagation
    no_fp_ds=graph_no_fp,  #This dataset will be used when trial suggest to not use Feature Propagation
    model_name="GAT",
    device=device,
    use_image=True
)

#set number of trials (50 trials is sufficient for this task by default) and run!
study.optimize(lambda x: objective_transductive(x, **optimizing_params), n_trials=100, show_progress_bar=True)

In [13]:
# Now, get best params and retrain model using them to reproduce the results

print("Best params: ")
print(study.best_params)

print("Best score: ")
print(study.best_score)

FrozenTrial(number=0, state=TrialState.COMPLETE, values=[-0.1137], datetime_start=datetime.datetime(2024, 7, 19, 12, 56, 25, 581867), datetime_complete=datetime.datetime(2024, 7, 19, 12, 57, 46, 397926), params={'n_layers': 3, 'hidden_dim': 256, 'lr_init': 0.004455890110995671, 'use_feature_propagation': False}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'n_layers': IntDistribution(high=7, log=False, low=1, step=1), 'hidden_dim': CategoricalDistribution(choices=(32, 64, 128, 256)), 'lr_init': FloatDistribution(high=0.005, log=False, low=0.0001, step=None), 'use_feature_propagation': CategoricalDistribution(choices=(True, False))}, trial_id=0, value=None)