# GNN node feature dynamics analysis on number of layers
- note: before using this code, you need to have checkpoints that are saved in the orgnaized folders as defined in train.py
- i.e. need to run train.py (instructions can be found in the file train.py)

In [None]:
PATH = '/Users/jungyoonlee/Desktop/MAT6215-Project/experiments/ckpt_real_graph_clean'         # your checkpoint path

In [None]:
import numpy as np
import matplotlib.pyplot as plt

import torch 
from torch_geometric.utils import to_edge_index
from sklearn.preprocessing import normalize

from utils import load_full_data, rayleigh_quotient
from models import GRAFFNet

import os

In [None]:
def adj_to_edge_index(adj):
    adj = torch.tensor(normalize(adj + torch.eye(adj.size(0))))
    adj = adj.to_sparse()
    edge_index, _ = to_edge_index(adj) 
    return edge_index

def list_files_in_folder(folder_path):
    files_list = []
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            files_list.append(os.path.join(root, file))
    return files_list

In [None]:
def get_ckpt_files(dataset:str, model:str, nlayers:str, step_size:float, linear:bool):
    if linear:
        path = f'{PATH}/{dataset}/{model}/linear/ss_{step_size}/nl_{nlayers}'
    else:
        path = f'{PATH}/{dataset}/{model}/act_tanh/ss_{step_size}/nl_{nlayers}'
    files = list_files_in_folder(path)
    target_files = sorted([f for f in files])
    print(target_files)
    return target_files

def get_model_bytype(features_dict, labels_dict, model:str, dataset:str, step_size:float, linear:bool):
    model = GRAFFNet(features_dict[dataset].size(1), 32, labels_dict[dataset].max()+1, 
                      step_size=step_size, model_type=model, linear=linear)
    return model

def get_trained_model_bytype(features_dict, labels_dict, model_type:str, dataset:str, nlayers:int, step_size: float, linear:bool, ckpt_files:list = None, single_model:bool = True):
    if ckpt_files is None:
        ckpt_files = get_ckpt_files(dataset, model_type, nlayers, step_size, linear)
    if len(ckpt_files) > 1 and single_model == False:
        models = [get_model_bytype(features_dict, labels_dict, model_type, dataset, step_size, linear) for _ in range(len(ckpt_files))]
        for model, ckpt in zip(models, ckpt_files):
            model.load_state_dict(torch.load(ckpt, map_location='cpu')['model_state_dict'])
    else:
        model = get_model_bytype(features_dict, labels_dict, model_type, dataset, step_size, linear)
        model.load_state_dict(torch.load(ckpt_files[0], map_location='cpu')['model_state_dict'])    
        return model

In [None]:
def layer_sweep(model, edge_index, feat, 
                num_steps=60, hidden_states=32, linear_model=True, step_size_override=None):
    if step_size_override:
        model.step_size = step_size_override

    s0_rq_dict = {}
    s1_rq_dict = {}      

    init_rq = rayleigh_quotient(edge_index, feat.size(0), feat)
    s1_rq_dict[0] = init_rq
    S = torch.zeros(size=(num_steps, feat.size(0), hidden_states))  # states
    S[0,:,:] = model.enc(feat)          
    for num_step in range(num_steps-1):
        if linear_model:
            S[num_step+1,:,:] = S[num_step,:,:] + model.step_size * (model.conv(S[num_step,:,:], edge_index, S[0,:,:]))
        else:
            S[num_step+1,:,:] = S[num_step,:,:] + model.step_size * torch.nn.functional.tanh(model.conv(S[num_step,:,:], 
                                                                                        edge_index, S[0,:,:]))
        s1_rq = rayleigh_quotient(edge_index, feat.size(0), S[num_step+1,:,:])
        s1_rq_dict[num_step+1] = s1_rq.detach().cpu().item()

    print("Done with sweep")
    return s0_rq_dict, s1_rq_dict

### Load Edge/Feature/Labels for each Dataset for easier usage

In [None]:
edge_dict = {}
features_dict = {}
labels_dict = {}

for dataset in ['cora', 'citeseer', 'pubmed',
                'texas','wisconsin','cornell',
                'film', 'chameleon', 'squirrel']: 
    adj, _, feat, label = load_full_data(dataset)
    edge_dict[dataset] = adj.coalesce().indices()
    features_dict[dataset] = feat
    labels_dict[dataset] = label

### Analysis by Model Types

In [None]:
def plot_per_model(model='graffgcn', num_steps = 60, step_size_override = 0.1, dict_exists:bool=False, ax=None):
        
    layer_interval=list(range(num_steps))
    
    if dict_exists:
        plt.plot(layer_interval, s1_rq_cora[model].values(), label='Cora')
        plt.plot(layer_interval, s1_rq_citeseer[model].values(), label='Citeseer')
        plt.plot(layer_interval, s1_rq_squirrel[model].values(), label='Squirrel')
        plt.plot(layer_interval, s1_rq_chameleon[model].values(), label='Chameleon')

    else:
        model_cora = get_trained_model_bytype(features_dict, labels_dict, model, 'cora', 60, step_size_override, True)
        model_citeseer = get_trained_model_bytype(features_dict, labels_dict, model, 'citeseer', 60, step_size_override, True)
        model_texas = get_trained_model_bytype(features_dict, labels_dict, model, 'texas', 60, step_size_override, True)
        model_wisconsin = get_trained_model_bytype(features_dict, labels_dict, model, 'wisconsin', 60, step_size_override, True)
        model_cornell = get_trained_model_bytype(features_dict, labels_dict, model, 'cornell', 60, step_size_override, True)
        model_film = get_trained_model_bytype(features_dict, labels_dict, model, 'film', 60, step_size_override, True)
        model_squirrel =  get_trained_model_bytype(features_dict, labels_dict, model, 'squirrel', 60, step_size_override, True)
        model_chameleon = get_trained_model_bytype(features_dict, labels_dict, model, 'chameleon', 60, step_size_override, True)

        _, s1_rq_cora = layer_sweep(model_cora, edge_dict['cora'], features_dict['cora'], 
                                    num_steps=num_steps, linear_model=model_cora.linear, step_size_override=step_size_override)
        _, s1_rq_citeseer = layer_sweep(model_citeseer, edge_dict['citeseer'], features_dict['citeseer'], 
                                    num_steps=num_steps, linear_model=model_citeseer.linear, step_size_override=step_size_override)
        _, s1_rq_texas = layer_sweep(model_texas, edge_dict['texas'], features_dict['texas'],
                                    num_steps=num_steps, linear_model=model_texas.linear, step_size_override=step_size_override)
        _, s1_rq_wisconsin = layer_sweep(model_wisconsin, edge_dict['wisconsin'], features_dict['wisconsin'],
                                    num_steps=num_steps, linear_model=model_wisconsin.linear, step_size_override=step_size_override)      
        _, s1_rq_cornell = layer_sweep(model_cornell, edge_dict['cornell'], features_dict['cornell'],
                                    num_steps=num_steps, linear_model=model_cornell.linear, step_size_override=step_size_override)
        _, s1_rq_film = layer_sweep(model_film, edge_dict['film'], features_dict['film'],
                                    num_steps=num_steps, linear_model=model_film.linear, step_size_override=step_size_override)  
        _, s1_rq_chameleon = layer_sweep(model_chameleon , edge_dict['chameleon'], features_dict['chameleon'], 
                                    num_steps=num_steps, linear_model=model_chameleon.linear, step_size_override=step_size_override)
        _, s1_rq_squirrel = layer_sweep(model_squirrel, edge_dict['squirrel'], features_dict['squirrel'], 
                                    num_steps=num_steps, linear_model=model_squirrel.linear, step_size_override=step_size_override)
        
        if ax is not None:
            ax.plot(layer_interval, s1_rq_cora.values(), label='Cora')
            ax.plot(layer_interval, s1_rq_citeseer.values(), label='Citeseer')
            ax.plot(layer_interval, s1_rq_squirrel.values(), label='Squirrel')
            ax.plot(layer_interval, s1_rq_chameleon.values(), label='Chameleon')
            ax.plot(layer_interval, s1_rq_texas.values(), label='Texas')
            ax.plot(layer_interval, s1_rq_wisconsin.values(), label='Wisconsin')
            ax.plot(layer_interval, s1_rq_cornell.values(), label='Cornell')
            ax.plot(layer_interval, s1_rq_film.values(), label='Film')

        else:
            plt.plot(layer_interval, s1_rq_cora.values(), label='Cora')
            plt.plot(layer_interval, s1_rq_citeseer.values(), label='Citeseer')
            plt.plot(layer_interval, s1_rq_squirrel.values(), label='Squirrel')
            plt.plot(layer_interval, s1_rq_chameleon.values(), label='Chameleon')
            plt.plot(layer_interval, s1_rq_texas.values(), label='Texas')
            plt.plot(layer_interval, s1_rq_wisconsin.values(), label='Wisconsin')
            plt.plot(layer_interval, s1_rq_cornell.values(), label='Cornell')
            plt.plot(layer_interval, s1_rq_film.values(), label='Film')

            # Adding labels and legend
            plt.xlabel('Num Layers')
            plt.ylabel('RQ')
            plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.1), 
                    fancybox=True, shadow=True, ncol=2, fontsize=9)

### Plotting with different step sizes

In [None]:
num_steps=60
step_sizes = [0.01, 0.1, 0.5]

#### Linear GCN

In [None]:
fig, axs = plt.subplots(1, 3, figsize=(12, 4))

# Call plot_per_model for each subplot
plot_per_model(model='graffgcn', num_steps=num_steps, step_size_override=step_sizes[0], ax=axs[0])
plot_per_model(model='graffgcn', num_steps=num_steps, step_size_override=step_sizes[1], ax=axs[1])
plot_per_model(model='graffgcn', num_steps=num_steps, step_size_override=step_sizes[2], ax=axs[2])

# Set titles for each subplot
axs[0].set_title(f'step size {step_sizes[0]}')
axs[1].set_title(f'step size {step_sizes[1]}')
axs[2].set_title(f'step size {step_sizes[2]}')

# Set xlabel, ylabel, and legend for each axes
for ax in axs:
    ax.set_xlabel('Num Layers')
    ax.set_ylabel('RQ')

axs[0].legend(loc='upper center', bbox_to_anchor=(0.5, -0.18), fancybox=True, shadow=True, ncol=2, fontsize=9)

# Adjust spacing between subplots
plt.tight_layout()
plt.show()