# Colab

In [None]:
# from google.colab import drive
# drive.mount('/content/gdrive')

# %cd gdrive/My\ Drive/Colab\ Notebooks/gnn/sudoku-test

# !rm -r ./tf2-gnn
# !git clone --branch sudoku https://github.com/oliverdutton/tf2-gnn.git 

# !pip install ./tf2-gnn

# Imports

In [1]:
import tensorflow as tf
import tf2_gnn as gnn
import numpy as np
from dpu_utils.utils import RichPath

In [2]:
%load_ext autoreload
%autoreload 2

# Dataset

### Create dataset structure to load data into

In [3]:
params = gnn.data.SudokuDataset.get_default_hyperparameters()
params

{'max_nodes_per_batch': 2700,
 'add_self_loop_edges': True,
 'tie_fwd_bkwd_edges': True}

In [4]:
params['max_nodes_per_batch'] = 2600

In [5]:
dataset = gnn.data.SudokuDataset(params)

### Load the data

In [6]:
!pwd

/Users/personal/Documents/Sudoku/tf2-gnn


In [7]:
# path = RichPath.create('./data/')
path = RichPath.create('/Users/personal/Documents/Sudoku/data/rrn-mini-data')

In [8]:
dataset.load_data(path)

In [9]:
tf_dataset = dataset.get_tensorflow_dataset(gnn.DataFold.TRAIN, use_worker_threads=False)

# Build model

In [10]:
params = gnn.models.NodeMulticlassTask.get_default_hyperparameters('gnn_edge_mlp')
params['gnn_hidden_dim'] = 96
params['gnn_num_edge_MLP_hidden_layers'] = 3
params['gnn_num_layers'] = 16
params['gnn_share_weights_between_mlps'] = True
params["gnn_message_activation_function"] = "gelu"

params['gnn_dense_every_num_layers'] = -1
params['gnn_residual_every_num_layers'] = 1e5
params['gnn_global_exchange_every_num_layers'] = 1e5
params['use_intermediate_gnn_results'] = True
params['loss_at_every_layer'] = True
params

{'gnn_aggregation_function': 'sum',
 'gnn_message_activation_function': 'gelu',
 'gnn_hidden_dim': 96,
 'gnn_use_target_state_as_input': True,
 'gnn_normalize_by_num_incoming': False,
 'gnn_num_edge_MLP_hidden_layers': 3,
 'gnn_message_calculation_class': 'gnn_edge_mlp',
 'gnn_initial_node_representation_activation': 'tanh',
 'gnn_dense_intermediate_layer_activation': 'tanh',
 'gnn_num_layers': 16,
 'gnn_dense_every_num_layers': -1,
 'gnn_residual_every_num_layers': 100000.0,
 'gnn_use_inter_layer_layernorm': False,
 'gnn_layer_input_dropout_rate': 0.0,
 'gnn_global_exchange_mode': 'gru',
 'gnn_global_exchange_every_num_layers': 100000.0,
 'gnn_global_exchange_weighting_fun': 'softmax',
 'gnn_global_exchange_num_heads': 4,
 'gnn_global_exchange_dropout_rate': 0.2,
 'gnn_share_weights_between_mlps': True,
 'optimizer': 'Adam',
 'learning_rate': 0.001,
 'learning_rate_decay': 0.98,
 'momentum': 0.85,
 'gradient_clip_value': None,
 'gradient_clip_global_norm': None,
 'use_intermediate_gnn

In [11]:
model = gnn.NodeMulticlassTask(params,dataset)

In [12]:
# Build model
input_shapes = dataset.get_batch_tf_data_description().batch_features_shapes
model.build(input_shapes)

# Inspect model
# ly = model.layers[1]
for ly in model.layers:
    for v,w in zip(ly.variables, ly.get_weights()):
        print(w.shape, v.name)

(96, 9) NodeMulticlassTask/projection_to_classes/kernel:0
(9,) NodeMulticlassTask/projection_to_classes/bias:0
(192, 96) GNN_Edge_MLP_GNN/Layer_0/MessagePassing/edge_type_0/MLP_dense_layer_0/kernel:0
(96, 96) GNN_Edge_MLP_GNN/Layer_0/MessagePassing/edge_type_0/MLP_dense_layer_1/kernel:0
(96, 96) GNN_Edge_MLP_GNN/Layer_0/MessagePassing/edge_type_0/MLP_dense_layer_2/kernel:0
(96, 96) GNN_Edge_MLP_GNN/Layer_0/MessagePassing/edge_type_0/MLP_final_layer/kernel:0
(192, 96) GNN_Edge_MLP_GNN/Layer_0/MessagePassing/edge_type_1/MLP_dense_layer_0/kernel:0
(96, 96) GNN_Edge_MLP_GNN/Layer_0/MessagePassing/edge_type_1/MLP_dense_layer_1/kernel:0
(96, 96) GNN_Edge_MLP_GNN/Layer_0/MessagePassing/edge_type_1/MLP_dense_layer_2/kernel:0
(96, 96) GNN_Edge_MLP_GNN/Layer_0/MessagePassing/edge_type_1/MLP_final_layer/kernel:0
(10, 96) GNN_Edge_MLP_GNN/gnn_initial_node_projection/kernel:0


In [13]:
model.summary()

Model: "node_multiclass_task"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                multiple                  873       
_________________________________________________________________
gnn (GNN)                    multiple                  93120     
Total params: 93,993
Trainable params: 93,993
Non-trainable params: 0
_________________________________________________________________


# Load Model

In [15]:
gnn.load_weights_verbosely(
    save_file='/Users/personal/Documents/Sudoku/tf2-gnn/best_models/colab_best.hdf5',
    model=model
)

# Train Model

In [16]:
def log_info(x):
  print(x)
  open("./best_models/log.txt","a").write(x+'\n')

In [17]:
gnn.train(
    model, 
    dataset,    
    log_fun=log_info,
    run_id=4,
    max_epochs=1,
    patience=48,
    save_dir='./',
    quiet=False,
    aml_run=None,
    use_worker_threads=False,
)



[KInitial valid metric: Avg DigitAccuracy: 0.969, AvgSudokuAccuracy: 0.815, Avg MicroF1: 0.970.per sec = 0.47818
   (Stored model metadata to ./4_best.pkl and weights to ./4_best.hdf5)
== Epoch 1
[K Train:  0.0213 loss | Avg DigitAccuracy: 0.950, AvgSudokuAccuracy: 0.685, Avg MicroF1: 0.951 | 4.08 graphs/s1
[K Valid:  0.0201 loss | Avg DigitAccuracy: 0.958, AvgSudokuAccuracy: 0.755, Avg MicroF1: 0.959 | 23.30 graphs/s


'./4_best.pkl'

In [20]:
gnn.test(    
    model, 
    dataset,    
    log_fun=log_info,
    use_worker_threads=False,
)

== Running on test dataset
[KAvg DigitAccuracy: 0.953, AvgSudokuAccuracy: 0.748, Avg MicroF1: 0.955ss = 0.04893  |  Steps per sec = 0.69353


# Miscellaneous

In [None]:
datum = list(tf_dataset.take(1))[0]

In [None]:
model.call(datum[0], training=False)

In [None]:
w = model.get_weights()

In [None]:
model.set_weights(w)

In [None]:
datum[1]

In [None]:
ly = model.layers[1]

In [22]:
for i in [4,8,16,32,64,128,256]:
    print(f"\n{i}")
    params['gnn_num_layers']=i
    model = gnn.NodeMulticlassTask(params,dataset)
    # Build model
    input_shapes = dataset.get_batch_tf_data_description().batch_features_shapes
    model.build(input_shapes)
    # Load
    gnn.load_weights_verbosely(
        save_file='/Users/personal/Documents/Sudoku/tf2-gnn/best_models/colab_best.hdf5',
        model=model
    )
    gnn.test(    
    model, 
    dataset,    
    log_fun=log_info,
    use_worker_threads=False,
    )


4
== Running on test dataset




[KAvgDigitAccuracy: 0.816, AvgSudokuAccuracy: 0.017, Avg MicroF1: 0.820oss = 0.08775  |  Steps per sec = 1.87278

8
== Running on test dataset
[KAvgDigitAccuracy: 0.918, AvgSudokuAccuracy: 0.438, Avg MicroF1: 0.920oss = 0.06689  |  Steps per sec = 0.99738

16
== Running on test dataset
[KAvgDigitAccuracy: 0.966, AvgSudokuAccuracy: 0.800, Avg MicroF1: 0.967oss = 0.05156  |  Steps per sec = 0.50819

32
== Running on test dataset
[KStep:   12  |  Epoch graph avg. loss = 0.01520  |  Batch graph avg. loss = 0.04987  |  Steps per sec = 0.25319AvgDigitAccuracy: 0.971, AvgSudokuAccuracy: 0.877, Avg MicroF1: 0.972

64
== Running on test dataset
[KStep:   12  |  Epoch graph avg. loss = 0.02970  |  Batch graph avg. loss = 0.16383  |  Steps per sec = 0.09314AvgDigitAccuracy: 0.965, AvgSudokuAccuracy: 0.865, Avg MicroF1: 0.966

128
== Running on test dataset
[KStep:   12  |  Epoch graph avg. loss = 548414100464467968.00000  |  Batch graph avg. loss = 1.71314  |  Steps per sec = 0.037261 per s

KeyboardInterrupt: 

# FiLM Model

In [21]:
params = gnn.models.NodeMulticlassTask.get_default_hyperparameters('GNN_FiLM')
params['gnn_hidden_dim'] = 96
params['gnn_num_edge_MLP_hidden_layers'] = 3
params['gnn_num_layers'] = 16
params['gnn_share_weights_between_mlps'] = True
params["gnn_message_activation_function"] = "gelu"

params['gnn_dense_every_num_layers'] = -1
params['gnn_residual_every_num_layers'] = 1e5
params['gnn_global_exchange_every_num_layers'] = 1e5
params['use_intermediate_gnn_results'] = True
params['loss_at_every_layer'] = True
params

{'gnn_aggregation_function': 'sum',
 'gnn_message_activation_function': 'gelu',
 'gnn_hidden_dim': 96,
 'gnn_use_target_state_as_input': False,
 'gnn_normalize_by_num_incoming': False,
 'gnn_num_edge_MLP_hidden_layers': 3,
 'gnn_film_parameter_MLP_hidden_layers': [],
 'gnn_message_calculation_class': 'GNN_FiLM',
 'gnn_initial_node_representation_activation': 'tanh',
 'gnn_dense_intermediate_layer_activation': 'tanh',
 'gnn_num_layers': 16,
 'gnn_dense_every_num_layers': -1,
 'gnn_residual_every_num_layers': 100000.0,
 'gnn_use_inter_layer_layernorm': False,
 'gnn_layer_input_dropout_rate': 0.0,
 'gnn_global_exchange_mode': 'gru',
 'gnn_global_exchange_every_num_layers': 100000.0,
 'gnn_global_exchange_weighting_fun': 'softmax',
 'gnn_global_exchange_num_heads': 4,
 'gnn_global_exchange_dropout_rate': 0.2,
 'gnn_share_weights_between_mlps': True,
 'optimizer': 'Adam',
 'learning_rate': 0.001,
 'learning_rate_decay': 0.98,
 'momentum': 0.85,
 'gradient_clip_value': None,
 'gradient_clip_

In [22]:
model = gnn.NodeMulticlassTask(params,dataset)

In [23]:
# Build model
input_shapes = dataset.get_batch_tf_data_description().batch_features_shapes
model.build(input_shapes)

# Inspect model
# ly = model.layers[1]
for ly in model.layers:
    for v,w in zip(ly.variables, ly.get_weights()):
        print(w.shape, v.name)

(96, 9) NodeMulticlassTask/projection_to_classes/kernel:0
(9,) NodeMulticlassTask/projection_to_classes/bias:0
(96, 96) GNN_FiLM_GNN/Layer_0/MessagePassing/edge_type_0/MLP_dense_layer_0/kernel:0
(96, 96) GNN_FiLM_GNN/Layer_0/MessagePassing/edge_type_0/MLP_dense_layer_1/kernel:0
(96, 96) GNN_FiLM_GNN/Layer_0/MessagePassing/edge_type_0/MLP_dense_layer_2/kernel:0
(96, 96) GNN_FiLM_GNN/Layer_0/MessagePassing/edge_type_0/MLP_final_layer/kernel:0
(96, 96) GNN_FiLM_GNN/Layer_0/MessagePassing/edge_type_1/MLP_dense_layer_0/kernel:0
(96, 96) GNN_FiLM_GNN/Layer_0/MessagePassing/edge_type_1/MLP_dense_layer_1/kernel:0
(96, 96) GNN_FiLM_GNN/Layer_0/MessagePassing/edge_type_1/MLP_dense_layer_2/kernel:0
(96, 96) GNN_FiLM_GNN/Layer_0/MessagePassing/edge_type_1/MLP_final_layer/kernel:0
(96, 192) GNN_FiLM_GNN/Layer_0/MessagePassing/edge_type_0-FiLM/MLP_final_layer/kernel:0
(96, 192) GNN_FiLM_GNN/Layer_0/MessagePassing/edge_type_1-FiLM/MLP_final_layer/kernel:0
(10, 96) GNN_FiLM_GNN/gnn_initial_node_projec

In [24]:
model.summary()

Model: "node_multiclass_task_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_2 (Dense)              multiple                  873       
_________________________________________________________________
gnn_1 (GNN)                  multiple                  111552    
Total params: 112,425
Trainable params: 112,425
Non-trainable params: 0
_________________________________________________________________
