In [684]:
%load_ext autoreload
%autoreload 2

import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import matplotlib.pyplot as plt

from models import *
from utils import *

import warnings
from scipy.sparse import SparseEfficiencyWarning
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", category=SparseEfficiencyWarning) 

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [578]:
from os import listdir

class ModelEval():
    def __init__(self, print_freq=10, lr=0.001):
        super(ModelEval, self).__init__()

        self.print_freq = print_freq
        self.tasks = load_tasks()
        self.prior_weights = dict()
        self.fisher_matrix = dict()
        self.optim = Adam(learning_rate=lr)
        self.loss_fn = CategoricalCrossentropy()
    
    def train(self, task_num, epochs=20, continual_learning=False, lambda_=0.1, verbose=True):
        
        (x, adj, y), (mask_tr, _, _) = self.tasks[task_num]

        # initialize model if not already trained
        if not continual_learning:
            self.model = GNNNodeClassifier()
        
        accuracy = CategoricalAccuracy()
        
        for i in range(epochs):
            with tf.GradientTape() as tape:
                pred = self.model([x, adj])
                loss = self.loss_fn(y[mask_tr], pred[mask_tr])

                # if continual learning and EWC is enabled add l2 loss wrt previous weights
                if continual_learning:
                    loss += penalty_loss(self.fisher_matrix[task_num-1], self.model.get_weights(),
                                            self.prior_weights[task_num-1], lambda_=lambda_)
                
            accuracy.update_state(y_pred=pred, y_true=y)
            gradients = tape.gradient(loss, self.model.trainable_variables)
            self.optim.apply_gradients(zip(gradients, self.model.trainable_variables))

            acc = accuracy.result().numpy()
            
            if i % self.print_freq == 0 and verbose:
                print(f"epoch={i}, train_loss={loss:.3f}, train_acc={acc:.3f}")

        self.prior_weights[task_num] = self.model.trainable_variables
        self.fisher_matrix[task_num] = apx_fisher(self.model, [x, adj], self.model.trainable_variables)
    
    def test(self, task_num, verbose=False):
        (x, adj, y), (_, _, mask_te) = self.tasks[task_num]

        pred = self.model([x, adj])
        accuracy = CategoricalAccuracy()
        accuracy.update_state(y_pred=pred[mask_te], y_true=y[mask_te])
        
        return accuracy.result().numpy()

In [689]:
x.shape

(854, 3703)

In [690]:
compute_fisher(model, [x,adj], y, mask_tr)



ResourceExhaustedError:  OOM when allocating tensor with shape[512,3703,64] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[node gradient_tape/MatMul_11/pfor/transpose_1
 (defined at C:\Users\Uni\AppData\Roaming\Python\Python39\site-packages\tensorflow\python\ops\parallel_for\control_flow_ops.py:191)
]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_f_6989466]

Errors may have originated from an input operation.
Input Source operations connected to node gradient_tape/MatMul_11/pfor/transpose_1:
In[0] gradient_tape/MatMul_11/pfor/Reshape_5:	
In[1] gradient_tape/MatMul_11/pfor/transpose_1/perm:

Operation defined at: (most recent call last)
>>>   File "C:\Users\Marco\AppData\Local\Programs\Python\Python39\lib\runpy.py", line 197, in _run_module_as_main
>>>     return _run_code(code, main_globals, None,
>>> 
>>>   File "C:\Users\Marco\AppData\Local\Programs\Python\Python39\lib\runpy.py", line 87, in _run_code
>>>     exec(code, run_globals)
>>> 
>>>   File "C:\Users\Marco\AppData\Local\Programs\Python\Python39\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
>>>     app.launch_new_instance()
>>> 
>>>   File "C:\Users\Marco\AppData\Local\Programs\Python\Python39\lib\site-packages\traitlets\config\application.py", line 845, in launch_instance
>>>     app.start()
>>> 
>>>   File "C:\Users\Marco\AppData\Local\Programs\Python\Python39\lib\site-packages\ipykernel\kernelapp.py", line 612, in start
>>>     self.io_loop.start()
>>> 
>>>   File "C:\Users\Marco\AppData\Local\Programs\Python\Python39\lib\site-packages\tornado\platform\asyncio.py", line 199, in start
>>>     self.asyncio_loop.run_forever()
>>> 
>>>   File "C:\Users\Marco\AppData\Local\Programs\Python\Python39\lib\asyncio\windows_events.py", line 316, in run_forever
>>>     super().run_forever()
>>> 
>>>   File "C:\Users\Marco\AppData\Local\Programs\Python\Python39\lib\asyncio\base_events.py", line 596, in run_forever
>>>     self._run_once()
>>> 
>>>   File "C:\Users\Marco\AppData\Local\Programs\Python\Python39\lib\asyncio\base_events.py", line 1890, in _run_once
>>>     handle._run()
>>> 
>>>   File "C:\Users\Marco\AppData\Local\Programs\Python\Python39\lib\asyncio\events.py", line 80, in _run
>>>     self._context.run(self._callback, *self._args)
>>> 
>>>   File "C:\Users\Marco\AppData\Local\Programs\Python\Python39\lib\site-packages\tornado\ioloop.py", line 688, in <lambda>
>>>     lambda f: self._run_callback(functools.partial(callback, future))
>>> 
>>>   File "C:\Users\Marco\AppData\Local\Programs\Python\Python39\lib\site-packages\tornado\ioloop.py", line 741, in _run_callback
>>>     ret = callback()
>>> 
>>>   File "C:\Users\Marco\AppData\Local\Programs\Python\Python39\lib\site-packages\tornado\gen.py", line 814, in inner
>>>     self.ctx_run(self.run)
>>> 
>>>   File "C:\Users\Marco\AppData\Local\Programs\Python\Python39\lib\site-packages\tornado\gen.py", line 775, in run
>>>     yielded = self.gen.send(value)
>>> 
>>>   File "C:\Users\Marco\AppData\Local\Programs\Python\Python39\lib\site-packages\ipykernel\kernelbase.py", line 358, in process_one
>>>     yield gen.maybe_future(dispatch(*args))
>>> 
>>>   File "C:\Users\Marco\AppData\Local\Programs\Python\Python39\lib\site-packages\tornado\gen.py", line 234, in wrapper
>>>     yielded = ctx_run(next, result)
>>> 
>>>   File "C:\Users\Marco\AppData\Local\Programs\Python\Python39\lib\site-packages\ipykernel\kernelbase.py", line 261, in dispatch_shell
>>>     yield gen.maybe_future(handler(stream, idents, msg))
>>> 
>>>   File "C:\Users\Marco\AppData\Local\Programs\Python\Python39\lib\site-packages\tornado\gen.py", line 234, in wrapper
>>>     yielded = ctx_run(next, result)
>>> 
>>>   File "C:\Users\Marco\AppData\Local\Programs\Python\Python39\lib\site-packages\ipykernel\kernelbase.py", line 536, in execute_request
>>>     self.do_execute(
>>> 
>>>   File "C:\Users\Marco\AppData\Local\Programs\Python\Python39\lib\site-packages\tornado\gen.py", line 234, in wrapper
>>>     yielded = ctx_run(next, result)
>>> 
>>>   File "C:\Users\Marco\AppData\Local\Programs\Python\Python39\lib\site-packages\ipykernel\ipkernel.py", line 302, in do_execute
>>>     res = shell.run_cell(code, store_history=store_history, silent=silent)
>>> 
>>>   File "C:\Users\Marco\AppData\Local\Programs\Python\Python39\lib\site-packages\ipykernel\zmqshell.py", line 539, in run_cell
>>>     return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
>>> 
>>>   File "C:\Users\Marco\AppData\Local\Programs\Python\Python39\lib\site-packages\IPython\core\interactiveshell.py", line 2894, in run_cell
>>>     result = self._run_cell(
>>> 
>>>   File "C:\Users\Marco\AppData\Local\Programs\Python\Python39\lib\site-packages\IPython\core\interactiveshell.py", line 2940, in _run_cell
>>>     return runner(coro)
>>> 
>>>   File "C:\Users\Marco\AppData\Local\Programs\Python\Python39\lib\site-packages\IPython\core\async_helpers.py", line 68, in _pseudo_sync_runner
>>>     coro.send(None)
>>> 
>>>   File "C:\Users\Marco\AppData\Local\Programs\Python\Python39\lib\site-packages\IPython\core\interactiveshell.py", line 3165, in run_cell_async
>>>     has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
>>> 
>>>   File "C:\Users\Marco\AppData\Local\Programs\Python\Python39\lib\site-packages\IPython\core\interactiveshell.py", line 3357, in run_ast_nodes
>>>     if (await self.run_code(code, result,  async_=asy)):
>>> 
>>>   File "C:\Users\Marco\AppData\Local\Programs\Python\Python39\lib\site-packages\IPython\core\interactiveshell.py", line 3437, in run_code
>>>     exec(code_obj, self.user_global_ns, self.user_ns)
>>> 
>>>   File "<ipython-input-690-4a91b0285af6>", line 1, in <module>
>>>     compute_fisher(model, [x,adj], y, mask_tr)
>>> 
>>>   File "c:\Users\Uni\Desktop\TwitchGNN\utils.py", line 19, in compute_fisher
>>>     gradients = tape.jacobian(loss, model.trainable_variables)
>>> 
>>>   File "C:\Users\Uni\AppData\Roaming\Python\Python39\site-packages\tensorflow\python\ops\parallel_for\control_flow_ops.py", line 191, in f
>>>     parallel_iterations=parallel_iterations)
>>> 

In [682]:
fisher_diag = tf.reduce_mean(flatted_gradients**2, axis=0)

In [645]:
for i in np.where(mask_tr[:10])[0]:
    with tf.GradientTape() as tape:
        model([x[i].reshape(-1,1), a])
        loss = loss_fn(y[i], pred[i])

    print(loss)
    gradients = tape.gradient(loss, model.trainable_variables)
    print(gradients)

InvalidArgumentError: Exception encountered when calling layer "gcn_conv_654" (type GCNConv).

Matrix size-incompatible: In[0]: [3703,1], In[1]: [3703,64] [Op:MatMul]

Call arguments received:
  • inputs=['tf.Tensor(shape=(3703, 1), dtype=float32)', '<tensorflow.python.framework.sparse_tensor.SparseTensor object at 0x000002ACA662FDF0>']
  • mask=None

array([0, 1, 2, 3, 4, 5, 6, 7, 9], dtype=int64)

In [579]:
model_eval = ModelEval()
n_tasks = 3
epochs = 80

for i in range(n_tasks):
    model_eval.train(i, epochs=epochs, continual_learning=False, verbose=False)
    acc = model_eval.test(i, verbose=False)
    print(f"task={i}, test_acc={acc:.3f}")

Pre-processing node features
task=0, test_acc=0.770
task=1, test_acc=0.866
task=2, test_acc=0.868


In [511]:
model_eval = ModelEval()
epochs = epochs
model_eval.train(0, epochs=epochs, verbose=False)
model_eval.train(1, epochs=epochs, verbose=False)
acc0 = model_eval.test(0, verbose=False)
acc1 = model_eval.test(1, verbose=False)
print("Accuracy after training on two tasks without CL")
print(f"task=0, test_acc={acc0:.3f}")
print(f"task=1, test_acc={acc1:.3f}")

Pre-processing node features
Accuracy after training on two tasks without CL
task=0, test_acc=0.318
task=1, test_acc=0.861


In [535]:
model_eval = ModelEval(print_freq=25)
epochs = 200
model_eval.train(0, epochs=epochs, verbose=False)
acc0 = model_eval.test(0, verbose=False)
print(f"task=0, test_acc={acc0:.3f}")

model_eval.train(1, epochs=epochs, continual_learning=True, verbose=True, lambda_=1)
acc0 = model_eval.test(0, verbose=False)
acc1 = model_eval.test(1, verbose=False)
print("Accuracy after training on two tasks with CL and EWC")
print(f"task=0, test_acc={acc0:.3f}")
print(f"task=1, test_acc={acc1:.3f}")

Pre-processing node features
task=0, test_acc=0.796
epoch=0, train_loss=6.628, train_acc=0.489
epoch=25, train_loss=0.441, train_acc=0.608
epoch=50, train_loss=0.339, train_acc=0.717
epoch=75, train_loss=0.287, train_acc=0.764
epoch=100, train_loss=0.253, train_acc=0.792
epoch=125, train_loss=0.226, train_acc=0.812
epoch=150, train_loss=0.203, train_acc=0.826
epoch=175, train_loss=0.178, train_acc=0.837
Accuracy after training on two tasks with CL and EWC
task=0, test_acc=0.318
task=1, test_acc=0.844


In [550]:
model_eval.fisher_matrix[0][0].shape

TensorShape([3703, 64])

x.shape

In [561]:
x, adj = model_eval.tasks[0][0][:2]
