In [6]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
from random import sample
import matplotlib.pyplot as plt
import os
import cv2
from keras import layers
from keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from proglearn.deciders import SimpleArgmaxAverage
from proglearn.progressive_learner import ProgressiveLearner
from proglearn.transformers import (
    NeuralClassificationTransformer,
    TreeClassificationTransformer,
)
from proglearn.voters import TreeClassificationVoter, KNNClassificationVoter
from sklearn.model_selection import train_test_split
from keras.backend import clear_session 
import pandas as pd
import pickle

In [7]:
TRAIN_DATADIR = '/Users/jayantadey/Downloads/LargeFineFoodAI/train'
VAL_DATADIR = '/Users/jayantadey/Downloads/LargeFineFoodAI/Val'

CATEGORIES = list(range(20))
SAMPLE_PER_CLASS = 60
NUM_CLASS_PER_TASK = 20
IMG_SIZE = 50

In [8]:
train_X = []
train_y = []
test_X = []
test_y = []
for category in CATEGORIES:
    path = os.path.join(TRAIN_DATADIR, str(category))
    
    images = os.listdir(path)
    total_images = len(images)
    train_indx = sample(range(total_images), SAMPLE_PER_CLASS)
    test_indx = np.delete(range(total_images), train_indx)
    for ii in train_indx:
        image_data = cv2.imread(
                os.path.join(path, images[ii])
            )
        resized_image = cv2.resize(
            image_data, 
            (IMG_SIZE, IMG_SIZE)
        )
        train_X.append(
            resized_image
        )
        train_y.append(
            category
        )
    for ii in test_indx:
        image_data = cv2.imread(
                os.path.join(path, images[ii])
            )
        resized_image = cv2.resize(
            image_data, 
            (IMG_SIZE, IMG_SIZE)
        )
        test_X.append(
            resized_image
        )
        test_y.append(
            category
        )

train_X = np.array(train_X).reshape(-1,IMG_SIZE,IMG_SIZE,3)
train_y = np.array(train_y)
test_X = np.array(test_X).reshape(-1,IMG_SIZE,IMG_SIZE,3)
test_y = np.array(test_y)

In [4]:
model='synn'
default_transformer_class = NeuralClassificationTransformer

network = keras.Sequential()
network.add(
    layers.Conv2D(
        filters=16,
        kernel_size=(3, 3),
        activation="relu",
        input_shape=np.shape(train_X[0]),
    )
)
network.add(layers.BatchNormalization())
network.add(
    layers.Conv2D(
        filters=32,
        kernel_size=(3, 3),
        strides=2,
        padding="same",
        activation="relu",
    )
)
network.add(layers.BatchNormalization())
network.add(
    layers.Conv2D(
        filters=64,
        kernel_size=(3, 3),
        strides=2,
        padding="same",
        activation="relu",
    )
)
network.add(layers.BatchNormalization())
network.add(
    layers.Conv2D(
        filters=128,
        kernel_size=(3, 3),
        strides=2,
        padding="same",
        activation="relu",
    )
)
network.add(layers.BatchNormalization())
network.add(
    layers.Conv2D(
        filters=254,
        kernel_size=(3, 3),
        strides=2,
        padding="same",
        activation="relu",
    )
)

network.add(layers.Flatten())
network.add(layers.BatchNormalization())
network.add(layers.Dense(2000, activation="relu"))
network.add(layers.BatchNormalization())
network.add(layers.Dense(2000, activation="relu"))
network.add(layers.BatchNormalization())
network.add(layers.Dense(units=NUM_CLASS_PER_TASK, activation="softmax")) 

default_transformer_kwargs = {
    "network": network,
    "euclidean_layer_idx": -2,
    "loss": "categorical_crossentropy",
    "optimizer": Adam(3e-4),
    "fit_kwargs": {
        "epochs": 200,
        "callbacks": [EarlyStopping(patience=5, monitor="val_loss")],
        "verbose": False,
        "validation_split": 0.33,
        "batch_size": 32,
    },
}
default_voter_class = KNNClassificationVoter
default_voter_kwargs = {"k": int(np.log2(500))}
default_decider_class = SimpleArgmaxAverage

progressive_learner = ProgressiveLearner(
        default_transformer_class=default_transformer_class,
        default_transformer_kwargs=default_transformer_kwargs,
        default_voter_class=default_voter_class,
        default_voter_kwargs=default_voter_kwargs,
        default_decider_class=default_decider_class,
    )

Metal device set to: Apple M1 Max


2023-03-09 21:00:26.321130: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-03-09 21:00:26.321246: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [5]:
progressive_learner.add_task(
            X=train_X,
            y=train_y,
            task_id=0,
            num_transformers=1 if model == "synn" else ntrees,
            transformer_voter_decider_split=[0.67, 0.33, 0],
            decider_kwargs={"classes": np.unique(train_y)},
        )

2023-03-09 21:00:26.612457: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


ValueError: in user code:

    File "/Users/jayantadey/miniforge3/envs/env/lib/python3.9/site-packages/keras/engine/training.py", line 1051, in train_function  *
        return step_function(self, iterator)
    File "/Users/jayantadey/miniforge3/envs/env/lib/python3.9/site-packages/keras/engine/training.py", line 1040, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/jayantadey/miniforge3/envs/env/lib/python3.9/site-packages/keras/engine/training.py", line 1030, in run_step  **
        outputs = model.train_step(data)
    File "/Users/jayantadey/miniforge3/envs/env/lib/python3.9/site-packages/keras/engine/training.py", line 890, in train_step
        loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "/Users/jayantadey/miniforge3/envs/env/lib/python3.9/site-packages/keras/engine/training.py", line 948, in compute_loss
        return self.compiled_loss(
    File "/Users/jayantadey/miniforge3/envs/env/lib/python3.9/site-packages/keras/engine/compile_utils.py", line 201, in __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "/Users/jayantadey/miniforge3/envs/env/lib/python3.9/site-packages/keras/losses.py", line 139, in __call__
        losses = call_fn(y_true, y_pred)
    File "/Users/jayantadey/miniforge3/envs/env/lib/python3.9/site-packages/keras/losses.py", line 243, in call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "/Users/jayantadey/miniforge3/envs/env/lib/python3.9/site-packages/keras/losses.py", line 1787, in categorical_crossentropy
        return backend.categorical_crossentropy(
    File "/Users/jayantadey/miniforge3/envs/env/lib/python3.9/site-packages/keras/backend.py", line 5119, in categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)

    ValueError: Shapes (None, 20) and (None, 10) are incompatible


In [88]:
np.mean(progressive_learner.predict(
            X=test_X, transformer_ids=[0], task_id=0
        ) == test_y)



0.2562929061784897

In [9]:
def get_data(task=0):
    train_X = []
    train_y = []
    test_X = []
    test_y = []
    
    categories_to_consider = range(task*NUM_CLASS_PER_TASK,(task+1)*NUM_CLASS_PER_TASK)
    for category in categories_to_consider:
        path = os.path.join(TRAIN_DATADIR, str(category))

        images = os.listdir(path)
        total_images = len(images)
        train_indx = sample(range(total_images), SAMPLE_PER_CLASS)
        test_indx = np.delete(range(total_images), train_indx)
        for ii in train_indx:
            image_data = cv2.imread(
                    os.path.join(path, images[ii])
                )
            resized_image = cv2.resize(
                image_data, 
                (IMG_SIZE, IMG_SIZE)
            )
            train_X.append(
                resized_image
            )
            train_y.append(
                category
            )
        for ii in test_indx:
            image_data = cv2.imread(
                    os.path.join(path, images[ii])
                )
            resized_image = cv2.resize(
                image_data, 
                (IMG_SIZE, IMG_SIZE)
            )
            test_X.append(
                resized_image
            )
            test_y.append(
                category
            )

    train_X = np.array(train_X).reshape(-1,IMG_SIZE,IMG_SIZE,3)
    train_y = np.array(train_y)
    test_X = np.array(test_X).reshape(-1,IMG_SIZE,IMG_SIZE,3)
    test_y = np.array(test_y)
    
    return train_X, train_y, test_X, test_y

In [10]:
def experiment(model='synf', ntrees=10, rep=1):
    num_tasks = 50
    tasks = []
    base_tasks = []
    accuracies_across_tasks = []
    singletask_accuracy = []
    df_multitask = pd.DataFrame()
    df_singletask = pd.DataFrame()

    if model == "synn":

        clear_session()  # clear GPU memory before each run, to avoid OOM error

        default_transformer_class = NeuralClassificationTransformer

        network = keras.Sequential()
        network.add(
            layers.Conv2D(
                filters=16,
                kernel_size=(3, 3),
                activation="relu",
                input_shape=(IMG_SIZE,IMG_SIZE,3),
            )
        )
        network.add(layers.BatchNormalization())
        network.add(
            layers.Conv2D(
                filters=32,
                kernel_size=(3, 3),
                strides=2,
                padding="same",
                activation="relu",
            )
        )
        network.add(layers.BatchNormalization())
        network.add(
            layers.Conv2D(
                filters=64,
                kernel_size=(3, 3),
                strides=2,
                padding="same",
                activation="relu",
            )
        )
        network.add(layers.BatchNormalization())
        network.add(
            layers.Conv2D(
                filters=128,
                kernel_size=(3, 3),
                strides=2,
                padding="same",
                activation="relu",
            )
        )
        network.add(layers.BatchNormalization())
        network.add(
            layers.Conv2D(
                filters=254,
                kernel_size=(3, 3),
                strides=2,
                padding="same",
                activation="relu",
            )
        )

        network.add(layers.Flatten())
        network.add(layers.BatchNormalization())
        network.add(layers.Dense(2000, activation="relu"))
        network.add(layers.BatchNormalization())
        network.add(layers.Dense(2000, activation="relu"))
        network.add(layers.BatchNormalization())
        network.add(layers.Dense(units=NUM_CLASS_PER_TASK, activation="softmax"))  # units=10

        default_transformer_kwargs = {
            "network": network,
            "euclidean_layer_idx": -2,
            "loss": "categorical_crossentropy",
            "optimizer": Adam(3e-4),
            "fit_kwargs": {
                "epochs": 100,
                "callbacks": [EarlyStopping(patience=5, monitor="val_loss")],
                "verbose": False,
                "validation_split": 0.33,
                "batch_size": 32,
            },
        }
        default_voter_class = KNNClassificationVoter
        default_voter_kwargs = {"k": int(np.log2(500))}
        default_decider_class = SimpleArgmaxAverage

    elif model == "synf":

        default_transformer_class = TreeClassificationTransformer
        default_transformer_kwargs = {"kwargs": {"max_depth": 30}}
        default_voter_class = TreeClassificationVoter
        default_voter_kwargs = {}
        default_decider_class = SimpleArgmaxAverage

    progressive_learner = ProgressiveLearner(
        default_transformer_class=default_transformer_class,
        default_transformer_kwargs=default_transformer_kwargs,
        default_voter_class=default_voter_class,
        default_voter_kwargs=default_voter_kwargs,
        default_decider_class=default_decider_class,
    )
    
    test_x_task = []
    test_y_task = []
    for task in range(num_tasks):
        print("doing task ", task)
        
        train_x, train_y, test_x, test_y = get_data(task)
        train_x = train_x.reshape(-1, 3*IMG_SIZE*IMG_SIZE)
        test_x = test_x.reshape(-1, 3*IMG_SIZE*IMG_SIZE)
        
        test_x_task.append(
            test_x
        )
        test_y_task.append(
            test_y
        )
        progressive_learner.add_task(
            X=train_x,
            y=train_y,
            task_id=task,
            num_transformers=1 if model == "synn" else ntrees,
            transformer_voter_decider_split=[0.67, 0.33, 0],
            decider_kwargs={"classes": np.unique(train_y)},
        )

        singletask_prediction = progressive_learner.predict(
            X=test_x, transformer_ids=[task], task_id=task
        )
        singletask_accuracy.append(
            np.mean(singletask_prediction==test_y)
        )
        print('accuracy ',np.mean(singletask_prediction==test_y))
        for ii in range(task+1):
            multitask_prediction = progressive_learner.predict(
                X=test_x_task[ii], task_id=ii
            )
            acc = np.mean(multitask_prediction==test_y_task[ii])
            print('task ',ii,' accuracy ', acc)
            base_tasks.append(task+1)
            tasks.append(ii+1)
            accuracies_across_tasks.append(
                np.mean(multitask_prediction == test_y_task[ii])
            )

    df_multitask['task'] = tasks
    df_multitask['base_task'] = base_tasks
    df_multitask['accuracy'] = accuracies_across_tasks

    df_singletask['task'] = list(range(1,num_tasks+1))
    df_singletask['accuracy'] = singletask_accuracy

    summary = (df_multitask, df_singletask)

    with open('results/'+model+'_'+str(rep)+'.pickle', 'wb') as f:
        pickle.dump(summary, f)


In [8]:
reps = 1

for ii in range(reps):
    experiment(model='synn',rep=ii)

doing task  0


ValueError: in user code:

    File "/Users/jayantadey/miniforge3/envs/env/lib/python3.9/site-packages/keras/engine/training.py", line 1051, in train_function  *
        return step_function(self, iterator)
    File "/Users/jayantadey/miniforge3/envs/env/lib/python3.9/site-packages/keras/engine/training.py", line 1040, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/jayantadey/miniforge3/envs/env/lib/python3.9/site-packages/keras/engine/training.py", line 1030, in run_step  **
        outputs = model.train_step(data)
    File "/Users/jayantadey/miniforge3/envs/env/lib/python3.9/site-packages/keras/engine/training.py", line 889, in train_step
        y_pred = self(x, training=True)
    File "/Users/jayantadey/miniforge3/envs/env/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/Users/jayantadey/miniforge3/envs/env/lib/python3.9/site-packages/keras/engine/input_spec.py", line 264, in assert_input_compatibility
        raise ValueError(f'Input {input_index} of layer "{layer_name}" is '

    ValueError: Input 0 of layer "sequential" is incompatible with the layer: expected shape=(None, 50, 50, 3), found shape=(None, 7500)


In [None]:
reps = 1

for ii in range(reps):
    experiment(model='synf',rep=ii)

doing task  0
accuracy  0.25224702812409394
task  0  accuracy  0.25224702812409394
doing task  1
accuracy  0.23744389826886086
task  0  accuracy  0.27747173093650335
task  1  accuracy  0.2624492412908741
doing task  2
accuracy  0.28204165884781385
task  0  accuracy  0.2922586256886054
task  1  accuracy  0.27035691387048516
task  2  accuracy  0.31581910302120475
doing task  3
accuracy  0.26615129762562123
task  0  accuracy  0.3000869817338359
task  1  accuracy  0.2814703996580466
task  2  accuracy  0.3336460874460499
task  3  accuracy  0.30922142462727775
doing task  4
accuracy  0.2567635691480423
task  0  accuracy  0.30820527689185273
task  1  accuracy  0.29194272280401795
task  2  accuracy  0.34396697316569713
task  3  accuracy  0.3210012884226026
task  4  accuracy  0.30532683582591164
doing task  5
accuracy  0.2075320512820513
task  0  accuracy  0.3105247897941432
task  1  accuracy  0.29814062833938876
task  2  accuracy  0.35747795083505346
task  3  accuracy  0.3335173937051353
task 

task  0  accuracy  0.32850101478689475
task  1  accuracy  0.3291301560162428
task  2  accuracy  0.38975417526740475
task  3  accuracy  0.3556046383213694
task  4  accuracy  0.32330700722567635
task  5  accuracy  0.2770432692307692
task  6  accuracy  0.30833333333333335
task  7  accuracy  0.3261620185922975
task  8  accuracy  0.32498577120091066
task  9  accuracy  0.4553873955274452
task  10  accuracy  0.3645110763034236
task  11  accuracy  0.37840154274694665
task  12  accuracy  0.4143615005683971
task  13  accuracy  0.31645569620253167
task  14  accuracy  0.2818565400843882
task  15  accuracy  0.3765952844473286
task  16  accuracy  0.4336442480772378
task  17  accuracy  0.4535416666666667
task  18  accuracy  0.37274774774774777
task  19  accuracy  0.3505261342073486
doing task  20
accuracy  0.2952747487705794
task  0  accuracy  0.334299797042621
task  1  accuracy  0.3338320153879034
task  2  accuracy  0.392568962281854
task  3  accuracy  0.3563408798085772
task  4  accuracy  0.3249873

task  15  accuracy  0.3845987454034177
task  16  accuracy  0.4329896907216495
task  17  accuracy  0.45645833333333335
task  18  accuracy  0.37875375375375375
task  19  accuracy  0.3563912368466448
task  20  accuracy  0.4060295060936498
task  21  accuracy  0.30132137800849457
task  22  accuracy  0.3459141274238227
task  23  accuracy  0.3301997649823737
task  24  accuracy  0.3067985166872682
task  25  accuracy  0.2908407382091593
task  26  accuracy  0.37820653365490026
task  27  accuracy  0.40559250712637435
doing task  28
accuracy  0.27055130468903277
task  0  accuracy  0.3314004059147579
task  1  accuracy  0.33340457362684334
task  2  accuracy  0.3910677425408144
task  3  accuracy  0.3664642002576845
task  4  accuracy  0.32733994286674506
task  5  accuracy  0.2802483974358974
task  6  accuracy  0.31893939393939397
task  7  accuracy  0.32775564409030544
task  8  accuracy  0.3284006829823563
task  9  accuracy  0.4583239213914615
task  10  accuracy  0.3714477511747595
task  11  accuracy  

task  1  accuracy  0.3361829450737337
task  2  accuracy  0.40026271345468195
task  3  accuracy  0.3679366832321001
task  4  accuracy  0.332381112418081
task  5  accuracy  0.28104967948717946
task  6  accuracy  0.3212121212121212
task  7  accuracy  0.3280212483399734
task  8  accuracy  0.3352305065452476
task  9  accuracy  0.4578721481816128
task  10  accuracy  0.3786081897516223
task  11  accuracy  0.38290122134133275
task  12  accuracy  0.422887457370216
task  13  accuracy  0.32004534290572456
task  14  accuracy  0.2795358649789029
task  15  accuracy  0.39043910880380706
task  16  accuracy  0.43380788741613485
task  17  accuracy  0.45625
task  18  accuracy  0.38044294294294295
task  19  accuracy  0.35604623080903913
task  20  accuracy  0.40838144109471886
task  21  accuracy  0.3067484662576687
task  22  accuracy  0.34383656509695293
task  23  accuracy  0.33137485311398357
task  24  accuracy  0.3096415327564895
task  25  accuracy  0.29254955570745045
task  26  accuracy  0.3860995395746

task  21  accuracy  0.30910806984426614
task  22  accuracy  0.344702216066482
task  23  accuracy  0.33294163728946335
task  24  accuracy  0.31409147095179235
task  25  accuracy  0.2966507177033493
task  26  accuracy  0.38566103924577944
task  27  accuracy  0.405456766662142
task  28  accuracy  0.3405925053953306
task  29  accuracy  0.3101408940685337
task  30  accuracy  0.48640167364016734
task  31  accuracy  0.3123916811091854
task  32  accuracy  0.3400942429829953
task  33  accuracy  0.4262166405023548
task  34  accuracy  0.4258847320525784
task  35  accuracy  0.3967030614429458
task  36  accuracy  0.31376919266963843
task  37  accuracy  0.34541658758777755
task  38  accuracy  0.344595702804658
task  39  accuracy  0.3612877583465819
doing task  40
accuracy  0.26522961574507964
task  0  accuracy  0.33371991881704843
task  1  accuracy  0.3376789912374439
task  2  accuracy  0.40101332332520173
task  3  accuracy  0.37345849438615863
task  4  accuracy  0.3335573853133927
task  5  accuracy

task  16  accuracy  0.4396989036164294
task  17  accuracy  0.46145833333333336
task  18  accuracy  0.38757507507507505
task  19  accuracy  0.35397619458340523
task  20  accuracy  0.4128714988240325
task  21  accuracy  0.31170363378952337
task  22  accuracy  0.34227839335180055
task  23  accuracy  0.3372502937720329
task  24  accuracy  0.3138442521631644
task  25  accuracy  0.29442925495557076
task  26  accuracy  0.3900460425345319
task  27  accuracy  0.4053210261979096
task  28  accuracy  0.3409848930743575
task  29  accuracy  0.31240215689685163
task  30  accuracy  0.48779637377963736
task  31  accuracy  0.3121750433275563
task  32  accuracy  0.3437820118828109
task  33  accuracy  0.42680533751962324
task  34  accuracy  0.4268958543983822
task  35  accuracy  0.3997002783129951
task  36  accuracy  0.3150074294205052
task  37  accuracy  0.34731448092617195
task  38  accuracy  0.3485320649499754
task  39  accuracy  0.36069157392686807
task  40  accuracy  0.3425492033739456
task  41  accu

task  35  accuracy  0.3984157567972597
task  36  accuracy  0.31104507181773156
task  37  accuracy  0.35092047826912126
task  38  accuracy  0.34804001968181075
task  39  accuracy  0.3622813990461049
task  40  accuracy  0.34418931583880036
task  41  accuracy  0.3654911838790932
task  42  accuracy  0.38789376158122296
task  43  accuracy  0.3980754124116261
task  44  accuracy  0.3703500091625435
task  45  accuracy  0.4351528384279476
task  46  accuracy  0.3443267776096823
task  47  accuracy  0.3600930773705643
task  48  accuracy  0.3066494512588767
doing task  49
accuracy  0.2617741935483871
task  0  accuracy  0.3331400405914758
task  1  accuracy  0.3430220132506946
task  2  accuracy  0.4021392381309814
task  3  accuracy  0.3732744340143567
task  4  accuracy  0.33994286674508484
task  5  accuracy  0.28405448717948717
task  6  accuracy  0.3231060606060606
task  7  accuracy  0.3354581673306773
task  8  accuracy  0.34889015367103016
task  9  accuracy  0.4583239213914615
task  10  accuracy  0.