# Requirements:

```
pip install tensorflow-gpu==2.0
pip install numpy
pip install scikit-learn
pip install PIL
pip install scipy
pip install tqdm
```

In [2]:
import tensorflow as tf
import os
import numpy as np
from tqdm import tqdm_notebook
from sklearn.metrics import precision_recall_fscore_support as score
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import confusion_matrix
from inspect import signature
from PIL import Image
from scipy.cluster.vq import kmeans
from multiprocessing import Pool


### Usefull functions used for this experiments

In [4]:
# following the implementation of soft voting scheme
# see paper: https://arxiv.org/abs/1512.05227

def soft_distance(x, features, gamma):
    diff = x - features          
    dist = np.sqrt(np.square(diff)).sum(axis=-1)
    return np.exp(-gamma * dist).sum()

def calculate_distances(features, anchor, gamma):
    labels = np.zeros((features.shape[0],))
    for i, feature in enumerate(features): 
        distances = np.zeros((len(unique_classes),))
        for cid in unique_classes:            
            n = soft_distance(feature, anchor[cid], gamma)            
            ds = EPS
            for ccid in unique_classes:                
                d = soft_distance(feature, anchor[ccid], gamma)
                ds += d
            
            distances[cid] = n/ds
            
        labels[i] = distances.argmax()
    return labels

def get_soft_voting(features, anchor, gamma=5.0):       
    labels = []
    cpus = cpu_count() - 2 # keep two cpus please
    num_chunks = features.shape[0] // (cpus - 1)
    
    with Pool(processes=cpus) as pool:
        results = []

        for chunk in range(cpus):
            start = chunk * num_chunks
            
            if chunk == cpus - 1:
                sub_features = features[start:]
            else:
                sub_features = features[start:start + num_chunks]
            
            results.append(pool.apply_async(calculate_distances, args=(sub_features, anchor, gamma)))
        
        with tqdm_notebook(total=features.shape[0]) as pbar:        
            for result in results:
                res = result.get()
                labels.extend(res)
                pbar.update(len(res))
        
    return labels


def get_anchor_points(num_points):
    anchor_points = []
    for cid in tqdm_notebook(unique_classes, desc='calculate anchor points'):
        indices = np.where(reference_classes == cid)[0]
        class_features = reference_features[indices]
        codebook, _ = kmeans(class_features, num_points, iter=20)
        anchor_points += [codebook]
    
    return np.array(anchor_points)

### variable definition

- *_features containing all feature vectors of 2048 dimensions
- *_classes contains the coressponding class ids to the features
- *_probs contains the softmax probabilities
- class_mapping contains the class name as key and the id as value (actually only used for visualization purposes)

In [3]:
# folder where all *.npy files are located
path_to_data = '/home/tschec/notebooks/Paper/Personalized_CNN'

# create log files in `current dir/`
logs = 'logs/' 

reference_features = np.load(os.path.join(path_to_data, 'training_features_62_classes.npy'))
reference_classes = np.load(os.path.join(path_to_data, 'training_classids_62_classes.npy'))
reference_probs = np.load(os.path.join(path_to_data, 'training_probabilities_62_classes.npy'))

incremental_features = np.load(os.path.join(path_to_data, 'validation_features_62_classes.npy'))
incremental_classes = np.load(os.path.join(path_to_data, 'validation_classids_62_classes.npy'))
incremental_probs = np.load(os.path.join(path_to_data, 'validation_probabilities_62_classes.npy'))

test_features = np.load(os.path.join(path_to_data, 'test_features_62_classes.npy'))
test_classes = np.load(os.path.join(path_to_data, 'test_classids_62_classes.npy'))
test_probs = np.load(os.path.join(path_to_data, 'test_probabilities_62_classes.npy'))

class_mapping = np.load(os.path.join(path_to_data, 'class_mapping_62_classes.npy'), allow_pickle=True).item()
unique_classes = np.unique(reference_classes)

# create tf.data datasets for training based on our features and labels
test_dataset = tf.data.Dataset.from_tensor_slices((test_features, test_classes))
test_dataset = test_dataset.batch(32)

FileNotFoundError: [Errno 2] No such file or directory: '/home/tschec/notebooks/Paper/Personalized_CNN\\training_features_62_classes.npy'

In [None]:
# generate an array of shape (62,10,2048) = 10 anchor points for 62 classes with 2048 dimensions
anchors = get_anchor_points(10) 

In [1]:
def calculate_weights(features, name):
    if name == 'baseline':
        return np.ones(features.shape[0])

In [6]:
# do some fancy magic to calculate the weights
# but for now we use 1.0 (float is important)
class_weights = {}
for class_id in unique_classes:
    class_weights[class_id] = 1.0

# this dictionary contains an experiment name and the corresponding weights
# based on this values the experiments are exectued and logged into path/to/log/dir/expermentname


### run experiment
- run the experiment for every weight listed in `experiment_weights`
- load with every experiment iteration the pre-trained model and train it with the new weights

In [1]:
experiment_weights_types = ['baseline', '.9 Threshold']
for name in experiment_weights_types:
    # sample weights 
    weights = calculate_weights(incremental_features, name)
    
    
    incremental_dataset = tf.data.Dataset.from_tensor_slices((incremental_features, incremental_classes, sample_weights))
    incremental_dataset = incremental_dataset.shuffle(2048).batch(32)
    # load pre-trained keras model
    model = tf.keras.models.load_model(os.path.join(path_to_data, 'submodel_62_classes.hdf5'))

    # use SGD optimizer
    optimizer = SGD(lr=.0001, momentum=0.9)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    # define callbacks 
    callbacks = [
        ModelCheckpoint(
            os.path.join(logs, '{}_model_62_classes.hdf5'.format(name)), monitor='val_accuracy', 
            verbose=0, save_best_only=True, save_weights_only=False, mode='auto'),
        # you can use tensorboard --logdir path/to/log/dir to visualize the training parameters like loss and accuracy
        TensorBoard(os.path.join(logs, name))
    ] 

    # start training for 250 epochs
    model.fit_generator(
        incremental_dataset,
        epochs=250,
        callbacks=callbacks,
        class_weights=weights,
        validation_data=test_dataset)
    
    y_pred = model.predict(test_dataset)
    
    cm = confusion_matrix(y_true, y_pred)

NameError: name 'experiment_weights' is not defined