In [1]:
import pickle
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization,Input, GlobalAveragePooling2D
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.utils import to_categorical
from keras.datasets import cifar100
from tensorflow.random import set_seed
import matplotlib.pyplot as plt
import numpy as np
from collections import defaultdict
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
# from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2, preprocess_input
import cv2
import tensorflow as tf
import tensorflow.keras.backend as K 
from tensorflow.keras.preprocessing.image import ImageDataGenerator


In [2]:
#convert fine label to course label
(fx, fy), (fxx, fyy) = cifar100.load_data()
(cx, cy), (cxx, cyy) = cifar100.load_data(label_mode='coarse') 

fine_to_coarse = {}
for f,c in zip( fy, cy):
    fine_to_coarse[f[0]] = c[0]

x_train = fx
y_train = fy
x_test = fxx
y_test = fyy

print(len(fine_to_coarse))
print(len(set(fine_to_coarse.values())))

print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)

del fx, fy, fxx, fyy

100
20
(50000, 32, 32, 3) (50000, 1) (10000, 32, 32, 3) (10000, 1)


In [3]:
target_size = (224,224)

x_train = np.array( [ preprocess_input(cv2.resize(x,target_size)) for x in x_train])
x_test =  np.array( [ preprocess_input(cv2.resize(x,target_size)) for x in x_test])

print(x_train.shape, x_test.shape)
x_train_resnet = x_train[:25000]
y_train_resnet = to_categorical(y_train[:25000])
x_train_reserve = x_train[25000:]
y_train_reserve = to_categorical(y_train[25000:])

(50000, 224, 224, 3) (10000, 224, 224, 3)


In [4]:
#get resnet model, use that for embedding 
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# for layer in resnet_model.layers:
#     layer.trainable = False
x = GlobalAveragePooling2D()(base_model.output)
x = Dense(1024, activation="relu")(x)
x = Dropout(0.25)(x)
x = Dense(512, activation="relu")(x)
x = Dropout(0.25)(x)
y = Dense(100, activation="softmax")(x)


embd_model  = Model(inputs=base_model.inputs, outputs=y)
embd_model.compile(optimizer='adam',
      loss='categorical_crossentropy',
      metrics=['accuracy'])
embd_model.fit(x_train_resnet, y_train_resnet, epochs=10, batch_size=48, 
               verbose=True, shuffle=True, validation_data=(x_train_reserve, y_train_reserve))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7ffa82739580>

In [5]:

trained_embed_model = Model(inputs=embd_model.input, outputs = embd_model.layers[-3].output)

x_train_reserve = trained_embed_model.predict(x_train_reserve)
x_test = trained_embed_model.predict(x_test)

y_train_reserve = y_train_reserve.argmax(axis=1)
print(x_train_reserve.shape, y_train_reserve.shape, x_test.shape)

(25000, 512) (25000,) (10000, 512)


In [14]:
def get_data_positive_class(tx, ty, txx, tyy, wanted_class, fine_to_coarse):
    kept_labels = []
    for k, v, in fine_to_coarse.items():
        if v == wanted_class:
            kept_labels.append(k)
        
    train_data, train_labels = [], []
    test_data,  test_labels =  [], []
    
    for x, y in zip(tx, ty):
        #y=y[0]
        if y in kept_labels:
            train_data.append(x)
            
    for x, y  in zip (txx, tyy):
        #y=y[0]
        if y in kept_labels:
            test_data.append(x)
    
    return  np.array(train_data), \
            np.repeat(np.array([0,1]).reshape(1,2), len(train_data), axis=0 ),\
            np.array(test_data),\
            np.repeat(np.array([0,1]).reshape(1,2), len(test_data), axis=0 )



def get_data_negative_class (all_data, all_labels, all_test_data, all_test_lables, exclude_classes, fine_to_coarse):
    exclude_labels = []
    for k, v, in fine_to_coarse.items():
        if v in exclude_classes:
            exclude_labels.append(k)
        
    train_data, test_data = [], []
    
    for x, y in zip(all_data, all_labels):
        #y = y[0]
        if y not in exclude_labels:
            train_data.append(x)
            
    for x, y  in zip (all_test_data, all_test_lables):
        #y = y[0]
        if y not in exclude_labels:
            test_data.append(x)
            
    return  np.array(train_data), \
            np.repeat(np.array([1,0]).reshape(1,2), len(train_data), axis=0 ),\
            np.array(test_data),\
            np.repeat(np.array([1,0]).reshape(1,2), len(test_data), axis=0 )
  

In [7]:
def get_accuracy(data, label, model):
    pp = np.argmax(model.predict(data), axis=1)
    return accuracy_score(label, pp)


def model_baseline(x_train, y_train, batch_size=32, epochs=20, verbose=False):
    input_dim = x_train.shape[1]
    input_layer = Input(shape=(input_dim,))
    
    x  = Dense(256, activation='relu',name='shared')(input_layer)
    
    x    = BatchNormalization()(x)
    y = Dense(y_train.shape[-1], activation='softmax')(x)

    model = Model(inputs=input_layer, outputs=y)
    model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, shuffle=True, verbose=verbose)
    return model


def model_transfer(x_train, y_train, x_transfer, y_transfer, batch_size=48, epochs=20, verbose=False):
    input_dim =x_train.shape[1]
    input_layer = Input(shape=(input_dim,))
    
    x  = Dense(256,  activation='relu',name='shared')(input_layer)
    
    x    = BatchNormalization()(x)
    y = Dense(y_train.shape[1], activation='softmax')(x)
    
    model_base = Model(inputs=input_layer, outputs=y)
    model_base.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

    yy = Dense(y_train.shape[1], activation='softmax')(x)
    model_transfer = Model(inputs=input_layer, outputs=yy)
    model_transfer.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
    for _ in range(epochs):
    
        #training
        model_base.fit(x_train, y_train, batch_size=batch_size, epochs=1, shuffle=True, verbose=verbose)
    
        #transfering
        model_transfer.fit(x_transfer, y_transfer, batch_size=batch_size, epochs=1, shuffle=True, verbose=verbose)

    return model_transfer

In [8]:
def do_transfer(x_train, y_train, x_test, y_test, 
                       transfer_class_idx, used_idx, transfer_percent, fine_to_coarse,  
                       train_percent=1.0, batch_size= 48, epochs=20, n_iter=25):
    curr_accuracy = []
    
    
    transfer_data, transfer_labels, transfer_test_data, transfer_test_labels = \
        get_data_positive_class(x_train , y_train, x_test, y_test, transfer_class_idx, fine_to_coarse)

    #negative data are all classes that's no in transfer class and used class
    train_neg_data, train_neg_labels, test_neg_data, test_neg_label \
                = get_data_negative_class(x_train , y_train, x_test, y_test, used_idx + [transfer_class_idx], fine_to_coarse)
        
    
    
    transfer_full_data = np.vstack([transfer_data, train_neg_data])
    transfer_full_labels = np.vstack([transfer_labels, train_neg_labels])

    transfer_full_test  = np.vstack([transfer_test_data, test_neg_data])
    transfer_full_test_lables = np.vstack([transfer_test_labels, test_neg_label])
    

    #for other in wanted_task_idx:
    for other in used_idx:
        
        train_pos_data, train_pos_labels, test_pos_data, test_pos_labels = \
            get_data_positive_class(x_train , y_train, x_test, y_test, other, fine_to_coarse)
        
        
        train_data = np.vstack([train_pos_data, train_neg_data])
        train_labels = np.vstack([train_pos_labels, train_neg_labels])

        test_data = np.vstack([test_pos_data, test_neg_data])
        test_labels = np.vstack([test_pos_labels, test_neg_label])

        transfer_accuracy = []

        for i in range(n_iter):
            if i % 5 == 0:
                print(i,  transfer_percent, coarse_names[other], np.average(transfer_accuracy) )
            

            #base_case data
            total = train_data.shape[0]
            train_size = int( total * train_percent)


            idx = np.random.choice(total, train_size, replace=False)
            d0 = train_data[idx]
            l0 = train_labels[idx]


            #transfer data!!!
            total = transfer_full_data.shape[0]
            transfer_size = int( total * transfer_percent)
            
            idx = np.random.choice(total, transfer_size, replace=False)
            d1 = transfer_full_data[idx]
            l1 = transfer_full_labels[idx]
                        
#             print(d0.shape, d1.shape)
            
            model = model_transfer(d0, l0, d1, l1, batch_size=batch_size, epochs=epochs)

            p = model.predict(transfer_full_test).argmax(axis=1)
            t = transfer_full_test_lables.argmax(axis=1)

            f1 = f1_score(p, t, labels=[1])            
            transfer_accuracy.append(f1)

        curr_accuracy.append( (coarse_names[other], transfer_accuracy ) )
    
    return curr_accuracy

In [9]:
def do_baseline(x_train, y_train, x_test, y_text, transfer_class, used_idx, use_percent, fine_to_coarse,  
                train_percent=1.0, batch_size= 48, epochs=20, n_iter=25):
    
    transfer_data, transfer_labels, transfer_test_data, transfer_test_labels = \
        get_data_positive_class(x_train , y_train, x_test, y_test, transfer_class, fine_to_coarse)

    #negative data are all classes that's no in transfer class and used class
    train_neg_data, train_neg_labels, test_neg_data, test_neg_label \
                = get_data_negative_class(x_train , y_train, x_test, y_test, used_idx + [transfer_class], fine_to_coarse)
        
    
    
    transfer_full_data = np.vstack([transfer_data, train_neg_data])
    transfer_full_labels = np.vstack([transfer_labels, train_neg_labels])

    transfer_full_test  = np.vstack([transfer_test_data, test_neg_data])
    transfer_full_test_lables = np.vstack([transfer_test_labels, test_neg_label])
    
    accuracies  = []
    for i in range(n_iter):
        if i % 5 == 0:
            print(i,  use_percent, np.average(accuracies) )

        total = transfer_full_data.shape[0]
        transfer_size = int( total * use_percent)

        idx = np.random.choice(total, transfer_size, replace=False)
        d = transfer_full_data[idx]
        l = transfer_full_labels[idx]
        model = model_baseline(d, l, batch_size=batch_size, epochs=epochs)
        
        p = model.predict(transfer_full_test).argmax(axis=1)
        t = transfer_full_test_lables.argmax(axis=1)
        
        f1  = f1_score(p ,t, labels=[1])
        accuracies.append(f1)
    return accuracies
                        

    

In [10]:
wanted = 8 #'large carnivores'

coarse_names = [
'aquatic_mammals',  'fish', 'flowers', 'food_containers', 
'fruit_and_vegetables', 'household_electrical_devices', 
'household_furniture', 'insects', 'large_carnivores',
'large_man-made_outdoor_things', 'large_natural_outdoor_scenes',
'large_omnivores_and_herbivores', 'medium-sized_mammals',
'non-insect_invertebrates', 'people', 'reptiles',
'small mammals', 'trees', 'vehicles_1','vehicles_2'
]


used_set  = ['small mammals',  'vehicles_1', ]
used_idx = [i for i, x in enumerate(coarse_names) if x  in used_set ]


In [15]:
seed = 9876
np.random.seed(seed)
set_seed(seed)

#measure baseline (non_transfer) accuracy 
ranges = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
n_iter = 25
baseline_acc = {}
for percentage in ranges:
    baseline_acc[percentage] = do_baseline(x_train_reserve, y_train_reserve, x_test, y_test, wanted, used_idx, percentage, fine_to_coarse, n_iter=n_iter)
    print('Percentage %s : accuracy %s' %(percentage, np.average(baseline_acc[percentage])))
    
with open ('./baseline_acc.p', 'wb') as handle:
    pickle.dump(baseline_acc, handle)

  avg = a.mean(axis)
  ret = ret.dtype.type(ret / rcount)


0 0.1 nan
5 0.1 0.5833200540864529
10 0.1 0.5882104622570906
15 0.1 0.5817722779460561
20 0.1 0.5805927811392501
Percentage 0.1 : accuracy 0.5779231092139058
0 0.2 nan
5 0.2 0.5831765777340314
10 0.2 0.5852203386175137
15 0.2 0.5873487849563528
20 0.2 0.5839643365929736
Percentage 0.2 : accuracy 0.587206958070933
0 0.3 nan
5 0.3 0.5884895342392727
10 0.3 0.5878288579168749
15 0.3 0.5897116817477481
20 0.3 0.5925029576096912
Percentage 0.3 : accuracy 0.5904663160197445
0 0.4 nan
5 0.4 0.5913997905180631
10 0.4 0.5996197312195328
15 0.4 0.6058512323087767
20 0.4 0.6056806403579251
Percentage 0.4 : accuracy 0.6045195984386522
0 0.5 nan
5 0.5 0.5967296617536554
10 0.5 0.6047542533649725
15 0.5 0.6118040493801435
20 0.5 0.6091509205990582
Percentage 0.5 : accuracy 0.6106531744412788
0 0.6 nan
5 0.6 0.6105998061654446


KeyboardInterrupt: 

In [16]:
seed = 9876
np.random.seed(seed)
set_seed(seed)

ranges = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
n_iter = 25
transfer_acc = {}
for percentage in ranges:
    res = do_transfer(x_train_reserve, y_train_reserve, x_test, y_test, 
                       wanted, used_idx, percentage, fine_to_coarse, n_iter=n_iter)
    tmp = res
    tmp = sorted(tmp, key=lambda x : np.average(x[1]))
    transfer_acc[percentage] = res
    print(percentage)
    for t in tmp:
        print (t[0], np.average(t[1]))
    
with open('./tranfer_acc.p', 'wb') as handle:
    pickle.dump(transfer_acc, handle)

0 0.1 small mammals nan
5 0.1 small mammals 0.5758650731912508
10 0.1 small mammals 0.5778252131808317
15 0.1 small mammals 0.5840642027516203
20 0.1 small mammals 0.5841911147022418
0 0.1 vehicles_1 nan
5 0.1 vehicles_1 0.5800150697837716
10 0.1 vehicles_1 0.5790797187006838
15 0.1 vehicles_1 0.5755517244898108
20 0.1 vehicles_1 0.5761736278795349
0.1
vehicles_1 0.577932763925673
small mammals 0.5837421075041361
0 0.2 small mammals nan
5 0.2 small mammals 0.5882837194498094
10 0.2 small mammals 0.5816505158301527
15 0.2 small mammals 0.5827185613198453
20 0.2 small mammals 0.5886333060778492
0 0.2 vehicles_1 nan
5 0.2 vehicles_1 0.5868050443682566
10 0.2 vehicles_1 0.5930740222933435
15 0.2 vehicles_1 0.5875062272447421
20 0.2 vehicles_1 0.589593681404151
0.2
small mammals 0.5888094056563864
vehicles_1 0.5897750160082237
0 0.3 small mammals nan
5 0.3 small mammals 0.5767301532862078
10 0.3 small mammals 0.5865988347074802
15 0.3 small mammals 0.5903290306047431
20 0.3 small mammals 0.

In [22]:
for k,v in transfer_acc.items():
    print (k)
    tt = sorted(v, key = lambda x : np.mean(x[1]))
    print(tt[0][0], np.mean(tt[0][1]), np.median(tt[0][1]))
    print(tt[1][0], np.mean(tt[1][1]), np.median(tt[1][1]))

0.1
vehicles_1 0.577932763925673 0.5861344537815126
small mammals 0.5837421075041361 0.585308056872038
0.2
small mammals 0.5888094056563864 0.59392575928009
vehicles_1 0.5897750160082237 0.5906499429874572
0.3
small mammals 0.5946596974268828 0.5972093023255814
vehicles_1 0.5956633862945104 0.6027713625866051
0.4
vehicles_1 0.5951434148887206 0.5979843225083987
small mammals 0.6028935285158389 0.6078665077473182
0.5
small mammals 0.5951913507217002 0.5988950276243095
vehicles_1 0.6043649934374138 0.6096256684491977
0.6
small mammals 0.60295253919035 0.6068660022148394
vehicles_1 0.6063959660276137 0.6098439375750301
0.7
small mammals 0.608711225495581 0.6180257510729613
vehicles_1 0.6091583178032818 0.6105263157894737
0.8
small mammals 0.6077234979620286 0.6086956521739131
vehicles_1 0.6112514009404832 0.6121546961325967
0.9
small mammals 0.6071801731284924 0.6155580608793686
vehicles_1 0.6106719159188366 0.6106696935300795
1.0
small mammals 0.6078050150278093 0.6124497991967871
vehicl

In [28]:
np.linspace(0,0.5, 10)

array([0.        , 0.05555556, 0.11111111, 0.16666667, 0.22222222,
       0.27777778, 0.33333333, 0.38888889, 0.44444444, 0.5       ])