<a href="https://colab.research.google.com/github/rvkgovind/watermarking_effects_on_classification/blob/master/RESNET50_finetuned_tinyimagenet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
#already downloaded 
import os
import zipfile#python lib for zipfile extraction and zipping files
import requests#python libraary for http requests
import io 

TRAINING_IMAGES_DIR = './tiny-imagenet-200/train/'
VAL_IMAGES_DIR = './tiny-imagenet-200/val/'
IMAGES_URL = 'http://cs231n.stanford.edu/tiny-imagenet-200.zip'
def download_images(url):
    if (os.path.isdir(TRAINING_IMAGES_DIR)):
        print ('Images already downloaded...')
        return
    r = requests.get(url, stream=True)
    print ('Downloading ' + url )
    zip_ref = zipfile.ZipFile(io.BytesIO(r.content))
    zip_ref.extractall('./')
    zip_ref.close()
    print('YAY...! downloaded')
download_images(IMAGES_URL)


Images already downloaded...


In [0]:
import numpy as np
from PIL import Image
import os

def load_images(path,num_classes):
    #Load images
    
    print('Loading ' + str(num_classes) + ' classes')

    X_train=np.zeros([num_classes*500,3,64,64],dtype='uint8')
    y_train=np.zeros([num_classes*500], dtype='uint8')

    trainPath=path+'/train'

    print('loading training images...');

    i=0
    j=0
    annotations={}
    for sChild in os.listdir(trainPath):
        sChildPath = os.path.join(os.path.join(trainPath,sChild),'images')
        annotations[sChild]=j
        for c in os.listdir(sChildPath):
            X=np.array(Image.open(os.path.join(sChildPath,c)))
            if len(np.shape(X))==2:
                X_train[i]=np.array([X,X,X])
            else:
                X_train[i]=np.transpose(X,(2,0,1))
            y_train[i]=j
            i+=1
        j+=1
        if (j >= num_classes):
            break

    print('finished loading training images')

    val_annotations_map = get_annotations_map()

    X_test = np.zeros([num_classes*50,3,64,64],dtype='uint8')
    y_test = np.zeros([num_classes*50], dtype='uint8')


    print('loading test images...')

    i = 0
    testPath=path+'/val/images'
    for sChild in os.listdir(testPath):
        if val_annotations_map[sChild] in annotations.keys():
            sChildPath = os.path.join(testPath, sChild)
            X=np.array(Image.open(sChildPath))
            if len(np.shape(X))==2:
                X_test[i]=np.array([X,X,X])
            else:
                X_test[i]=np.transpose(X,(2,0,1))
            y_test[i]=annotations[val_annotations_map[sChild]]
            i+=1
        else:
            pass


    print('finished loading test images')

    return X_train,y_train,X_test,y_test


def get_annotations_map():
	valAnnotationsPath = './tiny-imagenet-200/val/val_annotations.txt'
	valAnnotationsFile = open(valAnnotationsPath, 'r')
	valAnnotationsContents = valAnnotationsFile.read()
	valAnnotations = {}

	for line in valAnnotationsContents.splitlines():
		pieces = line.strip().split()
		valAnnotations[pieces[0]] = pieces[1]

	return valAnnotations

In [4]:
[X_train,y_train,X_test,y_test]=load_images('./tiny-imagenet-200',20)
print('X_train:'+ str(X_train.shape))
print('y_train:'+ str(y_train.shape))
print('X_test:'+ str(X_test.shape))
print('y_test:'+ str(y_test.shape))

Loading 20 classes
loading training images...
finished loading training images
loading test images...
finished loading test images
X_train:(10000, 3, 64, 64)
y_train:(10000,)
X_test:(1000, 3, 64, 64)
y_test:(1000,)


In [5]:
#extract bottleneck feature from VGG16
import keras
import numpy as np
from keras.applications import ResNet50
from keras.applications.vgg16 import preprocess_input
import scipy
from scipy import misc
import os
from keras.callbacks import ModelCheckpoint   
from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv2D, GlobalAveragePooling2D

Using TensorFlow backend.


In [6]:

y_train = np.squeeze(y_train)
# build the VGG16 network
model = Sequential()
model.add(ResNet50(include_top=False, weights='imagenet'))
#model.add(GlobalAveragePooling2D())
#get the model from the keras.model.applications without the fully connected layers
#with the pretrained weights

Instructions for updating:
Colocations handled automatically by placer.




In [7]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet50 (Model)             (None, None, None, 2048)  23587712  
Total params: 23,587,712
Trainable params: 23,534,592
Non-trainable params: 53,120
_________________________________________________________________


In [8]:
# obtain bottleneck features (train)
if os.path.exists('vgg16_features_train.npz'):
    print('bottleneck features detected (train)')
    features = np.load('vgg16_features_train.npz')['features']
else:
    print('bottleneck features file not detected (train)')
    print('calculating now ...')
    # pre-process the train data
    big_x_train = np.array([scipy.misc.imresize(X_train[i], (224, 224, 3)) 
                            for i in range(0, len(X_train))]).astype('float32')
    vgg_input_train = preprocess_input(big_x_train)
    print('train data preprocessed')
    # extract, process, and save bottleneck features
    features = model.predict(vgg_input_train)
    features = np.squeeze(features)
    np.savez('vgg16_features_train', features=features)
print('bottleneck features saved (train)')

bottleneck features detected (train)
bottleneck features saved (train)


In [9]:
# obtain bottleneck features (test)
if os.path.exists('vgg16_features_test.npz'):
    print('bottleneck features detected (test)')
    features_test = np.load('vgg16_features_test.npz')['features_test']
else:
    print('bottleneck features file not detected (test)')
    print('calculating now ...')
    # pre-process the test data
    big_x_test = np.array([scipy.misc.imresize(X_test[i], (224, 224, 3)) 
                       for i in range(0, len(X_test))]).astype('float32')
    vgg16_input_test = preprocess_input(big_x_test)
    # extract, process, and save bottleneck features (test)
    features_test = model.predict(vgg16_input_test)
    features_test = np.squeeze(features_test)
    np.savez('vgg16_features_test', features_test=features_test)
print('bottleneck features saved (test)')

bottleneck features file not detected (test)
calculating now ...


`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.3.0.
Use Pillow instead: ``numpy.array(Image.fromarray(arr).resize())``.
  if __name__ == '__main__':


bottleneck features saved (test)


In [0]:
y_train = np.squeeze(y_train)

In [0]:
from keras.utils import np_utils

# one-hot encode the labels
y_train = np_utils.to_categorical(y_train,20 )
y_test = np_utils.to_categorical(y_test, 20)

In [12]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet50 (Model)             (None, None, None, 2048)  23587712  
Total params: 23,587,712
Trainable params: 23,534,592
Non-trainable params: 53,120
_________________________________________________________________


In [13]:
model = Sequential()
model.add(GlobalAveragePooling2D())
model.add(Dense(400, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(20,activation='softmax'))

model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy', metrics=['accuracy'])


checkpointer = ModelCheckpoint(filepath='model.best.hdf5', 
                               verbose=1, save_best_only=True)
model.fit(features, y_train,
              epochs=50,
              batch_size=64,
              validation_data=(features_test, y_test),verbose=1,callbacks=[checkpointer])


Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
Train on 10000 samples, validate on 1000 samples
Epoch 1/50

Epoch 00001: val_loss improved from inf to 0.47730, saving model to model.best.hdf5
Epoch 2/50

Epoch 00002: val_loss improved from 0.47730 to 0.45114, saving model to model.best.hdf5
Epoch 3/50

Epoch 00003: val_loss improved from 0.45114 to 0.40388, saving model to model.best.hdf5
Epoch 4/50

Epoch 00004: val_loss did not improve from 0.40388
Epoch 5/50

Epoch 00005: val_loss did not improve from 0.40388
Epoch 6/50

Epoch 00006: val_loss did not improve from 0.40388
Epoch 7/50

Epoch 00007: val_loss did not improve from 0.40388
Epoch 8/50

Epoch 00008: val_loss did not improve from 0.40388
Epoch 9/50

Epoch 00009: val_loss did not improve from 0.40388
Epoch 10/50

Epoch 00010: val_loss did not improve from 0.40388
Epoch 11/50

Epoch 00011: val_loss did not improve

<keras.callbacks.History at 0x7efebc0d3b38>

In [14]:
# load the weights that yielded the best validation accuracy
model.load_weights('model.best.hdf5')

# evaluate test accuracy
score = model.evaluate(features_test, y_test, verbose=0)
accuracy = 100*score[1]

# print test accuracy
print('Test accuracy: %.4f%%' % accuracy)

Test accuracy: 89.0000%
