In [1]:
pwd

'/content'

In [2]:
ls

[0m[01;34mdatalab[0m/


In [3]:
!git clone https://github.com/sheikhomar/cvml.git

Cloning into 'cvml'...
remote: Counting objects: 78328, done.[K
remote: Total 78328 (delta 0), reused 0 (delta 0), pack-reused 78328[K
Receiving objects: 100% (78328/78328), 476.18 MiB | 30.99 MiB/s, done.
Resolving deltas: 100% (736/736), done.
Checking out files: 100% (77993/77993), done.


In [4]:
cd cvml/project/

/content/cvml/project


In [6]:
cd saved_weights/

/content/cvml/project/saved_weights


In [7]:
from google.colab import files

uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))


Saving model-15-inception-v3-take3-epoch55-acc1.00-loss0.02-valacc0.94-valloss0.24.hdf5 to model-15-inception-v3-take3-epoch55-acc1.00-loss0.02-valacc0.94-valloss0.24.hdf5
User uploaded file "model-15-inception-v3-take3-epoch55-acc1.00-loss0.02-valacc0.94-valloss0.24.hdf5" with length 90047632 bytes


In [8]:
ls

model-15-inception-v3-take3-epoch55-acc1.00-loss0.02-valacc0.94-valloss0.24.hdf5
myfile.bin
sentinel


In [0]:
import os
import re
import sys
import urllib.request
import h5py

from keras import applications
from keras.models import Model
from keras.layers import Dropout, Dense, GlobalAveragePooling2D


from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.models import Sequential
from keras import optimizers

import numpy as np


def preprocess_input(x):
    x /= 255.
    x -= 0.5
    x *= 2.
    return x


class ModelBase:
    def __init__(self,
                 model_name=None,
                 batch_size=16,
                 verbose=0,
                 n_freeze_layers=0,
                 learning_rate=0.00001,
                 epochs=400,
                 optimizer='adam'
                 ):
        if model_name is None:
            script_name, script_ext = os.path.splitext(sys.argv[0])
            self.model_name = os.path.basename(script_name)
        else:
            self.model_name = model_name
        self.batch_size = batch_size
        self.verbose = verbose

        self.train_data_dir = "Train/TrainImages"
        self.validation_data_dir = "Validation/ValidationImages"
        self.test_data_dir = "Test/TestImages"
        self.img_width = 256
        self.img_height = 256
        self.img_channels = 3
        self.n_train_samples = 5830
        self.n_validation_samples = 2298
        self.n_test_samples = 3460
        self.n_labels = 29
        self.epochs = epochs
        self.n_freeze_layers = n_freeze_layers
        self.learning_rate = learning_rate
        self.imagenet_weights_url = None
        self.imagenet_use_id = False

    def load_model(self, model_weights=None):
        print('Creating model...')
        self._create()
        print('Loading weights from {}...'.format(model_weights))
        self.model.load_weights(model_weights)
        print('Compiling...')
        sgd = optimizers.SGD(lr=self.learning_rate, decay=1e-6, momentum=0.9, nesterov=True)
        adam = optimizers.Adam(lr=self.learning_rate)
        nadam = optimizers.Nadam(lr=self.learning_rate)
        self.model.compile(
            #optimizers.Adam(lr=self.learning_rate),
            #optimizers.Nadam(),
            optimizers.SGD(momentum=0.9),
            loss='categorical_crossentropy',
            metrics=['accuracy']
        )
        return self.model

    def train(self):
        print('Creating model...')
        self._create()

        self._freeze_top_layers()

        print('Loading weights...')
        self._load_pretrained_weights()

        print('Compiling...')
        self.model.compile(
            #optimizers.Adam(lr=self.learning_rate),
            #optimizers.Nadam(),
            optimizers.SGD(momentum=0.9),
            loss='categorical_crossentropy',
            metrics=['accuracy']
        )

        #print(self.model.summary())

        # Data generators for the model
        train_gen = self._get_train_generator()
        validation_gen = self._get_validation_generator()

        print('Training model...')
        self.model.fit_generator(
            train_gen,
            steps_per_epoch=int(self.n_train_samples / self.batch_size),
            validation_data=validation_gen,
            validation_steps=int(self.n_validation_samples / self.batch_size),
            epochs=self.epochs,
            callbacks=self._get_callbacks(),
            verbose=self.verbose
        )

    def predict_validation(self, model_weights):
        img_paths = self._get_validation_image_paths()
        return self._predict(model_weights, img_paths)

    def predict_test(self, model_weights):
        img_paths = self._get_test_image_paths()
        return self._predict(model_weights, img_paths)

    def predict_instance_validation(self, model_weights):
        img_paths = self._get_validation_image_paths()
        return self._predict_instance(model_weights, img_paths)
      
    def predict_instance_test(self, model_weights):
        img_paths = self._get_test_image_paths()
        return self._predict_instance(model_weights, img_paths)
      
    @staticmethod
    def write_predictions(predictions, file_name='predictions.csv'):
        with open(file_name, 'w') as file:
            file.write('ID,Label')
            for index, value in enumerate(predictions):
                file.write('\n{0},{1}'.format(index + 1, value))

    @staticmethod
    def show_progress_bar(iteration, total, bar_length=50):
        percent = int(round((iteration / total) * 100))
        nb_bar_fill = int(round((bar_length * percent) / 100))
        bar_fill = '#' * nb_bar_fill
        bar_empty = ' ' * (bar_length - nb_bar_fill)
        sys.stdout.write("\r  [{0}] {1}%".format(str(bar_fill + bar_empty), percent))
        sys.stdout.flush()

    def _predict(self, model_weights, img_paths):
        self.load_model(model_weights)

        label_map = self._get_label_map()
        img_count = len(img_paths)
        y_predictions = np.zeros(img_count, dtype=np.int8)

        for i, (img_num, img_path) in enumerate(img_paths):
            ModelBase.show_progress_bar(i, img_count)
            img_data = self._load_image(img_path)
            if isinstance(self.model, Sequential):
                pred_index = self.model.predict_classes(img_data)[0]
            else:
                predictions = self.model.predict(img_data)
                pred_index = np.argmax(predictions, axis=1)[0]
            pred_label = label_map[pred_index]
            y_predictions[img_num-1] = pred_label
  
        return y_predictions

    def _predict_instance(self, model_weights, img_paths):
        print('Instance-based predictions...')
        self.load_model(model_weights)

        # Sort image paths in-place
        img_paths.sort(key=lambda tup: tup[0])

        # Group images so they come in pairs
        instance_pairs = list(zip(*[iter(img_paths)]*2))

        label_map = self._get_label_map()
        img_count = len(img_paths)
        y_predictions = np.zeros(img_count, dtype=np.int8)
        
        print('Image count={}      instance_pairs count={}'.format(img_count, len(instance_pairs)))

        for i, ((img1_num, img1_path),(img2_num, img2_path)) in enumerate(instance_pairs):
            ModelBase.show_progress_bar(i, img_count)
            
            if i < 10:
              print('Path1={}    Path2={}'.format(img1_path, img2_path))
            
            img1_data = m._load_image(img1_path)
            img2_data = m._load_image(img2_path)
            img1_pred = m.model.predict(img1_data)
            img2_pred = m.model.predict(img2_data)

            img1_pred_index = np.argmax(img1_pred, axis=1)[0]
            img2_pred_index = np.argmax(img2_pred, axis=1)[0]

            if img1_pred_index != img2_pred_index:
                img1_highest_score = np.max(img1_pred, axis=1)[0]
                img2_highest_score = np.max(img2_pred, axis=1)[0]

                # if class labels for different views differ,
                # we assign to the instance the class label
                # with the highest confidence score. 
                if img1_highest_score > img2_highest_score:
                   img2_pred_index = img1_pred_index
                else:
                   img1_pred_index = img2_pred_index

            img1_pred_label = label_map[img1_pred_index]
            img2_pred_label = label_map[img2_pred_index]
            
            if y_predictions[img1_num-1] > 0:
              print('Something is wrong with index ', img1_num-1)
              break
            if y_predictions[img2_num-1] > 0:
              print('Something is wrong with index ', img2_num-1)
              break
            
            y_predictions[img1_num-1] = img1_pred_label
            y_predictions[img2_num-1] = img2_pred_label
        
        print('\n ... predictions done')
        return y_predictions
      
    def _get_label_map(self):
        # We need the ImageDataGenerator used to train the model
        # because it contains a mapping between classes and indices
        train_gen = self._get_train_generator()

        # Reverse keys and values so values becomes keys
        label_map = {v: int(k) for k, v in train_gen.class_indices.items()}

        return label_map

    def _load_image(self, image_path):
        img = image.load_img(image_path, target_size=(self.img_width, self.img_height))
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        return preprocess_input(x)

    def _get_test_image_paths(self):
        final_list = []
        for img_name in os.listdir(self.test_data_dir):
            img_number = int(re.findall(r'\d+', img_name)[0])
            img_path = os.path.join(self.test_data_dir, img_name)
            final_list.append((img_number, img_path))
        return final_list

    def _get_validation_image_paths(self):
        final_list = []
        for sub_dir in os.listdir(self.validation_data_dir):
            sub_dir_path = os.path.join(self.validation_data_dir, sub_dir)
            for img_name in os.listdir(sub_dir_path):
                img_number = int(re.findall(r'\d+', img_name)[0])
                img_path = os.path.join(sub_dir_path, img_name)
                final_list.append((img_number, img_path))
        return final_list

    def _get_validation_generator(self):
        return ImageDataGenerator(
            preprocessing_function=preprocess_input
        ).flow_from_directory(
            self.validation_data_dir,
            target_size=(self.img_height, self.img_width),
            batch_size=self.batch_size,
            class_mode="categorical"
        )

    def _get_train_generator(self):
        return ImageDataGenerator(
            preprocessing_function=preprocess_input
        ).flow_from_directory(
            self.train_data_dir,
            target_size=(self.img_height, self.img_width),
            batch_size=self.batch_size,
            class_mode="categorical"
        )

    def _load_pretrained_weights(self):
        saved_weights_path = self._find_saved_weights()
        if saved_weights_path is not None:
            print('Loading saved weights from: {}'.format(saved_weights_path))
            self.model.load_weights(saved_weights_path)

        elif self.imagenet_weights_url is not None and len(self.imagenet_weights_url) > 0:
            print('Loading imagenet weights...')
            model_weights_path = 'saved_weights/{}'.format(os.path.basename(self.imagenet_weights_url))
            if os.path.isfile(model_weights_path):
                print('Model file already downloaded')
            else:
                # Download pre-trained weights
                print('Downloading {}...'.format(model_weights_path))
                urllib.request.urlretrieve(self.imagenet_weights_url, model_weights_path)
            self._load_weights_from_file(model_weights_path)
        else:
            print('No pre-trained weights loaded!')

    def _create(self):
        raise NotImplementedError('subclasses must override _create()')

    def _freeze_top_layers(self):
        if self.n_freeze_layers > 1:
            print("Freezing {} layers".format(self.n_freeze_layers))
            for layer in self.model.layers[:self.n_freeze_layers]:
                layer.trainable = False
            for layer in self.model.layers[self.n_freeze_layers:]:
                layer.trainable = True

    def _get_callbacks(self):
        # Define model checkpoint
        checkpoint = ModelCheckpoint(
            'saved_weights/%s-epoch{epoch:02d}-acc{acc:.2f}-loss{loss:.2f}'
            '-valacc{val_acc:.2f}-valloss{val_loss:.2f}.hdf5' % self.model_name,
            monitor='val_acc',
            save_best_only=False,
            save_weights_only=True,
            mode='auto',
            period=1,
            verbose=self.verbose
        )

        # Define early stopping
        early_stop = EarlyStopping(
            monitor='val_acc',
            min_delta=0,
            patience=10,
            mode='auto',
            verbose=self.verbose
        )
        
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-5)

        return [checkpoint, early_stop, reduce_lr]

    def _find_saved_weights(self, models_dir='./saved_weights/'):
        if not os.path.isdir(models_dir):
            return None

        list_of_files = sorted(os.listdir(models_dir))
        best_model = None
        best_acc = 0
        for f in list_of_files:
            if f.startswith(self.model_name):
                values = re.findall('val[^\d]*(\d+\.\d*)', f)
                acc = float(values[0])
                if acc > best_acc:
                    best_acc = acc
                    best_model = os.path.join(models_dir, f)
        return best_model

    def _load_weights_from_file(self, file_path):
        print('Loading weights from {}...'.format(file_path))

        layer_indices = {l.name: i for (i, l) in enumerate(self.model.layers)}

        # Load weights from the downloaded file
        with h5py.File(file_path) as model_weights_file:
            layer_names = model_weights_file.attrs['layer_names']
            for i, layer_name in enumerate(layer_names):
                level_0 = model_weights_file[layer_name]
                transferred_weights = []
                for k0 in level_0.keys():
                    level_1 = level_0[k0]
                    if hasattr(level_1, 'keys'):
                        for k1 in level_1.keys():
                            transferred_weights.append(level_1[k1][()])
                    else:
                        transferred_weights.append(level_0[k0][()])
                if self.imagenet_use_id:
                    layer_index = i
                else:
                    layer_key = layer_name.decode('UTF-8')
                    if layer_key not in layer_indices:
                        continue
                    layer_index = layer_indices[layer_key]
                self.model.layers[layer_index].set_weights(transferred_weights)
        print('Done loading weights')


In [0]:
class ModelInceptionV3Take2(ModelBase):
    def __init__(self, *args, **kwargs):
        ModelBase.__init__(self, *args, **kwargs)

    def _create(self):
        base_model = applications.InceptionV3(
            include_top=False,
            input_shape=(self.img_width, self.img_height, self.img_channels)
        )
        output_layer = base_model.output
        output_layer = GlobalAveragePooling2D(name='avg_pool')(output_layer)
        output_layer = Dense(256, activation='relu')(output_layer)
        output_layer = Dropout(0.5)(output_layer)
        output_layer = Dense(self.n_labels, activation='softmax', name='predictions')(output_layer)
        self.model = Model(inputs=base_model.input, outputs=output_layer)


In [0]:
m = ModelInceptionV3Take2(
    model_name='model-15-inception-v3-take3',
    n_freeze_layers=0,
    batch_size=64,
    verbose=1,
    epochs=300
)

In [105]:
y_val_pred = m.predict_validation(model_weights='saved_weights/model-15-inception-v3-take3-epoch55-acc1.00-loss0.02-valacc0.94-valloss0.24.hdf5')

Creating model...
Loading weights from saved_weights/model-15-inception-v3-take3-epoch55-acc1.00-loss0.02-valacc0.94-valloss0.24.hdf5...
Compiling...
Found 70208 images belonging to 29 classes.
  [########                                          ] 17%

  [##################################################] 100%

In [0]:
from sklearn.metrics import classification_report
import pandas as pd
from google.colab import files
y_validation = pd.read_csv('Validation/valLbls.csv', header=None, names=['label'])['label']

In [108]:
print(classification_report(y_validation, y_val_pred))

             precision    recall  f1-score   support

          1       1.00      0.99      1.00       116
          2       0.86      0.84      0.85        96
          3       0.90      0.96      0.93        94
          4       0.99      0.99      0.99        92
          5       0.97      0.99      0.98        88
          6       0.96      0.83      0.89        92
          7       0.89      0.84      0.86        92
          8       0.83      0.89      0.86        88
          9       0.97      0.99      0.98        88
         10       0.96      0.91      0.94        82
         11       0.99      0.98      0.98        86
         12       0.93      0.96      0.94        80
         13       0.98      1.00      0.99        80
         14       0.89      0.96      0.92        82
         15       0.94      0.90      0.92        82
         16       0.94      0.99      0.96        82
         17       1.00      0.99      0.99        80
         18       0.97      0.97      0.97   

In [109]:
y_val_pred = m.predict_instance_validation(model_weights='saved_weights/model-15-inception-v3-take3-epoch55-acc1.00-loss0.02-valacc0.94-valloss0.24.hdf5')

print(classification_report(y_validation, y_val_pred))


Instance-based predictions...
Creating model...
Loading weights from saved_weights/model-15-inception-v3-take3-epoch55-acc1.00-loss0.02-valacc0.94-valloss0.24.hdf5...
Compiling...
Found 70208 images belonging to 29 classes.
  [########                                          ] 15%

  [#########################                         ] 50% ... predictions done
             precision    recall  f1-score   support

          1       1.00      1.00      1.00       116
          2       0.91      0.88      0.89        96
          3       0.90      0.98      0.94        94
          4       1.00      1.00      1.00        92
          5       0.98      1.00      0.99        88
          6       1.00      0.93      0.97        92
          7       0.91      0.91      0.91        92
          8       0.81      0.86      0.84        88
          9       0.98      1.00      0.99        88
         10       0.98      0.98      0.98        82
         11       1.00      1.00      1.00        86
         12       0.93      0.97      0.95        80
         13       0.98      1.00      0.99        80
         14       0.89      0.95      0.92        82
         15       0.97      0.95      0.96        82
         16       1.00      1.00      1.00        82
         17       

In [0]:
y_test_pred = m.predict_instance_test(model_weights='saved_weights/model-15-inception-v3-take3-epoch55-acc1.00-loss0.02-valacc0.94-valloss0.24.hdf5')

Instance-based predictions...
Creating model...
Loading weights from saved_weights/model-15-inception-v3-take3-epoch55-acc1.00-loss0.02-valacc0.94-valloss0.24.hdf5...
Compiling...
Found 70208 images belonging to 29 classes.
Image count=3460      instance_pairs count=1730
  [                                                  ] 0%Path1=Test/TestImages/Image1.jpg    Path2=Test/TestImages/Image2.jpg
  [                                                  ] 0%Path1=Test/TestImages/Image3.jpg    Path2=Test/TestImages/Image4.jpg
  [                                                  ] 0%Path1=Test/TestImages/Image5.jpg    Path2=Test/TestImages/Image6.jpg
  [                                                  ] 0%Path1=Test/TestImages/Image7.jpg    Path2=Test/TestImages/Image8.jpg
  [                                                  ] 0%Path1=Test/TestImages/Image9.jpg    Path2=Test/TestImages/Image10.jpg
  [                                                  ] 0%Path1=Test/TestImages/Image11.jpg    Pat

  [#######                                           ] 14%

In [0]:
test_pred_file_path = 'test-pred-model-15-inception-v3-take3-epoch55-acc1.00-loss0.02-valacc0.94-valloss0.24-instance-based.csv'
ModelBase.write_predictions(y_test_pred, file_name=test_pred_file_path)

In [0]:

files.download(test_pred_file_path)

In [117]:
y_test_pred

array([1, 1, 1, ..., 0, 0, 0], dtype=int8)

In [0]:
image_paths = m._get_validation_image_paths()

In [50]:
# Sort it in-place
image_paths.sort(key=lambda tup: tup[0])

instance_pairs = zip(*[iter(image_paths)]*2)

for i, ((img1_num, img1_path),(img2_num, img2_path)) in enumerate(instance_pairs):
  print('i={}  {} {}'.format(i, img1_num, img2_num))
  if i > 10:
    break

i=0  1 2
i=1  3 4
i=2  5 6
i=3  7 8
i=4  9 10
i=5  11 12
i=6  13 14
i=7  15 16
i=8  17 18
i=9  19 20
i=10  21 22
i=11  23 24


In [44]:
image_paths[:10]

[(1, 'Validation/ValidationImages/1/Image1.jpg'),
 (2, 'Validation/ValidationImages/1/Image2.jpg'),
 (3, 'Validation/ValidationImages/1/Image3.jpg'),
 (4, 'Validation/ValidationImages/1/Image4.jpg'),
 (5, 'Validation/ValidationImages/1/Image5.jpg'),
 (6, 'Validation/ValidationImages/1/Image6.jpg'),
 (7, 'Validation/ValidationImages/1/Image7.jpg'),
 (8, 'Validation/ValidationImages/1/Image8.jpg'),
 (9, 'Validation/ValidationImages/1/Image9.jpg'),
 (10, 'Validation/ValidationImages/1/Image10.jpg')]

In [0]:
instance_pairs = zip(*[iter(image_paths)]*2)

In [0]:
t2 = list(instance_pairs)

In [46]:
t2[10]

((21, 'Validation/ValidationImages/1/Image21.jpg'),
 (22, 'Validation/ValidationImages/1/Image22.jpg'))

In [54]:
ls saved_weights/

model-15-inception-v3-take3-epoch55-acc1.00-loss0.02-valacc0.94-valloss0.24.hdf5
myfile.bin
sentinel


In [55]:
m.load_model('saved_weights/model-15-inception-v3-take3-epoch55-acc1.00-loss0.02-valacc0.94-valloss0.24.hdf5')

Creating model...
Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.5/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
Loading weights from saved_weights/model-15-inception-v3-take3-epoch55-acc1.00-loss0.02-valacc0.94-valloss0.24.hdf5...
Compiling...


<keras.engine.training.Model at 0x7f84cdb27710>

In [0]:
for i, ((img1_num, img1_path),(img2_num, img2_path)) in enumerate(t2[:10]):
    img1_data = m._load_image(img1_path)
    img2_data = m._load_image(img2_path)
    img1_pred = m.model.predict(img1_data)
    img2_pred = m.model.predict(img2_data)

    img1_pred_index = np.argmax(img1_pred, axis=1)[0]
    img2_pred_index = np.argmax(img2_pred, axis=1)[0]

    if img1_pred_index != img2_pred_index:
        img1_highest_score = np.max(img1_pred, axis=1)[0]
        img2_highest_score = np.max(img2_pred, axis=1)[0]

        # if class labels for different views differ,
        # we assign to the instance the class label
        # with the highest confidence score. 
        if img1_highest_score > img2_highest_score:
           img2_pred_index = img1_pred_index
        else:
           img1_pred_index = img2_pred_index


#     img1_pred_label = label_map[img1_pred_index]
#     img2_pred_label = label_map[img2_pred_index]
#     y_predictions[img1_num-1] = img1_pred_label
#     y_predictions[img2_num-1] = img2_pred_label