In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline
from PIL import Image
from scipy.misc import imread

import tensorflow as tf

# sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
# from keras import backend as K
# K.set_session(sess)

import os
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
# warnings.filterwarnings("ignore", category=UserWarning)
# warnings.filterwarnings("ignore", category=FutureWarning)

In [2]:
import sys
sys.path.append('./utils')

In [3]:
train_path = "./dataset/train/"

In [4]:
from load_labels import LoadLabels

train_labels, label_names = LoadLabels('./dataset/train.csv').load_labels()

In [5]:
from partition_data import PartitionData
partitionData = PartitionData(train_labels)

In [6]:
partitions = partitionData.RepeatedMultilabelStratifiedKFold(3,1,420)

In [None]:
from model_parameter import ModelParameter
parameter = ModelParameter(train_path, shuffle=True, batch_size=3, n_channels=3, n_epochs=5, row_scale_factor=2, col_scale_factor=2)

In [8]:
import mahotas as mt
from image_preprocessor import ImagePreprocessor
# from scipy.misc import imread
# from skimage.transform import resize
# from skimage.color import rgb2grey

# class ExtractImageFeatures:
#     def __init__(self, modelparameter):
#         self.parameter = modelparameter
#         self.basepath = self.parameter.basepath
#         self.scaled_row_dim = self.parameter.scaled_row_dim
#         self.scaled_col_dim = self.parameter.scaled_col_dim
#         self.n_channels = self.parameter.n_channels
        
#     def resize(self, image):
#         image = resize(image, (self.scaled_row_dim, self.scaled_col_dim))
#         return image
    
#     def normalize(self, image):
#         image /= 255 
#         return image
        
#     def preprocess(self, image_id):
#         image = self.load_image(image_id)
#         image = self.resize(image)
#         textures = mt.features.haralick(image.astype(int))
#         ht_mean = textures.mean(axis=0)
#         return ht_mean
    
#     def load_image(self, image_id):
#         image = np.zeros(shape=(512,512,3))
#         image[:,:,2] = imread(self.basepath + image_id + "_blue" + ".png")
#         image[:,:,1] = imread(self.basepath + image_id + "_green" + ".png")
#         image[:,:,0] = imread(self.basepath + image_id + "_red" + ".png")
#         return image

class ExtractImageFeatures(ImagePreprocessor):
    def __init__(self, modelparameter):
        super().__init__(modelparameter)
        
    def preprocess(self, image_id):
        image = self.load_image(image_id)
        image = self.resize(image)
#         image = self.normalize(image)
        textures = mt.features.haralick(image.astype(int))
        ht_mean = textures.mean(axis=0)
        return ht_mean
    
preprocessor = ExtractImageFeatures(parameter)

In [60]:
print(preprocessor.preprocess(partitions[0]['train'][0]))

[ 2.70222072e-02  1.79288236e+02  1.21474270e-01  1.03004597e+02
  2.23841041e-01  1.45070397e+01  2.32730151e+02  5.25336929e+00
  7.94173351e+00  2.58551633e-04  4.55324115e+00 -5.12316001e-02
  5.03556360e-01]


In [9]:
def get_targets_per_image(identifier, labels):
    return labels.loc[labels.Id==identifier].drop(["Id", "Target"], axis=1).values

In [10]:
# generator function to return images batchwise
def generator(listIDs, labels, modelparameter, imagepreprocessor):
    while True:
        # Randomize the indices to make an array
        indices_arr = np.random.permutation(listIDs.shape[0])
        for batch in range(0, len(indices_arr), modelparameter.batch_size):
            # slice out the current batch according to batch-size
            current_batch = indices_arr[batch:(batch + modelparameter.batch_size)]

            # initializing the arrays, x_train and y_train
            x_train = np.empty((modelparameter.batch_size, 13), dtype=np.float32)
            y_train = np.empty((modelparameter.batch_size, modelparameter.num_classes), dtype=int)

            for (i,ind) in enumerate(current_batch):
                # Appending them to existing batch
                x_train[i,] = preprocessor.preprocess(listIDs[ind])
                # Store class
                y_train[i] = get_targets_per_image(listIDs[ind], labels) 

            yield (x_train, y_train)

In [11]:
genrtor = generator(partitions[0]['train'], train_labels, parameter, preprocessor)

In [13]:
next(genrtor)

(array([[ 2.33969793e-01,  1.62238708e+03,  4.69740510e-01,
          1.51661755e+03,  5.19838333e-01,  3.45970993e+01,
          4.44408301e+03,  4.75413465e+00,  6.39355183e+00,
          9.50294023e-04,  4.23160696e+00, -1.41580105e-01,
          6.95594549e-01],
        [ 3.64103377e-01,  1.93865979e+03,  3.02991211e-01,
          1.34007764e+03,  6.29073918e-01,  3.10234032e+01,
          3.42165063e+03,  3.95687437e+00,  5.59849024e+00,
          1.44431018e-03,  3.62597919e+00, -1.79732561e-01,
          7.78283536e-01],
        [ 1.00476943e-01,  1.11005591e+03,  2.88831562e-01,
          7.48847168e+02,  3.77856255e-01,  2.56556911e+01,
          1.88533276e+03,  5.33714867e+00,  7.43940067e+00,
          4.80882067e-04,  4.77626705e+00, -1.17032036e-01,
          6.87863052e-01],
        [ 3.94452721e-01,  1.04785559e+03,  3.72241497e-01,
          8.17848511e+02,  6.78906560e-01,  2.02883625e+01,
          2.22353833e+03,  3.41546512e+00,  4.46876144e+00,
          1.5824999

In [20]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.losses import binary_crossentropy
from keras.optimizers import Adadelta
from keras.initializers import VarianceScaling

class FeatureBaseLineModel:
    
    def __init__(self, modelparameter, metrics):
        self.params = modelparameter
        self.use_multi_gpu = self.params.use_multi_gpu
        self.num_classes = self.params.num_classes
        self.img_rows = self.params.scaled_row_dim
        self.img_cols = self.params.scaled_col_dim
        self.n_channels = self.params.n_channels
        self.input_shape = (13,)
        self.my_metrics = metrics
    
    def build_model(self):
        self.model = Sequential()
        self.model.add(Dense(64, kernel_initializer=VarianceScaling(seed=0), activation='relu', input_shape=self.input_shape))
        self.model.add(Dense(13, kernel_initializer=VarianceScaling(seed=0), activation='relu'))
        self.model.add(Dense(5, kernel_initializer=VarianceScaling(seed=0), activation='relu'))
        self.model.add(Dense(self.num_classes, activation='sigmoid'))
    
    def compile_model(self, loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adadelta()):
        self.model.compile(loss=loss,
              optimizer=optimizer,
              metrics=self.my_metrics)
    
    def set_generators(self, train_generator, validation_generator, tr_sz, vl_sz):
        self.training_generator = train_generator
        self.validation_generator = validation_generator
        self.tr_sz = tr_sz
        self.vl_sz = vl_sz
    
    def learn(self, use_multiprocessing=False, workers= 24):
        return self.model.fit_generator(generator=self.training_generator,
                    validation_data=self.validation_generator,
                    epochs=self.params.n_epochs, 
                    use_multiprocessing=use_multiprocessing,
                    workers=workers, verbose=1, steps_per_epoch=self.tr_sz//self.params.batch_size, validation_steps=self.vl_sz//self.params.batch_size)
    
    def score(self, use_multiprocessing=False, workers=24):
        return self.model.evaluate_generator(generator=self.validation_generator,
                                      use_multiprocessing=use_multiprocessing, 
                                      workers=workers)
    
    def predict(self, predict_generator):
        y = predict_generator.predict(self.model)
        return y
    
    def save(self, modeloutputpath):
        self.model.save(modeloutputpath)
    
    def load(self, modelinputpath):
        self.model = load_model(modelinputpath)

In [21]:
model_feature = FeatureBaseLineModel(parameter, ['accuracy'])
model_feature.build_model()
model_feature.compile_model()
model_feature.model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_5 (Dense)              (None, 64)                896       
_________________________________________________________________
dense_6 (Dense)              (None, 13)                845       
_________________________________________________________________
dense_7 (Dense)              (None, 5)                 70        
_________________________________________________________________
dense_8 (Dense)              (None, 28)                168       
Total params: 1,979
Trainable params: 1,979
Non-trainable params: 0
_________________________________________________________________


In [22]:
# from ipdb import set_trace
from generators import PredictGenerator
def train_feature_model(model, partitions, train_labels, train_path, parameter, preprocessor):
    target_names = train_labels.drop(["Target", "Id"], axis=1).columns
    
    predictions = []
    histories = []
    
    for i, partition in enumerate(partitions):
        print("training in partition ",i+1)
        
        training_generator = generator(partition['train'], train_labels, parameter, preprocessor)
        tr_sz = partition['train'].shape[0]
        vl_sz = partition['validation'].shape[0]
        validation_generator = generator(partition['validation'], train_labels, parameter, preprocessor)
        predict_generator = PredictGenerator(partition['validation'], preprocessor, train_path)
        model.set_generators(training_generator, validation_generator, tr_sz, vl_sz)
        histories.append(model.learn())
        
        proba_predictions = model.predict(predict_generator)
        proba_predictions = pd.DataFrame(index = partition['validation'], data=proba_predictions, columns=target_names)
        
        predictions.append(proba_predictions)
        
    return predictions, histories

In [None]:
pred_feature_model, hist_feature_model = train_feature_model(model_feature, partitions, train_labels, train_path, parameter, preprocessor)

training in partition  1
Epoch 1/5


In [72]:
keras.__version__

'2.2.4'