In [18]:
import os
import numpy as np
import pandas as pd 
import random
import cv2
import matplotlib.pyplot as plt
%matplotlib inline

import keras.backend as K
from keras.models import Model, Sequential
from keras.layers import Input, Dense, Flatten, Dropout, BatchNormalization
from keras.layers import Conv2D, SeparableConv2D, MaxPool2D, LeakyReLU, Activation
from tensorflow.keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
import tensorflow as tf
import seaborn as sns

seed = 232
np.random.seed(seed)
tf.random.set_seed(seed)

In [19]:
img_size = 150
labels = ["NORMAL","PNEUMONIA"]

def data_prep(data_dir):
    data =[]
    counter = 0
    data1=[]
    data2=[]
    for label in labels:
        path = os.path.join(data_dir, label)
        class_num = labels.index(label)
        for img in os.listdir(path):
#             print(os.path.join(path,img))
#             clear_output(wait=True)
            if 'test' in path:
                if(img.endswith('.jpeg')):
                    
                    img_arr = cv2.imread(os.path.join(path,img),cv2.IMREAD_COLOR)
                    resize_arr = cv2.resize(img_arr,(img_size,img_size))
                    data.append([resize_arr,class_num])
                    if counter%2==0:
                        data1.append([resize_arr,class_num])
                    else:
                        data2.append(([resize_arr,class_num]))
                    counter+=1
            if(img.endswith('.jpeg')):
                img_arr = cv2.imread(os.path.join(path,img),cv2.IMREAD_COLOR)
                resize_arr = cv2.resize(img_arr,(img_size,img_size))
                data.append([resize_arr,class_num])
    if 'test' in path:
        return np.array(data1), np.array(data2)
    else: 
        return np.array(data)

In [20]:
train = data_prep('../input/chest-xray-pneumonia//chest_xray/chest_xray/train')
test1, test2 = data_prep('../input/chest-xray-pneumonia//chest_xray/chest_xray/test')
val = data_prep('../input/chest-xray-pneumonia//chest_xray/chest_xray/val')



In [21]:
x_train = []
y_train = []

x_test = []
y_test = []

x_test2 = []
y_test2 = []

x_val = []
y_val = []

for features, label in train:
    x_train.append(features)
    y_train.append(label)
    
for features, label in test1:
    x_test.append(features)
    y_test.append(label)

for features,label in test2:
    x_test2.append(features)
    y_test2.append(label)
    
for features, label in val:
    x_val.append(features)
    y_val.append(label)

In [22]:
# Normalizing the data
x_train = np.array(x_train) / 255
x_test = np.array(x_test) / 255
x_test2 = np.array(x_test2)/255
x_val = np.array(x_val) / 255

In [23]:
# Reshaping the data
# Note: using greyscale image that is why used 1 if it was RGB image then it would be 3

x_train = x_train.reshape(-1,img_size,img_size,3)
y_train = np.array(y_train)
y_train = y_train.reshape(-1,1)

x_test = x_test.reshape(-1,img_size,img_size,3)
y_test = np.array(y_test)
y_test = y_test.reshape(-1,1)

x_test2 = x_test2.reshape(-1,img_size,img_size,3)
y_test2 = np.array(y_test2)
y_test2 = y_test2.reshape(-1,1)

x_val = x_val.reshape(-1,img_size,img_size,3)
y_val = np.array(y_val)
y_val = y_val.reshape(-1,1)

In [24]:
datagen = ImageDataGenerator()
        # featurewise_center=False,  # set input mean to 0 over the dataset
        # samplewise_center=False,  # set each sample mean to 0
        # featurewise_std_normalization=False,  # divide inputs by std of the dataset
        # samplewise_std_normalization=False,  # divide each input by its std
        # zca_whitening=False,  # apply ZCA whitening
        # rotation_range = 30,  # randomly rotate images in the range (degrees, 0 to 180)
        # zoom_range = 0.2, # Randomly zoom image 
        # width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        # height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        # horizontal_flip = True,  # randomly flip images
        # vertical_flip=False)  # randomly flip images


datagen.fit(x_train)

In [25]:
img_dims = 150
inputs = Input(shape=(img_dims, img_dims, 3))

# 1 blok konwolucyjny
x = Conv2D(filters=16, kernel_size=(3, 3), activation='relu', padding='same')(inputs)
x = Conv2D(filters=16, kernel_size=(3, 3), activation='relu', padding='same')(x)
x = MaxPool2D(pool_size=(2, 2))(x)

# 2 blok konwolucyjny
x = SeparableConv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same')(x)
x = SeparableConv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same')(x)
x = BatchNormalization()(x)
x = MaxPool2D(pool_size=(2, 2))(x)
# 3 blok konwolucyjny
x = SeparableConv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same')(x)
x = SeparableConv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same')(x)
x = BatchNormalization()(x)
x = MaxPool2D(pool_size=(2, 2))(x)


# 4 blok konwolucyjny
x = SeparableConv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same')(x)
x = SeparableConv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same')(x)
x = BatchNormalization()(x)
x = MaxPool2D(pool_size=(2, 2))(x)
x = Dropout(rate=0.2)(x)

# 5 blok konwolucyjny
x = SeparableConv2D(filters=256, kernel_size=(3, 3), activation='relu', padding='same')(x)
x = SeparableConv2D(filters=256, kernel_size=(3, 3), activation='relu', padding='same')(x)
x = BatchNormalization()(x)
x = MaxPool2D(pool_size=(2, 2))(x)
x = Dropout(rate=0.2)(x)

# Warstwa spłaszczająca
x = Flatten()(x)
x = Dense(units=512, activation='relu')(x)
x = Dropout(rate=0.7)(x)
x = Dense(units=128, activation='relu')(x)
x = Dropout(rate=0.5)(x)
x = Dense(units=64, activation='relu')(x)
x = Dropout(rate=0.3)(x)

# Output layer
output = Dense(units=1, activation='sigmoid')(x)

# Creating model and compiling
model_1 = Model(inputs=inputs, outputs=output)
model_1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Callbacks
checkpoint = ModelCheckpoint(filepath='best_weights.hdf5', save_best_only=True, save_weights_only=True)
lr_reduce = ReduceLROnPlateau(monitor='val_loss', factor=0.3, patience=2, verbose=2, mode='max')
early_stop = EarlyStopping(monitor='val_loss', min_delta=0.1, patience=1, mode='min')

In [26]:
STEPS = len(x_train) / 32
hist = model_1.fit(
           datagen.flow(x_train,y_train,batch_size = 32), steps_per_epoch=STEPS, 
           epochs=10, validation_data=datagen.flow(x_test2, y_test2),
              callbacks=[checkpoint, lr_reduce])
#datagen.flow(x_train,y_train,batch_size = 32), steps_per_epoch=STEPS, batch_size = 32, epochs=10, validation_data = datagen.flow(x_val, y_val),

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10

Epoch 00005: ReduceLROnPlateau reducing learning rate to 0.0003000000142492354.
Epoch 6/10
Epoch 7/10

Epoch 00007: ReduceLROnPlateau reducing learning rate to 9.000000427477062e-05.
Epoch 8/10
Epoch 9/10

Epoch 00009: ReduceLROnPlateau reducing learning rate to 2.700000040931627e-05.
Epoch 10/10


In [27]:
model_1.evaluate(x_test, y_test)



[1.5140011310577393, 0.7756410241127014]

In [28]:
model_1.save('model_1.tf')

In [29]:
"""
ResNet-18
Reference:
[1] K. He et al. Deep Residual Learning for Image Recognition. CVPR, 2016
[2] K. He, X. Zhang, S. Ren, and J. Sun. Delving deep into rectifiers:
Surpassing human-level performance on imagenet classification. In
ICCV, 2015.
"""


from keras.callbacks import EarlyStopping
from keras.layers import Dense, Conv2D,  MaxPool2D, Flatten, GlobalAveragePooling2D,  BatchNormalization, Layer, Add
from keras.models import Sequential
from keras.models import Model
import tensorflow as tf


class ResnetBlock(Model):
    """
    A standard resnet block.
    """

    def __init__(self, channels: int, down_sample=False):
        """
        channels: same as number of convolution kernels
        """
        super().__init__()

        self.__channels = channels
        self.__down_sample = down_sample
        self.__strides = [2, 1] if down_sample else [1, 1]

        KERNEL_SIZE = (3, 3)
        # use He initialization, instead of Xavier (a.k.a 'glorot_uniform' in Keras), as suggested in [2]
        INIT_SCHEME = "he_normal"

        self.conv_1 = Conv2D(self.__channels, strides=self.__strides[0],
                             kernel_size=KERNEL_SIZE, padding="same", kernel_initializer=INIT_SCHEME)
        self.bn_1 = BatchNormalization()
        self.conv_2 = Conv2D(self.__channels, strides=self.__strides[1],
                             kernel_size=KERNEL_SIZE, padding="same", kernel_initializer=INIT_SCHEME)
        self.bn_2 = BatchNormalization()
        self.merge = Add()

        if self.__down_sample:
            # perform down sampling using stride of 2, according to [1].
            self.res_conv = Conv2D(
                self.__channels, strides=2, kernel_size=(1, 1), kernel_initializer=INIT_SCHEME, padding="same")
            self.res_bn = BatchNormalization()

    def call(self, inputs):
        res = inputs

        x = self.conv_1(inputs)
        x = self.bn_1(x)
        x = tf.nn.relu(x)
        x = self.conv_2(x)
        x = self.bn_2(x)

        if self.__down_sample:
            res = self.res_conv(res)
            res = self.res_bn(res)

        # if not perform down sample, then add a shortcut directly
        x = self.merge([x, res])
        out = tf.nn.relu(x)
        return out


class ResNet18(Model):

    def __init__(self, num_classes, **kwargs):
        """
            num_classes: number of classes in specific classification task.
        """
        super().__init__(**kwargs)
        self.conv_1 = Conv2D(64, (7, 7), strides=2,
                             padding="same", kernel_initializer="he_normal")
        self.init_bn = BatchNormalization()
        self.pool_2 = MaxPool2D(pool_size=(2, 2), strides=2, padding="same")
        self.res_1_1 = ResnetBlock(64)
        self.res_1_2 = ResnetBlock(64)
        self.res_2_1 = ResnetBlock(128, down_sample=True)
        self.res_2_2 = ResnetBlock(128)
        self.res_3_1 = ResnetBlock(256, down_sample=True)
        self.res_3_2 = ResnetBlock(256)
        self.res_4_1 = ResnetBlock(512, down_sample=True)
        self.res_4_2 = ResnetBlock(512)
        self.avg_pool = GlobalAveragePooling2D()
        self.flat = Flatten()
        self.fc = Dense(num_classes, activation="sigmoid")

    def call(self, inputs):
        out = self.conv_1(inputs)
        out = self.init_bn(out)
        out = tf.nn.relu(out)
        out = self.pool_2(out)
        for res_block in [self.res_1_1, self.res_1_2, self.res_2_1, self.res_2_2, self.res_3_1, self.res_3_2, self.res_4_1, self.res_4_2]:
            out = res_block(out)
        out = self.avg_pool(out)
        out = self.flat(out)
        out = self.fc(out)
        return out

In [30]:
model_2 = ResNet18(1)
model_2.build(input_shape = (None,150,150,3))
model_2.compile(optimizer='adam',
             loss='binary_crossentropy',
             metrics=['accuracy'])
model_2.summary()

Cause: mangled names are not yet supported
Model: "res_net18_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_22 (Conv2D)           multiple                  9472      
_________________________________________________________________
batch_normalization_24 (Batc multiple                  256       
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 multiple                  0         
_________________________________________________________________
resnet_block_8 (ResnetBlock) multiple                  74368     
_________________________________________________________________
resnet_block_9 (ResnetBlock) multiple                  74368     
_________________________________________________________________
resnet_block_10 (ResnetBlock multiple                  231296    
_________________________________________________________________
resnet_block

In [47]:
learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy',
                                            patience = 2, verbose=1,factor=0.3, min_lr=0.000001)
STEPS = len(x_train) / 32
# hist = model_2.fit(
#            datagen.flow(x_train,y_train,batch_size = 32), steps_per_epoch=STEPS, 
#            epochs=10, validation_data=datagen.flow(x_test2, y_test2),
#               callbacks=[learning_rate_reduction])
hist = model_2.fit(
           train_gen, steps_per_epoch=train_gen.samples // batch_size, 
           epochs=epochs, validation_data=test_gen,
           validation_steps=test_gen.samples // batch_size, callbacks=[checkpoint, lr_reduce])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Epoch 00010: ReduceLROnPlateau reducing learning rate to 2.700000040931627e-05.


In [48]:
model_2.evaluate(x_test, y_test)




[0.3083782494068146, 0.8974359035491943]

In [49]:
model_2.save('model_2.tf')



In [None]:
model_1.save('model_1.tf')
model_2.save('model_2.tf')

In [51]:
from sklearn.datasets import make_blobs
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from keras.models import load_model
from tensorflow.keras.utils import to_categorical
from numpy import dstack
 
# load models from file
def load_all_models(n_models):
	all_models = list()
	for i in range(n_models):
		# define filename for this ensemble
		filename = 'model_' + str(i + 1) + '.tf'
		# load model from file
		model = load_model(filename)
		# add to list of members
		all_models.append(model)
		print('>loaded %s' % filename)
	return all_models
 
# create stacked model input dataset as outputs from the ensemble
def stacked_dataset(members, inputX):
	stackX = None
	for model in members:
		# make prediction
		yhat = model.predict(inputX, verbose=0)
		# stack predictions into [rows, members, probabilities]
		if stackX is None:
			stackX = yhat
		else:
			stackX = dstack((stackX, yhat))
	# flatten predictions to [rows, members x probabilities]
	stackX = stackX.reshape((stackX.shape[0], stackX.shape[1]*stackX.shape[2]))
	return stackX
 
# fit a model based on the outputs from the ensemble members
def fit_stacked_model(members, inputX, inputy):
	# create dataset using ensemble
	stackedX = stacked_dataset(members, inputX)
	# fit standalone model
	model = LogisticRegression()
	model.fit(stackedX, inputy)
	return model
 
# make a prediction with the stacked model
def stacked_prediction(members, model, inputX):
	# create dataset using ensemble
	stackedX = stacked_dataset(members, inputX)
	# make a prediction
	yhat = model.predict(stackedX)
	return yhat
 
# generate 2d classification dataset
X, y = make_blobs(n_samples=1100, centers=3, n_features=2, cluster_std=2, random_state=2)
# split into train and test
# n_train = 100
# trainX, testX = X[:n_train, :], X[n_train:, :]
# trainy, testy = y[:n_train], y[n_train:]
# print(trainX.shape, testX.shape)
trainX,testX = x_train, x_test
trainy, testy = y_train, y_test
# load all models
n_members = 3
members = load_all_models(n_members)
print('Loaded %d models' % len(members))
# evaluate standalone models on test dataset


>loaded model_1.tf
>loaded model_2.tf
>loaded model_3.tf
Loaded 3 models


In [52]:
# y_train = np.asarray(y_train).astype('float32').reshape((-1,1))
# y_test = np.asarray(y_test).astype('float32').reshape((-1,1))
for model in members:
	testy_enc = to_categorical(testy, num_classes=2)
	_, acc = model.evaluate(testX, testy)
	print('Model Accuracy: %.3f' % acc)
# fit stacked model using the ensemble
model = fit_stacked_model(members, testX, testy)
# evaluate model on test set
yhat = stacked_prediction(members, model, testX)
acc = accuracy_score(testy, yhat)
print('Stacked Test Accuracy: %.3f' % acc)

Model Accuracy: 0.776
Model Accuracy: 0.897
Model Accuracy: 0.942


  y = column_or_1d(y, warn=True)


Stacked Test Accuracy: 0.952


In [36]:

def data_aug(img_dimensions, batch_size, class_mode):
    #manipulacja danymi
    #vertical_flip=True
    train_datagen = ImageDataGenerator(rescale=1./255, zoom_range=0.3, rotation_range=10,width_shift_range=0.2, height_shift_range=0.2,brightness_range=[0.4,1.5])
    test_val_datagen = ImageDataGenerator(rescale=1./255)
    
    # generator danych jest wejściem do sieci
    #generator danych treningowych 
    train_gen = train_datagen.flow_from_directory(
    directory=input_path+'train', 
    target_size=(img_dimensions, img_dimensions), 
    batch_size=batch_size, 
    class_mode=class_mode, 
    shuffle=True)

    #generator danych testowych
    test_gen = test_val_datagen.flow_from_directory(
    directory=input_path+'test', 
    target_size=(img_dimensions, img_dimensions), 
    batch_size=batch_size, 
    class_mode=class_mode, 
    shuffle=True)
    
    # zbiór testowy do obliczania metryk
    test_data = []
    test_labels = []
    

    for cond in ['/NORMAL/', '/PNEUMONIA/']:
        for img in (os.listdir(input_path + 'test' + cond)):
            img = plt.imread(input_path+'test'+cond+img)
            img = cv2.resize(img, (img_dimensions, img_dimensions))
            img = np.dstack([img, img, img])
            img = img.astype('float32') / 255
            if cond=='/NORMAL/':
                label = 0
            elif cond=='/PNEUMONIA/':
                label = 1
            test_data.append(img)
            test_labels.append(label)
        
    test_data = np.array(test_data)
    test_labels = np.array(test_labels)
    return train_gen, test_gen, test_data, test_labels

In [42]:
input_path = '../input/chest-xray-pneumonia//chest_xray/chest_xray/'
img_dims = 150
epochs = 10
batch_size = 32
train_gen, test_gen, test_data, test_labels = data_aug(img_dims, batch_size, "binary")

Found 5216 images belonging to 2 classes.
Found 624 images belonging to 2 classes.


In [40]:
model_3 = Model(inputs=inputs, outputs=output)
model_3.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Callbacks
checkpoint = ModelCheckpoint(filepath='best_weights.hdf5', save_best_only=True, save_weights_only=True)
lr_reduce = ReduceLROnPlateau(monitor='val_loss', factor=0.3, patience=2, verbose=2, mode='max')
early_stop = EarlyStopping(monitor='val_loss', min_delta=0.1, patience=1, mode='min')

In [43]:
hist = model_3.fit(
           train_gen, steps_per_epoch=train_gen.samples // batch_size, 
           epochs=epochs, validation_data=test_gen,
           validation_steps=test_gen.samples // batch_size, callbacks=[checkpoint, lr_reduce])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10

Epoch 00004: ReduceLROnPlateau reducing learning rate to 0.0003000000142492354.
Epoch 5/10
Epoch 6/10

Epoch 00006: ReduceLROnPlateau reducing learning rate to 9.000000427477062e-05.
Epoch 7/10
Epoch 8/10

Epoch 00008: ReduceLROnPlateau reducing learning rate to 2.700000040931627e-05.
Epoch 9/10
Epoch 10/10

Epoch 00010: ReduceLROnPlateau reducing learning rate to 8.100000013655517e-06.


In [44]:
model_3.save('model_3.tf')