**CHEST X-RAY IMAGE CLASSIFICATION ADVANCED DATA SCIENCE CAPSTONE PROJECT**

Paolo Cavadini, February 2021.

Dataset
https://www.kaggle.com/paultimothymooney/chest-xray-pneumonia

Find on GitHub: https://github.com/pcavad/capstone_x_rays

In [None]:
import os

import keras
from keras.preprocessing import image
from keras import backend as K
from keras.models import Sequential, load_model
from keras import layers
from keras.layers import Input, Dense, Dropout, Flatten, MaxPool2D 
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization, SeparableConv2D 
from keras.optimizers import Adam,SGD,Adagrad,Adadelta,RMSprop
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint,EarlyStopping 
import itertools
import matplotlib.pyplot as plt
plt.style.use('seaborn')
from mpl_toolkits.mplot3d import Axes3D
%matplotlib inline
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score 
import seaborn as sns
import tensorflow as tf

**DATA ETL**

**Read scans from the file system and encode the labels.**

In [None]:
def get_path(PATH):
    '''
    This function stores the file paths and the labels for normal and pneumonia images
    '''
    try:
        #saving jpeg only image paths in lists for nromal and penumonia
        paths_norm = [PATH + 'norm/' + p for p in os.listdir(PATH + 'norm/') if p.endswith('.jpeg')] #reads file paths
        paths_pneu = [PATH + 'pneu/' + p for p in os.listdir(PATH + 'pneu/') if p.endswith('.jpeg')] #reads file paths
        #persisting the correspondent class labels
        labels_norm = [0 for i in paths_norm]
        labels_pneu = [1 for i in paths_pneu]
    except Exception as e:
        print(e)
    return paths_norm, paths_pneu, labels_norm, labels_pneu

In [None]:
PATH = "./data/"
n, p, ln, lp = get_path(PATH)

**Loading images and transforming into arrays.**

In [None]:
# Setting the image size
IMAGE_SIZE = (150,150)

In [None]:
# Loading images using Keras preprocessing.

try:
    imgs_n = [image.load_img(img_path, target_size=(IMAGE_SIZE), color_mode='rgb') for img_path in n] ## rgb scale
    imgs_p = [image.load_img(img_path, target_size=(IMAGE_SIZE), color_mode='rgb') for img_path in p] 
    xn = np.array([image.img_to_array(img, data_format='channels_last') for img in imgs_n]) # channel last
    xp = np.array([image.img_to_array(img, data_format='channels_last') for img in imgs_p])
except Exception as e:
    print(e)

print(xn.shape, xp.shape)

In [None]:
# Merging the features and the class labels of the scans with a without pneumonia.

X = np.vstack((xn, xp))
Y = np.vstack((np.reshape(np.array(ln),(-1,1)), np.reshape(np.array(lp),(-1,1))))
print(X.shape, Y.shape)

In [None]:
np.save('./data/X.npy', X)
np.save('./data/Y.npy', Y)