**CHEST X-RAY IMAGE CLASSIFICATION ADVANCED DATA SCIENCE CAPSTONE PROJECT**

Paolo Cavadini, February 2021.

Dataset
https://www.kaggle.com/paultimothymooney/chest-xray-pneumonia

Find on GitHub: https://github.com/pcavad/capstone_x_rays

In [None]:
import os

import keras
from keras.preprocessing import image
from keras import backend as K
from keras.models import Sequential, load_model
from keras import layers
from keras.layers import Input, Dense, Dropout, Flatten, MaxPool2D 
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization, SeparableConv2D 
from keras.optimizers import Adam,SGD,Adagrad,Adadelta,RMSprop
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint,EarlyStopping 
import itertools
import matplotlib.pyplot as plt
plt.style.use('seaborn')
from mpl_toolkits.mplot3d import Axes3D
%matplotlib inline
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score 
import seaborn as sns
import tensorflow as tf

In [None]:
X = np.load('./data/X.npy')
Y = np.load('./data/Y.npy')

**DATA EXPLORATION**

In [None]:
# Visualizing random images from train samples.

plt.figure(figsize=(10,10))
for i in range(1,4):
    plt.subplot(1,3,i)
    random_num = np.random.randint(0,len(X))
    plt.imshow(X[random_num][:,:,:]/255) # normalizing
    plt.grid(False)
    plt.axis('off')
    plt.title('pneumonia' if Y[random_num] == 1 else 'normal')
plt.show()

In [None]:
# Distribution of the labels.

fg = sns.displot(data=Y, binwidth=0.4)
fg.ax.set_xticks([0.2,1])
fg.ax.set_xticklabels(['0 = normal','1 = pneumonia'])
fg.ax.set_xlabel('class labels')
plt.show()

In [None]:
# Distribution of the data along the first dimension.

print(X[:,0,0,0].min(), X[:,0,0,0].max())
plt.hist(X[:,0,0,0])
plt.show()

In [None]:
# Picking a random scan.

random_num = np.random.randint(0,len(X))

In [None]:
# Distribution of the data along the second dimension for the selected scan.

print(X[random_num,:,0,0].min(), X[random_num,:,0,0].max())
plt.plot(X[random_num,:,0,0])
plt.show()

In [None]:
# Distribution of the data along the third dimension for the selected scan.

print(X[random_num,0,:,0].min(), X[random_num,0,:,0].max())
plt.plot(X[random_num,0,:,0])
plt.show()

In [None]:
# Min and Max for the last dimension for the selected scan.

print(X[random_num,0,0,:].min(), X[random_num,0,0,:].max())

In [None]:
# 3D representation of the selected scan.

fig = plt.figure(figsize=(16,8))
ax = Axes3D(fig)

ax.scatter(xs=X[random_num][:,0,0], ys=X[random_num][0,:,0], marker='o')
ax.set_title('Scan: ' + str(random_num))
plt.show()