Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
tonykqt committed Apr 1, 2018
0 parents commit 59ebaac
Show file tree
Hide file tree
Showing 5 changed files with 112,730 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
source/*
test/*
train/*
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Sample Convolutional NN to classify chest X-Rays

## Requirements
- Python 3.5+
- Tensorflow
- Pandas, PIL, Numpy



# Data set and article reference:
https://www.nih.gov/news-events/news-releases/nih-clinical-center-provides-one-largest-publicly-available-chest-x-ray-datasets-scientific-community
462 changes: 462 additions & 0 deletions chestrays-keras.ipynb

Large diffs are not rendered by default.

133 changes: 133 additions & 0 deletions chestrays-keras.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import pathlib
import shutil
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import optimizers
from keras.preprocessing.image import ImageDataGenerator

# Parameters
learning_rate = 0.01
num_steps = 100
#num_steps = 2000
batch_size = 10
display_step = 100

# Network Parameters
dropout = 0.5 # Dropout, probability to keep units

# Images
IMG_HEIGHT = 150
IMG_WIDTH = 150
CH = 3
image_dir = "M:\\DataSets\\chestrays\\source\\" # XPS
rows = 4001 # number
train_rows = 3600 # 90/10 split
test_rows = 400


df = pd.read_csv("chestrays.csv", header=None, na_values="?")
df = df.iloc[1:rows]
df.head()


# Prepare train and test sets

# Factorize the labels and make the directories, convert all | to _'s, remove spaces
labels, names = pd.factorize(df[1])
image_names = image_dir + df.iloc[0:rows,0].values
d = dict() # dictionary of classification -> count pairs

# data mover function, also populates the dictionary so we can see the distribution of data
def copyImages(dataframe, idx, directory="train"):
classification = dataframe.iloc[idx][1].replace(" ","").replace("|","_")

if classification in d:
d[classification] += 1
else:
d[classification] = 1

source = image_dir + dataframe.iloc[idx][0]
destination = directory + "/" + classification
shutil.copy(source, destination)

# Make train and test directories, replaces spaces and |'s with _
for n in names:
dirname = n.replace(" ","").replace("|","_")
pathlib.Path("train/" + dirname).mkdir(parents=True, exist_ok=True)
pathlib.Path("test/" + dirname).mkdir(parents=True, exist_ok=True)


for r in range(train_rows):
copyImages(df, r, "train")

for r in range(test_rows):
copyImages(df, train_rows + r, "test")


num_classes = len(list(set(labels)))

print('Number of classes: {}'.format(num_classes))
print('Number of rows: {}'.format(len(labels)))
print(names[:10])
print(image_names)


# Build the TF model
model = Sequential()
# input: 250x250 images with 1 channel
# this applies 32 convolution filters of size 3x3 each.
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_WIDTH, IMG_HEIGHT, CH)))
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd)


# this is the augmentation configuration we will use for training
train_datagen = ImageDataGenerator(rescale=1./255)

# this is the augmentation configuration we will use for testing:
# only rescaling
test_datagen = ImageDataGenerator(rescale=1./255)

# this is a generator that will read pictures found in
# subfolers of './train', and indefinitely generate
# batches of augmented image data
train_generator = train_datagen.flow_from_directory(
'train', # this is the target directory
target_size=(IMG_WIDTH, IMG_HEIGHT), # all images will be resized to 150x150
batch_size=batch_size,
class_mode='categorical')

# this is a similar generator, for validation data
validation_generator = test_datagen.flow_from_directory(
'test',
target_size=(IMG_WIDTH, IMG_HEIGHT),
batch_size=batch_size,
class_mode='categorical')


model.fit_generator(
train_generator,
steps_per_epoch=num_steps,
epochs=50,
validation_data=validation_generator,
validation_steps=800)

Loading

0 comments on commit 59ebaac

Please sign in to comment.