In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# We create the model
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
inputs = keras.Input(shape=(460, 700, 3))
x = layers.Conv2D(filters=32, kernel_size=3, activation="relu")(inputs)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=64, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=128, kernel_size=3, activation="relu")(x)
x = layers.Flatten()(x)
outputs = layers.Dense(8, activation="softmax")(x)
model = keras.Model(inputs=inputs, outputs=outputs)

In [None]:
# pull train and test images into two np arrays
#import os
#from PIL import Image
#base = "/content/drive/MyDrive/cancer_400/"
#train_dir = base + "train/"
#test_dir = base + "test/"
# create train and test full path names
#dir_names = ["adenosis/", "ductal_ca/", "fibroadenoma/", "lobular_ca/", "mucinous_ca/",
#             "papillary_ca/", "phylloides/", "tubular_ad/"]
#train_image_paths = []
#test_image_paths = []
#for d in dir_names:
#  print('proc dir:', d)
#  path = train_dir + d
#  print('len', len(os.listdir(path)))

#  path = test_dir + d
#  print('len', len(os.listdir(path)))

In [3]:
# pull train and test images into two np arrays
import os
from PIL import Image
base = "/content/drive/MyDrive/cancer_400/"
train_dir = base + "train/"
test_dir = base + "test/"
# create train and test full path names
dir_names = ["adenosis/", "ductal_ca/", "fibroadenoma/", "lobular_ca/", "mucinous_ca/",
             "papillary_ca/", "phylloides/", "tubular_ad/"]
train_image_paths = []
test_image_paths = []
for d in dir_names:
  print('proc dir:', d)
  path = train_dir + d
  for fn in os.listdir(path):
    train_image_paths.append(path + fn)
  path = test_dir + d
  for fn in os.listdir(path):
    test_image_paths.append(path + fn)

# read file paths, flatten, add to list of flattened images
train_flattened = []
print('23 len train flat', len(train_flattened))
test_flattened = []
print('25 len test flat', len(test_flattened))

print('opening train images')
image_count = 0
for count, path in enumerate(train_image_paths):
  a = np.asarray(Image.open(path))
  if count == 0:
    print('a shape:', a.shape)
  try:
    assert a.shape == (460, 700, 3)
  except Exception:
    print(count, a.shape, path)
    continue  # skip this incorrect shaped image
  af = a.flatten()
  train_flattened.append(af)
  image_count += 1
  if count % 50 == 0:
    print(count, end=',')
print('train final count', image_count)
print()
print('opening test images')
for count, path in enumerate(test_image_paths):
  a = np.asarray(Image.open(path))
  assert a.shape == (460, 700, 3)
  af = a.flatten()
  test_flattened.append(af)
  if count % 50 == 0:
    print(count, end=',')
print('test final count', count)
print()

print('generate two np arrays')
# stack flattened arrays into single array
train_images = np.hstack(train_flattened)
test_images = np.hstack(test_flattened)


proc dir: adenosis/
proc dir: ductal_ca/
proc dir: fibroadenoma/
proc dir: lobular_ca/
proc dir: mucinous_ca/
proc dir: papillary_ca/
proc dir: phylloides/
proc dir: tubular_ad/
23 len train flat 0
25 len test flat 0
opening train images
a shape: (460, 700, 3)
0,50,100,150,200,250,300,350,375 (456, 700, 3) /content/drive/MyDrive/cancer_400/train/papillary_ca/SOB_M_PC-14-12465-400-001.png
376 (456, 700, 3) /content/drive/MyDrive/cancer_400/train/papillary_ca/SOB_M_PC-14-12465-400-003.png
377 (456, 700, 3) /content/drive/MyDrive/cancer_400/train/papillary_ca/SOB_M_PC-14-12465-400-002.png
378 (456, 700, 3) /content/drive/MyDrive/cancer_400/train/papillary_ca/SOB_M_PC-14-12465-400-007.png
379 (456, 700, 3) /content/drive/MyDrive/cancer_400/train/papillary_ca/SOB_M_PC-14-12465-400-008.png
380 (456, 700, 3) /content/drive/MyDrive/cancer_400/train/papillary_ca/SOB_M_PC-14-12465-400-004.png
381 (456, 700, 3) /content/drive/MyDrive/cancer_400/train/papillary_ca/SOB_M_PC-14-12465-400-009.png
382

In [None]:
len(train_flattened)
for n, im in enumerate(train_flattened):
  try:
    assert im.shape[0] == 460*700*3
  except Exception:
    print('n when stopped:', n)
    break

In [None]:
tf = np.hstack(train_flattened)
tf.shape

(567042000,)

In [None]:
567042000 / (460*700*3)

587.0

In [None]:
train_images.shape

(567042000,)

In [4]:
# create train and test label np arrays
train_labels = np.array([0]*75+[1]*75+[2]*75+[3]*75+[4]*75+[5]*62+[6]*75+[7]*75)
test_list= []
for im_class in range(8):
  test_list += [im_class]*25
test_labels = np.array(test_list)
print(train_labels.shape)
print(test_labels.shape)


(587,)
(200,)


In [None]:
# WE fit the model
train_images = train_images.reshape((600-13, 460, 700, 3))
train_images = train_images.astype("float32") / 255
test_images = test_images.reshape((200, 460, 700, 3))
test_images = test_images.astype("float32") / 255
model.compile(optimizer="rmsprop",
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"])
model.fit(train_images, train_labels, epochs=2, batch_size=32)

Epoch 1/2
Epoch 2/2

In [None]:
47040000/(460*700*3)

48.69565217391305

In [None]:
400*400*3

480000

In [None]:
480000/78400

6.122448979591836

In [None]:
test_loss, test_acc = model.evaluate(test_images, test_labels)
print(f"Test accuracy: {test_acc:.3f}")

In [15]:
# second model
inputs = keras.Input(shape=(460, 700, 3))
x = layers.Rescaling(1./255)(inputs)
x = layers.Conv2D(filters=32, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=64, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=128, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=256, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=256, kernel_size=3, activation="relu")(x)
x = layers.Flatten()(x)
outputs = layers.Dense(8, activation="softmax")(x)
model2 = keras.Model(inputs=inputs, outputs=outputs)

In [None]:
train_images = train_images.reshape((600-13, 460, 700, 3))
train_images = train_images.astype("float32") / 255
test_images = test_images.reshape((200, 460, 700, 3))
test_images = test_images.astype("float32") / 255
model2.compile(optimizer="rmsprop",
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"])
model2.fit(train_images, train_labels, epochs=30, batch_size=32)

Epoch 1/30


In [None]:
test_loss, test_acc = model2.evaluate(test_images, test_labels)
print(f"Test accuracy: {test_acc:.3f}")

In [10]:
# third model
inputs = keras.Input(shape=(460, 700, 3))
x = layers.Rescaling(1./255)(inputs)
x = layers.Conv2D(filters=64, kernel_size=5, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=64, kernel_size=5, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=128, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=256, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=256, kernel_size=3, activation="relu")(x)
x = layers.Flatten()(x)
outputs = layers.Dense(8, activation="softmax")(x)
model3 = keras.Model(inputs=inputs, outputs=outputs)

In [11]:
model3.compile(optimizer="rmsprop",
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"])
model3.fit(train_images, train_labels, epochs=4, batch_size=32)

ValueError: ignored

In [None]:
test_loss, test_acc = model3.evaluate(test_images, test_labels)
print(f"Test accuracy: {test_acc:.3f}")

In [None]:
base2 = "/content/drive/MyDrive/Ductal/"
#base3 = "/content/drive/MyDrive/Phylloides/"
#predict2_dir = base + "Ductal/"
#test_dir = base + "test/"
# create train and test full path names
#dir_names1 = ["Ductal/"]

predict_image_paths = []
#test_image_paths = []
#for d in dir_names1:
#  print('proc dir:', d)
path = base2
for fn in os.listdir(path):
  predict_image_paths.append(path + fn)
#  path = test_dir + d
#  for fn in os.listdir(path):
#   test_image_paths.append(path + fn)

# read file paths, flatten, add to list of flattened images
predict_flattened = []

print('opening train images')

image_count = 0
for count, path in enumerate(predict_image_paths):
  a = np.asarray(Image.open(path))
  if count == 0:
    print('a shape:', a.shape)
  try:
    assert a.shape == (460, 700, 3)
  except Exception:
    print(count, a.shape, path)
    continue  # skip this incorrect shaped image
  af = a.flatten()
  train_flattened.append(af)
  image_count += 1
#  if count % 50 == 0:
#    print(count, end=',')
print('train final count', image_count)

print('generate two np arrays')
# stack flattened arrays into single array
predict_images = np.hstack(train_flattened)

opening train images
a shape: (460, 700, 3)
train final count 8
generate two np arrays


In [None]:
# use this to make predictions, unknown_images is size N x 460 x 700 x 3
model.predict(predict_images)