<a href="https://colab.research.google.com/github/rydeveraumn/csci-5561-flying-dolphins/blob/main/DW_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##Unzip the photos to a local Directory

In [None]:
#Mount Drive
from google.colab import drive
drive.mount('/content/drive')
#Unzip photos to local directory
!unzip /content/drive/MyDrive/Breast\ Cancer\ Data/preprocessed_pec_removal_240x384.zip -d /content/data


##Get training csv


In [2]:
!unzip /content/drive/MyDrive/Breast\ Cancer\ Data/train.csv.zip -d /content/

Archive:  /content/drive/MyDrive/Breast Cancer Data/train.csv.zip
  inflating: /content/train.csv      


##Model of DWS - CNN with Keras


In [1]:
from keras.models import Sequential
from keras.layers import SeparableConv2D, MaxPooling2D, Flatten, Dense, BatchNormalization
from keras.activations import relu

import keras.backend as K

def weighted_binary_crossentropy(weights):
    def loss(y_true, y_pred):
        # Clip predictions to prevent log(0) error
        y_pred = K.clip(y_pred, K.epsilon(), 1 - K.epsilon())
        # Calculate weighted binary cross entropy
        loss = -(weights[0]*y_true*K.log(y_pred) + weights[1]*(1-y_true)*K.log(1-y_pred))
        return K.mean(loss, axis=-1)
    return loss

model = Sequential()

# Depthwise separable convolution -> Batch norm -> Max pooling
model.add(SeparableConv2D(16, kernel_size=(2, 2), activation=relu, padding='same', input_shape=(12, 7, 512)))
model.add(BatchNormalization())
#model.add(MaxPooling2D(pool_size=(2, 2)))

# Depthwise separable convolution -> Batch norm -> Depthwise separable convolution -> Batch norm -> Max pooling
model.add(SeparableConv2D(32, kernel_size=(2, 2), activation=relu, padding='same'))
model.add(BatchNormalization())
#model.add(SeparableConv2D(64, kernel_size=(2, 2), activation=relu, padding='same'))
#model.add(BatchNormalization())
#model.add(MaxPooling2D(pool_size=(2, 2)))

# Depthwise separable convolution -> Batch norm -> Depthwise separable convolution -> Batch norm -> Depthwise separable convolution -> Batch norm -> Max pooling
model.add(SeparableConv2D(64, kernel_size=(2, 2), activation=relu, padding='same'))
model.add(BatchNormalization())
#model.add(SeparableConv2D(128, kernel_size=(2, 2), activation=relu, padding='same'))
#model.add(BatchNormalization())
#model.add(SeparableConv2D(128, kernel_size=(2, 2), activation=relu, padding='same'))
#model.add(BatchNormalization())
#model.add(MaxPooling2D(pool_size=(2, 2)))

# Flatten and fully connected layers
model.add(Flatten())
model.add(Dense(256, activation=relu))
model.add(Dense(64, activation=relu))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
#weights = [0.02, 0.98]
#model.compile(optimizer='adam', loss=weighted_binary_crossentropy(weights), metrics=['accuracy', 'AUC'])

##Outline of training the model

In [None]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
from sklearn.model_selection import StratifiedKFold
from imblearn.over_sampling import SMOTE
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.applications.vgg16 import VGG16, preprocess_input

# Load dataset from CSV file
data = pd.read_csv('/content/train.csv')

# Define paths to the image directory and labels
img_dir = '/content/data/preprocessed_pec_removal_240x384'
labels = data['cancer'].values

# Function to load and preprocess images
def load_and_preprocess_image(image_path):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_png(image, channels=1)
    image = tf.image.convert_image_dtype(image, tf.float32)
    #image = tf.image.resize(image, [256, 256])
    return image

# Create a dataset of image paths and labels
img_paths = [os.path.join(img_dir, '{}_{}.png'.format(pid, iid)) for pid, iid in zip(data['patient_id'], data['image_id'])]

labels = data['cancer'].values

images = np.zeros((labels.shape[0], 384, 240), dtype= np.int8)
images = images[:15000]
labels = labels[:15000]

#Just test first 1000 for now
for i in range(len(labels)):

  images[i] = load_and_preprocess_image(img_paths[i])[:,:,0]


# Split the data into train and test sets
#X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.5, stratify=labels)
X_train = images[:int(len(labels)*0.8)]
y_train = labels[:int(len(labels)*0.8)]
X_test = images[int(len(labels)*0.8):]
y_test = labels[int(len(labels)*0.8):]
X_train = np.expand_dims(X_train, axis = -1)
X_train = np.repeat(X_train, 3, axis = -1)
X_test = np.expand_dims(X_test, axis = -1)
X_test = np.repeat(X_test, 3, axis = -1)

print("Extracting features")
# Extract features using a pre-trained CNN
cnn = VGG16(weights='imagenet', include_top=False, input_shape=(384, 240, 3))
train_features = cnn.predict(X_train)

print("Train features Shape")
print(train_features.shape)


# Flatten the features
train_features_flat = np.reshape(train_features, (train_features.shape[0], -1))


print("Train features flat Shape")
print(train_features_flat.shape)


#Encode the labels
le = LabelEncoder()
train_labels = le.fit_transform(y_train)
print("Train Labels Shape")
print(train_labels.shape)

# Apply SMOTE to the feature space
sm = SMOTE(sampling_strategy='minority')
train_features_resampled, train_labels_resampled = sm.fit_resample(train_features_flat, train_labels)

print("Train features resampled shape")
print(train_features_resampled.shape)

# Reshape the features to their original shape
train_features_resampled = np.reshape(train_features_resampled, (train_features_resampled.shape[0], 12, 7, 512))

print("Train features resampled shape reshaped")
print(train_features_resampled.shape)

#Get test set features
test_features = cnn.predict(X_test)

print("Test features Shape")
print(test_features.shape)

# Flatten the features
test_features_flat = np.reshape(test_features, (test_features.shape[0], -1))

print("Test features flat Shape")
print(test_features_flat.shape)

#Encode the labels
le = LabelEncoder()
test_labels = le.fit_transform(y_test)
print("Test lables shape")
print(test_labels.shape)







Extracting features
Train features Shape
Train features flat Shape
Train Labels Shape
(12000,)


In [6]:
# Define hyperparameters
batch_size = 8
num_epochs = 20

# Define the loss function and optimizer
loss_fn = tf.keras.losses.BinaryCrossentropy()
optimizer = tf.keras.optimizers.Adam()

# Define the evaluation metric
metrics = [tf.keras.metrics.BinaryAccuracy(name="binary_accuracy", threshold=0.5), tf.keras.metrics.AUC(name='auc'), tf.keras.metrics.Precision(name = "prec"),  tf.keras.metrics.Recall(name = "recal"),tf.keras.metrics.TruePositives(name = "tp"), tf.keras.metrics.TrueNegatives(name = "tn"), tf.keras.metrics.FalsePositives(name = "fp"), tf.keras.metrics.FalseNegatives(name = "fn")]

model.compile(optimizer = optimizer, loss = loss_fn, metrics = metrics)

history = model.fit(train_features_resampled, train_labels_resampled, batch_size=batch_size, epochs=num_epochs, validation_data=(test_features, test_labels))

#test_features, test_labels = next(iter((test_features, test_labels)))
model.evaluate(test_features_flat, test_labels)


model.save('/content/drive/MyDrive/CancerModelsTrained/CNNprelim')


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


