In [None]:
# Import dependencies
import pandas as pd
import matplotlib.pyplot as plt
import sklearn as skl
import tensorflow as tf
import os
import cv2
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense,Conv2D,MaxPooling2D,Flatten,Dropout, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Define base directory
base_dir = '/content/drive/MyDrive/BOOTCAMP/ColabNotebooks/ProjectWithGreg/Data'

# Verify each directory level
drive_dir = '/content/drive/MyDrive'
bootcamp_dir = os.path.join(drive_dir, 'BOOTCAMP')
colab_notebooks_dir = os.path.join(bootcamp_dir, 'ColabNotebooks')
project_dir = os.path.join(colab_notebooks_dir, 'ProjectWithGreg')
data_dir = os.path.join(project_dir, 'Data')

Mounted at /content/drive


In [None]:
# Use os.path.join() to concatenate base_dir and 'train', set train_dir to 'Data/train'
# os.path.join(base_dir, 'train') takes base_dir, which = 'Data/', and concatenates it with string 'train'
train_dir = os.path.join(base_dir, 'train')

# Concatenate base_dir and 'test', set test_dir to 'Data/test'
# os.path.join(base_dir, 'test') takes base_dir, which = 'Data/', and concatenates it with string 'test'
test_dir = os.path.join(base_dir, 'test')

# Concatenate base_dir and 'valid', set valid_dir to 'Data/valid'
# os.path.join(base_dir, 'valid') takes base_dir, which = 'Data/', and concatenates it with string 'valid'
valid_dir = os.path.join(base_dir, 'valid')

#Read contents of base_dir directory and return list of names of entries (files and directories) in it
os.listdir(base_dir)

['valid', 'test', 'train', 'best_model_resnet.h5']

In [None]:
#Data generators are a convenient way to load and preprocess data in batches during model training

#ImageDataGenerator: This class from Keras's ImageDataGenerator module is used to generate batches of tensor image data with real-time data augmentation

# Define data generators for training and validation
# train_datagen and valid_datagen are instances of ImageDataGenerator used for training and validation data
# The rescale=1./255 parameter scales pixel values to the range [0,1]
train_datagen = ImageDataGenerator(rescale=1./255)
valid_datagen = ImageDataGenerator(rescale=1./255)

img_size = (224, 224)  # Define target size for images
batch_size = 32

# train_gen and valid_gen are actual data generators created using flow_from_directory method
# They load images from specified directories, rescale them, and convert labels to categorical format

# flow_from_directory method generates batches of augmented/normalized data from image files in a directory
# flow_from_directory method allows you to specify various parameters like target size, batch size, and class mode

train_gen = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_size[0], img_size[1]),
    batch_size=batch_size,
    class_mode='categorical'
)

valid_gen = valid_datagen.flow_from_directory(
    valid_dir,
    target_size=(img_size[0], img_size[1]),
    batch_size=batch_size,
    class_mode='categorical'
)

Found 613 images belonging to 4 classes.
Found 72 images belonging to 4 classes.


In [None]:
from tensorflow.keras.applications import ResNet50

from tensorflow.keras.optimizers import Adam

# Assuming train_gen and valid_gen are your image data generators

img_size = (224, 224)
channels = 3
img_shape = (img_size[0], img_size[1], channels)
class_count = len(list(train_gen.class_indices.keys()))

# Create pre-trained ResNet50 model
base_model = ResNet50(include_top=False, weights="imagenet", input_shape=img_shape, pooling='max')
for layer in base_model.layers:
    layer.trainable = False
model = Sequential([
    base_model,
    BatchNormalization(axis=-1),
    Dense(256, activation='relu'),
    Dropout(0.3),
    Dense(class_count, activation='softmax')
])

optimizer = Adam()  # Using Adam optimizer
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# Define the base directory path
#base_dir = '/content/drive/MyDrive/BOOTCAMP/ColabNotebooks/ProjectWithGreg/Data'

# Create the base directory if it doesn't exist
#if not os.path.exists(base_dir):
#    os.makedirs(base_dir)

# Define the full file path including the base directory
filepath = os.path.join(base_dir, 'best_model_resnet.h5')

# Create ModelCheckpoint callback to save the best model based on validation accuracy
checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')

# Train the model with the added callback
history = model.fit(
    x=train_gen,
    epochs=20,
    verbose=1,
    validation_data=valid_gen,
    callbacks=[checkpoint]
)

# Save the model to the specified directory
model.save(filepath)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
Epoch 1/20
Epoch 1: val_accuracy improved from -inf to 0.19444, saving model to /content/drive/MyDrive/BOOTCAMP/ColabNotebooks/ProjectWithGreg/Data/best_model_resnet.h5


  saving_api.save_model(


Epoch 2/20
Epoch 2: val_accuracy improved from 0.19444 to 0.47222, saving model to /content/drive/MyDrive/BOOTCAMP/ColabNotebooks/ProjectWithGreg/Data/best_model_resnet.h5
Epoch 3/20
Epoch 3: val_accuracy did not improve from 0.47222
Epoch 4/20
Epoch 4: val_accuracy improved from 0.47222 to 0.69444, saving model to /content/drive/MyDrive/BOOTCAMP/ColabNotebooks/ProjectWithGreg/Data/best_model_resnet.h5
Epoch 5/20
Epoch 5: val_accuracy did not improve from 0.69444
Epoch 6/20
Epoch 6: val_accuracy did not improve from 0.69444
Epoch 7/20
Epoch 7: val_accuracy did not improve from 0.69444
Epoch 8/20
Epoch 8: val_accuracy did not improve from 0.69444
Epoch 9/20
Epoch 9: val_accuracy improved from 0.69444 to 0.73611, saving model to /content/drive/MyDrive/BOOTCAMP/ColabNotebooks/ProjectWithGreg/Data/best_model_resnet.h5
Epoch 10/20
Epoch 10: val_accuracy improved from 0.73611 to 0.76389, saving model to /content/drive/MyDrive/BOOTCAMP/ColabNotebooks/ProjectWithGreg/Data/best_model_resnet.h5


In [None]:
import os
file_path = '/content/drive/MyDrive/BOOTCAMP/ColabNotebooks/ProjectWithGreg/Data/best_model_resnet.h5'
if os.path.exists(file_path):
    print("File exists")
else:
    print("File does not exist")

File exists


In [None]:
import os
print(os.getcwd())

/content


In [None]:
import os
files = os.listdir('/content/drive/MyDrive/BOOTCAMP/ColabNotebooks/ProjectWithGreg/Data')
print(files)

['valid', 'test', 'train', 'best_model_resnet.h5']


In [None]:
os.listdir('/content/drive/MyDrive/BOOTCAMP/ColabNotebooks/ProjectWithGreg/Data')

['valid', 'test', 'train', 'best_model_resnet.h5']

In [None]:
# Define the paths to the directories containing your test data for each class
class_directories = ['/content/drive/MyDrive/BOOTCAMP/ColabNotebooks/ProjectWithGreg/Data/test/squamous.cell.carcinoma', '/content/drive/MyDrive/BOOTCAMP/ColabNotebooks/ProjectWithGreg/Data/test/normal', '/content/drive/MyDrive/BOOTCAMP/ColabNotebooks/ProjectWithGreg/Data/test/large.cell.carcinoma', '/content/drive/MyDrive/BOOTCAMP/ColabNotebooks/ProjectWithGreg/Data/test/adenocarcinoma']

# Initialize a variable to store the total length of the test data
ts_length = 0

# Iterate through each class directory and count the number of files
for class_dir in class_directories:
    # Count the number of files in the current class directory
    num_files = len(os.listdir(class_dir))

    # Add the number of files in the current class directory to the total length
    ts_length += num_files

# Print the total length of the test data
print("Total length of test data:", ts_length)

Total length of test data: 315


In [None]:
test_datagen = ImageDataGenerator(rescale=1./255)
img_size = (224, 224)  # Define target size for images
batch_size = 32

# test_gen actual data generator created using flow_from_directory method
# loads images from specified directory, rescales them, and converts labels to categorical format

# flow_from_directory method generates batches of augmented/normalized data from image files in a directory
# flow_from_directory method allows you to specify various parameters like target size, batch size, and class mode

test_gen = test_datagen.flow_from_directory(
    test_dir,
    target_size=(img_size[0], img_size[1]),
    batch_size=batch_size,
    class_mode='categorical'
)

Found 315 images belonging to 4 classes.


In [None]:
#The first model is trained, while the second model is only evaluated on the datasets
#The second model does not involve training the model further

import tensorflow as tf
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.layers import Input, Average

model= load_model('/content/drive/MyDrive/BOOTCAMP/ColabNotebooks/ProjectWithGreg/Data/best_model_resnet.h5')

#ts_length = len(test_df)
test_batch_size = test_batch_size = max(sorted([ts_length // n for n in range(1, ts_length + 1) if ts_length%n == 0 and ts_length/n <= 80]))
test_steps = ts_length // test_batch_size
train_score = model.evaluate(train_gen, steps= test_steps, verbose= 1)
valid_score = model.evaluate(valid_gen, steps= test_steps, verbose= 1)
test_score = model.evaluate(test_gen, steps= test_steps, verbose= 1)

print("Train Loss: ", train_score[0])
print("Train Accuracy: ", train_score[1])
print('-' * 20)
print("Validation Loss: ", valid_score[0])
print("Validation Accuracy: ", valid_score[1])
print('-' * 20)
print("Test Loss: ", test_score[0])
print("Test Accuracy: ", test_score[1])





Train Loss:  0.12149331718683243
Train Accuracy:  0.96875
--------------------
Validation Loss:  0.7315235733985901
Validation Accuracy:  0.75
--------------------
Test Loss:  1.339189887046814
Test Accuracy:  0.5562499761581421
