<a href="https://colab.research.google.com/github/snwanjiru/Deep-Learning/blob/Biomedical-Image-Analysis/Colon_Cancer_Cells_Counting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Import Required Libraries

In [2]:
# Import libraries for direct counting
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

# File paths
image_folder_path = "/content/drive/MyDrive/HT29_Colon_Cancer_Images"  # Path to the folder containing images
txt_file_path = "/content/drive/MyDrive/BBBC001_v1_counts_ground_truth.txt"  # Path to the ground truth file

# Ground Truth Processing


In [16]:
# Load ground truth data
def load_ground_truth(txt_file_path, image_folder_path):
    # Read the ground truth file
    data = pd.read_csv(txt_file_path, sep="\t")  # Adjust the separator if necessary (e.g., sep="\s+" or sep=",")

    # Print column names for debugging
    print("Column names:", data.columns.tolist())

    # Standardize column names to match expected names
    data.columns = data.columns.str.strip()  # Remove extra spaces

    # Check if the expected columns are present
    if 'manual count #1' not in data.columns or 'manual count #2' not in data.columns:
        raise ValueError("Expected columns 'manual count #1' and 'manual count #2' are missing. Found columns:", data.columns)

    # Compute the average count
    data['average_count'] = data[['manual count #1', 'manual count #2']].mean(axis=1)

    # Append the folder path to image names with correct separator
    data['Image'] = image_folder_path + '/' + data['Image'] # This line is changed to add '/'

    return data['Image'].tolist(), data['average_count'].tolist()

# File paths
txt_file_path = "/content/drive/MyDrive/BBBC001_v1_counts_ground_truth.txt"  # Replace with the correct path
image_folder_path = "/content/drive/MyDrive/HT29_Colon_Cancer_Images"  # Replace with the correct path

# Load the data
image_paths, labels = load_ground_truth(txt_file_path, image_folder_path)


Column names: ['              Image', '             manual count #1', 'manual count #2']


# Image Preprocessing

In [17]:
# Preprocess images
def preprocess_images(image_paths, target_size=(128, 128)):
    images = []
    for path in image_paths:
        img = tf.keras.preprocessing.image.load_img(path, color_mode='grayscale', target_size=target_size)
        img = tf.keras.preprocessing.image.img_to_array(img) / 255.0  # Normalize to [0, 1]
        images.append(img)
    return np.array(images)

# Prepare images
images = preprocess_images(image_paths)

# Split data into training, validation, and test sets
train_images, test_images, train_labels, test_labels = train_test_split(
    images, labels, test_size=0.2, random_state=42
)
train_images, val_images, train_labels, val_labels = train_test_split(
    train_images, train_labels, test_size=0.25, random_state=42
)


# Model Definition

In [18]:
# Define a simple CNN for regression
def build_cnn(input_shape):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Conv2D(128, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='linear')  # Output single value (cell count)
    ])
    return model

# Build and compile the model
input_shape = (128, 128, 1)  # Grayscale images
model = build_cnn(input_shape)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4), loss='mean_squared_error', metrics=['mae'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


# Data Augmentation

In [19]:
# Data augmentation for training
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Training

In [21]:
# Define callbacks
checkpoint = ModelCheckpoint("cnn_model.keras", monitor='val_loss', save_best_only=True, mode='min')
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Train the model
history = model.fit(
    datagen.flow(train_images, np.array(train_labels), batch_size=8),
    validation_data=(val_images, np.array(val_labels)),
    epochs=100,
    callbacks=[checkpoint, early_stopping]
)

Epoch 1/100


  self._warn_if_super_not_called()


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step - loss: 129326.3125 - mae: 348.3600 - val_loss: 177564.1406 - val_mae: 421.3836
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 434ms/step - loss: 129233.4375 - mae: 348.2350 - val_loss: 177455.4688 - val_mae: 421.2546
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 628ms/step - loss: 129144.2891 - mae: 348.1075 - val_loss: 177342.6719 - val_mae: 421.1207
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 419ms/step - loss: 129120.4141 - mae: 348.0730 - val_loss: 177210.7812 - val_mae: 420.9641
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 671ms/step - loss: 129022.5859 - mae: 347.9412 - val_loss: 177070.1719 - val_mae: 420.7971
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 519ms/step - loss: 128937.4141 - mae: 347.8200 - val_loss: 176914.0156 - val_mae: 420.6115
Epoch 7/100
[1m1/1[0m 

# Model Evaluation and Results

In [22]:
# Evaluate the model
test_loss, test_mae = model.evaluate(test_images, np.array(test_labels))
print(f"Test Loss: {test_loss}, Test MAE: {test_mae}")

# Predict cell counts
predicted_counts = model.predict(test_images)

# Compare manual and predicted counts
for img, manual_count, pred_count in zip(image_paths, test_labels, predicted_counts):
    print(f"Image: {img}, Manual Count: {manual_count}, Predicted Count: {pred_count[0]}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step - loss: 1042.1182 - mae: 29.0908
Test Loss: 1042.1181640625, Test MAE: 29.090805053710938
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 93ms/step
Image: /content/drive/MyDrive/HT29_Colon_Cancer_Images/AS_09125_050118150001_A03f00d0.tif, Manual Count: 356.0, Predicted Count: 371.0964050292969
Image: /content/drive/MyDrive/HT29_Colon_Cancer_Images/AS_09125_050118150001_A03f01d0.tif, Manual Count: 339.0, Predicted Count: 382.085205078125
