In [None]:
import tensorflow as tf
import os
import imageio
import csv
import numpy as np
import cv2
import matplotlib.pyplot as plt

from sklearn.metrics import precision_score, recall_score, f1_score
from PIL import Image
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.callbacks import EarlyStopping
from scipy.ndimage import gaussian_filter
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs
from skimage import measure

In [None]:
pip install -U scikit-image

In [None]:
def process_mask(mask):
    # Thresholding (Otsu's method)

    mask = mask.astype(np.uint8)
    _, binary_mask = cv2.threshold(mask, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    # Morphological operations
    kernel = np.ones((2, 2), np.uint8)

    # Binary dilation
    dilated_mask = cv2.dilate(binary_mask, kernel, iterations=1)

    # Border clearing
    cleared_mask = dilated_mask.copy()
    contours, _ = cv2.findContours(cleared_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cv2.drawContours(cleared_mask, contours, -1, 1, thickness=cv2.FILLED)

    largest_contour = max(contours, key=cv2.contourArea)

    middle_part_mask = np.zeros_like(cleared_mask)
    cv2.drawContours(middle_part_mask, [largest_contour], -1, 255, thickness=cv2.FILLED)

    smoothed_mask = cv2.erode(middle_part_mask, kernel, iterations=1)
    smoothed_mask = gaussian_filter(smoothed_mask.astype(float), sigma=1)
    cleared_mask = smoothed_mask

    # Small object removal
    cleared_mask = cv2.morphologyEx(cleared_mask, cv2.MORPH_OPEN, kernel)

    # Hole-filling within the lesion masks
    cleared_mask = cv2.morphologyEx(cleared_mask, cv2.MORPH_CLOSE, kernel)

    return cleared_mask.astype(np.uint8)

In [None]:
def circularity_f(processed_mask):
    label_image = measure.label(processed_mask, connectivity=2)
    properties = measure.regionprops(label_image)
    largest_region = max(properties, key=lambda x: x.area)
    circularity = 4 * np.pi * largest_region.area / (largest_region.perimeter ** 2)
    return circularity

In [None]:
original_image = np.reshape(image_1d[7], target_size)
processed_mask = process_mask(original_image)

final_masked_image = cv2.bitwise_and(original_image,original_image,mask=processed_mask)

# Compute the standard deviation of the intensity distribution for each lesion
std_deviation = np.std(final_masked_image)

# Use region props to obtain the circularity of each mask
label_image = measure.label(processed_mask, connectivity=2)

properties = measure.regionprops(label_image)
largest_region = max(properties, key=lambda x: x.area)

# Calculate circularity for the largest region
circularity = 4 * np.pi * largest_region.area / (largest_region.perimeter ** 2)

print(f"Lesion Circularity: {circularity}")


plt.subplot(1, 3, 1)
plt.imshow(original_image, cmap='gray')
plt.title('Processed Mask')

# Display the original image
plt.subplot(1, 3, 2)
plt.imshow(final_masked_image, cmap='gray')
plt.title('Original Image')

# Display the processed mask
plt.subplot(1, 3, 3)
plt.imshow(processed_mask, cmap='gray')
plt.title('Processed Mask')

plt.show()

In [None]:
split_ratio = 0.7
# Get the total number of samples
num_samples = tf.shape(class_tensor_encoded)[0]

num_train_samples = tf.cast(tf.floor(split_ratio * tf.cast(num_samples, tf.float32)), tf.int32)
num_test_samples = num_samples - num_train_samples

# Generate random indices for shuffling
indices = tf.range(num_samples)
shuffled_indices = tf.random.shuffle(indices)

# Split the indices into training and testing sets
train_indices = shuffled_indices[:num_train_samples]
test_indices = shuffled_indices[num_train_samples:]

# Use tf.gather to get the training and testing sets
X_train = tf.gather(filtered_values, train_indices)
y_train = tf.gather(class_tensor_encoded, train_indices)
circularity_test = tf.gather(circularity_tensor,test_indices)
X_test = tf.gather(filtered_values, test_indices)
y_test = tf.gather(class_tensor_encoded, test_indices)

In [None]:
working_directory_test = "/Users/matheusss03/Developer/appDosGuri/test"
working_directory_train = "/Users/matheusss03/Developer/appDosGuri/training"

In [None]:
# Set your parameters
target_size = (100, 100)
batch_size = 50
consistent_length = None
image_tensors = []
circularity_tensors = []

image_files = [f for f in os.listdir(working_directory_train) if f.endswith('.JPG')]
size = len(image_files)

for i in range(0, len(image_files), batch_size):
    batch = image_files[i:i + batch_size]
    print(i)
    batch_images = []
    circularity_vector = []

    for file in batch:
        image = Image.open(os.path.join(working_directory_train, file))
        resized_image_array = tf.keras.preprocessing.image.img_to_array(image)
        gray_image = 255 - resized_image_array.mean(axis=2).astype('int32')
        processed_mask = process_mask(gray_image)
        final_masked_image = cv2.bitwise_and(gray_image,gray_image,mask=processed_mask)
        final_masked_resized = cv2.resize(final_masked_image.astype('uint8'), target_size)
        batch_images.append(final_masked_resized)
        circularity_vector.append(circularity_f(processed_mask))
        

    # Convert the batch of images to a tensor
    batch_tensor = np.stack(batch_images, axis=0)
    image_tensors.append(batch_tensor)
    circularity_tensors.append(circularity_vector)
    
# Stack the list of tensors to create the final tensor
final_tensor = np.vstack(image_tensors)
final_circularity = np.vstack(circularity_tensors)
filtered_circularity_tensor = final_circularity[filter_condition]
filtered_tensor1 = final_tensor[filter_condition]


In [None]:
image_tensors_test = []
circularity_tensors_test = []

image_files_test = [f for f in os.listdir(working_directory_test) if f.endswith('.JPG')]
size = len(image_files)

for i in range(0, len(image_files_test), batch_size):
    batch_test = image_files_test[i:i + batch_size]
    print(i)
    batch_images_test = []
    circularity_vector_test = []

    for file in batch:
        image_test = Image.open(os.path.join(working_directory_test, file_test))
        resized_image_array_test = tf.keras.preprocessing.image.img_to_array(image_test)
        gray_image_test = 255 - resized_image_array_test.mean(axis=2).astype('int32')
        processed_mask_test = process_mask(gray_image_test)
        final_masked_image_test = cv2.bitwise_and(gray_image_test,gray_image_test,mask=processed_mask_test)
        final_masked_resized_test = cv2.resize(final_masked_image_test.astype('uint8'), target_size)
        batch_images_test.append(final_masked_resized_test)
        circularity_vector_test.append(circularity_f(processed_mask_test))
        

    # Convert the batch of images to a tensor
    batch_tensor_test = np.stack(batch_images_test, axis=0)
    
    image_tensors_test.append(batch_tensor_test)
    circularity_tensors_test.append(circularity_vector_test)
    
# Stack the list of tensors to create the final tensor
final_tensor_test = np.vstack(image_tensors_test)
final_circularity_test = np.vstack(circularity_tensors_test)
filtered_circularity_tensor_test = final_circularity[filter_condition_test]
filtered_tensor1_test = final_tensor[filter_condition_test]

In [None]:
## Opens the excel file and creates a tensor of the classes

with open("challenge-2016-training_metadata_2023-11-04.csv", mode='r') as file:
    csv_reader = csv.reader(file)
    header = next(csv_reader)
    age_column_index = header.index("benign_malignant")
    age_data = [row[age_column_index] for row in csv_reader]

image_vectors_tensor = tf.stack(image_1d)

label_encoder = LabelEncoder()
class_tensor_np = np.array(age_data) 

desired_classes = ['benign', 'malignant']
filter_condition = np.isin(age_data, desired_classes)

filtered_values = image_vectors_tensor[filter_condition]
filtered_classes = class_tensor_np[filter_condition]

circularity_vector1 = np.array(circularity_vector)
circularity_vector_filtered = circularity_vector1[filter_condition]
circularity_tensor = tf.constant(circularity_vector_filtered)


class_tensor_encoded_np = label_encoder.fit_transform(filtered_classes)
class_tensor_encoded = tf.constant(class_tensor_encoded_np, dtype=tf.int32)

In [None]:
## Opens the excel file and creates a tensor of the classes for the test images

with open("challenge-2016-training_metadata_2023-11-04.csv", mode='r') as file:
    csv_reader_test = csv.reader(file)
    header_test = next(csv_reader_test)
    age_column_index_test = header_test.index("benign_malignant")
    age_data_test = [row[age_column_index_test] for row in csv_reader_test]

image_vectors_tensor_test = tf.stack(image_1d_test)

label_encoder_test = LabelEncoder()
class_tensor_np_test = np.array(age_data_test) 

desired_classes = ['benign', 'malignant']
filter_condition_test = np.isin(age_data_test, desired_classes)

filtered_values_test = image_vectors_tensor_test[filter_condition_test]
filtered_classes_test = class_tensor_np_test[filter_condition_test]

circularity_vector1_test = np.array(circularity_vector_test)
circularity_vector_filtered_test = circularity_vector1_test[filter_condition_test]
circularity_tensor_test = tf.constant(circularity_vector_filtered_test)


class_tensor_encoded_np_test = label_encoder_test.fit_transform(filtered_classes_test)
class_tensor_encoded_test = tf.constant(class_tensor_encoded_np_test, dtype=tf.int32)

In [None]:
dataset1 = tf.data.Dataset.from_tensor_slices((filtered_tensor1, class_tensor_encoded))

split_ratio = 0.7
# Get the total number of samples
num_samples = tf.shape(class_tensor_encoded)[0]

num_train_samples = tf.cast(tf.floor(split_ratio * tf.cast(num_samples, tf.float32)), tf.int32)
num_test_samples = num_samples - num_train_samples

# Generate random indices for shuffling
indices = tf.range(num_samples)
shuffled_indices = tf.random.shuffle(indices)

# Split the indices into training and testing sets
train_indices = shuffled_indices[:num_train_samples]
test_indices = shuffled_indices[num_train_samples:]

# Use tf.gather to get the training and testing sets
X_train1 = tf.gather(filtered_tensor1, train_indices)
y_train = tf.gather(class_tensor_encoded, train_indices)

X_test1 = tf.gather(filtered_tensor1, test_indices)

y_test = tf.gather(class_tensor_encoded, test_indices)


In [None]:
input_shape = (100, 100, 1)

model1 = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(512, activation='relu',kernel_regularizer=tf.keras.regularizers.l2(l2_reg)),
    layers.Dropout(dropout),
    layers.Dense(2, activation='softmax')  # Binary classification, use 'softmax' for multi-class
])

# Compile the model
optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=0.9)
model1.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

model1.fit(X_train1, y_train, epochs=30, batch_size=batch_size, validation_split=0.2)

# Evaluate the model on the test data
loss, accuracy = model1.evaluate(X_test1, y_test)

In [None]:
y_pred_prob = model1.predict(X_test1)

# Convert predicted probabilities to predicted labels
y_pred_labels = np.argmax(y_pred_prob, axis = 1)

# Convert one-hot encoded true labels to integer labels
y_true_labels = np.array(y_test)

# Compute confusion matrix
conf_matrix = confusion_matrix(y_true_labels, y_pred_labels)

# Print the confusion matrix
print("Confusion Matrix:")
print(conf_matrix)

precision = precision_score(y_true_labels, y_pred_labels)
recall = recall_score(y_true_labels, y_pred_labels)
f1 = f1_score(y_true_labels, y_pred_labels)

print(f1,recall,precision)
float_tensor = tf.cast(X_test1, tf.float32)
std_deviation = tf.math.reduce_std(float_tensor, axis=[1, 2])
concatenated_vector = np.column_stack((y_pred_prob,circularity_test,std_deviation))

In [None]:
kmeans = KMeans(n_clusters=2)

# Fit the KMeans model to the data
kmeans.fit(concatenated_vector)

# Get cluster labels and centroids
labels = kmeans.labels_
centroids = kmeans.cluster_centers_
comparing = np.column_stack((y_true_labels,labels))

In [None]:
from sklearn.metrics import normalized_mutual_info_score

# Assuming 'true_labels' is your ground truth vector and 'predicted_labels' is the K-Means cluster assignments
nmi = normalized_mutual_info_score(y_true_labels, labels)
print("Normalized Mutual Information:", nmi)

In [None]:
fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111, projection='3d')

# Scatter plot for each cluster
for i in range(kmeans.n_clusters):
    ax.scatter(concatenated_vector[labels == i, 0], concatenated_vector[labels == i, 1], concatenated_vector[labels == i, 2], label=f'Cluster {i + 1}')

# Plot cluster centers
ax.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], kmeans.cluster_centers_[:, 2],
           s=300, c='red', marker='X', label='Cluster Centers')

ax.set_xlabel('Feature 1')
ax.set_ylabel('Feature 2')
ax.set_zlabel('Feature 3')
ax.set_title('3D Clustering Visualization')
ax.legend()

plt.show()

In [None]:
x = concatenated_vector[:, 0]
y = concatenated_vector[:, 1]
z = concatenated_vector[:, 2]

# Create a 3D scatter plot
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(x, y, z, c=y_true_labels, marker='o')

# Set axis labels
ax.set_xlabel('X Label')
ax.set_ylabel('Y Label')
ax.set_zlabel('Z Label')

# Show the plot
plt.show()

In [None]:

x = concatenated_vector[:, 0]
z = concatenated_vector[:, 1]

# Create a 3D scatter plot

plt.scatter(x, z, c=y_true_labels, marker='o')
plt.show()

In [None]:
tetha1 = 0.01
tetha2 = 30

pred_f = []

for s in concatenated_vector:
    
    if s[1] > tetha1 and s[3] > tetha2 and s[1] < 0.9999:
        pred_f += [1]
 
    else:
        pred_f += [0]

conf_matrix_f = confusion_matrix(y_true_labels, pred_f)
conc = np.column_stack((y_true_labels,pred_f))

# Print the confusion matrix
print("Confusion Matrix:")
print(conf_matrix_f)
precision = precision_score(y_true_labels, pred_f)
print(precision)