In [12]:
import tensorflow as tf
import os
import imageio
import csv
import numpy as np
import cv2
import matplotlib.pyplot as plt

from sklearn.metrics import precision_score, recall_score, f1_score
from PIL import Image
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.callbacks import EarlyStopping
from scipy.ndimage import gaussian_filter
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs
from skimage import measure

In [13]:
def process_mask(mask):
    # Thresholding (Otsu's method)

    mask = mask.astype(np.uint8)
    _, binary_mask = cv2.threshold(mask, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    # Morphological operations
    kernel = np.ones((2, 2), np.uint8)

    # Binary dilation
    dilated_mask = cv2.dilate(binary_mask, kernel, iterations=1)

    # Border clearing
    cleared_mask = dilated_mask.copy()
    contours, _ = cv2.findContours(cleared_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cv2.drawContours(cleared_mask, contours, -1, 1, thickness=cv2.FILLED)

    largest_contour = max(contours, key=cv2.contourArea)

    middle_part_mask = np.zeros_like(cleared_mask)
    cv2.drawContours(middle_part_mask, [largest_contour], -1, 255, thickness=cv2.FILLED)

    smoothed_mask = cv2.erode(middle_part_mask, kernel, iterations=1)
    smoothed_mask = gaussian_filter(smoothed_mask.astype(float), sigma=1)
    cleared_mask = smoothed_mask

    # Small object removal
    cleared_mask = cv2.morphologyEx(cleared_mask, cv2.MORPH_OPEN, kernel)

    # Hole-filling within the lesion masks
    cleared_mask = cv2.morphologyEx(cleared_mask, cv2.MORPH_CLOSE, kernel)

    return cleared_mask.astype(np.uint8)

In [14]:
def circularity_f(processed_mask):
    label_image = measure.label(processed_mask, connectivity=2)
    properties = measure.regionprops(label_image)
    largest_region = max(properties, key=lambda x: x.area)
    circularity = 4 * np.pi * largest_region.area / (largest_region.perimeter ** 2)
    return circularity

In [15]:
working_directory_test = "C:\\Dev\\Soft\\test"
working_directory_train = "C:\\Dev\\Soft\\train"

In [22]:
# Set your parameters
target_size = (100, 100)
batch_size = 50
consistent_length = None
image_tensors = []
circularity_tensors = []

image_files = [f for f in os.listdir(working_directory_train) if f.endswith('.JPG')]
size = len(image_files)

for i in range(0, len(image_files), batch_size):
    batch = image_files[i:i + batch_size]
    print(i)
    batch_images = []
    circularity_vector = []

    for file in batch:
        image = Image.open(os.path.join(working_directory_train, file))
        resized_image_array = tf.keras.preprocessing.image.img_to_array(image)
        gray_image = 255 - resized_image_array.mean(axis=2).astype('int32')
        processed_mask = process_mask(gray_image)
        final_masked_image = cv2.bitwise_and(gray_image,gray_image,mask=processed_mask)
        final_masked_resized = cv2.resize(final_masked_image.astype('uint8'), target_size)
        batch_images.append(final_masked_resized)
      

    # Convert the batch of images to a tensor
    batch_tensor = np.stack(batch_images, axis=0)
    image_tensors.append(batch_tensor)
    
    
final_tensor = np.vstack(image_tensors)


0
50
100
150
200
250
300
350
400
450
500
550
600
650
700
750
800
850
900
950
1000
1050
1100
1150
1200
1250
1300
1350
1400
1450
1500
1550
1600
1650
1700
1750
1800
1850
1900
1950
2000
2050
2100
2150
2200
2250
2300
2350
2400
2450
2500
2550
2600
2650
2700
2750
2800
2850
2900
2950
3000
3050
3100
3150
3200
3250
3300
3350
3400
3450
3500
3550
3600
3650
3700
3750
3800
3850
3900
3950
4000
4050
4100
4150
4200
4250
4300
4350
4400
4450
4500
4550
4600
4650
4700
4750
4800
4850
4900
4950
5000
5050
5100
5150
5200
5250
5300
5350
5400
5450
5500
5550
5600
5650
5700
5750
5800
5850
5900
5950
6000
6050
6100
6150
6200
6250
6300
6350
6400
6450
6500
6550
6600
6650
6700
6750
6800
6850
6900
6950
7000
7050
7100
7150
7200
7250
7300
7350
7400
7450
7500
7550
7600
7650
7700
7750
7800
7850
7900
7950
8000
8050
8100
8150
8200
8250
8300
8350
8400
8450
8500
8550
8600
8650
8700
8750
8800
8850
8900
8950
9000
9050
9100
9150
9200
9250
9300
9350
9400
9450
9500
9550
9600
9650
9700
9750
9800
9850
9900
9950
10000


In [21]:
image_tensors_test = []
circularity_tensors_test = []
image_files_test = [f for f in os.listdir(working_directory_test) if f.endswith('.JPG')]

for i in range(0, len(image_files_test), batch_size):
    batch_test = image_files_test[i:i + batch_size]
    print(i)
    batch_images_test = []
    circularity_vector_test = []

    for files in batch_test:
        image_test = Image.open(os.path.join(working_directory_test, files))
        resized_image_array_test = tf.keras.preprocessing.image.img_to_array(image_test)
        gray_image_test = 255 - resized_image_array_test.mean(axis=2).astype('int32')
        processed_mask_test = process_mask(gray_image_test)
        final_masked_image_test = cv2.bitwise_and(gray_image_test,gray_image_test,mask=processed_mask_test)
        final_masked_resized_test = cv2.resize(final_masked_image_test.astype('uint8'), target_size)
        batch_images_test.append(final_masked_resized_test)

    batch_tensor_test = np.stack(batch_images_test, axis=0)
    image_tensors_test.append(batch_tensor_test)

final_tensor_test = np.vstack(image_tensors_test)

0
50
100
150
200
250
300
350
400
450
500
550
600
650
700
750
800
850
900
950
1000
1050
1100
1150
1200
1250
1300
1350
1400
1450
1500


In [24]:
## Opens the excel file and creates a tensor of the classes

with open("train_class.csv", mode='r') as file:
    csv_reader = csv.reader(file)
    header = next(csv_reader)
    age_column_index = header.index("benign_malignant")
    age_data = [row[age_column_index] for row in csv_reader]


label_encoder = LabelEncoder()
class_tensor_np = np.array(age_data) 

desired_classes = ['benign', 'malignant']
filter_condition = np.isin(age_data, desired_classes)
filtered_classes = class_tensor_np[filter_condition]
class_tensor_encoded_np = label_encoder.fit_transform(filtered_classes)
class_tensor_encoded = tf.constant(class_tensor_encoded_np, dtype=tf.int32)

filtered_tensor1 = final_tensor[filter_condition]

#filtered_circularity_tensor = final_circularity[filter_condition]
#circularity_tensor = tf.constant(filtered_circularity_tensor)

In [23]:
## Opens the excel file and creates a tensor of the classes for the test images

with open("test_class.csv", mode='r') as file:
    csv_reader_test = csv.reader(file)
    header_test = next(csv_reader_test)
    age_column_index_test = header_test.index("benign_malignant")
    age_data_test = [row[age_column_index_test] for row in csv_reader_test]



label_encoder_test = LabelEncoder()
class_tensor_np_test = np.array(age_data_test)

desired_classes = ['benign', 'malignant']
filter_condition_test = np.isin(age_data_test, desired_classes)

filtered_classes_test = class_tensor_np_test[filter_condition_test]
class_tensor_encoded_np_test = label_encoder_test.fit_transform(filtered_classes_test)
class_tensor_encoded_test = tf.constant(class_tensor_encoded_np_test, dtype=tf.int32)

filtered_tensor1_test = final_tensor_test[filter_condition_test]

#filtered_circularity_tensor_test = final_circularity[filter_condition_test]
#circularity_tensor_test = tf.constant(filtered_circularity_tensor_test)

In [27]:
X_train1 = filtered_tensor1
y_train = class_tensor_encoded

X_test1 = filtered_tensor1_test
y_test = class_tensor_encoded_test

In [29]:
learning_rate = 2e-4
n_iterations = 501
batch_size = 128
dropout = 0.5
l2_reg = 1e-3 

input_shape = (100, 100, 1)

model1 = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(512, activation='relu',kernel_regularizer=tf.keras.regularizers.l2(l2_reg)),
    layers.Dropout(dropout),
    layers.Dense(2, activation='softmax')
])

optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=0.9)
model1.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model1.fit(X_train1, y_train, epochs=10, batch_size=batch_size, validation_split=0.2)

loss, accuracy = model1.evaluate(X_test1, y_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [31]:
y_pred_prob = model1.predict(X_test1)

# Convert predicted probabilities to predicted labels
y_pred_labels = np.argmax(y_pred_prob, axis = 1)

# Convert one-hot encoded true labels to integer labels
y_true_labels = np.array(y_test)

# Compute confusion matrix
conf_matrix = confusion_matrix(y_true_labels, y_pred_labels)

# Print the confusion matrix
print("Confusion Matrix:")
print(conf_matrix)

precision = precision_score(y_true_labels, y_pred_labels)
recall = recall_score(y_true_labels, y_pred_labels)
f1 = f1_score(y_true_labels, y_pred_labels)

print(f1,recall,precision)
float_tensor = tf.cast(X_test1, tf.float32)
std_deviation = tf.math.reduce_std(float_tensor, axis=[1, 2])
concatenated_vector = np.column_stack((y_pred_prob,std_deviation))

Confusion Matrix:
[[903   6]
 [166   5]]
0.054945054945054944 0.029239766081871343 0.45454545454545453


In [None]:
kmeans = KMeans(n_clusters=2)

# Fit the KMeans model to the data
kmeans.fit(concatenated_vector)

# Get cluster labels and centroids
labels = kmeans.labels_
centroids = kmeans.cluster_centers_
comparing = np.column_stack((y_true_labels,labels))

In [None]:
from sklearn.metrics import normalized_mutual_info_score

# Assuming 'true_labels' is your ground truth vector and 'predicted_labels' is the K-Means cluster assignments
nmi = normalized_mutual_info_score(y_true_labels, labels)
print("Normalized Mutual Information:", nmi)

In [None]:
fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111, projection='3d')

# Scatter plot for each cluster
for i in range(kmeans.n_clusters):
    ax.scatter(concatenated_vector[labels == i, 0], concatenated_vector[labels == i, 1], concatenated_vector[labels == i, 2], label=f'Cluster {i + 1}')

# Plot cluster centers
ax.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], kmeans.cluster_centers_[:, 2],
           s=300, c='red', marker='X', label='Cluster Centers')

ax.set_xlabel('Feature 1')
ax.set_ylabel('Feature 2')
ax.set_zlabel('Feature 3')
ax.set_title('3D Clustering Visualization')
ax.legend()

plt.show()

In [None]:
x = concatenated_vector[:, 0]
y = concatenated_vector[:, 1]
z = concatenated_vector[:, 2]

# Create a 3D scatter plot
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(x, y, z, c=y_true_labels, marker='o')

# Set axis labels
ax.set_xlabel('X Label')
ax.set_ylabel('Y Label')
ax.set_zlabel('Z Label')

# Show the plot
plt.show()

In [None]:

x = concatenated_vector[:, 0]
z = concatenated_vector[:, 1]

# Create a 3D scatter plot

plt.scatter(x, z, c=y_true_labels, marker='o')
plt.show()

In [48]:
tetha1 = 0.06
tetha2 = 30

pred_f = []

for s in concatenated_vector:
    
    if s[1] > tetha1 and s[2] > tetha2:
        pred_f += [1]
 
    else:
        pred_f += [0]

conf_matrix_f = confusion_matrix(y_true_labels, pred_f)
conc = np.column_stack((y_true_labels,pred_f))

# Print the confusion matrix
print("Confusion Matrix:")
print(conf_matrix_f)
precision = precision_score(y_true_labels, pred_f)
print(precision)

Confusion Matrix:
[[558 351]
 [ 72  99]]
0.22
