In [9]:
!pip install tensorflow keras 



In [10]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator


Only run the next cell if the data has not been split into training and validation

In [16]:
import os
import shutil
import random

# Define paths
data_dir = 'data/'
train_dir = 'train/'
validation_dir = 'validation/'

# Create directories if they don't exist
os.makedirs(train_dir, exist_ok=True)
os.makedirs(validation_dir, exist_ok=True)

# Get list of image filenames in each class folder
malignant_images = os.listdir(os.path.join(data_dir, 'malignant'))
benign_images = os.listdir(os.path.join(data_dir, 'normal'))

# Shuffle the filenames
random.shuffle(malignant_images)
random.shuffle(benign_images)

# Split ratio (adjust as needed)
split_ratio = 0.8  # 80% for training, 20% for validation

# Calculate split index
malignant_split_idx = int(len(malignant_images) * split_ratio)
benign_split_idx = int(len(benign_images) * split_ratio)

# Move images to train and validation folders
for img in malignant_images[:malignant_split_idx]:
    src = os.path.join(data_dir, 'malignant', img)
    dst = os.path.join(train_dir, 'malignant', img)
    shutil.move(src, dst)

for img in malignant_images[malignant_split_idx:]:
    src = os.path.join(data_dir, 'malignant', img)
    dst = os.path.join(validation_dir, 'malignant', img)
    shutil.move(src, dst)

for img in benign_images[:benign_split_idx]:
    src = os.path.join(data_dir, 'normal', img)
    dst = os.path.join(train_dir, 'normal', img)
    shutil.move(src, dst)

for img in benign_images[benign_split_idx:]:
    src = os.path.join(data_dir, 'normal', img)
    dst = os.path.join(validation_dir, 'normal', img)
    shutil.move(src, dst)


Setting up target size 

In [22]:
train_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory(
        'train/',
        target_size=(150, 150),
        batch_size=32,
        class_mode='binary')  # or categorical if more than two classes

validation_datagen = ImageDataGenerator(rescale=1./255)
validation_generator = validation_datagen.flow_from_directory(
        'validation/',
        target_size=(150, 150),
        batch_size=32,
        class_mode='binary')  # or categorical if more than two classes

datagen = ImageDataGenerator(rescale=1./255)
test_generator = datagen.flow_from_directory(
    'data/',
    target_size=(150, 150),
    batch_size=32,
    class_mode='binary',
    subset='validation')


Found 548 images belonging to 2 classes.
Found 139 images belonging to 2 classes.
Found 0 images belonging to 2 classes.


In [27]:
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(512, activation='relu'),
    Dense(1, activation='sigmoid')  # or softmax if more than two classes
])

model.summary()


In [28]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',  # or categorical_crossentropy if more than two classes
              metrics=['accuracy'])


In [29]:
history = model.fit(
      train_generator,
      steps_per_epoch=100,  # total_train // batch_size
      epochs=15,
      validation_data=validation_generator,
      validation_steps=50,  # total_validation // batch_size
      verbose=2)


Epoch 1/15


  self._warn_if_super_not_called()
  self.gen.throw(typ, value, traceback)


100/100 - 11s - 110ms/step - accuracy: 0.6825 - loss: 0.5154 - val_accuracy: 0.7842 - val_loss: 0.4027
Epoch 2/15
100/100 - 8s - 83ms/step - accuracy: 0.7956 - loss: 0.4405 - val_accuracy: 0.8058 - val_loss: 0.4128
Epoch 3/15
100/100 - 8s - 79ms/step - accuracy: 0.7993 - loss: 0.4497 - val_accuracy: 0.8058 - val_loss: 0.3969
Epoch 4/15
100/100 - 22s - 221ms/step - accuracy: 0.8066 - loss: 0.4437 - val_accuracy: 0.8058 - val_loss: 0.3687
Epoch 5/15
100/100 - 7s - 70ms/step - accuracy: 0.8248 - loss: 0.4181 - val_accuracy: 0.8058 - val_loss: 0.3826
Epoch 6/15
100/100 - 7s - 72ms/step - accuracy: 0.8047 - loss: 0.4050 - val_accuracy: 0.7986 - val_loss: 0.3361
Epoch 7/15
100/100 - 7s - 73ms/step - accuracy: 0.8449 - loss: 0.3465 - val_accuracy: 0.8345 - val_loss: 0.3146
Epoch 8/15
100/100 - 20s - 204ms/step - accuracy: 0.8631 - loss: 0.2748 - val_accuracy: 0.7914 - val_loss: 0.5347
Epoch 9/15
100/100 - 13s - 129ms/step - accuracy: 0.8540 - loss: 0.3196 - val_accuracy: 0.8273 - val_loss: 0.

In [30]:
loss, accuracy = model.evaluate(validation_generator)
print("Validation Loss:", loss)
print("Validation Accuracy:", accuracy)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 203ms/step - accuracy: 0.8672 - loss: 0.3628
Validation Loss: 0.33988603949546814
Validation Accuracy: 0.8633093237876892


In [31]:
import numpy as np
from sklearn.metrics import classification_report

# Assuming you have trained your model and named it 'model'

# Predict classes for validation set
y_pred = model.predict(validation_generator)
y_pred_classes = np.argmax(y_pred, axis=1)  # Convert probabilities to class labels

# Get true classes for validation set
y_true = validation_generator.classes

# Get class labels
class_labels = list(validation_generator.class_indices.keys())

# Generate classification report
print(classification_report(y_true, y_pred_classes, target_names=class_labels))


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 197ms/step
              precision    recall  f1-score   support

   malignant       0.61      1.00      0.76        85
      normal       0.00      0.00      0.00        54

    accuracy                           0.61       139
   macro avg       0.31      0.50      0.38       139
weighted avg       0.37      0.61      0.46       139



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
from sklearn.metrics import confusion_matrix
%matplotlib inline
y_pred = model.predict(x_test)
y_pred_c = np.argmax(y_pred, axis=1)
y_test_c = np.argmax(y_test, axis=1)
matrix_confusion = confusion_matrix(y_test_c, y_pred_c)

sns.heatmap(matrix_confusion, square=True, annot=True, cmap='Blues', fmt='d', cbar=False )
plt.show()


### this doesnt work yet 