<a href="https://colab.research.google.com/github/suke-bhargav/BREAST-CANCER-CBISDDSM/blob/main/1modelclassification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import tensorflow as tf
import shutil
import pandas as pd
import sys
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import cv2

In [None]:
current_working_directory = os.getcwd()

# print output to the console
print(current_working_directory)


In [None]:
from google.colab import files
from os import environ

In [None]:
!pip install -q kaggle

In [None]:
# upload kaggle API key
uploaded = files.upload()

In [None]:
# define kaggle config folder
! mkdir "./kaggle" && mv "./kaggle.json" "./kaggle/kaggle.json"
environ['KAGGLE_CONFIG_DIR'] = './kaggle'

# hide kaggle API key for other users
! chmod 600 ./kaggle/kaggle.json


In [None]:
# fetch kaggle dataset
!kaggle datasets download -d awsaf49/cbis-ddsm-breast-cancer-image-dataset
!unzip "./*.zip" && rm *.zip-

In [None]:
csv_path = '/content/csv/meta.csv'
df_meta = pd.read_csv(csv_path)
dicom_data = pd.read_csv('/content/csv/dicom_info.csv')

In [None]:
image_dir = '/content/jpeg'
full_mammogram_images = dicom_data[dicom_data.SeriesDescription == 'full mammogram images'].image_path
cropped_images = dicom_data[dicom_data.SeriesDescription == 'cropped images'].image_path
roi_mask_images = dicom_data[dicom_data.SeriesDescription == 'ROI mask images'].image_path

full_mammogram_images = full_mammogram_images.apply(lambda x: x.replace('CBIS-DDSM/jpeg', image_dir))
roi_mask_images = roi_mask_images.apply(lambda x: x.replace('CBIS-DDSM/jpeg', image_dir))
full_mammogram_images.iloc[0]
cropped_images.iloc[0]

In [None]:
full_mammogram_dict = dict()
cropped_dict = dict()
roi_mask_dict = dict()

for dicom in full_mammogram_images:
    # print(dicom)
    key = dicom.split("/")[3]
    # print(key)
    full_mammogram_dict[key] = dicom
for dicom in cropped_images:
    key = dicom.split("/")[3]
    cropped_dict[key] = dicom
for dicom in roi_mask_images:
    key = dicom.split("/")[3]
    roi_mask_dict[key] = dicom

In [None]:
mass_train_data = pd.read_csv('/content/csv/mass_case_description_train_set.csv')
mass_test_data = pd.read_csv('/content/csv/mass_case_description_test_set.csv')
calc_train_data = pd.read_csv('/content/csv/calc_case_description_train_set.csv')
calc_test_data = pd.read_csv('/content/csv/calc_case_description_test_set.csv')

In [None]:
def filter_dataframe_by_base_directory(df):
    base_directory = '/content/jpeg'

    # Check if all three columns start with the base directory
    mask = (
        df['image file path'].str.startswith(base_directory) &
        df['cropped image file path'].str.startswith(base_directory) &
        df['ROI mask file path'].str.startswith(base_directory)
    )

    # Keep only the rows where all three columns start with the base directory
    filtered_df = df[mask]

    return filtered_df

In [None]:
def fix_image_path_mass(dataset):
    for i, img in enumerate(dataset.values):
        img_name = img[11].split("/")[2]
        if img_name in full_mammogram_dict:
            dataset.iloc[i, 11] = full_mammogram_dict[img_name]

        img_name = img[12].split("/")[2]
        if img_name in cropped_dict:
            dataset.iloc[i, 12] = cropped_dict[img_name]

        img_name = img[13].split("/")[2]
        if img_name in roi_mask_dict:
            dataset.iloc[i, 13] = roi_mask_dict[img_name]

In [None]:
fix_image_path_mass(mass_train_data)
fix_image_path_mass(mass_test_data)

In [None]:
mass_train = mass_train_data.rename(columns={'left or right breast': 'left_or_right_breast',
                                           'image view': 'image_view',
                                           'abnormality id': 'abnormality_id',
                                           'abnormality type': 'abnormality_type',
                                           'mass shape': 'mass_shape',
                                           'mass margins': 'mass_margins',
                                           'image file path': 'image_file_path',
                                           'cropped image file path': 'cropped_image_file_path',
                                           'ROI mask file path': 'ROI_mask_file_path'})
mass_test = mass_test_data.rename(columns={'left or right breast': 'left_or_right_breast',
                                           'image view': 'image_view',
                                           'abnormality id': 'abnormality_id',
                                           'abnormality type': 'abnormality_type',
                                           'mass shape': 'mass_shape',
                                           'mass margins': 'mass_margins',
                                           'image file path': 'image_file_path',
                                           'cropped image file path': 'cropped_image_file_path',
                                           'ROI mask file path': 'ROI_mask_file_path'})


In [None]:
mass_train


In [None]:
mass_train.mass_shape.unique()


In [None]:
mass_train.pathology.unique()

benign=mass_train[mass_train.pathology=="BENIGN"]

benignwoclbk=mass_train[mass_train.pathology=="BENIGN_WITHOUT_CALLBACK"]
malignant=mass_train[mass_train.pathology=="MALIGNANT"]


In [None]:
benignimgs=benign.image_file_path
malignantimgs=malignant.image_file_path
benignwoclbkimgs=benignwoclbk.image_file_path
ben='/content/class/benign'
benwcb='/content/class/benignwithoutcallback'
mal='/content/class/malignant'
os.makedirs(ben,exist_ok=True)
os.makedirs(mal,exist_ok=True)

In [None]:
for ben_path in benignimgs:
  img_filename1=os.path.basename(ben_path)
  destination_path1=os.path.join(ben,img_filename1)
  shutil.copy(ben_path,destination_path1)

for mal_path in malignantimgs:
  img_filename1=os.path.basename(mal_path)
  destination_path1=os.path.join(mal,img_filename1)
  shutil.copy(mal_path,destination_path1)


In [None]:
# Define data directory
data_dir = '/content/class'

# Define image dimensions and batch size
image_height = 512
image_width = 512
batch_size = 32


In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, Dropout

In [None]:
# Data preprocessing
datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

# Generate data batches
train_generator = datagen.flow_from_directory(
    data_dir,
    target_size=(image_height, image_width),
    batch_size=batch_size,
    class_mode='binary',
    subset='training')

val_generator = datagen.flow_from_directory(
    data_dir,
    target_size=(image_height, image_width),
    batch_size=batch_size,
    class_mode='binary',
    subset='validation')

target_size=(image_height, image_width)

In [None]:
base_model = VGG16(weights=None, include_top=False, input_shape=target_size + (3,))

# Freeze the convolutional layers
for layer in base_model.layers:
    layer.trainable = False

# Add custom classification layers
model = Sequential([
    base_model,
    Flatten(),
    Dense(512, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])


In [None]:

# Compile the model
model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(
    train_generator,
    epochs=10,
    steps_per_epoch=len(train_generator),
    validation_data=val_generator,
    validation_steps=len(val_generator)
)


In [None]:
# Evaluate the model
loss, accuracy = model.evaluate(val_generator, steps=len(val_generator))
print(f'Validation Loss: {loss}, Validation Accuracy: {accuracy}')
