In [1]:
import sys
#!{sys.executable} -m pip install dicomsdl
!pip install /kaggle/input/rsnawheelspackages/dicomsdl-0.109.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
#import pydicom as dicom
import dicomsdl as dicom
import tensorflow as tf
import tensorflow_io as tfio
from PIL import Image as im
import os
from tqdm import tqdm
import cv2
from PIL import Image
from joblib import Parallel, delayed
import gc
import random

Processing /kaggle/input/rsnawheelspackages/dicomsdl-0.109.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl
Installing collected packages: dicomsdl
Successfully installed dicomsdl-0.109.1
[0m

## Enter the path of the test dataset and image dataset here

In [2]:

datasetPath = '/kaggle/input/rsna-breast-cancer-detection/test.csv'
imgPath = '/kaggle/input/rsna-breast-cancer-detection/test_images/'


## Data processing pipeline

In [3]:



def crop(sideName, imgName):
    """
    This function is used to crop the breast images. It takes two arguments.
    
    Input:-
    :sideName = Laterality of breast if it is right or left
    :imgName = Image pixel data of the DCM images
    
    Output:-
    :return = Output after cropping the image.
    
    """
    if sideName == 'L':
        colind=[]
        for r,row in enumerate(imgName):
            for c,col in enumerate(row):
                if col==0:
                    colind.append(c)
                    break
        crop_size = max(colind)
        imgName = imgName[0:512,0:crop_size]
        imgName = cv2.resize(imgName,(128,128))
        
    if sideName == 'R':
        colind=[]
        for r,row in enumerate(imgName):
            for c,col in enumerate(row):
                if col!=0:
                    colind.append(c)
                    break
        crop_size = min(colind)
        imgName = imgName[0:512,crop_size:512]
        imgName = cv2.resize(imgName,(128,128))
    
    return imgName    

    gc.collect()
    
def crop_reverse(sideName, imgName):
    
    """
    This function is used to crop the breast images but in the reverse order.
    Because the laterality is defined wrongly for some images. It takes two arguments.
    
    Input:-
    :sideName = Laterality of breast if it is right or left
    :imgName = Image pixel data of the DCM images
    
    Output:-
    :return = Output after cropping the image.
    
    """
    
    if sideName == 'R':
        colind=[]
        for r,row in enumerate(imgName):
            for c,col in enumerate(row):
                if col==0:
                    colind.append(c)
                    break
        crop_size = max(colind)
        imgName = imgName[0:512,0:crop_size]
        imgName = cv2.resize(imgName,(128,128))
        
    if sideName == 'L':
        colind=[]
        for r,row in enumerate(imgName):
            for c,col in enumerate(row):
                if col!=0:
                    colind.append(c)
                    break
        crop_size = min(colind)
        imgName = imgName[0:512,crop_size:512]
        imgName = cv2.resize(imgName,(128,128))
    
    return imgName    

    gc.collect()
    

def img_process(i,filename,sides):
    
    """
    This function is used to process the images which will be used for the training/test dataset. It takes three arguments.
    
    Input:-
    :i = Index of the image in the dataframe
    :filename = Path of the image
    :sides = List of all images' laterality
    
    Output:-
    :return = Output after cropping the image.
    
    """
    
    
    #ds = dicom.dcmread(filename)
    dsraw = dicom.open(filename)
    ds = dsraw.pixelData()
    
    ds = (ds - ds.min()) / (ds.max() - ds.min())
    if dsraw.PhotometricInterpretation == "MONOCHROME1":  
        ds = 1 - ds
    ds = (ds * 255).astype(np.uint8)

    
    #ds = cv2.normalize(ds, None, 0, 1.0, cv2.NORM_MINMAX, dtype=cv2.CV_32F)
    ds = cv2.resize(ds,(512,512))
    
    #ds = np.where(ds >= 0.999, 0,ds)
    
    try:
        ds = np.array(crop(sides[i], ds))   
    except:
        ds = np.array(crop_reverse(sides[i], ds))
    

    #train_data.loc[i,'img_data'] = [img_fin]
    #train_data.to_csv('/kaggle/working/training_img_data.csv') 
    return ds
    gc.collect()

def dcmToPix(datasetPath, imgPath):
    
    """
    This function is used to process all the images which will be used for the training/test dataset. It takes two arguments.
    
    Input:-
    :datasetPath = Path of the cancer dataset
    :imgPath = Path of the image dataset
   
    Output:-
    :return = Array of all the processed images
    
    """
    
    
    dataset = pd.read_csv(datasetPath)
    
    patient_ids = dataset['patient_id']
    image_ids = dataset['image_id']
    sides  = dataset['laterality']

    imgData = []

    for pi, ii, leng in zip(patient_ids, image_ids, range(len(patient_ids))):
        imgData.append(imgPath + str(pi) + '/' + str(ii) + '.dcm')

    dataset['img_data'] = " "
    
    result = Parallel(n_jobs=128)(\
    delayed(img_process)(i, fname, sides) for i, fname in zip(range(len(imgData)),tqdm(imgData))\
    )
    
    dataset['img_data'] = result
    dataset.to_pickle('/kaggle/working/imgData.pkl' )
    
    return result
    

## Implementing the data processing pipeline to the data

This may take hours if the dataset is large.

In [4]:
testImgData = dcmToPix(datasetPath, imgPath)

 75%|███████▌  | 3/4 [00:00<00:00, 119.52it/s]


## Normalizing the data and processing the data to make it compatible with the model

In [5]:
#with (open('/kaggle/input/output/imgData.pkl', "rb")) as openfile:
#     imgData = pickle.load(openfile)


In [6]:

testdataset = testImgData


In [7]:
#from scipy.ndimage import zoom
#ids=76
#thresh = 0.1
#imgshow = testdataset['img_data'][ids]
#imgshow = np.clip(imgshow,thresh,1)
#imgshow = imgshow[25:105,25:105]
#print(testdataset['cancer'][ids])

#plt.imshow(imgshow)

In [8]:
def random_rotate(imgData):
    #thresh = 0.15
    
    imgData = imgData.reshape(128,128)
    #imgData = ndimage.rotate(imgData, random.randint(-20, 20), reshape=False)

    #imgData = np.clip(imgData,thresh,1)
    imgData = imgData[15:110,15:110]
    #imgData = cv2.resize(imgData,(528,528))
    
    clahe = cv2.createCLAHE(clipLimit=7, tileGridSize=(3,3))
    imgData = clahe.apply(imgData)
    #imgData = im.fromarray(imgData)
    #imgData = np.asarray(imgData.rotate(random.randint(-20, 20)))
    return imgData

In [9]:
from scipy import ndimage
imgDataList=[]
for j in tqdm(testdataset):
    imgDataList.append(random_rotate(j))
testImgData= imgDataList

100%|██████████| 4/4 [00:00<00:00, 550.51it/s]


In [10]:
test=testImgData


test_features=[]
for i in test:
    i=np.array(i)
    test_features.append(i/255)
test_features=np.array(test_features)
 

    
 
#from sklearn.preprocessing import MinMaxScaler
    
#featureTransform = test_features.reshape(len(test_features), 6400)
#scaler = MinMaxScaler()
#featureTransform =scaler.fit_transform(featureTransform)
#norm_features= []
#for i in range(len(test_features)):
#    norm_features.append(scaler.fit_transform(test_features[i]))
#test_features=np.array(norm_features)


#backTransform = featureTransform.reshape(len(test_features),80,80)
test_features = test_features.reshape(len(test_features),95,95,1)


## Loading the trained model

I have already implemented the convolutional neural network model to the train dataset. Because it took me so long to run the notebook for the model training, I am not uploading the previous notebooks. It is necessary to run above cells to run the model and predict the outcomes.

**Please put the path of the trained model. The model is saved in the dataset named "pre trained model of breast cancer"**

In [11]:
from tensorflow import keras
model = keras.models.load_model("/kaggle/input/pre-trained-model-of-breast-cancer/trained_model_breast_cancer3.h5")

2023-02-04 23:21:06.870141: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-02-04 23:21:06.963347: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-02-04 23:21:06.964207: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-02-04 23:21:06.967723: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

## Predictions

In [12]:

pred = model.predict(test_features)
pred= pred.reshape(len(pred))

2023-02-04 23:21:11.727099: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)
2023-02-04 23:21:12.671444: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005


## Saving the model to the current directory as submission.csv

In [13]:
testData = pd.read_csv(datasetPath)


submissionFrame={'prediction_id':testData['prediction_id'],\
                 'cancer':pred}
submission = pd.DataFrame(submissionFrame)
submission = submission.groupby('prediction_id').mean('cancer').reset_index()


pred = submission['cancer']
bin_pred = []
for i in pred:
    if i>=0.5:
        bin_pred.append(1)
    else:
        bin_pred.append(0)
bin_pred = np.array(bin_pred)


submission['cancer']  = pred
print(pred)

0    6.653023e-08
1    2.135496e-09
Name: cancer, dtype: float32


In [14]:
submission.to_csv('/kaggle/working/submission.csv',index=False)

In [15]:

#trues = testdataset['cancer']

#bin_pred = []
#for i in pred:
#    if i>=0.5:
#        bin_pred.append(1)
#    else:
#        bin_pred.append(0)
#bin_pred = np.array(bin_pred)


In [16]:

def recall_m(y_true, y_pred):
    true_positives = np.sum(np.round(np.clip(y_true * y_pred, 0, 1)))
    possible_positives = np.sum(np.round(np.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives )
    return recall

def precision_m(y_true, y_pred):
    true_positives = np.sum(np.round(np.clip(y_true * y_pred, 0, 1)))
    predicted_positives = np.sum(np.round(np.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives )
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall))


#print(f1_m(trues,bin_pred))

Please let me know if there is anything missing.