In [1]:

!pip install --upgrade pip
!pip install python-gdcm

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

Collecting pip
  Downloading pip-22.3.1-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m18.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 22.0.4
    Uninstalling pip-22.0.4:
      Successfully uninstalled pip-22.0.4
Successfully installed pip-22.3.1
[0mCollecting python-gdcm
  Downloading python_gdcm-3.0.20-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.0/13.0 MB[0m [31m71.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: python-gdcm
Successfully installed python-gdcm-3.0.20
[0m

In [6]:
!pip install -qU python-gdcm pydicom pylibjpeg

!pip install scikit-image

[0mCollecting scikit-image
  Downloading scikit_image-0.19.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (14.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.0/14.0 MB[0m [31m72.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting PyWavelets>=1.1.1
  Downloading PyWavelets-1.4.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m82.4 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hCollecting tifffile>=2019.7.26
  Downloading tifffile-2022.10.10-py3-none-any.whl (210 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m210.3/210.3 kB[0m [31m22.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting imageio>=2.4.1
  Downloading imageio-2.24.0-py3-none-any.whl (3.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.4/3.4 MB[0m [31m82.0 MB/s[0m eta [36m0:00:00[0m:00:01[0m
Collecting networkx>=2.2
  Downloa

In [7]:
from functools import partial
import multiprocessing as mp
from pathlib import Path
import numpy as np
import pandas as pd
import pydicom
from skimage.transform import resize

In [8]:
# RESCALE_TO = (512, 512)
RESCALE_TO = (256, 256)
DATA_PATH = Path("/kaggle/input/rsna-breast-cancer-detection")
TRAINING_IMAGES = DATA_PATH.joinpath("train_images").iterdir()
TRAINING_DATA = DATA_PATH.joinpath("train.csv")
TEST_IMAGES = DATA_PATH.joinpath("test_images").iterdir()
TEST_DATA = DATA_PATH.joinpath("test.csv")

In [9]:
def dicom_file_to_data(path):
    dicom = pydicom.read_file(path)
    data = dicom.pixel_array
    if dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
    return data * dicom.RescaleSlope + dicom.RescaleIntercept

In [10]:
TR_IMAGES = []
TR_DATA = []
def process_training_image_directory(directory_path, training_data=None):
    print(f"processing image directory: {directory_path}")
    for image_path in directory_path.iterdir():
        print(f"\tTraining image: {image_path}")
        image_data = dicom_file_to_data(image_path)
        patient_id = image_path.parent.name
        image_id = image_path.stem
        resized_image = resize(image_data, (RESCALE_TO))
        df = training_data[(training_data.patient_id == int(patient_id)) & (training_data.image_id == int(image_id))]
        TR_IMAGES.append(resized_image)
        TR_DATA.append(df['cancer'].iloc[0])
    return TR_IMAGES, TR_DATA

In [11]:
training_data_df = pd.read_csv(TRAINING_DATA)
TRAINING_IMAGES = DATA_PATH.joinpath("train_images", "10130")
images, data = process_training_image_directory(TRAINING_IMAGES, training_data_df)

processing image directory: /kaggle/input/rsna-breast-cancer-detection/train_images/10130
	Training image: /kaggle/input/rsna-breast-cancer-detection/train_images/10130/1165309236.dcm
	Training image: /kaggle/input/rsna-breast-cancer-detection/train_images/10130/2110820077.dcm
	Training image: /kaggle/input/rsna-breast-cancer-detection/train_images/10130/388811999.dcm
	Training image: /kaggle/input/rsna-breast-cancer-detection/train_images/10130/1013166704.dcm
	Training image: /kaggle/input/rsna-breast-cancer-detection/train_images/10130/613462606.dcm
	Training image: /kaggle/input/rsna-breast-cancer-detection/train_images/10130/1672636630.dcm
	Training image: /kaggle/input/rsna-breast-cancer-detection/train_images/10130/1360338805.dcm


In [12]:
def test_model():
    testing_data_df = pd.read_csv(TEST_DATA)
    for image_path in TEST_IMAGES.iterdir():
        image = dicom_file_to_data(image_path)
        #TODO: call the model with image

In [16]:
import os 
os.system('pip install scikit-learn')

Collecting scikit-learn
  Downloading scikit_learn-1.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.7 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 9.7/9.7 MB 54.5 MB/s eta 0:00:00
Collecting joblib>=1.1.1
  Downloading joblib-1.2.0-py3-none-any.whl (297 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 298.0/298.0 kB 29.3 MB/s eta 0:00:00
Collecting threadpoolctl>=2.0.0
  Downloading threadpoolctl-3.1.0-py3-none-any.whl (14 kB)
Installing collected packages: threadpoolctl, joblib, scikit-learn
Successfully installed joblib-1.2.0 scikit-learn-1.2.0 threadpoolctl-3.1.0




0

In [17]:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.preprocessing import image
from sklearn.model_selection import train_test_split
import numpy as np

model = Sequential()
model.add(Conv2D(8, kernel_size=(3, 3),activation='relu',input_shape=(256,256,1)))

model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64,(3, 3), activation='relu'))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='softmax'))

X_train, X_test, y_train, y_test = train_test_split(TR_IMAGES, TR_DATA, random_state=42, test_size=0.2,shuffle=True)
model.compile(loss='categorical_crossentropy',optimizer='Adam',metrics=['accuracy'])
X_train = np.expand_dims(X_train, axis=-1)
X_test=np.expand_dims(X_train, axis=-1)
model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

2023-01-10 06:04:00.868203: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2023-01-10 06:04:00.868262: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)
2023-01-10 06:04:00.868300: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (5a7a7a1502c0): /proc/driver/nvidia/version does not exist
2023-01-10 06:04:00.868685: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


ValueError: Failed to find data adapter that can handle input: <class 'numpy.ndarray'>, (<class 'list'> containing values of types {"<class 'numpy.int64'>"})