In [1]:
import os
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

In [2]:
train_data_dir = 'IDC_regular_ps50_idx5'

In [5]:
def is_valid_image(file_path):
    try:
        with Image.open(file_path) as img:
            img.verify()
            return True
    except (IOError, SyntaxError) as e:
        print(f"Invalid image: {file_path} ({e})")
        return False
    
valid_images = []
for image_file in os.listdir(train_data_dir):
    image_path = os.path.join(train_data_dir, image_file)
    for image in os.listdir(image_path):
        file = os.path.join(image_path, image)
        for f in os.listdir(file):
            photo = os.path.join(file, f)
            if is_valid_image(photo):
                valid_images.append(photo)

In [6]:
valid_images

['IDC_regular_ps50_idx5\\10253\\0\\10253_idx5_x1001_y1001_class0.png',
 'IDC_regular_ps50_idx5\\10253\\0\\10253_idx5_x1001_y1051_class0.png',
 'IDC_regular_ps50_idx5\\10253\\0\\10253_idx5_x1001_y1101_class0.png',
 'IDC_regular_ps50_idx5\\10253\\0\\10253_idx5_x1001_y1151_class0.png',
 'IDC_regular_ps50_idx5\\10253\\0\\10253_idx5_x1001_y1201_class0.png',
 'IDC_regular_ps50_idx5\\10253\\0\\10253_idx5_x1001_y1251_class0.png',
 'IDC_regular_ps50_idx5\\10253\\0\\10253_idx5_x1001_y1301_class0.png',
 'IDC_regular_ps50_idx5\\10253\\0\\10253_idx5_x1001_y1351_class0.png',
 'IDC_regular_ps50_idx5\\10253\\0\\10253_idx5_x1001_y1501_class0.png',
 'IDC_regular_ps50_idx5\\10253\\0\\10253_idx5_x1001_y1551_class0.png',
 'IDC_regular_ps50_idx5\\10253\\0\\10253_idx5_x1001_y1701_class0.png',
 'IDC_regular_ps50_idx5\\10253\\0\\10253_idx5_x1001_y351_class0.png',
 'IDC_regular_ps50_idx5\\10253\\0\\10253_idx5_x1001_y401_class0.png',
 'IDC_regular_ps50_idx5\\10253\\0\\10253_idx5_x1001_y451_class0.png',
 'IDC_reg

In [7]:
groups = []

In [8]:
for i in valid_images:
    groups.append(i.split("_")[7][5])

In [9]:
Fseries = pd.Series(valid_images, name= 'Fseries')
Lseries = pd.Series(groups, name='Lseries')
train_df = pd.concat([Fseries, Lseries], axis= 1)
train_df

Unnamed: 0,Fseries,Lseries
0,IDC_regular_ps50_idx5\10253\0\10253_idx5_x1001...,0
1,IDC_regular_ps50_idx5\10253\0\10253_idx5_x1001...,0
2,IDC_regular_ps50_idx5\10253\0\10253_idx5_x1001...,0
3,IDC_regular_ps50_idx5\10253\0\10253_idx5_x1001...,0
4,IDC_regular_ps50_idx5\10253\0\10253_idx5_x1001...,0
...,...,...
277519,IDC_regular_ps50_idx5\9383\1\9383_idx5_x2051_y...,1
277520,IDC_regular_ps50_idx5\9383\1\9383_idx5_x2051_y...,1
277521,IDC_regular_ps50_idx5\9383\1\9383_idx5_x2101_y...,1
277522,IDC_regular_ps50_idx5\9383\1\9383_idx5_x2101_y...,1


In [10]:
from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(train_df, test_size=0.20, random_state=42)
train_df = train_df.reset_index(drop=True)
test_df = test_df .reset_index(drop=True)

In [11]:
train_df

Unnamed: 0,Fseries,Lseries
0,IDC_regular_ps50_idx5\12898\0\12898_idx5_x651_...,0
1,IDC_regular_ps50_idx5\9125\1\9125_idx5_x1351_y...,1
2,IDC_regular_ps50_idx5\8863\0\8863_idx5_x1651_y...,0
3,IDC_regular_ps50_idx5\15902\1\15902_idx5_x2551...,1
4,IDC_regular_ps50_idx5\15513\0\15513_idx5_x1001...,0
...,...,...
222014,IDC_regular_ps50_idx5\13022\0\13022_idx5_x551_...,0
222015,IDC_regular_ps50_idx5\9267\0\9267_idx5_x2501_y...,0
222016,IDC_regular_ps50_idx5\13591\0\13591_idx5_x3051...,0
222017,IDC_regular_ps50_idx5\14153\0\14153_idx5_x901_...,0


In [12]:
test_df

Unnamed: 0,Fseries,Lseries
0,IDC_regular_ps50_idx5\9076\0\9076_idx5_x2901_y...,0
1,IDC_regular_ps50_idx5\14157\0\14157_idx5_x3151...,0
2,IDC_regular_ps50_idx5\9291\0\9291_idx5_x2401_y...,0
3,IDC_regular_ps50_idx5\13400\1\13400_idx5_x1501...,1
4,IDC_regular_ps50_idx5\12880\1\12880_idx5_x2551...,1
...,...,...
55500,IDC_regular_ps50_idx5\8955\0\8955_idx5_x601_y1...,0
55501,IDC_regular_ps50_idx5\10264\1\10264_idx5_x551_...,1
55502,IDC_regular_ps50_idx5\15840\0\15840_idx5_x351_...,0
55503,IDC_regular_ps50_idx5\14156\0\14156_idx5_x3301...,0


In [17]:
from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale = 1./255,
                                   shear_range = 0.2,
                                   zoom_range = 0.2,
                                   horizontal_flip = True)

test_datagen = ImageDataGenerator(rescale = 1./255)

training_set = train_datagen.flow_from_dataframe(train_df, 
                                                 target_size = (50, 50),
                                                 batch_size = 15,
                                                 x_col='Fseries',
                                                 y_col='Lseries',
                                                 class_mode = 'binary')

test_set = test_datagen.flow_from_dataframe(test_df, 
                                            target_size = (50, 50),
                                            batch_size = 15,
                                            x_col='Fseries',
                                            y_col='Lseries',
                                            class_mode = 'binary')

Found 222019 validated image filenames belonging to 2 classes.
Found 55505 validated image filenames belonging to 2 classes.


In [18]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, Activation, BatchNormalization

model = Sequential()

model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(50, 50, 3)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid')) 
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [19]:
model.fit(               training_set,
                         steps_per_epoch = 14000,
                         epochs = 25,
                         validation_data = test_set,
                         validation_steps = 2000)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.src.callbacks.History at 0x22fae904a50>