In [1]:
import glob
import os
import shutil
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import cv2
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.metrics import categorical_crossentropy
from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D,GlobalAveragePooling2D
from keras.layers import Activation, Dropout, BatchNormalization, Flatten, Dense, AvgPool2D,MaxPool2D
from keras.models import Sequential, Model
import tensorflow as tf
from sklearn.model_selection import train_test_split
import torchvision
import torchvision.transforms as transforms
from keras.preprocessing.image import load_img
from keras.preprocessing.image import save_img
from keras.preprocessing.image import img_to_array

In [2]:
def train_directory():
    pwd = os.getcwd()
    
    os.mkdir(pwd+'/training')
    os.mkdir(pwd+'/training/LUAD_TRAIN')
    os.mkdir(pwd+'/training/LUSC_TRAIN')
    os.mkdir(pwd+'/training/MESO_TRAIN')

    
    train_dir = '/kaggle/input/histopathology-dataset/train/'
    cancers = ["LUAD","LUSC","MESO"]
    luad_train_dir = '/kaggle/working/training/LUAD_TRAIN/'
    lusc_train_dir = '/kaggle/working/training/LUSC_TRAIN/'
    meso_train_dir = '/kaggle/working/training/MESO_TRAIN/'
    cat_train_dir = [luad_train_dir,lusc_train_dir,meso_train_dir]

    for i in range(3):
        for f in glob.iglob(train_dir+cancers[i]+"/*"):
            for subf in glob.iglob(f+'/*'):
                shutil.copy(subf,cat_train_dir[i])
    
    labels = ["LUAD","LUSC","MESO"]
    dir_label_df = pd.DataFrame(columns = ["directory","label"])
    for i in range(3):
        filepaths_i = glob.glob(cat_train_dir[i]+"/*")
        series_i = pd.Series(filepaths_i)
        df_i = pd.DataFrame(series_i,columns = ["directory"])
        df_i["label"] = labels[i]
        dir_label_df = pd.concat([dir_label_df,df_i],axis=0)
    
    return dir_label_df.reset_index(drop=True)

In [3]:
dir_label_df = train_directory()


In [4]:
dir_label_df

In [5]:
print(dir_label_df['label'].value_counts())

In [6]:
train,test = train_test_split(dir_label_df, test_size=0.2, random_state=42)

print(f"train set shape: {train.shape}")
print(f"test set shape: {test.shape}")

In [7]:
transform_train = transforms.Compose(
    [transforms.Resize((128,128)),
     transforms.RandomApply([
        torchvision.transforms.RandomRotation(10),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip()],0.5),
     transforms.ToTensor()])

In [None]:
row = train.iloc[0]
row_directory = row['directory']
row_label = row['label']

img = load_img(row_directory)
trans_img = transform_train(img)
new_dir = '../kaggle/working/'+str(1) + '.jpg'
save_img(new_dir,img_to_array(trans_img),data_format='channels_first')

In [8]:
def data_augmentation(train):
    
    aug_label_df = pd.DataFrame(columns = ["directory","label"])
    
    for i in range(len(train)):
        row = train.iloc[i]
        row_directory = row['directory']
        row_label = row['label']
        
        if row_label == "LUAD":
            
            for j in range(2):
                img = load_img(row_directory)
                trans_img = transform_train(img)
                new_dir = row_directory[:-4] + str(j) + '.jpg'
                save_img(new_dir,img_to_array(trans_img),data_format='channels_first')

                series_i = pd.Series(new_dir)
                df_i = pd.DataFrame(series_i,columns = ["directory"])
                df_i["label"] = "LUAD"
                aug_label_df = pd.concat([aug_label_df,df_i],axis=0)
                
        if row_label == "MESO":
            
            for z in range(25):
                
                img = load_img(row_directory)
                trans_img = transform_train(img)
                new_dir = row_directory[:-4] + str(z) + '.jpg'
                save_img(new_dir,img_to_array(trans_img),data_format='channels_first')

                series_i = pd.Series(new_dir)
                df_i = pd.DataFrame(series_i,columns = ["directory"])
                df_i["label"] = "MESO"
                aug_label_df = pd.concat([aug_label_df,df_i],axis=0)
                
    train = pd.concat([aug_label_df,train],axis=0)
        
    return train

In [9]:
train = data_augmentation(train)

In [10]:
print(train['label'].value_counts())

In [None]:
! mkdir testing
%cd testing
! mkdir LUAD_TEST
! mkdir LUSC_TEST
! mkdir MESO_TEST

test_dir = '../kaggle/input/histopathology-dataset/dev/'
cancers = ["LUAD","LUSC","MESO"]
luad_test_dir = '../kaggle/working/testing/LUAD_TEST/'
lusc_test_dir = '../kaggle/working/testing/LUSC_TEST/'
meso_test_dir = '../kaggle/working/testing/MESO_TEST/'
cat_test_dir = [luad_test_dir,lusc_test_dir,meso_test_dir]

%cd /kaggle

for i in range(3):
    for f in glob.iglob(test_dir+cancers[i]+"/*"):
        for subf in glob.iglob(f+'/*'):
            shutil.copy(subf,cat_test_dir[i])
            
            
labels = ["LUAD","LUSC","MESO"]
test_label_df = pd.DataFrame(columns = ["directory","label"])
for i in range(3):
    filepaths_i = glob.glob(cat_test_dir[i]+"/*")
    series_i = pd.Series(filepaths_i)
    df_i = pd.DataFrame(series_i,columns = ["directory"])
    df_i["label"] = labels[i]
    test_label_df = pd.concat([test_label_df,df_i],axis=0)
    
    
test_label_df = test_label_df.reset_index(drop=True)
test_label_df.head(),test_label_df.tail()

In [13]:
plt.figure(figsize=(12,8))
labels = ["LUAD","LUSC","MESO"]
pos = 1
for i in range(3):
    for j in range(5):
        labels_i = labels[i]
        index_i = dir_label_df[dir_label_df["label"] == labels_i].index
        random = np.random.randint(index_i[0],index_i[-1])
        plt.subplot(3,5,pos)
        pos+=1
        plt.imshow(cv2.imread(dir_label_df.loc[random,"directory"]))
        plt.title(dir_label_df.loc[random, "label"], size = 15, color = "white") 
        plt.xticks([])
        plt.yticks([])

plt.show()

In [None]:

! pwd

In [None]:
'''from PIL import Image
import cv2 as cv
import numpy as np
dir_img_rgb = dir_label_df.iloc[83]['directory']
img_rbg = cv.imread(dir_img_rgb)
plt.imshow(img_rbg)
img_hsv = cv.cvtColor(img_rbg,cv.COLOR_BGR2HSV)

lower_blue = np.array([110,50,50])
upper_blue = np.array([130,255,255])

lower_red = np.array([150,135,100])
upper_red = np.array([170,255,255])

lower_notwhite = np.array([20,0,0])
upper_notwhite = np.array([102,255,255])
# Threshold the HSV image to get only blue colors
mask = cv.inRange(img_hsv, lower_red, upper_red)
# Bitwise-AND mask and original image
res = cv.bitwise_and(img_rbg,img_rbg, mask= mask)

plt.imshow(res)

lower_red = np.array([150,135,0])
upper_red = np.array([190,255,200])

plt.figure(figsize=(12,8))
pos = 1
for i in range(3):
    for j in range(5):
        labels_i = labels[i]
        index_i = dir_label_df[dir_label_df["label"] == labels_i].index
        random = np.random.randint(index_i[0],index_i[-1])
        plt.subplot(3,5,pos)
        pos+=1
        img_rbg = cv2.imread(dir_label_df.loc[random,"directory"])
        img_hsv = cv.cvtColor(img_rbg,cv.COLOR_BGR2HSV)
        mask = cv.inRange(img_hsv, lower_red, upper_red)
        res = cv.bitwise_and(img_rbg,img_rbg, mask= mask)
        plt.imshow(res)
        plt.title(dir_label_df.loc[random, "label"], size = 15, color = "white") 
        plt.xticks([])
        plt.yticks([])

plt.show()
'''

In [None]:
train

In [14]:


train_datagen = ImageDataGenerator(rescale = 1./128.,rotation_range = 40, width_shift_range = 0.2, height_shift_range = 0.2, 
                                   shear_range = 0.2, zoom_range = 0.2, horizontal_flip = True, vertical_flip =True)
test_datagen = ImageDataGenerator(rescale = 1.0/128.)

train_gen = train_datagen.flow_from_dataframe(dataframe = train,
                                              x_col = 'directory', y_col ='label',
                                              target_size = (128,128), batch_size = 32, 
                                              class_mode = 'categorical', shuffle = True)
test_gen = test_datagen.flow_from_dataframe(test,
                                            target_size = (128,128), x_col = 'directory', y_col ='label',
                                             class_mode = 'categorical',
                                            batch_size = 16, shuffle = False)

In [15]:
from tensorflow import keras
base_model = keras.applications.InceptionV3(
    weights="imagenet",  # Load weights pre-trained on ImageNet.
    input_shape=(128, 128, 3),
    include_top=False,
)

In [16]:
# Freeze the base_model
base_model.trainable = False

# Create new model on top
inputs = keras.Input(shape=(128, 128, 3))

In [19]:
x = base_model(inputs, training=False)
x = keras.layers.GlobalAveragePooling2D()(x)
x = keras.layers.Dropout(0.2)(x)  # Regularize with dropout

x = keras.layers.Dense(256)(x)
x = keras.layers.Dense(128)(x)
x = keras.layers.Dropout(0.2)(x)
x = keras.layers.Dense(32)(x)
outputs = keras.layers.Dense(3, activation="sigmoid")(x)
model = keras.Model(inputs, outputs)

model.summary()

In [None]:
callbacks = [tf.keras.callbacks.ModelCheckpoint("Tumor_classifier_model.h5", save_best_only=True, verbose = 0)]

model.compile(loss='categorical_crossentropy',
              optimizer='adam', metrics=['accuracy'])

model.fit_generator(train_gen, validation_data = test_gen,
                    use_multiprocessing=True,
                    workers=6,
                    epochs = 40,
                    callbacks = callbacks,verbose=1)

In [None]:
model.save("InceptionV3_histo.h5")

In [None]:
test_steps_per_epoch = np.math.ceil(test_gen.samples / test_gen.batch_size)

In [None]:
test_steps_per_epoch

In [None]:
predictions = model.predict_generator(test_gen,test_steps_per_epoch)

In [None]:
predicted_classes = np.argmax(predictions, axis=1)

In [None]:
true_classes = test_gen.classes
class_labels = list(test_gen.class_indices.keys()) 

In [None]:
from sklearn.metrics import confusion_matrix,classification_report

In [None]:
report = classification_report(true_classes, predicted_classes, target_names=class_labels)
print(report)

In [None]:
conf_mat = confusion_matrix(true_classes, predicted_classes)
print(conf_mat)

In [None]:
validation_datagen = ImageDataGenerator(rescale = 1.0/128.)

validation_gen = validation_datagen.flow_from_dataframe(test_label_df,
                                            target_size = (128,128), x_col = 'directory', y_col ='label',
                                             class_mode = 'categorical',
                                            batch_size = 16, shuffle = False)

In [None]:
validation_steps_per_epoch = np.math.ceil(validation_gen.samples / validation_gen.batch_size)
validation_predictions = model.predict_generator(validation_gen,validation_steps_per_epoch)
validation_predicted_classes = np.argmax(validation_predictions, axis=1)

validation_true_classes = validation_gen.classes
validation_class_labels = list(validation_gen.class_indices.keys()) 

In [None]:
validation_report = classification_report(validation_true_classes,
                               validation_predicted_classes, target_names=validation_class_labels)
print(validation_report)

In [None]:
v_conf_mat = confusion_matrix(validation_true_classes, validation_predicted_classes)
print(v_conf_mat)