In [1]:
import os
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import os
training_dir = "/content/drive/MyDrive/Colab Notebooks/Image Processing/keukomia/Original"
img_dir_list = os.listdir(training_dir)

In [3]:
img_dir_list

['Pre', 'Pro', 'Early', 'Benign']

In [4]:
class_names = {'Benign': 0, 'Pro': 1, 'Early': 2, 'Pre': 3}
# list file address and class
DATA_DF = []
for img_dir in img_dir_list:
  file_dir = os.listdir(training_dir+"/"+img_dir)
  for file_name in file_dir:
    DATA_DF.append([file_name,class_names[img_dir],img_dir])

In [5]:
# convert to pandas data frame
import pandas as pd
from sklearn.utils import shuffle

Train_Data_Frame = pd.DataFrame(DATA_DF, columns = ["file_name","class","dir"])

Train_Data_Frame = shuffle(Train_Data_Frame)

Train_Data_Frame.head()

Unnamed: 0,file_name,class,dir
670,WBC-Malignant-Pre-459.jpg,3,Pre
1030,WBC-Malignant-Pro-603.jpg,1,Pro
2950,WBC-Benign-287.jpg,0,Benign
1921,WBC-Malignant-Early-465.jpg,2,Early
2066,WBC-Malignant-Early-697.jpg,2,Early


In [6]:
import numpy as np
from keras.utils.np_utils import to_categorical
y_data = np.array(to_categorical(Train_Data_Frame["class"]))

In [7]:
print(y_data[:5])

[[0. 0. 0. 1.]
 [0. 1. 0. 0.]
 [1. 0. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 1. 0.]]


In [8]:
import time
start = time.time()

import numpy as np
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.python.keras.preprocessing import image
from tensorflow.python.keras.models import Model
import tensorflow as tf

In [9]:
from keras import backend as K

def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [10]:
model = tf.keras.models.Sequential([
                                    
		tf.keras.layers.Conv2D(96, (7, 7), strides=(2, 2), activation='relu',
			input_shape=(224, 224, 3)),
		tf.keras.layers.MaxPooling2D(3, strides=2),
    tf.keras.layers.Lambda(lambda x: tf.image.per_image_standardization(x)),

		tf.keras.layers.Conv2D(256, (5, 5), strides=(2, 2), activation='relu'),
		tf.keras.layers.MaxPooling2D(3, strides=2),
    tf.keras.layers.Lambda(lambda x: tf.image.per_image_standardization(x)),

		tf.keras.layers.Conv2D(384, (3, 3), activation='relu'),

		tf.keras.layers.Conv2D(384, (3, 3), activation='relu'),

		tf.keras.layers.Conv2D(256, (3, 3), activation='relu'),

		tf.keras.layers.MaxPooling2D(3, strides=2),

    tf.keras.layers.Flatten(),

		tf.keras.layers.Dense(4096),

		tf.keras.layers.Dense(4096),

		tf.keras.layers.Dense(4, activation='softmax')
	])

In [11]:
model.compile(loss = "categorical_crossentropy",
              optimizer = "rmsprop",
              metrics = ["accuracy",tf.keras.metrics.Precision(), tf.keras.metrics.Recall(),f1_m,tf.keras.metrics.AUC()]
              )

In [12]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, TensorBoard, ModelCheckpoint

reduce_lr = ReduceLROnPlateau(monitor='val_accuracy', factor=0.3, patience=3, min_denta=0.0001, mode='auto', verbose=1)

# using tensorboard 
tensorboard = TensorBoard(log_dir='logs')

# give location of file where you want to save model while trainng the model with 'cnn_classification.h5' name
file_path = "/content/drive/MyDrive/Colab Notebooks/ML/images_train_test_val/cnn_classification.h5"
# saves the best fitted model
checkpoint = ModelCheckpoint(file_path, monitor='val_accuracy', save_best_only=True, mode='auto', verbose=1)

In [13]:
import cv2
from tqdm import tqdm
x_data = []
for rows in tqdm(range(len(Train_Data_Frame))):
  get_file_name = Train_Data_Frame.iloc[rows]
  img = cv2.imread(training_dir+'/'+get_file_name['dir']+'/'+get_file_name['file_name'])
  stretch_near = cv2.resize(img, (224, 224),
               interpolation = cv2.INTER_AREA)
  x_data.append(stretch_near)




100%|██████████| 3256/3256 [00:26<00:00, 120.80it/s]


#### load data

In [14]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test  = train_test_split(x_data,y_data,train_size=0.9,shuffle=True,random_state=42)

In [15]:
del x_data

In [16]:
np.array(x_train).shape

(2930, 224, 224, 3)

In [17]:
y_train.shape

(2930, 4)

### If there is seperate test folder
 

In [18]:
'''testing_dir = "/content/drive/MyDrive/Colab Notebooks/Image Processing/kidney/Test_images"
img_dir_list = os.listdir(training_dir)
class_names = {'Grade0': 0, 'Grade1': 1, 'Grade3': 2, 'Grade4': 3, 'Grade2': 4}
# list file address and class
DATA_DF = []
for img_dir in img_dir_list:
  file_dir = os.listdir(testing_dir+"/"+img_dir)
  for file_name in file_dir:
    DATA_DF.append([file_name,class_names[img_dir],img_dir])

# convert to pandas data frame
import pandas as pd
from sklearn.utils import shuffle

Test_Data_Frame = pd.DataFrame(DATA_DF, columns = ["file_name","class","dir"])

Test_Data_Frame = shuffle(Test_Data_Frame)

import numpy as np
from keras.utils.np_utils import to_categorical
y_test_data = np.array(to_categorical(Test_Data_Frame["class"]))

'''

'testing_dir = "/content/drive/MyDrive/Colab Notebooks/Image Processing/kidney/Test_images"\nimg_dir_list = os.listdir(training_dir)\nclass_names = {\'Grade0\': 0, \'Grade1\': 1, \'Grade3\': 2, \'Grade4\': 3, \'Grade2\': 4}\n# list file address and class\nDATA_DF = []\nfor img_dir in img_dir_list:\n  file_dir = os.listdir(testing_dir+"/"+img_dir)\n  for file_name in file_dir:\n    DATA_DF.append([file_name,class_names[img_dir],img_dir])\n\n# convert to pandas data frame\nimport pandas as pd\nfrom sklearn.utils import shuffle\n\nTest_Data_Frame = pd.DataFrame(DATA_DF, columns = ["file_name","class","dir"])\n\nTest_Data_Frame = shuffle(Test_Data_Frame)\n\nimport numpy as np\nfrom keras.utils.np_utils import to_categorical\ny_test_data = np.array(to_categorical(Test_Data_Frame["class"]))\n\n'

In [19]:
'''import cv2
from tqdm import tqdm
x_test_data = []
for rows in tqdm(range(len(Test_Data_Frame))):
  get_file_name = Test_Data_Frame.iloc[rows]
  img = cv2.imread(testing_dir+'/'+get_file_name['dir']+'/'+get_file_name['file_name'])
  stretch_near = cv2.resize(img, (224, 224),
               interpolation = cv2.INTER_AREA)
  features = model.predict(stretch_near.reshape(1,224,224,3))
  features = features.reshape(2048,)
  x_test_data.append(features)'''

"import cv2\nfrom tqdm import tqdm\nx_test_data = []\nfor rows in tqdm(range(len(Test_Data_Frame))):\n  get_file_name = Test_Data_Frame.iloc[rows]\n  img = cv2.imread(testing_dir+'/'+get_file_name['dir']+'/'+get_file_name['file_name'])\n  stretch_near = cv2.resize(img, (224, 224),\n               interpolation = cv2.INTER_AREA)\n  features = model.predict(stretch_near.reshape(1,224,224,3))\n  features = features.reshape(2048,)\n  x_test_data.append(features)"

In [20]:
'''x_test, y_test = x_test_data, y_test_data'''

'x_test, y_test = x_test_data, y_test_data'

In [21]:
'''x_train,y_train = x_data, y_data'''

'x_train,y_train = x_data, y_data'

In [22]:
#from sklearn.model_selection import train_test_split

#x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.15, random_state=42)


#### Model Training 


In [None]:
# we are using batch size of 50
img_batch_size = 50

hist = model.fit(np.array(x_train),y_train,epochs = 50,validation_data=(np.array(x_test),y_test),batch_size=img_batch_size,callbacks=[tensorboard, checkpoint, reduce_lr])

Epoch 1/50

In [None]:
import matplotlib.pyplot as plt
print(hist.history.keys())
plt.figure(figsize=(15,10))
plt.plot(hist.history["loss"],label = "training loss")
plt.plot(hist.history["val_loss"],label = "validation loss")
plt.title("Training Loss & Validation Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.show()

plt.figure(figsize=(15,10))
plt.plot(hist.history["accuracy"],label = "training acc")
plt.plot(hist.history["val_accuracy"],label = "validation acc")
plt.title("Training Accuracy & Validation Accuracy")
plt.xlabel("Accuracy")
plt.ylabel("Accuracy")
plt.legend()
plt.show()