**Try Prediction using cv2 and preprocessing (crop,resize,convert to gray)**

In [1]:
import cv2
import glob
import imutils
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from sklearn.decomposition import PCA, KernelPCA
from sklearn.ensemble import RandomForestClassifier

In [25]:
from google.colab import drive
drive.mount('/content/drive')
import pathlib
path = pathlib.Path("/content/drive/MyDrive/tumor_data")
%cd /content/drive/MyDrive/tumor_data
%ls

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/tumor_data
[0m[01;34mno[0m/  [01;34myes[0m/


In [28]:
ext = ['jpg', 'JPG', 'pnp', 'jpeg']


def get_files(path_, ext):
    temp_paths = []
    [temp_paths.extend(glob.glob(path_ + '*.' + e)) for e in ext]
    return temp_paths

files_yes = get_files('yes/', ext)
files_no = get_files('no/', ext)


def read_files(files):
    temp_images = []
    for file in files:
        temp_img = cv2.imread(file)
        if temp_img is not None:
            temp_images.append(temp_img)
    return temp_images

tumor_imgs_yes = read_files(files_yes)
tumor_imgs_no = read_files(files_no)


def crop_brain(image):
    
    # Convert the image to grayscale, and blur it slightly
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.GaussianBlur(gray, (5, 5), 0)
    
    thresh = cv2.threshold(gray, 45, 255, cv2.THRESH_BINARY)[1]
    thresh = cv2.erode(thresh, None, iterations=2)
    thresh = cv2.dilate(thresh, None, iterations=2)

    # Find contours in thresholded image, then grab the largest one
    cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = imutils.grab_contours(cnts)
    c = max(cnts, key=cv2.contourArea)
    # extreme points
    extLeft = tuple(c[c[:, :, 0].argmin()][0])
    extRight = tuple(c[c[:, :, 0].argmax()][0])
    extTop = tuple(c[c[:, :, 1].argmin()][0])
    extBot = tuple(c[c[:, :, 1].argmax()][0])
    
    # crop new image out of the original image using the four extreme points (left, right, top, bottom)
    new_image = image[extTop[1]:extBot[1], extLeft[0]:extRight[0]]            
    
    return new_image


tumor_imgs_croped_yes = []
tumor_imgs_croped_no = []



for image in tumor_imgs_yes:
    x = crop_brain(image)
    x_resize = cv2.resize(x, (128, 128))
    gray = cv2.cvtColor(x_resize, cv2.COLOR_BGR2GRAY)
    tumor_imgs_croped_yes.append(gray)


for image in tumor_imgs_no:
    x = crop_brain(image)
    x_resize = cv2.resize(x, (128, 128))
    gray = cv2.cvtColor(x_resize, cv2.COLOR_BGR2GRAY)
    tumor_imgs_croped_no.append(gray)


y_yes = np.ones(len(tumor_imgs_croped_yes), dtype="int8")
y_no = np.zeros(len(tumor_imgs_croped_no), dtype="int8")



X = np.concatenate((tumor_imgs_croped_yes, tumor_imgs_croped_no), axis=0)
y = np.concatenate((y_yes, y_no), axis=0)
print(X.shape)

d1, d2, d3 = X.shape

X = X.reshape((d1, d2 * d3))


(245, 128, 128)


In [29]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)


# scale data before train model
scaler_ = StandardScaler()
X_train_sc = scaler_.fit_transform(X_train)
X_test_sc = scaler_.transform(X_test)


# random forest without pca
rf_model = RandomForestClassifier()
rf_model.fit(X_train_sc, y_train)
y_predict_rf = rf_model.predict(X_test_sc)


# SVC without pca
svc_model = SVC(kernel="linear")
svc_model.fit(X_train_sc, y_train)
y_predict_svc = svc_model.predict(X_test_sc)

# PCA
data_pca = PCA(n_components=12)
pca_components = data_pca.fit(X_train_sc)
X_train_pca = pca_components.fit_transform(X_train_sc)
X_test_pca = pca_components.transform(X_test_sc)

# KernelPCA
data_kpca = KernelPCA()
kpca_components = data_kpca.fit(X_train_sc)
X_train_kpca = kpca_components.fit_transform(X_train_sc)
X_test_kpca = kpca_components.transform(X_test_sc)


# RandomForest With PCA
rf_model_pca = RandomForestClassifier()
rf_model_pca.fit(X_train_pca, y_train)
y_predict_rf_pca = rf_model_pca.predict(X_test_pca)

# SVC With PCA
svc_model_pca = SVC(kernel="linear")
svc_model.fit(X_train_pca, y_train)
y_predict_pca = svc_model.predict(X_test_pca)


# RandomForest With KernelPCA
rf_model_kpca = RandomForestClassifier()
rf_model_kpca.fit(X_train_kpca, y_train)
y_predict_rf_kpca = rf_model_kpca.predict(X_test_kpca)

# SVC With KernelPCA
svc_model_kpca = SVC(kernel="linear")
svc_model.fit(X_train_kpca, y_train)
y_predict_kpca = svc_model.predict(X_test_kpca)


print("SVC without PCA: ", accuracy_score(y_test, y_predict_svc))
print("SVC with PCA: ", accuracy_score(y_test, y_predict_pca))

print("SVC without PCA:")
print(classification_report(y_test, y_predict_svc))
print("SVC with PCA:")
print(classification_report(y_test, y_predict_pca))


print("RandomForest without PCA:")
print(classification_report(y_test, y_predict_rf))

print("RandomForest with PCA:")
print(classification_report(y_test, y_predict_rf_pca))


print("RandomForest without KPCA:")
print(classification_report(y_test, y_predict_rf_kpca))

print("SVC with KPCA:")
print(classification_report(y_test, y_predict_kpca))


SVC without PCA:  0.7959183673469388
SVC with PCA:  0.6122448979591837
SVC without PCA:
              precision    recall  f1-score   support

           0       0.68      0.83      0.75        18
           1       0.89      0.77      0.83        31

    accuracy                           0.80        49
   macro avg       0.79      0.80      0.79        49
weighted avg       0.81      0.80      0.80        49

SVC with PCA:
              precision    recall  f1-score   support

           0       0.45      0.28      0.34        18
           1       0.66      0.81      0.72        31

    accuracy                           0.61        49
   macro avg       0.56      0.54      0.53        49
weighted avg       0.58      0.61      0.59        49

RandomForest without PCA:
              precision    recall  f1-score   support

           0       0.57      0.44      0.50        18
           1       0.71      0.81      0.76        31

    accuracy                           0.67        49


without crop function

In [30]:

ext = ['jpg', 'JPG', 'pnp', 'jpeg']

def get_files(path_, ext):
    temp_paths = []
    [temp_paths.extend(glob.glob(path_ + '*.' + e)) for e in ext]
    return temp_paths

files_yes = get_files('yes/', ext)
files_no = get_files('no/', ext)


def read_files(files):
    temp_images = []
    for file in files:
        temp_img = cv2.imread(file)
        if temp_img is not None:
            temp_images.append(temp_img)
    return temp_images

tumor_imgs_yes = read_files(files_yes)
tumor_imgs_no = read_files(files_no)

tumor_imgs_pp_yes = []
tumor_imgs_pp_no = []


for image in tumor_imgs_yes:
  gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
  image_resize = cv2.resize(gray, (128, 128))
  tumor_imgs_pp_yes.append(image_resize)


for image in tumor_imgs_no:
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image_resize = cv2.resize(gray, (128, 128))
    tumor_imgs_pp_no.append(image_resize)


y_yes = np.ones(len(tumor_imgs_pp_yes), dtype="int8")
y_no = np.zeros(len(tumor_imgs_pp_no), dtype="int8")



X = np.concatenate((tumor_imgs_pp_yes, tumor_imgs_pp_no), axis=0)
y = np.concatenate((y_yes, y_no), axis=0)
print(X.shape)

d1, d2, d3 = X.shape

X = X.reshape((d1, d2 * d3))

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)


# scale data before train model
scaler_ = StandardScaler()
X_train_sc = scaler_.fit_transform(X_train)
X_test_sc = scaler_.transform(X_test)


# random forest without pca
rf_model = RandomForestClassifier()
rf_model.fit(X_train_sc, y_train)
y_predict_rf = rf_model.predict(X_test_sc)


# SVC without pca
svc_model = SVC(kernel="linear")
svc_model.fit(X_train_sc, y_train)
y_predict_svc = svc_model.predict(X_test_sc)

# PCA
data_pca = PCA(n_components=12)
pca_components = data_pca.fit(X_train_sc)
X_train_pca = pca_components.fit_transform(X_train_sc)
X_test_pca = pca_components.transform(X_test_sc)

# KernelPCA
data_kpca = KernelPCA()
kpca_components = data_kpca.fit(X_train_sc)
X_train_kpca = kpca_components.fit_transform(X_train_sc)
X_test_kpca = kpca_components.transform(X_test_sc)


# RandomForest With PCA
rf_model_pca = RandomForestClassifier()
rf_model_pca.fit(X_train_pca, y_train)
y_predict_rf_pca = rf_model_pca.predict(X_test_pca)

# SVC With PCA
svc_model_pca = SVC(kernel="linear")
svc_model.fit(X_train_pca, y_train)
y_predict_pca = svc_model.predict(X_test_pca)


# RandomForest With KernelPCA
rf_model_kpca = RandomForestClassifier()
rf_model_kpca.fit(X_train_kpca, y_train)
y_predict_rf_kpca = rf_model_kpca.predict(X_test_kpca)

# SVC With KernelPCA
svc_model_kpca = SVC(kernel="linear")
svc_model.fit(X_train_kpca, y_train)
y_predict_kpca = svc_model.predict(X_test_kpca)


print("without PCA: ", accuracy_score(y_test, y_predict_svc))
print("with PCA: ", accuracy_score(y_test, y_predict_pca))

print("without PCA:")
print(classification_report(y_test, y_predict_svc))
print("with PCA:")
print(classification_report(y_test, y_predict_pca))


print("RandomForest without PCA:")
print(classification_report(y_test, y_predict_rf))

print("RandomForest with PCA:")
print(classification_report(y_test, y_predict_rf_pca))


print("RandomForest without KPCA:")
print(classification_report(y_test, y_predict_rf_kpca))

print("SVC with KPCA:")
print(classification_report(y_test, y_predict_kpca))


(245, 128, 128)
without PCA:  0.7755102040816326
with PCA:  0.7346938775510204
without PCA:
              precision    recall  f1-score   support

           0       0.71      0.67      0.69        18
           1       0.81      0.84      0.83        31

    accuracy                           0.78        49
   macro avg       0.76      0.75      0.76        49
weighted avg       0.77      0.78      0.77        49

with PCA:
              precision    recall  f1-score   support

           0       0.67      0.56      0.61        18
           1       0.76      0.84      0.80        31

    accuracy                           0.73        49
   macro avg       0.72      0.70      0.70        49
weighted avg       0.73      0.73      0.73        49

RandomForest without PCA:
              precision    recall  f1-score   support

           0       0.69      0.61      0.65        18
           1       0.79      0.84      0.81        31

    accuracy                           0.76        49


**Try CNN Model to Predict**

In [32]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing import image_dataset_from_directory
import matplotlib.pyplot as plt

In [34]:
train_dataset = image_dataset_from_directory(
    "/content/drive/MyDrive/tumor_data", 
    labels='inferred',
    label_mode="int",
    color_mode='rgb',
    # color_mode='bgr',
    batch_size=32,
    image_size=(128, 128),
    shuffle=True,
    validation_split=0.1,
    subset="validation",
    seed=46,
    crop_to_aspect_ratio=True)

Found 253 files belonging to 2 classes.
Using 25 files for validation.


In [35]:
inputs = keras.Input(shape=(128, 128, 3))
x = keras.layers.Rescaling(1./255)(inputs)
x = keras.layers.Conv2D(filters=16, kernel_size=3, activation="selu")(x) 
x = keras.layers.MaxPooling2D(pool_size=2)(x)
x = keras.layers.Conv2D(filters=32, kernel_size=3, activation="elu")(x) 
x = keras.layers.MaxPooling2D(pool_size=2)(x)
x = keras.layers.Conv2D(filters=64, kernel_size=3, activation="tanh")(x) 
x = keras.layers.Flatten()(x)
outputs = keras.layers.Dense(1, activation="sigmoid")(x)
model = keras.Model(inputs=inputs, outputs=outputs)

In [36]:
model.compile(loss="binary_crossentropy",
              optimizer="adam",
              metrics=["accuracy"])
fitted_model =model.fit(train_dataset,epochs=15)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [45]:
model.fit(train_dataset,epochs=15)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


ValueError: ignored

In [39]:
print(fitted_model.history)

{'loss': [0.6637853384017944, 1.7887747287750244, 0.4055693447589874, 0.7521761059761047, 0.3716881573200226, 0.5033863186836243, 0.4942244589328766, 0.3046582043170929, 0.2605043649673462, 0.2945373058319092, 0.27706441283226013, 0.21766792237758636, 0.16892407834529877, 0.14894667267799377, 0.14693765342235565], 'accuracy': [0.7200000286102295, 0.5199999809265137, 0.8799999952316284, 0.47999998927116394, 0.8799999952316284, 0.6800000071525574, 0.6800000071525574, 0.8799999952316284, 1.0, 0.8799999952316284, 0.8799999952316284, 0.9200000166893005, 1.0, 1.0, 0.9599999785423279]}


In [46]:
print(model.history)

<keras.callbacks.History object at 0x7fc46007a690>


In [40]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 128, 128, 3)]     0         
                                                                 
 rescaling (Rescaling)       (None, 128, 128, 3)       0         
                                                                 
 conv2d (Conv2D)             (None, 126, 126, 16)      448       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 63, 63, 16)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 61, 61, 32)        4640      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 30, 30, 32)       0         
 2D)                                                         