In [None]:
# import das bibliotecas
import numpy as np
import pandas as pd
import cv2
from glob import glob 
import os
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.python.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report, log_loss, accuracy_score
from sklearn.model_selection import train_test_split

import xgboost as xgb

np.random.seed(42)

In [None]:
path = "/kaggle/input/histopathologic-cancer-detection/" 
labels = pd.read_csv(path + 'train_labels.csv')
train_path = path + 'train/'

In [None]:
df = pd.DataFrame({'path': glob(os.path.join(train_path,'*.tif'))})
df['id'] = df.path.map(lambda x: ((x.split("n")[-1].split('.')[0])[1:]))
df = df.merge(labels, on = "id")
df.head(3)

In [None]:
IMG_SIZE = 96
BATCH_SIZE = 128

In [None]:
plt.title("Distribuição das classes");

plt.pie(df['label'].value_counts(), labels=['Sem cancer',
          'Com Cancer'], startangle=180, autopct='%1.1f', 
           colors=['#00ff99','#FF96A7'], shadow=True);
plt.figure(figsize=(16,16));
plt.show();

In [None]:
# #fig = plt.figure(figsize=(30, 6))
# #np.random.choice(df['path'], 20)
# X = []

# #plt.title("Exemplos de imagens \n")
# for idx, img in enumerate(np.random.choice(df['path'])):
#     #ax = fig.add_subplot(2, 20//2, idx+1, xticks=[], yticks=[])    
#     im = cv2.imread(img)    
#     X.append(im)
#     if()
#     #plt.imshow(im)
#     #lab = df.loc[df['id'] == img.split('/')[-1].split('.')[0], 'label'].values[0]
#     #ax.set_title('Label: %s'%lab) 



In [None]:
df.head()

In [None]:
df.shape

In [None]:
import tqdm

In [None]:
X = []
y = []
# plt.title("Exemplos de imagens \n")
for idx in tqdm.tqdm(range(df.shape[0])):
    X.append(cv2.imread(df.iloc[idx]['path']))
    y.append(df.iloc[idx]['label'])
    if idx == 10000:
        break

X = np.array(X)
y = np.array(y)

In [None]:
plt.title("Primeira imagem do dataset")
imagem = cv2.imread(df['path'][0], cv2.IMREAD_GRAYSCALE)
plt.imshow(imagem);

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_test, X_valid, y_test, y_valid = train_test_split(X_test, y_test, test_size=0.3, random_state=42)

In [None]:
import tensorflow
from tensorflow import keras

In [None]:
IMG_SIZE = 96

In [None]:
from keras.applications.resnet50 import ResNet50
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, BatchNormalization, Activation
from keras.layers import Conv2D, MaxPool2D

dropout_fc = 0.5

conv_base = ResNet50(weights = 'imagenet', include_top = False, input_shape = (IMG_SIZE,IMG_SIZE,3))

my_model = Sequential()

my_model.add(conv_base)
my_model.add(Flatten())
my_model.add(Dense(256, use_bias=False))
my_model.add(BatchNormalization())
my_model.add(Activation("relu"))
my_model.add(Dropout(dropout_fc))
my_model.add(Dense(1, activation = "sigmoid"))

In [None]:
my_model.summary()

In [None]:
conv_base.Trainable=True

set_trainable=False
for layer in conv_base.layers:
    if layer.name == 'res5a_branch2a':
        set_trainable = True
    if set_trainable:
        layer.trainable = True
    else:
        layer.trainable = False


In [None]:
from keras import optimizers
my_model.compile(optimizers.Adam(0.001), loss = "binary_crossentropy", metrics = ["accuracy"])

In [None]:
history = my_model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

In [None]:
epochs = [i for i in range(1, len(history.history['loss'])+1)]

plt.plot(epochs, history.history['loss'], color='blue', label="training_loss")
plt.plot(epochs, history.history['val_loss'], color='red', label="validation_loss")
plt.legend(loc='best')
plt.title('training')
plt.xlabel('epoch')
plt.savefig("training.png", bbox_inches='tight')
plt.show()

plt.plot(epochs, history.history['accuracy'], color='blue', label="training_accuracy")
plt.plot(epochs, history.history['val_accuracy'], color='red',label="validation_accuracy")
plt.legend(loc='best')
plt.title('validation')
plt.xlabel('epoch')
plt.savefig("validation.png", bbox_inches='tight')
plt.show()

In [None]:
from sklearn.metrics import roc_curve, auc, roc_auc_score

predictions = my_model.predict(X_valid)
false_positive_rate, true_positive_rate, threshold = roc_curve(y_valid, predictions)
area_under_curve = auc(false_positive_rate, true_positive_rate)

plt.plot([0, 1], [0, 1], 'k--')
plt.plot(false_positive_rate, true_positive_rate, label='AUC = {:.3f}'.format(area_under_curve))
plt.xlabel('False positive rate')
plt.ylabel('True positive rate')
plt.title('ROC curve')
plt.legend(loc='best')
plt.savefig('ROC_PLOT.png', bbox_inches='tight')
plt.show()



In [None]:
y_pred = my_model.predict(X_valid)

In [None]:
y_pred = np.array([0 if x < 0.5 else 1 for x in y_pred])

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(y_valid, y_pred)

In [None]:
# import das bibliotecas
import numpy as np
import pandas as pd
import cv2
from glob import glob 
import os
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.python.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report, log_loss, accuracy_score
from sklearn.model_selection import train_test_split

np.random.seed(42)