**Import libraries**

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from sklearn.utils import resample
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

import os 
from os import listdir
from tqdm import tqdm
import shutil

from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv2D, MaxPool2D, Flatten
from keras.utils import np_utils, to_categorical
from keras.preprocessing import image

%matplotlib inline
#************************************************

**Read DataBase**

In [None]:
os.mkdir('augmented')
os.mkdir('/kaggle/working/augmented/benign')
os.mkdir('/kaggle/working/augmented/malignant')

In [None]:
def getListOfFiles(dirName):
    listOfFile = os.listdir(dirName)
    allFiles = list()
    for entry in listOfFile:
        fullPath = os.path.join(dirName, entry)
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
            allFiles.append(fullPath)
                
    return allFiles

In [None]:
files_benign = getListOfFiles('../input/breakhis/BreaKHis_v1/BreaKHis_v1/histology_slides/breast/benign')
for f in files_benign:
    if f.endswith('.png'):
        
        shutil.copy(f,'augmented/benign')
files_malignant = getListOfFiles('../input/breakhis/BreaKHis_v1/BreaKHis_v1/histology_slides/breast/malignant')
for f in files_malignant:
    if f.endswith('.png'):
        
        shutil.copy(f,'augmented/malignant')

In [None]:
benign_images = getListOfFiles('/kaggle/working/augmented/benign')
malignent_images = getListOfFiles('/kaggle/working/augmented/malignant')

**Data Visualization**

**Benign slide image**

In [None]:
image.load_img(benign_images[3], target_size=(120,120,1), grayscale=False)

**Malignant slide image**

In [None]:
image.load_img(malignent_images[3], target_size=(120,120,1), grayscale=False)

In [None]:
total_images = len(benign_images) + len(malignent_images)
total_images

In [None]:
data = pd.DataFrame(index=np.arange(0, len(benign_images)+len(malignent_images)), columns=["image", "target"])
k=0

for c in [0,1]:
        if c==1:
            for m in range(len(benign_images)):
                data.iloc[k]["image"] = benign_images[m]
                data.iloc[k]["target"] = 0
                k += 1
        else:
            for m in range(len(malignent_images)):
                data.iloc[k]["image"] = malignent_images[m]
                data.iloc[k]["target"] = 1
                k += 1

In [None]:
data.head(10)

In [None]:
data.shape

**Check unbalanced data**

In [None]:
count_data = data["target"].value_counts()
count_data

In [None]:
import seaborn as sns

target = sns.countplot(data["target"])
target.set_xticklabels(['0','1'])
plt.show()

In [None]:
ben_upsampled = resample(data[data['target']==0],n_samples=data[data['target']==1].shape[0], random_state=42)

up_sampled = pd.concat([data[data['target']==1], ben_upsampled])

up_sampled['target'].value_counts()

In [None]:
ben_upsampled.head(10)

In [None]:
up_sampled.shape

In [None]:
train_image = []
y = []

for i in tqdm(range(up_sampled.shape[0])):
    img = image.load_img(up_sampled['image'].iloc[i], target_size=(28,28,1), grayscale=False)
    img = image.img_to_array(img)
    img = img/255
    train_image.append(img)

        
X = np.array(train_image)
y = up_sampled.iloc[:,-1].values
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.2)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, random_state=42, test_size=0.2 , shuffle=True)

Y_train = np_utils.to_categorical(y_train, 2)
Y_test = np_utils.to_categorical(y_test, 2)
Y_val = np_utils.to_categorical(y_val, 2)

print(X_train.shape)
print(X_test.shape)
print(X_val.shape)

In [None]:
model = Sequential()
#convlouton layer with the number of filters, filter size, strides steps, padding or no, activation type and the input shape.
model.add(Conv2D(30, kernel_size = (3,3), strides=(1,1), padding='valid', activation='relu', input_shape=(28,28,3)))
#pooling layer to reduce the volume of input image after convolution,
model.add(MaxPool2D(pool_size=(1,1)))
#flatten layer to flatten the output
model.add(Flatten())   # flatten output of conv
model.add(Dense(150, activation='relu'))  # hidden layer of 150 neuron
model.add(Dense(2, activation='softmax'))  # output layer
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

history = model.fit(X_train, Y_train, batch_size=20, epochs = 20, validation_data=(X_test, Y_test))

In [None]:
history_df = pd.DataFrame(history.history)
history_df.plot()

In [None]:
y_pred = model.predict_classes(X_val)
acc_test = 0

for i in range(X_val.shape[0]):
    if(y_pred[i] == y_val[i]):
        acc_test= acc_test+1
print("Accuracy test : "  , acc_test/X_val.shape[0]*100)