<a href="https://colab.research.google.com/github/reynoso811/Image-Classification-Pneumonia-X-rays-/blob/main/Phase_04_Talos.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd 
import random as rn

# tensorflow
import tensorflow.random as tfr
import tensorflow.keras as keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout, Flatten, Lambda, Input
from tensorflow.keras.layers import Conv2D, MaxPool2D, MaxPooling2D, BatchNormalization
from keras.models import Model
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input
from keras.preprocessing import image
from tensorflow.keras import backend as K
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import RMSprop, Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

# Plotting
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

import seaborn as sns

from skimage import color, exposure
from sklearn.metrics import classification_report

import os
import cv2

# Setting the same seed for repeatability

seed = 0

np.random.seed(seed) 
rn.seed(seed)
tfr.set_seed(seed)

# Display graphs in a Jupyter
%matplotlib inline

print("Imported")

Imported


In [None]:
# Google Drive connection

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Create a path to the root folder and path to specific folders

data_path = '/content/drive/MyDrive/Phase_04_project/chest_xray/chest_xray/'
data_path

train_path = data_path + 'train/'
test_path = data_path + 'test/'
val_path = data_path + 'val/'

In [None]:
# Optimal image size for modeling

img_size = 200

In [None]:
# Custom function to read data from folders
# Returns numpy array [img, label]

def read_data(data_paths):
    for data_path in data_paths:
        labels = ['PNEUMONIA', 'NORMAL']
        images = []
        y = []
        for label in labels:
            curr_path = data_path + label
            for img in os.listdir(curr_path):
                if ('DS' not in img):
                    image_path = os.path.join(curr_path, img)
                    image =  cv2.resize(cv2.imread(image_path), (img_size, img_size))
                    if image is not None:
                        images.append([image, label])
                
    images = np.asarray(images)
    return images

In [None]:
%%time
train = read_data([train_path])
test = read_data([val_path, test_path])

  return array(a, dtype, copy=False, order=order)


CPU times: user 1min 28s, sys: 4.54 s, total: 1min 33s
Wall time: 1min 52s


In [None]:
# Shuffle the data

for i in range(10):
    np.random.shuffle(train)
    np.random.shuffle(test)

In [None]:
# Create data frame for visualization

train_df = pd.DataFrame(train, columns=['image', 'label'])
test_df = pd.DataFrame(test, columns = ['image', 'label'])

In [None]:
def lung_condition(label):
    if label == 'NORMAL':
        return 0
    else:
        return 1

In [None]:
# Function for dividing data into arrays X and y

def splitdata(data):
    X = []
    y = []
    for i, (val, label) in enumerate(data):
        X.append(val)
        y.append(lung_condition(label))
    return np.array(X), np.array(y)

In [None]:
# Split the data

np.random.shuffle(train)
np.random.shuffle(test)
X_train, y_train = splitdata(train)
X_test, y_test = splitdata(test)


In [None]:
# Function for drawing learning curve history on learning neural network

def draw_learning_curve(history, keys=['accuracy', 'loss']):
    plt.figure(figsize=(20,8))
    for i, key in enumerate(keys):
        plt.subplot(1, 2, i + 1)
        sns.lineplot(x = history.epoch, y = history.history[key])
        sns.lineplot(x = history.epoch, y = history.history['val_' + key])
        plt.title('Learning Curve')
        plt.ylabel(key.title())
        plt.xlabel('Epoch')
#         plt.ylim(ylim)
        plt.legend(['train', 'test'], loc='best')
    plt.show()

In [None]:
# Lee code

In [None]:
#create function that helps us visualize our model
def visualize_training_results(results):
    history = results.history
    plt.figure()
    plt.plot(history['val_loss'])
    plt.plot(history['loss'])
    plt.legend(['val_loss', 'loss'])
    plt.title('Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.show()
    
    plt.figure()
    plt.plot(history['val_accuracy'])
    plt.plot(history['accuracy'])
    plt.legend(['val_acc_accuracy', 'accuracy'])
    plt.title('Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.show()

In [None]:
#set image dimensions
IMG_HEIGHT = 224
IMG_WIDTH = 224
batch_size = 624

In [None]:
image_gen = ImageDataGenerator(rescale = 1./255)

In [None]:
train_data_gen = image_gen.flow_from_directory(
    directory=train_path,
    shuffle=True,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size= 5216,
    class_mode='binary')

Found 5216 images belonging to 2 classes.


In [None]:
test_data_gen = image_gen.flow_from_directory(
    directory = test_path,
    target_size = (IMG_HEIGHT, IMG_WIDTH),
    batch_size = batch_size,
    class_mode='binary')

Found 624 images belonging to 2 classes.


In [None]:
#creating X and y test, train datasets
X_train, y_train = next(train_data_gen)
X_test, y_test = next(test_data_gen)

In [None]:
#in order to tune our CNN, we need to figure out what parameters work the best 
#Below are the parameters I've chosen:
params = {'dropout': [0.1, 0.3, 0.5],
          'optimizer': ['adam', 'sgd'],
          'activation1': ['relu', 'tanh']}

In [None]:
#use generic CNN model with parameters options.
def tuned_model(X_train, y_train, X_test, y_test, params):
    model = Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation=params['activation1'], input_shape=(IMG_HEIGHT, IMG_WIDTH, 3), padding='same'))
    model.add(layers.MaxPool2D(pool_size=(2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation=params['activation1'], padding='same'))
    model.add(layers.MaxPool2D(pool_size=(2, 2)))
    model.add(layers.Conv2D(128, (3, 3), activation=params['activation1'], padding='same'))
    model.add(layers.MaxPool2D(pool_size=(3, 3)))
    model.add(layers.Flatten())
    model.add(layers.Dense(120, activation=params['activation1']))
    model.add(layers.Dense(60, activation=params['activation1']))
    model.add(layers.Dropout(params['dropout']))
    model.add(layers.Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy', optimizer=params['optimizer'], metrics='accuracy')
    es = EarlyStopping(patience=20, monitor= 'val_accuracy', restore_best_weights=True)

    out = model.fit(X_train, 
                y_train,
                batch_size = 50,
                epochs = 30,
                validation_data=(X_test, y_test),
                callbacks = es)

    return out, model

In [None]:
!pip install talos



In [None]:
import talos

In [None]:
%%time
results = talos.Scan(X_train, y_train, params=params, model=tuned_model, experiment_name='grid')

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


  8%|â–Š         | 1/12 [3:12:24<35:16:33, 11544.83s/it]

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
 6/74 [=>............................] - ETA: 5:21 - loss: 0.1090 - accuracy: 0.9633

In [None]:
results.best_model(metric='val_accuracy')