# Alzheimers Multi Class Classification

# Process Data

## Import Packages

In [None]:
import os
import numpy as np
import scipy as sp
import pandas as pd
import seaborn as sns
from PIL import Image
import tensorflow as tf
import keras_cv
from tensorflow.data import AUTOTUNE
import tensorflow.keras as keras
import matplotlib.pyplot as plt
from imblearn.over_sampling import SMOTE
from tensorflow.keras.layers import (
    RandomBrightness, RandomZoom, RandomFlip,
    Input, Conv2D, BatchNormalization, MaxPool2D, Dropout, Flatten, Dense
)

## Load Data

In [None]:
class_names = ['MildDemented', 'ModerateDemented', 'NonDemented', 'VeryMildDemented']
image_size = (176,208)

train_data = tf.keras.utils.image_dataset_from_directory(
    '/kaggle/input/alzheimers-dataset-4-class-of-images/Alzheimer_s Dataset/train', 
    color_mode = 'grayscale',
    class_names = class_names,
    image_size = image_size,
    label_mode = 'categorical',
    batch_size = 10000
)

val_data = tf.keras.utils.image_dataset_from_directory(
    '/kaggle/input/alzheimers-dataset-4-class-of-images/Alzheimer_s Dataset/test', 
    color_mode = 'grayscale',
    class_names = class_names,
    image_size = image_size,
    label_mode = 'categorical',
    batch_size = 10000
)

## Convert to Numpy Arrays

In [None]:
X_train, y_train = train_data.as_numpy_iterator().next()
X_val, y_val = val_data.as_numpy_iterator().next()

del train_data
del val_data

X = np.concatenate([X_train,X_val],axis=0)
y = np.concatenate([y_train,y_val],axis=0)

## View Class Imbalance

In [None]:
def plot_imbalance(y):
    class_totals = pd.Series([0,0,0,0], index = class_names)
    y_sparse = np.argmax(y, axis = -1)
    for i,class_name in enumerate(class_names):
        total = np.sum(y_sparse == i)
        class_totals[class_name] = total
    class_totals.plot.bar()
    
plot_imbalance(y)

## View Data

In [None]:
def show_images(X,y, random = True):
    """
    Input: An image list
    Output: Displays a grid of 9 images with lables
    """
    
    # get image lables
    labels =dict(zip([0,1,2,3], class_names))
    
    # get a batch of images
    # display a grid of 9 images
    plt.figure(figsize=(10, 10))
    for i in range(9):
        if random:
            idx = np.random.randint(0,y.shape[0])
        else:
            idx = i
        x = X[idx]
        label = y[idx]
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(x, cmap = 'gray')
        plt.axis("off")
        plt.title("Class:{}".format(labels[np.argmax(label)]))
    
# Display Train Images
show_images(X,y)

## Plot Brightness

Some of the examples look abnormally dark.

In [None]:
def plot_brightness(X,y):
    brightness = []

    for image, label in zip(X,y):
        brightness.append(np.mean(image))

    plt.figure(figsize = (15,5))
    plt.subplot(1,2,1)
    sns.histplot(brightness)
    plt.subplot(1,2,2)
    sns.boxplot(brightness)
    
plot_brightness(X,y)

## View Outliers

In [None]:
def show_outliers(X,y):
    brightness = []

    for image, label in zip(X,y):
        brightness.append(np.mean(image))

    q1 = np.quantile(brightness,.25)
    iqr = np.subtract(*np.percentile(brightness, [75, 25]))
    lower_fence = q1 - 1.5*iqr
    print('lower fence:',lower_fence)

    X_out = []
    y_out = []
    for x,label in zip(X,y):
        if np.mean(x) < lower_fence:
            X_out.append(x)
            y_out.append(label)

    print('number of outliers:', len(y_out))
    show_images(X_out,y_out, False)
    
show_outliers(X,y)

## Reassign Outlier Brightness

In [None]:
def unskew_brightness(X):
    
    brightness = []

    for image in X:
        brightness.append(np.mean(image))
    
    mew = np.median(brightness)
    q1 = np.quantile(brightness,.25)
    iqr = np.subtract(*np.percentile(brightness, [75, 25]))
    lower_fence = q1 - 1.5*iqr
    sigma = iqr/1.35
    
    unskewed = np.zeros(X.shape)
    
    for i,image in enumerate(X):
        
        if brightness[i] < lower_fence:
            new_image = sigma*np.clip(np.random.randn(1),-1,1)+mew*(image/brightness[i])
        else:
            new_image = image
        unskewed[i] = new_image
        
    return unskewed
    
    
X = unskew_brightness(X)
plot_brightness(X,y)

Most of the darkened images were removed put some brighter outliers blend in with the brightness of normal images so not all are accounted for. 

In [None]:
show_images(X,y)

## Process Data

Resample minority classes to account for class imbalance.

In [None]:
split_percent = 5121/len(y)


sm = SMOTE() 

shuffle = np.arange(len(y))
np.random.shuffle(shuffle)

X = X[shuffle]/255.
y = y[shuffle].astype('float')

split = int(split_percent*len(y))

X_train = X[:split]
y_train = y[:split]

X_val = X[split:]
y_val = y[split:]

X_train, y_train = sm.fit_resample(X_train.reshape(X_train.shape[0],-1),y_train)

X_train = X_train.reshape(-1,*image_size,1)

del X
del y

In [None]:
def make_image_directory(X, labels, split_name, class_names = class_names, name = 'Alzheimer_s Dataset',):
    
    y_sparse = np.argmax(labels, axis = -1)
    
    for i, class_name in enumerate(class_names):
        output_path = os.path.join(name, split_name, class_name)
        if not os.path.exists(output_path):
            os.makedirs(output_path)
        
        class_mask = (y_sparse == i)
        X_class = X[class_mask]
        
        for i,x in enumerate(X_class):
            image = Image.fromarray((x*255).astype('uint8').reshape(*image_size))
            image.save(os.path.join(output_path, class_name + str(i) + '.jpg'))
        
make_image_directory(X_train,y_train, 'training')
make_image_directory(X_val,y_val, 'validation')

In [None]:
plot_imbalance(y_train)

We can now train now a balanced dataset

## Export Files

In [None]:
import shutil

# Define the source directory to be zipped
source_directory = '/kaggle/working/Alzheimer_s Dataset'

# Define the target zip file
target_zip_file = '/kaggle/working/AlzheimersDataset.zip'

# Create a zip file of the source directory
os.makedirs(target_zip_file)
shutil.make_archive(target_zip_file, 'zip', source_directory)