# Deep Neural Network

## Data Download and Processing

### RUN THE BELOW CELLS IF USING LOCAL MACHINE

In [1]:
import sys, os
sys.path.append(os.path.dirname(os.path.join((os.path.pardir), "Modules")))

origin_dir = os.path.join(os.path.pardir, 'Data')
new_dir_path = os.path.join(os.path.pardir, 'Datasets')

#for local systems

height, width = 25, 25
csv_dir = os.path.join(os.path.pardir, 'Datasets', 'Resized_data_{}_{}'.format(height, width))
train_csv = os.path.join(csv_dir, 'train.csv')
test_csv = os.path.join(csv_dir, 'test.csv')
val_csv = os.path.join(csv_dir, 'val.csv')

### **RUN THE BELOW CELL IF USING COLAB** (overrides paths and installs packages)

For the git clone segment, please add the Personal Access Token instead of the {pat} segment 

In [2]:
# # RUN THIS CELL IF COLAB

# from google.colab import drive

# drive.mount('/content/gdrive')
# !git clone "https://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@github.com/madhava20217/Malaria-Detection-from-Cells.git"

# !pip install -q -r "/content/Malaria-Detection-from-Cells/requirements_versionless.txt" 

# sys.path.append(os.path.dirname(os.path.join(os.path.curdir, "Malaria-Detection-from-Cells", "Modules")))

# origin_dir = "/content/Data"
# new_dir_path = "/content/Datasets/"

In [3]:
# from Modules.data_download import Data_Download
# from Modules.labelling import Labelling

# download = Data_Download(origin_dir)
# data_dir = download.resize_image(new_dir_path, height, width)

# lab = Labelling()
# lab.label(data_dir)
# train_csv, val_csv, test_csv = lab.train_test_val_split(data_dir, train_split = 0.7, test_split = 0.15, labels = "labels.csv")

### Image Mode

In [4]:
IMAGE_MODE = 1      #1 for colour, 0 for grayscale, unchanged for -1

### Datasets locations:

In [5]:
N_CLASSES = 2

## Imports

In [6]:
import tensorflow as tf

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import cv2

## Loading the dataset

In [7]:
train_df = pd.read_csv(train_csv)
val_df = pd.read_csv(test_csv)

In [8]:
train_df.head()

Unnamed: 0,Image_Path,Parasitized
0,..\Datasets\Resized_data_25_25\Uninfected\25x2...,0.0
1,..\Datasets\Resized_data_25_25\Parasitized\25x...,1.0
2,..\Datasets\Resized_data_25_25\Parasitized\25x...,1.0
3,..\Datasets\Resized_data_25_25\Parasitized\25x...,1.0
4,..\Datasets\Resized_data_25_25\Uninfected\25x2...,0.0


In [9]:
train_df.dtypes

Image_Path      object
Parasitized    float64
dtype: object

## Data and Labels

In [10]:
train_x = train_df['Image_Path'].to_numpy()
train_y = train_df['Parasitized'].to_numpy()

val_x  = val_df['Image_Path'].to_numpy()
val_y  = val_df['Parasitized'].to_numpy()

#### Load images from paths

In [11]:
def load_images(path_arr):
    '''Reads and loads images into a numpy array
    Returns: a numpy array'''
    arr = []
    for path in path_arr:
        arr.append(cv2.imread(path, IMAGE_MODE)[..., ::-1]/255.0)
    
    return np.array(arr)


In [12]:
train_x = load_images(train_x)
val_x = load_images(val_x)

#### Checking shape

In [13]:
print(train_x.shape, val_x.shape, sep = '\n')

(17471, 25, 25, 3)
(3925, 25, 25, 3)


## DNN

In [14]:
class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, x_set, y_set, batch_size):
        self.x, self.y = x_set, y_set
        self.batch_size = batch_size

    def __len__(self):
        return int(np.ceil(len(self.x) / float(self.batch_size)))

    def __getitem__(self, idx):
        batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]
        return batch_x, batch_y

In [15]:
# batch sizes
TRAIN_BATCH_SIZE = 64
VAL_BATCH_SIZE  = 32

In [16]:
train_x = train_x
val_x = val_x

train_gen = DataGenerator(train_x, train_y, TRAIN_BATCH_SIZE)
test_gen  = DataGenerator(val_x, val_y, VAL_BATCH_SIZE)

# train_ds = tf.data.Dataset.from_tensor_slices((train_x, train_y))
# test_ds  = tf.data.Dataset.from_tensor_slices((test_x, test_y))

# train_ds = train_ds.batch(TRAIN_BATCH_SIZE)
# test_ds  = test_ds.batch(TEST_BATCH_SIZE)

In [17]:
height, width = train_x.shape[1], train_x.shape[2]
dims = len(train_x.shape)-1

In [18]:
input_shape = list(train_x.shape[1:])
if(len(input_shape) == 2): input_shape.append(1)

input_shape

[25, 25, 3]

In [20]:
dnn_model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(16, (3,3), padding = 'same', activation = 'relu', input_shape = input_shape),
    tf.keras.layers.MaxPool2D((3,3), padding = 'same'),

    tf.keras.layers.Conv2D(32, (2,2), padding = 'same', activation = 'relu'),
    tf.keras.layers.MaxPool2D((2,2), padding = 'same'),


    tf.keras.layers.Flatten(),

    tf.keras.layers.Dense(512, activation = 'relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(256, activation = 'relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(N_CLASSES, activation = 'sigmoid')
])

dnn_model.compile(
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.003),
    loss = 'sparse_categorical_crossentropy',
    metrics = ['accuracy']
)

dnn_model.fit(train_gen,
                shuffle = True,
                epochs = 50,
                validation_data = test_gen, 
                callbacks = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', patience = 5)
                )

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50


<keras.callbacks.History at 0x1f5701dad00>