# Deep Neural Network

## Data Download and Processing

### RUN THE BELOW CELLS IF USING LOCAL MACHINE

In [49]:
import sys, os
sys.path.append(os.path.dirname(os.path.join((os.path.pardir), "Modules")))

origin_dir = os.path.join(os.path.pardir, 'Data')
new_dir_path = os.path.join(os.path.pardir, 'Datasets')

#for local systems

height, width = 25, 25
csv_dir = os.path.join(os.path.pardir, 'Datasets', 'Resized_data_{}_{}'.format(height, width))
train_csv = os.path.join(csv_dir, 'train.csv')
test_csv = os.path.join(csv_dir, 'test.csv')
val_csv = os.path.join(csv_dir, 'val.csv')

### **RUN THE BELOW CELL IF USING COLAB** (overrides paths and installs packages)

For the git clone segment, please add the Personal Access Token instead of the {pat} segment 

In [50]:
# # RUN THIS CELL IF COLAB

# from google.colab import drive

# drive.mount('/content/gdrive')
# !git clone "https://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@github.com/madhava20217/Malaria-Detection-from-Cells.git"

# !pip install -q -r "/content/Malaria-Detection-from-Cells/requirements_versionless.txt" 

# sys.path.append(os.path.dirname(os.path.join(os.path.curdir, "Malaria-Detection-from-Cells", "Modules")))

# origin_dir = "/content/Data"
# new_dir_path = "/content/Datasets/"

In [51]:
# from Modules.data_download import Data_Download
# from Modules.labelling import Labelling

# download = Data_Download(origin_dir)
# data_dir = download.resize_image(new_dir_path, height, width)

# lab = Labelling()
# lab.label(data_dir)
# train_csv, val_csv, test_csv = lab.train_test_val_split(data_dir, train_split = 0.7, test_split = 0.15, labels = "labels.csv")

### Image Mode

In [52]:
IMAGE_MODE = 1      #1 for colour, 0 for grayscale, unchanged for -1

### Datasets locations:

In [53]:
N_CLASSES = 2

## Imports

In [54]:
import tensorflow as tf

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import cv2

## Loading the dataset

In [55]:
train_df = pd.read_csv(train_csv)
val_df = pd.read_csv(val_csv)

In [56]:
train_df.head()

Unnamed: 0,Image_Path,Parasitized
0,..\Datasets\Resized_data_25_25\Uninfected\25x2...,0.0
1,..\Datasets\Resized_data_25_25\Uninfected\25x2...,0.0
2,..\Datasets\Resized_data_25_25\Parasitized\25x...,1.0
3,..\Datasets\Resized_data_25_25\Parasitized\25x...,1.0
4,..\Datasets\Resized_data_25_25\Parasitized\25x...,1.0


In [57]:
train_df.dtypes

Image_Path      object
Parasitized    float64
dtype: object

## Data and Labels

In [58]:
train_x = train_df['Image_Path'].to_numpy()
train_y = train_df['Parasitized'].to_numpy()

val_x  = val_df['Image_Path'].to_numpy()
val_y  = val_df['Parasitized'].to_numpy()

#### Load images from paths

In [59]:
def load_images(path_arr):
    '''Reads and loads images into a numpy array
    Returns: a numpy array'''
    arr = []
    for path in path_arr:
        arr.append(cv2.imread(path, IMAGE_MODE)[..., ::-1]/255.0)
    
    return np.array(arr)


In [60]:
train_x = load_images(train_x)
val_x = load_images(val_x)

#### Checking shape

In [61]:
print(train_x.shape, val_x.shape, sep = '\n')

(21701, 25, 25, 3)
(3101, 25, 25, 3)


## DNN

In [62]:
class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, x_set, y_set, batch_size):
        self.x, self.y = x_set, y_set
        self.batch_size = batch_size

    def __len__(self):
        return int(np.ceil(len(self.x) / float(self.batch_size)))

    def __getitem__(self, idx):
        batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]
        return batch_x, batch_y

In [63]:
# batch sizes
TRAIN_BATCH_SIZE = 64
VAL_BATCH_SIZE  = 32

In [64]:
train_x = train_x
val_x = val_x

train_gen = DataGenerator(train_x, train_y, TRAIN_BATCH_SIZE)
test_gen  = DataGenerator(val_x, val_y, VAL_BATCH_SIZE)

# train_ds = tf.data.Dataset.from_tensor_slices((train_x, train_y))
# test_ds  = tf.data.Dataset.from_tensor_slices((test_x, test_y))

# train_ds = train_ds.batch(TRAIN_BATCH_SIZE)
# test_ds  = test_ds.batch(TEST_BATCH_SIZE)

In [65]:
height, width = train_x.shape[1], train_x.shape[2]
dims = len(train_x.shape)-1

In [66]:
input_shape = list(train_x.shape[1:])
if(len(input_shape) == 2): input_shape.append(1)

input_shape

[25, 25, 3]

In [67]:
import tensorflow as tf
import numpy as np
# load the data
# (train_x, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
# create a TensorFlow Dataset object from the training data
# unpack the zip object into two separate tensors
train_x, train_y = zip(*train_x[:min(len(train_x), len(train_y))])

# create a TensorFlow dataset from the tensors
train_dataset = tf.data.Dataset.from_tensor_slices((train_x, train_y))
# define a function that performs data augmentation on a single image
def augment(image, label):
  # randomly choose a number between 0 and 1
  p = tf.random.uniform(shape=(), minval=0, maxval=1)

  # if p is less than 0.25, flip the image horizontally
  if p < 0.25:
    image = tf.image.flip_left_right(image)

  # if p is between 0.25 and 0.5, flip the image vertically
  elif p >= 0.25 and p < 0.5:
    image = tf.image.flip_up_down(image)

  # if p is between 0.5 and 0.75, rotate the image by 90 degrees
  elif p >= 0.5 and p < 0.75:
    image = tf.image.rot90(image, k=1)

  # if p is between 0.75 and 1.0, rotate the image by 180 degrees
  elif p >= 0.75 and p < 1.0:
    image = tf.image.rot90(image, k=2)

  # return the augmented image and label as a tuple
  return image, label


# apply the data augmentation function to each element in the dataset
augmented_train_dataset = train_dataset.map(augment)

# create input arrays for scikit-learn by concatenating the augmented data
train_x = np.concatenate([x for x, y in augmented_train_dataset])
train_y = np.concatenate([y for x, y in augmented_train_dataset])

# use the input arrays with scikit-learn
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(train_x,train_y)

ValueError: too many values to unpack (expected 2)

<MapDataset element_spec=(TensorSpec(shape=(25, 25, 3), dtype=tf.float64, name=None), TensorSpec(shape=(), dtype=tf.float64, name=None))>
