# INFORMATION TO USER
The notebook is used to generate the results at the following GitHub folders via the **Google Cloud Computing Platform**:

- https://github.com/oliver29063/MalariaDiagnosis/tree/master/ImageUpscaling/FSRCNN

These results are also referred to in **Table 4** of the manuscript in the **Results** subsection titled **Image Resolution Upscaling**. In short, this notebook is used to determine the ideal high dimension feature space, low dimension feature space, and number of mapping layers.

Please note that this notebook requires about 24 GB of RAM to run properly, which exceeds Google Colab capacity and most personal laptops. Consequently, please adjust the number of images used to reduce RAM is necessary when testing the functionality of this notebook. 



# Package Versions
Standard Libraries for Python 3.6.9
- os
- shutil
- zipfile
- sys
- csv

Imported Libraries
- numpy: 1.18.5
- tensorflow: 2.2.0
- keras: 2.3.1
- sklearn: 0.22.2.post1
- cv2: 4.1.2
- PIL: 7.0.0
- skimage: 0.16.2

### Download NIH Malaria Dataset
For more information about the dataset, visit https://lhncbc.nlm.nih.gov/publication/pub9932

In [None]:
# Import relevant packages
import numpy as np
import os
from shutil import copyfile
from zipfile import ZipFile

# Download NIH dataset zip file
!wget -nc ftp://lhcftp.nlm.nih.gov/Open-Access-Datasets/Malaria/cell_images.zip

# Extract images if not already extracted
ROOT_DIR = os.path.join("/", "content")
if not os.path.isdir("cell_images"):
    print("Extracting images...")
    with ZipFile(os.path.join("cell_images.zip"), "r") as zipObj:
        zipObj.extractall()
    print("Done!")

### Specify Number of Images to Use
In the manuscript we used 10000 images instead of the full set of approximately 22000 images to reduce computational burden. To check if the code runs properly on your device, we first recommend setting ```numImg``` equal to some small amount, such as 1000 to prevent RAM overload. 

In [2]:
numImg = 1000
numClass = numImg//2

### Basic Image Preprocessing
Here we just load our image data into two NumPy arrays ```Parasitized``` and ```Uninfected```, which each contain our set of 128x128 RGB images in a 4D NumPy array for each class.

In [None]:
# Install and import relevant packages
import numpy as np
import os
!pip install opencv-python
!apt update && apt install -y libsm6 libxext6 libxrender1
import cv2
from PIL import Image

# Create new folders to save rescaled images
if not os.path.isdir("RescaledSet"):
    os.mkdir("RescaledSet")
if not os.path.isdir("RescaledSet/Parasitized"):
    os.mkdir("RescaledSet/Parasitized")
if not os.path.isdir("RescaledSet/Uninfected"):
    os.mkdir("RescaledSet/Uninfected")

# Generate list of parasitized file names
ParasitizedFiles = os.listdir("cell_images/Parasitized/")
UninfectedFiles = os.listdir("cell_images/Uninfected/")

# Remove Thumb.db files
while 'Thumbs.db' in ParasitizedFiles: ParasitizedFiles.remove('Thumbs.db')   
while 'Thumbs.db' in UninfectedFiles: UninfectedFiles.remove('Thumbs.db')  

# Pre-allocate memory space for images
Parasitized = np.empty([numClass,128,128,3])
Uninfected = np.empty([numClass,128,128,3])

# Resize and load parasitized images
for i in range(numClass):
    TempImage = cv2.imread('cell_images/Parasitized/'+ParasitizedFiles[i])
    ResizedImage = cv2.resize(TempImage, dsize=(128,128))
    Parasitized[i,:,:,:] = ResizedImage

# Resize and load uninfected images
for i in range(numClass):
    TempImage = cv2.imread('cell_images/Uninfected/'+UninfectedFiles[i])
    ResizedImage = cv2.resize(TempImage, dsize=(128,128))
    Uninfected[i,:,:,:] = ResizedImage
    
print('Uninfected Dataset size is:',np.shape(Uninfected))
print('Parasitized Dataset size is:',np.shape(Parasitized))

# Generate image dataset
Dataset = np.concatenate((Parasitized, Uninfected), axis=0)

### Create Downscaled Images

In [12]:
# Generate train and test sets
from skimage.transform import rescale, resize, downscale_local_mean

In = np.zeros([np.shape(Dataset)[0],32,32,3])
for i in range(np.shape(Dataset)[0]):
  In[i,:,:,:] = downscale_local_mean(Dataset[i,:,:,:], (4,4,1))

### Create Cross-Validation Groups

In [13]:
# Generate 5-fold cross-validation groups
Spaces = np.linspace(0,numImg,6).astype('int')
CVIndices = np.random.permutation(Dataset.shape[0])
Index1, Index2, Index3, Index4, Index5 = CVIndices[:Spaces[1]], CVIndices[Spaces[1]:Spaces[2]], CVIndices[Spaces[2]:Spaces[3]], CVIndices[Spaces[3]:Spaces[4]], CVIndices[Spaces[4]:]

###List High and Low Dimension Numbers

In [15]:
high_dimension = [48,56]
low_dimension = [12,16]

### Import Relevant Packages for Neural Network Training

In [21]:
# Import relevant packages for neural network training
import sys
import csv
if 'tensorflow' in sys.modules == False:
    %tensorflow_version 2.x
    import tensorflow as tf
import keras
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Sequential, Model
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D, BatchNormalization
from keras import backend as k 
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping

!pip install scikit-learn
import sklearn
from sklearn.metrics import roc_curve
from sklearn.metrics import auc



### Create FSRCNN Architecture w/ 4 Mapping Layers



In [None]:
## Create FSRCNN architecture
from keras import optimizers
from keras.models import load_model
from keras.models import Sequential, Model
from keras.layers import Dense, Activation
from keras.layers import Conv2D, MaxPooling2D, Input, ZeroPadding2D, Conv2DTranspose, merge 
from keras.layers.advanced_activations import PReLU
from keras.preprocessing import image

for h_d in high_dimension:
  for l_d in low_dimension:

    # Create empty lists to store results
    TrainLoss = []
    TestLoss = []

    for i in range(5):

      # Create the appropriate training and testing sets
      if i == 0:
          TrainOut = np.concatenate((Dataset[Index1,:],Dataset[Index2,:],Dataset[Index3,:],Dataset[Index4,:]), axis=0)
          TrainIn = np.concatenate((In[Index1,:], In[Index2,:], In[Index3,:], In[Index4,:]), axis=0)
          TestOut = Dataset[Index5,:]
          TestIn = In[Index5,:]
      elif i == 1:
          TrainOut = np.concatenate((Dataset[Index1,:],Dataset[Index2,:],Dataset[Index3,:],Dataset[Index5,:]), axis=0)
          TrainIn = np.concatenate((In[Index1,:], In[Index2,:], In[Index3,:], In[Index5,:]), axis=0)
          TestOut = Dataset[Index4,:]
          TestIn = In[Index4,:]
      elif i == 2:
          TrainOut = np.concatenate((Dataset[Index1,:],Dataset[Index2,:],Dataset[Index4,:],Dataset[Index5,:]), axis=0)
          TrainIn = np.concatenate((In[Index1,:], In[Index2,:], In[Index4,:], In[Index5,:]), axis=0)
          TestOut = Dataset[Index3,:]
          TestIn = In[Index3,:]
      elif i == 3:
          TrainOut = np.concatenate((Dataset[Index1,:],Dataset[Index3,:],Dataset[Index4,:],Dataset[Index5,:]), axis=0)
          TrainIn = np.concatenate((In[Index1,:], In[Index3,:], In[Index4,:], In[Index5,:]), axis=0)
          TestOut = Dataset[Index2,:]
          TestIn = In[Index2,:]
      else:
          TrainOut = np.concatenate((Dataset[Index2,:],Dataset[Index3,:],Dataset[Index4,:],Dataset[Index5,:]), axis=0)
          TrainIn = np.concatenate((In[Index2,:], In[Index3,:], In[Index4,:], In[Index5,:]), axis=0)
          TestOut = Dataset[Index1,:]
          TestIn = In[Index1,:]
    
      #Feature Extraction
      model = Sequential()
      input_img = Input(shape=(32,32,3))
      model = Conv2D(filters = h_d, kernel_size = (5, 5), padding='same', kernel_initializer='he_normal')(input_img)
      model = PReLU()(model)

      #Shrink
      model = Conv2D(filters = l_d, kernel_size = (1, 1), padding='same', kernel_initializer='he_normal')(model)
      model = PReLU()(model)

      #Mapping
      model = Conv2D(filters = 12, kernel_size = (3, 3), padding='same', kernel_initializer='he_normal')(model)
      model = PReLU()(model)
      model = Conv2D(filters = 12, kernel_size = (3, 3), padding='same', kernel_initializer='he_normal')(model)
      model = PReLU()(model)
      model = Conv2D(filters = 12, kernel_size = (3, 3), padding='same', kernel_initializer='he_normal')(model)
      model = PReLU()(model)
      model = Conv2D(filters = 12, kernel_size = (3, 3), padding='same', kernel_initializer='he_normal')(model)
      model = PReLU()(model)

      #Exapansion
      model = Conv2D(filters = h_d, kernel_size = (1, 1), padding='same', kernel_initializer='he_normal')(model)
      model = PReLU()(model)

      #Deconvolution
      model = Conv2DTranspose(filters = 3, kernel_size = (9, 9), strides=(4, 4), padding='same')(model)
      output_img = model

      model = Model(input_img, output_img) #Create the model object
      adam = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False) #Training optimizer
      model.compile(loss = "mean_squared_error", optimizer = adam, metrics=["mean_squared_error"]) #How we measure error

      print("High Dimension " + str(h_d))
      print("Low Dimension "+ str(l_d))

      # Train model and evaluate performance
      print('We are now training cross-validation set #',i+1)
      Results = model.fit(y=TrainOut, x=TrainIn, validation_data = (TestIn,TestOut), epochs=5, batch_size = 32, validation_freq=1)

    # Display and store performance results
      Results.history['loss'] = [round(k, 4) for k in Results.history['loss']]
      Results.history['val_loss'] = [round(k, 4) for k in Results.history['val_loss']]
      
      print('Training MSE:',Results.history['loss'])
      print('Validation MSE:',Results.history['val_loss'])
      
      TrainLoss.append(Results.history['loss'])
      TestLoss.append(Results.history['val_loss'])
      print('')
      
      # Save and export as CSV files
      with open(str(h_d)+"_"+str(l_d)+"_"+"4Maps_TrainLoss.csv", "w") as f:
          writer = csv.writer(f)
          writer.writerows(TrainLoss)
      with open(str(h_d)+"_"+str(l_d)+"_"+"4Maps_TestLoss.csv", "w") as f:
          writer = csv.writer(f)
          writer.writerows(TestLoss)

### Create FSRCNN Architecture w/ 3 Mapping Layers

In [None]:
## Create FSRCNN architecture
from keras import optimizers
from keras.models import load_model
from keras.models import Sequential, Model
from keras.layers import Dense, Activation
from keras.layers import Conv2D, MaxPooling2D, Input, ZeroPadding2D, Conv2DTranspose, merge 
from keras.layers.advanced_activations import PReLU
from keras.preprocessing import image

for h_d in high_dimension:
  for l_d in low_dimension:

    # Create empty lists to store results
    TrainLoss = []
    TestLoss = []

    for i in range(5):

      # Create the appropriate training and testing sets
      if i == 0:
          TrainOut = np.concatenate((Dataset[Index1,:],Dataset[Index2,:],Dataset[Index3,:],Dataset[Index4,:]), axis=0)
          TrainIn = np.concatenate((In[Index1,:], In[Index2,:], In[Index3,:], In[Index4,:]), axis=0)
          TestOut = Dataset[Index5,:]
          TestIn = In[Index5,:]
      elif i == 1:
          TrainOut = np.concatenate((Dataset[Index1,:],Dataset[Index2,:],Dataset[Index3,:],Dataset[Index5,:]), axis=0)
          TrainIn = np.concatenate((In[Index1,:], In[Index2,:], In[Index3,:], In[Index5,:]), axis=0)
          TestOut = Dataset[Index4,:]
          TestIn = In[Index4,:]
      elif i == 2:
          TrainOut = np.concatenate((Dataset[Index1,:],Dataset[Index2,:],Dataset[Index4,:],Dataset[Index5,:]), axis=0)
          TrainIn = np.concatenate((In[Index1,:], In[Index2,:], In[Index4,:], In[Index5,:]), axis=0)
          TestOut = Dataset[Index3,:]
          TestIn = In[Index3,:]
      elif i == 3:
          TrainOut = np.concatenate((Dataset[Index1,:],Dataset[Index3,:],Dataset[Index4,:],Dataset[Index5,:]), axis=0)
          TrainIn = np.concatenate((In[Index1,:], In[Index3,:], In[Index4,:], In[Index5,:]), axis=0)
          TestOut = Dataset[Index2,:]
          TestIn = In[Index2,:]
      else:
          TrainOut = np.concatenate((Dataset[Index2,:],Dataset[Index3,:],Dataset[Index4,:],Dataset[Index5,:]), axis=0)
          TrainIn = np.concatenate((In[Index2,:], In[Index3,:], In[Index4,:], In[Index5,:]), axis=0)
          TestOut = Dataset[Index1,:]
          TestIn = In[Index1,:]
    
      #Feature Extraction
      model = Sequential()
      input_img = Input(shape=(32,32,3))
      model = Conv2D(filters = h_d, kernel_size = (5, 5), padding='same', kernel_initializer='he_normal')(input_img)
      model = PReLU()(model)

      #Shrink
      model = Conv2D(filters = l_d, kernel_size = (1, 1), padding='same', kernel_initializer='he_normal')(model)
      model = PReLU()(model)

      #Mapping
      model = Conv2D(filters = 12, kernel_size = (3, 3), padding='same', kernel_initializer='he_normal')(model)
      model = PReLU()(model)
      model = Conv2D(filters = 12, kernel_size = (3, 3), padding='same', kernel_initializer='he_normal')(model)
      model = PReLU()(model)
      model = Conv2D(filters = 12, kernel_size = (3, 3), padding='same', kernel_initializer='he_normal')(model)
      model = PReLU()(model)

      #Exapansion
      model = Conv2D(filters = h_d, kernel_size = (1, 1), padding='same', kernel_initializer='he_normal')(model)
      model = PReLU()(model)

      #Deconvolution
      model = Conv2DTranspose(filters = 3, kernel_size = (9, 9), strides=(4, 4), padding='same')(model)
      output_img = model

      model = Model(input_img, output_img) #Create the model object
      adam = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False) #Training optimizer
      model.compile(loss = "mean_squared_error", optimizer = adam, metrics=["mean_squared_error"]) #How we measure error

      print("High Dimension " + str(h_d))
      print("Low Dimension "+ str(l_d))

      # Train model and evaluate performance
      print('We are now training cross-validation set #',i+1)
      Results = model.fit(y=TrainOut, x=TrainIn, validation_data = (TestIn,TestOut), epochs=5, batch_size = 32, validation_freq=1)

    # Display and store performance results
      Results.history['loss'] = [round(k, 4) for k in Results.history['loss']]
      Results.history['val_loss'] = [round(k, 4) for k in Results.history['val_loss']]
      
      print('Training MSE:',Results.history['loss'])
      print('Validation MSE:',Results.history['val_loss'])
      
      TrainLoss.append(Results.history['loss'])
      TestLoss.append(Results.history['val_loss'])
      print('')
      
      # Save and export as CSV files
      with open(str(h_d)+"_"+str(l_d)+"_"+"3Maps_TrainLoss.csv", "w") as f:
          writer = csv.writer(f)
          writer.writerows(TrainLoss)
      with open(str(h_d)+"_"+str(l_d)+"_"+"3Maps_TestLoss.csv", "w") as f:
          writer = csv.writer(f)
          writer.writerows(TestLoss)

### Create FSRCNN Architecture w/ 2 Mapping Layers

In [None]:
## Create FSRCNN architecture
from keras import optimizers
from keras.models import load_model
from keras.models import Sequential, Model
from keras.layers import Dense, Activation
from keras.layers import Conv2D, MaxPooling2D, Input, ZeroPadding2D, Conv2DTranspose, merge 
from keras.layers.advanced_activations import PReLU
from keras.preprocessing import image

for h_d in high_dimension:
  for l_d in low_dimension:

    # Create empty lists to store results
    TrainLoss = []
    TestLoss = []

    for i in range(5):

      # Create the appropriate training and testing sets
      if i == 0:
          TrainOut = np.concatenate((Dataset[Index1,:],Dataset[Index2,:],Dataset[Index3,:],Dataset[Index4,:]), axis=0)
          TrainIn = np.concatenate((In[Index1,:], In[Index2,:], In[Index3,:], In[Index4,:]), axis=0)
          TestOut = Dataset[Index5,:]
          TestIn = In[Index5,:]
      elif i == 1:
          TrainOut = np.concatenate((Dataset[Index1,:],Dataset[Index2,:],Dataset[Index3,:],Dataset[Index5,:]), axis=0)
          TrainIn = np.concatenate((In[Index1,:], In[Index2,:], In[Index3,:], In[Index5,:]), axis=0)
          TestOut = Dataset[Index4,:]
          TestIn = In[Index4,:]
      elif i == 2:
          TrainOut = np.concatenate((Dataset[Index1,:],Dataset[Index2,:],Dataset[Index4,:],Dataset[Index5,:]), axis=0)
          TrainIn = np.concatenate((In[Index1,:], In[Index2,:], In[Index4,:], In[Index5,:]), axis=0)
          TestOut = Dataset[Index3,:]
          TestIn = In[Index3,:]
      elif i == 3:
          TrainOut = np.concatenate((Dataset[Index1,:],Dataset[Index3,:],Dataset[Index4,:],Dataset[Index5,:]), axis=0)
          TrainIn = np.concatenate((In[Index1,:], In[Index3,:], In[Index4,:], In[Index5,:]), axis=0)
          TestOut = Dataset[Index2,:]
          TestIn = In[Index2,:]
      else:
          TrainOut = np.concatenate((Dataset[Index2,:],Dataset[Index3,:],Dataset[Index4,:],Dataset[Index5,:]), axis=0)
          TrainIn = np.concatenate((In[Index2,:], In[Index3,:], In[Index4,:], In[Index5,:]), axis=0)
          TestOut = Dataset[Index1,:]
          TestIn = In[Index1,:]
    
      #Feature Extraction
      model = Sequential()
      input_img = Input(shape=(32,32,3))
      model = Conv2D(filters = h_d, kernel_size = (5, 5), padding='same', kernel_initializer='he_normal')(input_img)
      model = PReLU()(model)

      #Shrink
      model = Conv2D(filters = l_d, kernel_size = (1, 1), padding='same', kernel_initializer='he_normal')(model)
      model = PReLU()(model)

      #Mapping
      model = Conv2D(filters = 12, kernel_size = (3, 3), padding='same', kernel_initializer='he_normal')(model)
      model = PReLU()(model)
      model = Conv2D(filters = 12, kernel_size = (3, 3), padding='same', kernel_initializer='he_normal')(model)
      model = PReLU()(model)

      #Exapansion
      model = Conv2D(filters = h_d, kernel_size = (1, 1), padding='same', kernel_initializer='he_normal')(model)
      model = PReLU()(model)

      #Deconvolution
      model = Conv2DTranspose(filters = 3, kernel_size = (9, 9), strides=(4, 4), padding='same')(model)
      output_img = model

      model = Model(input_img, output_img) #Create the model object
      adam = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False) #Training optimizer
      model.compile(loss = "mean_squared_error", optimizer = adam, metrics=["mean_squared_error"]) #How we measure error

      print("High Dimension " + str(h_d))
      print("Low Dimension "+ str(l_d))

      # Train model and evaluate performance
      print('We are now training cross-validation set #',i+1)
      Results = model.fit(y=TrainOut, x=TrainIn, validation_data = (TestIn,TestOut), epochs=5, batch_size = 32, validation_freq=1)

    # Display and store performance results
      Results.history['loss'] = [round(k, 4) for k in Results.history['loss']]
      Results.history['val_loss'] = [round(k, 4) for k in Results.history['val_loss']]
      
      print('Training MSE:',Results.history['loss'])
      print('Validation MSE:',Results.history['val_loss'])
      
      TrainLoss.append(Results.history['loss'])
      TestLoss.append(Results.history['val_loss'])
      print('')
      
      # Save and export as CSV files
      with open(str(h_d)+"_"+str(l_d)+"_"+"2Maps_TrainLoss.csv", "w") as f:
          writer = csv.writer(f)
          writer.writerows(TrainLoss)
      with open(str(h_d)+"_"+str(l_d)+"_"+"2Maps_TestLoss.csv", "w") as f:
          writer = csv.writer(f)
          writer.writerows(TestLoss)