## Calculating PSNR and MSE of NIH Dataset
Here we load the images from the NIH dataset online, and resize all images to 128x128. Then we downsample it by a factor of 4 into 32x32 and reupscale it via bicubic interpolation. Then we calculate the MSE and PSNR. 

In [0]:
# Import relevant packages
import numpy as np
import os
from shutil import copyfile
from zipfile import ZipFile

# Download NIH dataset zip file
!wget -nc ftp://lhcftp.nlm.nih.gov/Open-Access-Datasets/Malaria/cell_images.zip

# Extract images if not already extracted
ROOT_DIR = os.path.join("/", "content")
if not os.path.isdir("cell_images"):
    print("Extracting images...")
    with ZipFile(os.path.join("cell_images.zip"), "r") as zipObj:
        zipObj.extractall()
    print("Done!")

# Install and import relevant packages
import numpy as np
import os
!pip install opencv-python
!apt update && apt install -y libsm6 libxext6 libxrender1
import cv2
from PIL import Image

# Create new folders to save rescaled images
if not os.path.isdir("RescaledSet"):
    os.mkdir("RescaledSet")
if not os.path.isdir("RescaledSet/Parasitized"):
    os.mkdir("RescaledSet/Parasitized")
if not os.path.isdir("RescaledSet/Uninfected"):
    os.mkdir("RescaledSet/Uninfected")

# Generate list of parasitized file names
ParasitizedFiles = os.listdir("cell_images/Parasitized/")
UninfectedFiles = os.listdir("cell_images/Uninfected/")

# Remove Thumb.db files
while 'Thumbs.db' in ParasitizedFiles: ParasitizedFiles.remove('Thumbs.db')   
while 'Thumbs.db' in UninfectedFiles: UninfectedFiles.remove('Thumbs.db')  

# Pre-allocate memory space for images
Parasitized = np.empty([13779,128,128,3])
Uninfected = np.empty([13779,128,128,3])

# Resize and load parasitized images
for i in range(13779):
    TempImage = cv2.imread('cell_images/Parasitized/'+ParasitizedFiles[i])
    ResizedImage = cv2.resize(TempImage, dsize=(128,128))
    Parasitized[i,:,:,:] = ResizedImage

# Resize and load uninfected images
for i in range(2500):
    TempImage = cv2.imread('cell_images/Uninfected/'+UninfectedFiles[i])
    ResizedImage = cv2.resize(TempImage, dsize=(128,128))
    Uninfected[i,:,:,:] = ResizedImage

# Generate image dataset
Dataset = np.concatenate((Parasitized, Uninfected), axis=0)

# Generate train and test sets
from skimage.transform import rescale, resize, downscale_local_mean

Small = np.zeros([np.shape(Dataset)[0],32,32,3])
for i in range(np.shape(Small)[0]):
  Small[i,:,:,:] = downscale_local_mean(Dataset[i,:,:,:], (4,4,1))

### Calculate MSE and PSNR

In [0]:
import cv2
from math import log
SE = 0
Temp = np.zeros([128,128,3])
for i in range(np.shape(Small)[0]):
  for j in range(3):
    Temp[:,:,j] = cv2.resize(Small[i,:,:,j], dsize = (128,128), interpolation = cv2.INTER_CUBIC)
    #Temp = rescale(Small[i,:,:,:], (4,4,1), order = 2, anti_aliasing=False)
  SE = SE + ((Temp - Dataset[i,:,:,:])**2).mean()
MSE = SE/np.shape(Small)[0] 

print("The MSE is:", MSE)
print("The PSNR is:", 10*log10(256**2/MSE))