In [0]:
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
import random
import pickle
import torch
import time
import torchvision
from PIL import Image

In [0]:
noncovid_filenames = os.listdir('drive/My Drive/dataset_498l/NonCOVID')
test_noncovid_files = random.sample(noncovid_filenames, 78)
train_noncovid_files = []

for files in noncovid_filenames:
  if files not in test_noncovid_files:
    train_noncovid_files.append(files)

print(len(test_noncovid_files), len(train_noncovid_files))


covid_filenames = os.listdir('drive/My Drive/dataset_498l/COVID')
test_covid_files = random.sample(covid_filenames, 72)
train_covid_files = []

for files in covid_filenames:
  if files not in test_covid_files:
    train_covid_files.append(files)

print(len(test_covid_files), len(train_covid_files))

78 319
72 277


In [0]:
# create test datasets

def create_testdataset(noncovid_filenames, covid_filenames):
  
  datadir = 'drive/My Drive/dataset_498l'
  categories = ["NonCOVID", "COVID"]

  transform_resize = torchvision.transforms.Resize((152, 152))
  transform_greyscale = torchvision.transforms.Grayscale(num_output_channels=1)

  test_dataset = []

  for img in noncovid_filenames:
    
    class_num = 0
    path = datadir+"/"+categories[0]
    image = Image.open(os.path.join(path, img))
    image = transform_resize(image)
    image = transform_greyscale(image)

    img_array = np.array(image)
    test_dataset.append([img_array, class_num])
  
  for img in covid_filenames:
    class_num = 1
    path = datadir+"/"+categories[1]
    image = Image.open(os.path.join(path, img))
    image = transform_resize(image)
    image = transform_greyscale(image)

    img_array = np.array(image)
    test_dataset.append([img_array, class_num])
  
  random.Random(28).shuffle(test_dataset)

  return test_dataset

In [0]:
# create train datasets

def create_aug_training_dataset(noncovid_filenames, covid_filenames):

  datadir = 'drive/My Drive/dataset_498l'
  categories = ["NonCOVID", "COVID"]

  transform_color_jitter = torchvision.transforms.ColorJitter()
  transform_hflip = torchvision.transforms.RandomHorizontalFlip()
  transform_vflip = torchvision.transforms.RandomVerticalFlip()
  transform_affine = torchvision.transforms.RandomAffine(degrees = 10)


  transform_resize = torchvision.transforms.Resize((152, 152))
  transform_greyscale = torchvision.transforms.Grayscale(num_output_channels=1)

  train_dataset = []

  for img in noncovid_filenames:
    class_num = 0
    path = datadir+"/"+categories[0]
    image = Image.open(os.path.join(path, img))
    image1 = image
    image2 = transform_color_jitter(image)
    image3 = transform_hflip(image)
    image4 = transform_vflip(image)
    image5 = transform_affine(image)

    image_array = [image1, image2, image3, image4, image5]

    for images in image_array:
      images = transform_resize(images)
      images = transform_greyscale(images)

      img_array = np.array(images)
      train_dataset.append([img_array, class_num])

  
  for img in covid_filenames:
    class_num = 1
    path = datadir+"/"+categories[1]
    image = Image.open(os.path.join(path, img))
    image1 = image
    image2 = transform_color_jitter(image)
    image3 = transform_hflip(image)
    image4 = transform_vflip(image)
    image5 = transform_affine(image)

    image_array = [image1, image2, image3, image4, image5]

    for images in image_array:
      images = transform_resize(images)
      images = transform_greyscale(images)

      img_array = np.array(images)
      train_dataset.append([img_array, class_num])
      
  random.Random(28).shuffle(train_dataset)
  return train_dataset

In [0]:
test_dataset = create_testdataset(test_noncovid_files, test_covid_files)
train_dataset = create_aug_training_dataset(train_noncovid_files, train_covid_files)

In [0]:
from collections import Counter

train_images = []
train_labels = []

for images, labels in train_dataset:
  train_images.append(images)
  train_labels.append(labels)

print("training >> ", Counter(train_labels))

pickle_out = open("drive/My Drive/project_cmsc498l/train_images_152x152.pickle", "wb")
pickle.dump(train_images, pickle_out)
pickle_out.close()

pickle_out = open("drive/My Drive/project_cmsc498l/train_labels_152x152.pickle", "wb")
pickle.dump(train_labels, pickle_out)
pickle_out.close()




test_images = []
test_labels = []

for images, labels in test_dataset:
  test_images.append(images)
  test_labels.append(labels)

print("testing >> ", Counter(test_labels))

pickle_out = open("drive/My Drive/project_cmsc498l/test_images_152x152.pickle", "wb")
pickle.dump(test_images, pickle_out)
pickle_out.close()

pickle_out = open("drive/My Drive/project_cmsc498l/test_labels_152x152.pickle", "wb")
pickle.dump(test_labels, pickle_out)
pickle_out.close()

training >>  Counter({0: 1595, 1: 1385})
testing >>  Counter({0: 78, 1: 72})
