In [1]:
import pandas as pd
import numpy as np
import os
import glob
import cv2
from keras.preprocessing import image
from PIL import Image

In [2]:
# Getting the original number of images in each category
train_BP = len(glob.glob("TrainData/BacterialPneumonia/*"))
train_CV = len(glob.glob("TrainData/COVID-19/*.*"))
train_N = len(glob.glob("TrainData/Normal/*.*"))
train_VP = len(glob.glob("TrainData/ViralPneumonia/*.*"))
print(train_BP, train_N, train_VP, train_CV)

1300 1781 824 120


### Some augmentation techniques to balance each set of images

In [3]:
def flip_and_rotate_img(image):
    augmented_images = []

    # Appending image itself
    augmented_images.append(image)

    # Rotating image to 90, 180, 270 and appending them
    rows,cols = image.shape[:2]
    for i_angle in [90, 180, 270]:
        M = cv2.getRotationMatrix2D((cols/2, rows/2), i_angle, 1)
        dst = cv2.warpAffine(image, M, (cols, rows))  
        augmented_images.append(dst)

    # First flipping and then rotating the image to 90, 180, 270 and appending them
    flipped_image = cv2.flip(image, 1)
    augmented_images.append(flipped_image)

    rows,cols = flipped_image.shape[:2]
    for i_angle in [90, 180, 270]:
        M = cv2.getRotationMatrix2D((cols/2, rows/2), i_angle, 1)
        dst = cv2.warpAffine(flipped_image, M, (cols, rows))  
        augmented_images.append(dst)
    return augmented_images

In [4]:
def flip_and_rotate_img_saving_to_dir(image_pixels, image_name = "00001-", output_dir = "./folder_to_be_deleted/"):
    
    # Rotating image to 90, 180, 270 and appending them
    rows,cols = image_pixels.shape[:2]
    for i_angle in [90, 180, 270]:
        M = cv2.getRotationMatrix2D((cols/2, rows/2), i_angle, 1)
        dst = cv2.warpAffine(image_pixels, M, (cols, rows))  
        # Converting numpy array to image pil
        image = Image.fromarray(dst.astype('uint8'), 'RGB')
        image.save(output_dir + image_name + "_" + str(i_angle) + "degree" + ".jpg")

    # First flipping and then rotating the image to 90, 180, 270 and appending them
    flipped_image = cv2.flip(image_pixels, 1)
    # Converting numpy array to image pil
    image = Image.fromarray(flipped_image.astype('uint8'), 'RGB')
    image.save(output_dir + image_name + "_" + "flipped.jpg")

    rows,cols = flipped_image.shape[:2]
    for i_angle in [90, 180, 270]:
        M = cv2.getRotationMatrix2D((cols/2, rows/2), i_angle, 1)
        dst = cv2.warpAffine(flipped_image, M, (cols, rows))  
        # Converting numpy array to image pil
        image = Image.fromarray(dst.astype('uint8'), 'RGB')
        image.save(output_dir + image_name + "_" + "flipped" + "_" + str(i_angle) +"degree" + ".jpg")

In [5]:
def augmenting_image(path, output_dir):
    # Loading the image
    img = image.load_img(path, target_size=(512, 512))
    # Converting the image to array
    x = image.img_to_array(img)
    # Getting the image name
    image_name = path.split('/')[-1].split('.')[0]
    # Augmenting the image
    flip_and_rotate_img_saving_to_dir(x, image_name, output_dir)

In [6]:
for i_image in os.listdir("./TrainData/COVID-19"):
    im = Image.open("./TrainData/COVID-19/"+i_image)
    img = im.rotate(15)
    img.save("./TrainData/COVID-19/" + "rotated15_" + i_image)

In [7]:
len(glob.glob("TrainData/COVID-19/*.*"))

240

In [8]:
# Augmenting the "COVID-19" Images
for i_img in os.listdir("./TrainData/COVID-19"):
    augmenting_image(path="./TrainData/COVID-19/"+i_img, output_dir="./Dataset/COVID-19/")

In [16]:
#Augmenting the ViralPneumonia" Images
for i_image in os.listdir("./Dataset/ViralPneumonia"):
    im = Image.open("./Dataset/ViralPneumonia/"+i_image)
    img = im.rotate(15)"
    img.save("./Dataset/ViralPneumonia/" + "rotated15_" + i_image)

In [19]:
# Balanced dataset after applying some data augmnetation techniques

CV=len(glob.glob("./Dataset/COVID-19/*"))
No=len(glob.glob("./Dataset/Normal/*"))
BP=len(glob.glob("./Dataset/BacterialPneumonia/*"))
VP=len(glob.glob("./Dataset/ViralPneumonia/*"))
print(CV, No, BP, VP)

1680 1781 1300 1648


### Prepare test data in csv file

In [22]:
# Appending data in a list
images = []
for folder in os.listdir("./Dataset"):
    for img in os.listdir("./Dataset/"+folder):
        images.append(folder+"/"+img)

'BacterialPneumonia/0.jpeg'

In [23]:
len(images)

6409

In [26]:
data = pd.DataFrame({"Images": images, "Normal": np.nan, "COVID-19": np.nan, "BacterialPneumonia": np.nan, 
                     "ViralPneumonia": np.nan})
data.head()

Unnamed: 0,Images,Normal,COVID-19,BacterialPneumonia,ViralPneumonia
0,BacterialPneumonia/0.jpeg,,,,
1,BacterialPneumonia/1.jpeg,,,,
2,BacterialPneumonia/10.jpeg,,,,
3,BacterialPneumonia/100.jpeg,,,,
4,BacterialPneumonia/101.jpeg,,,,


In [33]:
for i in range(len(data['Images'])):
    category = data['Images'][i].split('/')[0]
    data[category][i] = 1.0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [36]:
data.fillna(0.0)

Unnamed: 0,Images,Normal,COVID-19,BacterialPneumonia,ViralPneumonia
0,BacterialPneumonia/0.jpeg,0.0,0.0,1.0,0.0
1,BacterialPneumonia/1.jpeg,0.0,0.0,1.0,0.0
2,BacterialPneumonia/10.jpeg,0.0,0.0,1.0,0.0
3,BacterialPneumonia/100.jpeg,0.0,0.0,1.0,0.0
4,BacterialPneumonia/101.jpeg,0.0,0.0,1.0,0.0
...,...,...,...,...,...
6404,ViralPneumonia/_96_3583409.jpeg,0.0,0.0,0.0,1.0
6405,ViralPneumonia/_97_6809245.jpeg,0.0,0.0,0.0,1.0
6406,ViralPneumonia/_98_7527018.jpeg,0.0,0.0,0.0,1.0
6407,ViralPneumonia/_99_1223038.jpeg,0.0,0.0,0.0,1.0


In [38]:
data.to_csv("./Dataset/train_data.csv")

### Prepare Val data in csv file

In [40]:
# Appending data in a list
images = []
for folder in os.listdir("./ValData"):
    for img in os.listdir("./ValData/"+folder):
        images.append(folder+"/"+img)

In [41]:
len(images)

988

In [42]:
val_data = pd.DataFrame({"Images": images, "Normal": np.nan, "COVID-19": np.nan, "BacterialPneumonia": np.nan, 
                     "ViralPneumonia": np.nan})
val_data.head()

Unnamed: 0,Images,Normal,COVID-19,BacterialPneumonia,ViralPneumonia
0,BacterialPneumonia/0.jpeg,,,,
1,BacterialPneumonia/1.jpeg,,,,
2,BacterialPneumonia/10.jpeg,,,,
3,BacterialPneumonia/100.jpeg,,,,
4,BacterialPneumonia/101.jpeg,,,,


In [44]:
for i in range(len(val_data['Images'])):
    category = val_data['Images'][i].split('/')[0]
    val_data[category][i] = 1.0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [45]:
val_data.head()

Unnamed: 0,Images,Normal,COVID-19,BacterialPneumonia,ViralPneumonia
0,BacterialPneumonia/0.jpeg,,,1.0,
1,BacterialPneumonia/1.jpeg,,,1.0,
2,BacterialPneumonia/10.jpeg,,,1.0,
3,BacterialPneumonia/100.jpeg,,,1.0,
4,BacterialPneumonia/101.jpeg,,,1.0,


In [46]:
val_data.fillna(0.0)

Unnamed: 0,Images,Normal,COVID-19,BacterialPneumonia,ViralPneumonia
0,BacterialPneumonia/0.jpeg,0.0,0.0,1.0,0.0
1,BacterialPneumonia/1.jpeg,0.0,0.0,1.0,0.0
2,BacterialPneumonia/10.jpeg,0.0,0.0,1.0,0.0
3,BacterialPneumonia/100.jpeg,0.0,0.0,1.0,0.0
4,BacterialPneumonia/101.jpeg,0.0,0.0,1.0,0.0
...,...,...,...,...,...
983,ViralPneumonia/95.jpeg,0.0,0.0,0.0,1.0
984,ViralPneumonia/96.jpeg,0.0,0.0,0.0,1.0
985,ViralPneumonia/97.jpeg,0.0,0.0,0.0,1.0
986,ViralPneumonia/98.jpeg,0.0,0.0,0.0,1.0


In [47]:
val_data.to_csv("./Dataset/val_data.csv")