In [1]:
import pandas as pd

df_test = pd.read_csv('test_split.txt',
                 delimiter = " ",
                index_col=False)
df_test.columns = ["number", "filename", "illness", "data"]

df_train = pd.read_csv('train_split.txt',
                 delimiter = " ",
                index_col=False)
df_train.columns = ["number", "filename", "illness", "data"]

dict_test = pd.Series(df_test.illness.values,index=df_test.filename).to_dict() 
dict_train = pd.Series(df_train.illness.values,index=df_train.filename).to_dict()

In [2]:
import os
from PIL import Image
from numpy import asarray
import numpy as np

img_path = os.path.join("data", "train")
names = df_train["filename"]

def load_images(names):
    n_images = len(names)
    images = np.zeros((n_images, 224, 224, 3))
    labels = np.empty(n_images, dtype='object')
    for i in range(n_images):
        name = names[i]
        image = Image.open(os.path.join(img_path, name))
        image = image.convert('RGB').resize((224,224)) #thumbnail
        image = asarray(image)
        images[i] = image
        labels[i] = dict_train[name]
    return images, labels

In [3]:
np.random.seed(2137)

df1 = df_train[df_train["illness"]=="normal"].sample(110)["filename"]
df2 = df_train[df_train["illness"]=="COVID-19"].sample(110)["filename"]
df3 = df_train[df_train["illness"]=="pneumonia"].sample(110)["filename"]
frames = [df1.head(100), df2.head(100), df3.head(100)]
frames2 = [df1.tail(10), df2.tail(10), df3.tail(10)]

names_train = pd.concat(frames).reset_index(drop=True)
names_test = pd.concat(frames2).reset_index(drop=True)

In [4]:
def encode(y):
    y[y=="normal"]=0
    y[y=="COVID-19"]=1
    y[y=="pneumonia"]=2
    return y.astype(int)

In [5]:
img_path = os.path.join("data", "train")

names = names_train
x_train, y_train = load_images(names)

names = names_test
x_test, y_test = load_images(names)

y_train = encode(y_train)
y_test = encode(y_test)

In [6]:
np.save("data/x_train", x_train)
np.save("data/y_train", y_train)
np.save("data/x_test", x_test)
np.save("data/y_test", y_test)